dimer.nnet

1 "abstract classes for deep models" 2 3 import inspect, logging, time, cPickle, abc 4 from collections import Sequence 5 from operator import itemgetter, attrgetter 6 from functools import partial 7 8 import numpy as np 9 import pandas as pd 10 import theano 11 12 from . import config_spec 13 from .. import archive, deprecated 14 15 __spec_cls__ = lambda t: (t[0].endswith("Spec") and inspect.isclass(t[1])) 16 spec_list = map(itemgetter(1), 17 filter(__spec_cls__, inspect.getmembers(config_spec))) 18 19 20 log = logging.getLogger(__name__)

21 22 -class Layer( object ):

23 """abstract layer class. 24 25 A layer is a list of weights. Weights are 26 theano shared tensors that are needed for example to compute activation. 27 You can use them also for other reasons e.g., speeds to implement 28 momentum GSD. 29 30 """ 31 32 __metaclass__ = abc.ABCMeta 33

34 - def __init__(self, wshape_lst, wnames, fanin_lst, rng, wdtype):

35 """initialize by the given weight properties, all params are lists 36 of the same length 37 38 @param wshape_lst: shapes of weights 39 @param wnames: names of weights 40 @param fanin_lst: fan in of weights 41 @param rng: a random number generator or init value (see alloc_shared_weights_) 42 @param wdtype: data type of weights. 43 """ 44 45 if len(wshape_lst) != len(wnames): 46 raise ValueError("length names and shapes differ") 47 48 self._weights_ = [] 49 for (sh, n, fi, r, dt) in zip(wshape_lst, wnames, fanin_lst, rng, wdtype): 50 self._weights_.append( alloc_shared_weights_(sh, dt, n, fi, r) )

51 52 @abc.abstractmethod

53 - def get_params(self):

54 """weights as symbolic variables""" 55 pass

56 57 @abc.abstractmethod

58 - def activation(self):

59 "the symbolic activation function" 60 pass

61

62 - def get_weights(self):

63 """weights as ndarrays""" 64 65 return map(lambda w: w.get_value(), self.get_params())

66 67 @abc.abstractmethod

68 - def set_params(self, params):

69 """set weights 70 71 @param params: tuple of (ndarray) 72 @return none""" 73 74 pass

75

76 - def set_weights(self, vlst):

77 """set the weight values of this layer""" 78 79 assert len(vlst) == len(self.get_params()) 80 81 for v, sym_w in zip(vlst, self.get_params()): 82 sym_w.set_value( v )

83 84

85 - def weight_norm(self, degree):

86 """compute the L1 or L2 norm of weights 87 88 @param degree: keyword `l1` or `l2` 89 @return : the L1 or L2 norm of weights (float)""" 90 91 f = {"l1" : lambda w: np.abs( w ).sum(), 92 "l2" : lambda w: (np.abs( w )**2).sum()}[degree] 93 94 return sum( map(f, self.get_weights()) )

95

96 - def get_flat_weights(self):

97 """get a list of the flattened version of the weights of this layer 98 99 @return: list[ndarray]""" 100 101 olst = [] 102 for w in self.get_weights(): 103 olst.append( w.reshape(-1,) ) 104 return olst

105

106 - def load_flat_weights(self, flatp):

107 """reshape the given flattened weights into the correct dimensions 108 for this layer 109 110 @param flatp: flattened weights (list[ndarray]) 111 @return: the same given weights reshaped""" 112 113 for i in range(len(flatp)): 114 flatp[i] = flatp[i].reshape( self.get_weights()[i].shape ) 115 return flatp

116

117 -class SpeedLayer( Layer ):

118 """This layer provides an extra set of weights as a support the momentum algorithm for SGD. 119 At time point t, we need weights at t-1 and the gradient at t to update weights. Namely 120 121 w(t+1) - w(t) = - rho * dE(w)/dw + p s(t), for t = 0, 1, ... and s(0) = 0 122 123 I call w(t) - w(t-1) = s(t) (speed) 124 """ 125 126 127 __metaclass__ = abc.ABCMeta 128

129 - def __init__(self, ws, wn, rng, wdtype):

130 super(SpeedLayer, self).__init__(ws+ws, 131 wn+map(lambda n: "%s_speed" % n, wn), 132 map(sum, ws) + map(sum, ws), 133 rng + [0.0]*len(rng), 134 wdtype + wdtype) 135 self.__wl = len(ws)

136

137 - def get_speeds(self):

138 """get speeds 139 140 @return: a tuple of (ndarray) speeds """ 141 142 return map(lambda i: self._weights_[self.__wl+i].get_value(), 143 range(self.__wl))

144

145 - def set_speeds(self, v):

146 """set speeds 147 148 @param v: tuple of (ndarray) 149 @return: none""" 150 151 for i in range(self.__wl): 152 self._weights_[self.__wl + i].set_value( v[i] )

153

154 - def get_params(self):

155 """get weights 156 157 @return: a tuple of (theano.tensor) weights""" 158 159 return self._weights_[0:self.__wl]

160

161 - def set_params(self, params):

162 """set weights 163 164 @param params: tuple of (ndarray) 165 @return none""" 166 167 for i in range(len(self.__wl)): 168 self._weights_[i] = params[i]

169 170 @classmethod

171 - def _speed_update_f(cls, s, g, mu, rho):

172 """speed update formula 173 174 new_speed = -rho * g + mu*s 175 176 @param s: cur speed 177 @param g: gradient 178 @param mu: momentum 179 @param rho: learning rate 180 @return: new speed""" 181 182 return (s*mu) - (rho * g)

183 184 @classmethod

185 - def _weight_update_f(cls, cur_weight, speed):

186 """speed update for the given gradient 187 188 new_weight = cur_weight + speed 189 190 @param cur_weight: current weight (ndarray) 191 @param speed: speed (ndarray) 192 @return: new_weight (ndarray) 193 """ 194 195 return cur_weight + speed

196

197 - def speed_update(self, gradient, mu, rho):

198 """update speeds for the given gradients 199 200 new_speed = -rho * gradient + mu*cur_speed 201 202 @param gradient: gradient (list of ndarray) 203 @param mu: momentum (float) 204 @param rho: learning rate (float) 205 @return: new_speed (ndarray) 206 """ 207 208 upd_f = self._speed_update_f 209 new_spd = [] 210 for i in range(self.__wl): 211 new_spd.append( upd_f(self.get_speeds()[i], gradient[i], mu, rho) ) 212 self.set_speeds(new_spd)

213

214 - def weight_update(self):

215 """update speeds for the current speed 216 217 new_weight = cur_weight + cur_speed 218 219 """ 220 upd_f = self._weight_update_f 221 new_w = [] 222 for i in range(self.__wl): 223 new_w.append( upd_f(self.get_weights()[i], self.get_speeds()[i]) ) 224 self.set_weights(new_w)

225

226 227 -class Model( object ):

228 """generic model class with basic functionality""" 229 230 __metaclass__ = abc.ABCMeta 231

232 - def __init__(self, layers):

233 self.__layers = layers 234 log.info("declared model\n%s", str(self))

235

236 - def __getitem__(self, i):

237 return self.__layers[i]

238

239 - def __len__(self):

240 return len(self.__layers)

241

242 - def __setitem__(self, i, it):

243 self.__layers[i] = it

244

245 - def __delitem__(self, i):

246 del self.__layers[i]

247

248 - def load(self, path):

249 archpath = archive.archname(path) 250 251 def load_by_name(layer_key, wn, what): 252 kp = {"weights" : "%s/%s", "shape" : "%s/%s_shape"}[what] 253 return archive.load_object(archpath, kp % (layer_key, wn) ).values

254 255 for i, layer in enumerate(self): 256 layer_key = archive.basename( path ) + ("/layer_%d" % i) 257 layer_weights = [] 258 for wn in map(attrgetter("name"), layer.get_params()): 259 layer_weights.append( load_by_name(layer_key, wn, "weights") ) 260 layer.set_weights( layer.load_flat_weights(layer_weights) )

261

262 - def save(self, path):

263 def weights_names_shapes(layer): 264 "[(wname, w_flat, w_shape), ...]" 265 layer_weights = layer.get_flat_weights() 266 layer_weight_names = map(attrgetter("name"), layer.get_params()) 267 layer_shapes = map(lambda w: w.shape, layer.get_weights()) 268 return zip(layer_weight_names, layer_weights, layer_shapes)

269 270 log.info("saving model to %s", path) 271 for i, layer in enumerate(self): 272 layer_key = archive.basename( path ) + ("/layer_%d" % i) 273 for wn, w, wsh in weights_names_shapes(layer): 274 key = layer_key + "/" + wn 275 archive.save_object( archive.archname(path), key, pd.Series(w) ) 276 key = layer_key + "/" + wn + "_shape" 277 archive.save_object( archive.archname(path), key, 278 pd.Series(wsh, index=range(len(wsh))) ) 279

280 - def weight_norm(self, degree):

281 """compute the L1 or L2 norm of weights 282 283 @param degree: keyword `l1` or `l2` 284 @return: the L1 or L2 norm of weights (float)""" 285 286 return sum( map(lambda l: l.weight_norm(degree), self) )

287 288

289 - def get_weights(self):

290 """numpy arrays for the weights 291 292 @return: the list of parameters of layers 293 """ 294 295 return map(lambda l: l.get_weights(), self.__layers)

296

297 - def set_weights(self, wlst):

298 for w, i in enumerate(wlst): 299 self.__layers[i].set_weights( w )

300

301 - def get_params(self):

302 """(theano) symbolic variables for the weights 303 304 @return: the list of parameters of layers 305 """ 306 307 return map(lambda l: l.get_params(), self.__layers)

308

309 - def __str__(self):

310 return "\n".join( map(str, self) )

311

312 313 314 -def verbose_compile(func):

315 """decorator that explains what is being theano-compiled, by logging the functions __doc__""" 316 def newfunc(*args, **kwargs): 317 log.info( func.__doc__ ) 318 return func(*args, **kwargs)

319 return newfunc 320

321 322 -def alloc_shared_weights_(shape, dtype, name, fan_in, rng):

323 """alloc a matrix of weights and return a theano shared variable 324 Note : optimal initialization of weights is dependent on the 325 activation function used (among other things). Here I replicate 326 http://www.deeplearning.net/tutorial/mlp.html#mlp 327 """ 328 329 if type(rng) == float: 330 val = str(rng) 331 var = theano.shared(value=np.zeros(shape, dtype=dtype) + rng, name=name) 332 elif type(rng) == np.random.RandomState: 333 val = "rnd" 334 thr = np.sqrt(3./fan_in) 335 var = theano.shared(value=np.asarray(rng.uniform(low=-thr, high=thr, 336 size=shape), dtype=dtype), name=name) 337 else: 338 raise ValueError("cannot understant `rng`") 339 340 log.debug("%s (%s) = %s (%s)", str(var), str(var.get_value().shape), 341 val, str(var.get_value().dtype)) 342 343 return var

344

345 -def adjust_lr(err, lrmax):

346 "adjust error rate for the next batch" 347 348 ## another strategy is to keep the learning rate fixed 349 ## if it does not increase the error 350 ## and to sqrt(half) it otherwise 351 352 if len(err) < 2: 353 return lrmax 354 E = max( err[-2], err[-1] ) 355 if E == 0: 356 return lrmax 357 if (err[-2] - err[-1]) / E < -0.001: ## went up by more than 1% 358 return float( np.array([lrmax * np.sqrt((len(err)-1) / float(len(err)))], dtype=np.float32)[0] ) 359 return lrmax

360

361 @deprecated 362 -def get_data(low_idx, high_idx, path, borrow=True, as_dtypeX=None, as_dtypeY=None):

363 """load rasterized data into shared theano variables""" 364 365 x, y = archive.get_target_dataset(path, low_idx, high_idx) 366 367 SH = partial(theano.shared, borrow=borrow) 368 369 if as_dtypeX: 370 x = np.asarray(x, dtype=as_dtypeX) 371 if as_dtypeY: 372 y = np.asarray(y, dtype=as_dtypeY) 373 374 return SH(x.reshape(x.shape[0], np.prod(x.shape[1:]))), SH(y)

375

376 377 -def shift_data(x):

378 """transform each component of X so that it has zero mean 379 380 x: a pandas data panel of the form <anchors> X <tracks> X <genome position> 381 return: (the shifted input, the mean for each input component. this is an array of shape(<tracks>, <genome position>)) 382 """ 383 384 m = np.empty( x.shape[1:] ) 385 for i in xrange(m.shape[0]): 386 for j in xrange(m.shape[1]): 387 m[i, j] = x.values[:, i, j].mean() 388 for a in x: 389 x[a] -= m 390 return x, m

391

392 -def scale_data(x):

393 """transform each component of X so that it has unit variance 394 395 @param x: a pandas data panel of the form <anchors> X <tracks> X <genome position> 396 @return: (the scaled input, the std for each input component. this is an array of shape(<tracks>, <genome position>)) 397 """ 398 399 m = np.empty( x.shape[1:] ) 400 for i in xrange(m.shape[0]): 401 for j in xrange(m.shape[1]): 402 m[i, j] = x.values[:, i, j].std() 403 if np.any( m == 0 ): 404 log.warning("zero variance components") 405 m[ m == 0 ] = 1 406 for a in x: 407 x[a] /= m 408 return x, m

409

410 -def fit_data(x):

411 """transform each component of X so that it fits on an interval [-1, 1] 412 413 @param x: a pandas data panel of the form <anchors> X <tracks> X <genome position> 414 @return: (the scaled input, the std for each input component. this is an array of shape(<tracks>, <genome position>)) 415 """ 416 417 md = np.empty( x.shape[1:] ) 418 rg = np.empty( x.shape[1:] ) 419 for i in xrange(md.shape[0]): 420 for j in xrange(md.shape[1]): 421 md[i, j] = (np.max(x.values[:, i, j]) + np.min(x.values[:, i, j])) / 2 422 rg = (np.max(x.values[:, i, j]) - np.min(x.values[:, i, j])) 423 if np.any( rg == 0 ): 424 log.warning("zero variability components") 425 rg[ rg == 0 ] = 1 426 427 ## x has mid=0 and rg = 1 428 for a in x: 429 x[a] = (x[a] - md) / rg 430 return x

431

Source Code for Package dimer.nnet