1 "abstract classes for deep models"
2
3 import inspect, logging, time, cPickle, abc
4 from collections import Sequence
5 from operator import itemgetter, attrgetter
6 from functools import partial
7
8 import numpy as np
9 import pandas as pd
10 import theano
11
12 from . import config_spec
13 from .. import archive, deprecated
14
15 __spec_cls__ = lambda t: (t[0].endswith("Spec") and inspect.isclass(t[1]))
16 spec_list = map(itemgetter(1),
17 filter(__spec_cls__, inspect.getmembers(config_spec)))
18
19
20 log = logging.getLogger(__name__)
23 """abstract layer class.
24
25 A layer is a list of weights. Weights are
26 theano shared tensors that are needed for example to compute activation.
27 You can use them also for other reasons e.g., speeds to implement
28 momentum GSD.
29
30 """
31
32 __metaclass__ = abc.ABCMeta
33
34 - def __init__(self, wshape_lst, wnames, fanin_lst, rng, wdtype):
35 """initialize by the given weight properties, all params are lists
36 of the same length
37
38 @param wshape_lst: shapes of weights
39 @param wnames: names of weights
40 @param fanin_lst: fan in of weights
41 @param rng: a random number generator or init value (see alloc_shared_weights_)
42 @param wdtype: data type of weights.
43 """
44
45 if len(wshape_lst) != len(wnames):
46 raise ValueError("length names and shapes differ")
47
48 self._weights_ = []
49 for (sh, n, fi, r, dt) in zip(wshape_lst, wnames, fanin_lst, rng, wdtype):
50 self._weights_.append( alloc_shared_weights_(sh, dt, n, fi, r) )
51
52 @abc.abstractmethod
54 """weights as symbolic variables"""
55 pass
56
57 @abc.abstractmethod
59 "the symbolic activation function"
60 pass
61
63 """weights as ndarrays"""
64
65 return map(lambda w: w.get_value(), self.get_params())
66
67 @abc.abstractmethod
69 """set weights
70
71 @param params: tuple of (ndarray)
72 @return none"""
73
74 pass
75
77 """set the weight values of this layer"""
78
79 assert len(vlst) == len(self.get_params())
80
81 for v, sym_w in zip(vlst, self.get_params()):
82 sym_w.set_value( v )
83
84
86 """compute the L1 or L2 norm of weights
87
88 @param degree: keyword `l1` or `l2`
89 @return : the L1 or L2 norm of weights (float)"""
90
91 f = {"l1" : lambda w: np.abs( w ).sum(),
92 "l2" : lambda w: (np.abs( w )**2).sum()}[degree]
93
94 return sum( map(f, self.get_weights()) )
95
97 """get a list of the flattened version of the weights of this layer
98
99 @return: list[ndarray]"""
100
101 olst = []
102 for w in self.get_weights():
103 olst.append( w.reshape(-1,) )
104 return olst
105
107 """reshape the given flattened weights into the correct dimensions
108 for this layer
109
110 @param flatp: flattened weights (list[ndarray])
111 @return: the same given weights reshaped"""
112
113 for i in range(len(flatp)):
114 flatp[i] = flatp[i].reshape( self.get_weights()[i].shape )
115 return flatp
116
118 """This layer provides an extra set of weights as a support the momentum algorithm for SGD.
119 At time point t, we need weights at t-1 and the gradient at t to update weights. Namely
120
121 w(t+1) - w(t) = - rho * dE(w)/dw + p s(t), for t = 0, 1, ... and s(0) = 0
122
123 I call w(t) - w(t-1) = s(t) (speed)
124 """
125
126
127 __metaclass__ = abc.ABCMeta
128
129 - def __init__(self, ws, wn, rng, wdtype):
130 super(SpeedLayer, self).__init__(ws+ws,
131 wn+map(lambda n: "%s_speed" % n, wn),
132 map(sum, ws) + map(sum, ws),
133 rng + [0.0]*len(rng),
134 wdtype + wdtype)
135 self.__wl = len(ws)
136
138 """get speeds
139
140 @return: a tuple of (ndarray) speeds """
141
142 return map(lambda i: self._weights_[self.__wl+i].get_value(),
143 range(self.__wl))
144
146 """set speeds
147
148 @param v: tuple of (ndarray)
149 @return: none"""
150
151 for i in range(self.__wl):
152 self._weights_[self.__wl + i].set_value( v[i] )
153
155 """get weights
156
157 @return: a tuple of (theano.tensor) weights"""
158
159 return self._weights_[0:self.__wl]
160
162 """set weights
163
164 @param params: tuple of (ndarray)
165 @return none"""
166
167 for i in range(len(self.__wl)):
168 self._weights_[i] = params[i]
169
170 @classmethod
172 """speed update formula
173
174 new_speed = -rho * g + mu*s
175
176 @param s: cur speed
177 @param g: gradient
178 @param mu: momentum
179 @param rho: learning rate
180 @return: new speed"""
181
182 return (s*mu) - (rho * g)
183
184 @classmethod
186 """speed update for the given gradient
187
188 new_weight = cur_weight + speed
189
190 @param cur_weight: current weight (ndarray)
191 @param speed: speed (ndarray)
192 @return: new_weight (ndarray)
193 """
194
195 return cur_weight + speed
196
198 """update speeds for the given gradients
199
200 new_speed = -rho * gradient + mu*cur_speed
201
202 @param gradient: gradient (list of ndarray)
203 @param mu: momentum (float)
204 @param rho: learning rate (float)
205 @return: new_speed (ndarray)
206 """
207
208 upd_f = self._speed_update_f
209 new_spd = []
210 for i in range(self.__wl):
211 new_spd.append( upd_f(self.get_speeds()[i], gradient[i], mu, rho) )
212 self.set_speeds(new_spd)
213
215 """update speeds for the current speed
216
217 new_weight = cur_weight + cur_speed
218
219 """
220 upd_f = self._weight_update_f
221 new_w = []
222 for i in range(self.__wl):
223 new_w.append( upd_f(self.get_weights()[i], self.get_speeds()[i]) )
224 self.set_weights(new_w)
225
226
227 -class Model( object ):
228 """generic model class with basic functionality"""
229
230 __metaclass__ = abc.ABCMeta
231
233 self.__layers = layers
234 log.info("declared model\n%s", str(self))
235
237 return self.__layers[i]
238
240 return len(self.__layers)
241
243 self.__layers[i] = it
244
247
248 - def load(self, path):
249 archpath = archive.archname(path)
250
251 def load_by_name(layer_key, wn, what):
252 kp = {"weights" : "%s/%s", "shape" : "%s/%s_shape"}[what]
253 return archive.load_object(archpath, kp % (layer_key, wn) ).values
254
255 for i, layer in enumerate(self):
256 layer_key = archive.basename( path ) + ("/layer_%d" % i)
257 layer_weights = []
258 for wn in map(attrgetter("name"), layer.get_params()):
259 layer_weights.append( load_by_name(layer_key, wn, "weights") )
260 layer.set_weights( layer.load_flat_weights(layer_weights) )
261
262 - def save(self, path):
263 def weights_names_shapes(layer):
264 "[(wname, w_flat, w_shape), ...]"
265 layer_weights = layer.get_flat_weights()
266 layer_weight_names = map(attrgetter("name"), layer.get_params())
267 layer_shapes = map(lambda w: w.shape, layer.get_weights())
268 return zip(layer_weight_names, layer_weights, layer_shapes)
269
270 log.info("saving model to %s", path)
271 for i, layer in enumerate(self):
272 layer_key = archive.basename( path ) + ("/layer_%d" % i)
273 for wn, w, wsh in weights_names_shapes(layer):
274 key = layer_key + "/" + wn
275 archive.save_object( archive.archname(path), key, pd.Series(w) )
276 key = layer_key + "/" + wn + "_shape"
277 archive.save_object( archive.archname(path), key,
278 pd.Series(wsh, index=range(len(wsh))) )
279
281 """compute the L1 or L2 norm of weights
282
283 @param degree: keyword `l1` or `l2`
284 @return: the L1 or L2 norm of weights (float)"""
285
286 return sum( map(lambda l: l.weight_norm(degree), self) )
287
288
290 """numpy arrays for the weights
291
292 @return: the list of parameters of layers
293 """
294
295 return map(lambda l: l.get_weights(), self.__layers)
296
298 for w, i in enumerate(wlst):
299 self.__layers[i].set_weights( w )
300
302 """(theano) symbolic variables for the weights
303
304 @return: the list of parameters of layers
305 """
306
307 return map(lambda l: l.get_params(), self.__layers)
308
310 return "\n".join( map(str, self) )
311
315 """decorator that explains what is being theano-compiled, by logging the functions __doc__"""
316 def newfunc(*args, **kwargs):
317 log.info( func.__doc__ )
318 return func(*args, **kwargs)
319 return newfunc
320
323 """alloc a matrix of weights and return a theano shared variable
324 Note : optimal initialization of weights is dependent on the
325 activation function used (among other things). Here I replicate
326 http://www.deeplearning.net/tutorial/mlp.html#mlp
327 """
328
329 if type(rng) == float:
330 val = str(rng)
331 var = theano.shared(value=np.zeros(shape, dtype=dtype) + rng, name=name)
332 elif type(rng) == np.random.RandomState:
333 val = "rnd"
334 thr = np.sqrt(3./fan_in)
335 var = theano.shared(value=np.asarray(rng.uniform(low=-thr, high=thr,
336 size=shape), dtype=dtype), name=name)
337 else:
338 raise ValueError("cannot understant `rng`")
339
340 log.debug("%s (%s) = %s (%s)", str(var), str(var.get_value().shape),
341 val, str(var.get_value().dtype))
342
343 return var
344
346 "adjust error rate for the next batch"
347
348
349
350
351
352 if len(err) < 2:
353 return lrmax
354 E = max( err[-2], err[-1] )
355 if E == 0:
356 return lrmax
357 if (err[-2] - err[-1]) / E < -0.001:
358 return float( np.array([lrmax * np.sqrt((len(err)-1) / float(len(err)))], dtype=np.float32)[0] )
359 return lrmax
360
361 @deprecated
362 -def get_data(low_idx, high_idx, path, borrow=True, as_dtypeX=None, as_dtypeY=None):
363 """load rasterized data into shared theano variables"""
364
365 x, y = archive.get_target_dataset(path, low_idx, high_idx)
366
367 SH = partial(theano.shared, borrow=borrow)
368
369 if as_dtypeX:
370 x = np.asarray(x, dtype=as_dtypeX)
371 if as_dtypeY:
372 y = np.asarray(y, dtype=as_dtypeY)
373
374 return SH(x.reshape(x.shape[0], np.prod(x.shape[1:]))), SH(y)
375
378 """transform each component of X so that it has zero mean
379
380 x: a pandas data panel of the form <anchors> X <tracks> X <genome position>
381 return: (the shifted input, the mean for each input component. this is an array of shape(<tracks>, <genome position>))
382 """
383
384 m = np.empty( x.shape[1:] )
385 for i in xrange(m.shape[0]):
386 for j in xrange(m.shape[1]):
387 m[i, j] = x.values[:, i, j].mean()
388 for a in x:
389 x[a] -= m
390 return x, m
391
393 """transform each component of X so that it has unit variance
394
395 @param x: a pandas data panel of the form <anchors> X <tracks> X <genome position>
396 @return: (the scaled input, the std for each input component. this is an array of shape(<tracks>, <genome position>))
397 """
398
399 m = np.empty( x.shape[1:] )
400 for i in xrange(m.shape[0]):
401 for j in xrange(m.shape[1]):
402 m[i, j] = x.values[:, i, j].std()
403 if np.any( m == 0 ):
404 log.warning("zero variance components")
405 m[ m == 0 ] = 1
406 for a in x:
407 x[a] /= m
408 return x, m
409
411 """transform each component of X so that it fits on an interval [-1, 1]
412
413 @param x: a pandas data panel of the form <anchors> X <tracks> X <genome position>
414 @return: (the scaled input, the std for each input component. this is an array of shape(<tracks>, <genome position>))
415 """
416
417 md = np.empty( x.shape[1:] )
418 rg = np.empty( x.shape[1:] )
419 for i in xrange(md.shape[0]):
420 for j in xrange(md.shape[1]):
421 md[i, j] = (np.max(x.values[:, i, j]) + np.min(x.values[:, i, j])) / 2
422 rg = (np.max(x.values[:, i, j]) - np.min(x.values[:, i, j]))
423 if np.any( rg == 0 ):
424 log.warning("zero variability components")
425 rg[ rg == 0 ] = 1
426
427
428 for a in x:
429 x[a] = (x[a] - md) / rg
430 return x
431