1 """convolutional neural nets"""
2
3 import logging
4
5 import numpy as np
6
7 import theano.tensor as T
8 from theano.tensor.signal import downsample
9 from theano.tensor.nnet import sigmoid, softmax, conv
10
11 from . import Model, SpeedLayer
12
13 log = logging.getLogger(__name__)
16 """A logistic regression layer"""
17
18 - def __init__(self, X, n_in, n_out, rng, dtype):
19 """A logistic regression layer
20
21 the layer has a weight and a bias matrix as weights and parameters.
22 """
23
24 super(LogisticReg, self).__init__([(n_in, n_out), (n_out, )],
25 ["llW", "llb"], [rng, 0.0], [dtype, dtype])
26
27 self.input = X
28
29 self.p_y_given_x = self.activation()
30
31
32 self.y_hat = T.argmax(self.p_y_given_x, axis=1)
33
35 "softmax activation. XW + b (samples are rows)"
36
37 W, b = self.get_params()
38 return softmax( T.dot(self.input, W) + b )
39
41 return "[(LOG_REG) %dx%d]" % self.get_weights()[0].shape
42
44 """Hidden layer of a feed-forward net """
45
46 - def __init__(self, X, n_in, n_out, rng, dtype):
47 super(HiddenLayer, self).__init__([(n_in, n_out), (n_out, )],
48 ["hlW", "hlb"], [0.0, 0.0], [dtype, dtype])
49
50 if type(rng) == np.random.RandomState:
51 thr = np.sqrt( 6. / (n_in+n_out) )
52 self._weights_[0].set_value(
53 np.asarray(rng.uniform(low=-thr, high=thr,
54 size=(n_in, n_out)), dtype=dtype) )
55
56 self.input = X
57
59 "softmax activation. XW + b (X is a row)"
60
61 W, b = self.get_params()
62 return sigmoid( T.dot(self.input, W) + b )
63
65 return "[(HIDDEN) %dx%d]" % self.get_weights()[0].shape
66
69 """LeNet conv-pool layer"""
70
71 - def __init__(self, X, fshape, ishape, rng, poolsize, dtype):
72 """Le Cun convolutional layer
73
74 fshape: (# filters, # in_feature_maps, width, height)
75 ishape: (batch_size, # feature_maps, width, height)
76 """
77 assert fshape[1] == ishape[1], "nr. of feature maps should not change"
78
79 super(ConvPoolLayer, self).__init__([fshape, (fshape[0], )],
80 ["cpW", "cpb"], [rng, 0.0], [dtype, dtype])
81 if type(rng) == np.random.RandomState:
82
83 thr = np.sqrt( 3. / np.prod(fshape[1:]) )
84 self._weights_[0].set_value(
85 np.asarray(rng.uniform(low=-thr, high=thr,
86 size=fshape), dtype=dtype) )
87
88 self.input = X
89 self.ishape = ishape
90 self.fshape = fshape
91 self.pshape = poolsize
92
94 """activation function"""
95
96 W, b = self.get_params()
97 conved = conv.conv2d(self.input, W,
98 filter_shape=self.fshape, image_shape=self.ishape)
99 pooled = downsample.max_pool_2d(conved, self.pshape, ignore_border=True)
100 return sigmoid( pooled + b.dimshuffle('x', 0, 'x', 'x') )
101
103 "in_fature_maps -> nr_of_kern (receprive_field_size (wXh) / pool_size(wXh)) -> "
104
105 (nk, ifm, fw, fh) = self.fshape
106 (pw, ph) = self.pshape
107 weights = "[(CONV_POOL) %d -> @%d (%dx%d) / %dx%d]" % (ifm, nk, fw, fh, pw, ph)
108 state = "[%d/batch @%d (%dx%d) -> @%d (%dx%d)]" % (self.ishape + (self.fshape[0],
109 (self.ishape[2] - self.fshape[2] + 1) / self.pshape[0],
110 (self.ishape[3] - self.fshape[3] + 1) / self.pshape[1]))
111 return weights + " " + state
112
114 - def __init__(self, arch, lreg_size, inshape, nout, rng, xdtype, ydtype):
115 self.X = T.dtensor4('X')
116 self.Y = {"int32": T.ivector("Y"), "int64": T.lvector("Y")}[ydtype]
117
118 (in_bs, in_fm, in_w, in_h) = inshape
119 layers = []
120 this_input = self.X.reshape( inshape )
121 img_sh = inshape
122 for (nkern, rf, ps) in zip(*arch):
123 layers.append(ConvPoolLayer(this_input,
124 (nkern, img_sh[1], rf[0], rf[1]), img_sh, rng, ps, xdtype))
125 this_input = layers[-1].activation()
126 img_sh = (in_bs, nkern, (img_sh[2] - rf[0] + 1) / ps[0],
127 (img_sh[3] - rf[1] + 1) / ps[1])
128
129 layers.append(HiddenLayer(this_input.flatten(2),
130 nkern * img_sh[2] * img_sh[3], lreg_size, rng, xdtype) )
131
132 layers.append(LogisticReg(layers[-1].activation(),
133 lreg_size, nout, rng, xdtype))
134
135 Model.__init__(self, layers)
136
137 @property
140
142 """
143 @return: map(lambda l: l.get_speeds(), self)
144 """
145
146 return map(lambda l: l.get_speeds(), self)
147
149 for w, i in enumerate(vlst):
150 self[i].set_speeds( w )
151
152 - def cost(self, l1, l2):
153 """regularized cross entropy
154
155 @param l1: L1 coefficient (float)
156 @param l2: L2 coefficient (float)
157 @return: cost function"""
158
159 l1_term = l1 * self.weight_norm("l1")
160 l2_term = l2 * self.weight_norm("l2")
161 error = T.log(self[-1].p_y_given_x)[T.arange(self.Y.shape[0]), self.Y]
162 return -T.mean( error ) + l1_term + l2_term
163
164 - def update_params(self, train_batches, gradient_f, momentum, lrate):
165 """step on the direction of gradient
166
167 step on the direction of gradient
168 for a whole epoch and update the model params in place.
169 By definition speed is initialized to 0.
170 new_speed = -rho * dE/dw + mu * speed
171 new_weight = w + new_speed
172
173 @param train_batches: indexes of batches (list)
174 @param gradient_f: function that returns the list of gradients
175 from the batch index.
176 @param momentum: mu
177 @param lrate: rho
178 @return: none
179 """
180
181 for batch_i in train_batches:
182 all_grads = gradient_f(batch_i)
183 for layer in self:
184 l_grads = map(lambda i: all_grads.pop(0),
185 range(len(layer.get_params())))
186
187 layer.speed_update(l_grads, momentum, lrate)
188 layer.weight_update()
189
190
191
192
193
194
195