dimer.nnet.nccn

1 """convolutional neural nets""" 2 3 import logging 4 5 import numpy as np 6 7 import theano.tensor as T 8 from theano.tensor.signal import downsample 9 from theano.tensor.nnet import sigmoid, softmax, conv 10 11 from . import Model, SpeedLayer 12 13 log = logging.getLogger(__name__)

14 15 -class LogisticReg( SpeedLayer ):

16 """A logistic regression layer""" 17

18 - def __init__(self, X, n_in, n_out, rng, dtype):

19 """A logistic regression layer 20 21 the layer has a weight and a bias matrix as weights and parameters. 22 """ 23 24 super(LogisticReg, self).__init__([(n_in, n_out), (n_out, )], 25 ["llW", "llb"], [rng, 0.0], [dtype, dtype]) 26 27 self.input = X 28 # class prediction vector (n_out x 1) 29 self.p_y_given_x = self.activation() 30 31 # the index of the largest value is the chosen class 32 self.y_hat = T.argmax(self.p_y_given_x, axis=1)

33

34 - def activation(self):

35 "softmax activation. XW + b (samples are rows)" 36 37 W, b = self.get_params() 38 return softmax( T.dot(self.input, W) + b )

39

40 - def __str__(self):

41 return "[(LOG_REG) %dx%d]" % self.get_weights()[0].shape

42

43 -class HiddenLayer( SpeedLayer ):

44 """Hidden layer of a feed-forward net """ 45

46 - def __init__(self, X, n_in, n_out, rng, dtype):

47 super(HiddenLayer, self).__init__([(n_in, n_out), (n_out, )], 48 ["hlW", "hlb"], [0.0, 0.0], [dtype, dtype]) 49 50 if type(rng) == np.random.RandomState: 51 thr = np.sqrt( 6. / (n_in+n_out) ) 52 self._weights_[0].set_value( 53 np.asarray(rng.uniform(low=-thr, high=thr, 54 size=(n_in, n_out)), dtype=dtype) ) 55 56 self.input = X

57

58 - def activation(self):

59 "softmax activation. XW + b (X is a row)" 60 61 W, b = self.get_params() 62 return sigmoid( T.dot(self.input, W) + b )

63

64 - def __str__(self):

65 return "[(HIDDEN) %dx%d]" % self.get_weights()[0].shape

66

67 68 -class ConvPoolLayer( SpeedLayer ):

69 """LeNet conv-pool layer""" 70

71 - def __init__(self, X, fshape, ishape, rng, poolsize, dtype):

72 """Le Cun convolutional layer 73 74 fshape: (# filters, # in_feature_maps, width, height) 75 ishape: (batch_size, # feature_maps, width, height) 76 """ 77 assert fshape[1] == ishape[1], "nr. of feature maps should not change" 78 79 super(ConvPoolLayer, self).__init__([fshape, (fshape[0], )], 80 ["cpW", "cpb"], [rng, 0.0], [dtype, dtype]) 81 if type(rng) == np.random.RandomState: 82 83 thr = np.sqrt( 3. / np.prod(fshape[1:]) ) 84 self._weights_[0].set_value( 85 np.asarray(rng.uniform(low=-thr, high=thr, 86 size=fshape), dtype=dtype) ) 87 88 self.input = X 89 self.ishape = ishape 90 self.fshape = fshape 91 self.pshape = poolsize

92

93 - def activation(self):

94 """activation function""" 95 96 W, b = self.get_params() 97 conved = conv.conv2d(self.input, W, 98 filter_shape=self.fshape, image_shape=self.ishape) 99 pooled = downsample.max_pool_2d(conved, self.pshape, ignore_border=True) 100 return sigmoid( pooled + b.dimshuffle('x', 0, 'x', 'x') )

101

102 - def __str__(self):

103 "in_fature_maps -> nr_of_kern (receprive_field_size (wXh) / pool_size(wXh)) -> " 104 105 (nk, ifm, fw, fh) = self.fshape 106 (pw, ph) = self.pshape 107 weights = "[(CONV_POOL) %d -> @%d (%dx%d) / %dx%d]" % (ifm, nk, fw, fh, pw, ph) 108 state = "[%d/batch @%d (%dx%d) -> @%d (%dx%d)]" % (self.ishape + (self.fshape[0], 109 (self.ishape[2] - self.fshape[2] + 1) / self.pshape[0], 110 (self.ishape[3] - self.fshape[3] + 1) / self.pshape[1])) 111 return weights + " " + state

112

113 -class CnnModel( Model ):

114 - def __init__(self, arch, lreg_size, inshape, nout, rng, xdtype, ydtype):

115 self.X = T.dtensor4('X') 116 self.Y = {"int32": T.ivector("Y"), "int64": T.lvector("Y")}[ydtype] 117 118 (in_bs, in_fm, in_w, in_h) = inshape 119 layers = [] 120 this_input = self.X.reshape( inshape ) 121 img_sh = inshape 122 for (nkern, rf, ps) in zip(*arch): 123 layers.append(ConvPoolLayer(this_input, 124 (nkern, img_sh[1], rf[0], rf[1]), img_sh, rng, ps, xdtype)) 125 this_input = layers[-1].activation() 126 img_sh = (in_bs, nkern, (img_sh[2] - rf[0] + 1) / ps[0], 127 (img_sh[3] - rf[1] + 1) / ps[1]) 128 # add a mlp 129 layers.append(HiddenLayer(this_input.flatten(2), 130 nkern * img_sh[2] * img_sh[3], lreg_size, rng, xdtype) ) 131 132 layers.append(LogisticReg(layers[-1].activation(), 133 lreg_size, nout, rng, xdtype)) 134 135 Model.__init__(self, layers)

136 137 @property

138 - def top_cp_idx(self):

139 return len(self) - 3

140

141 - def get_speeds(self):

142 """ 143 @return: map(lambda l: l.get_speeds(), self) 144 """ 145 146 return map(lambda l: l.get_speeds(), self)

147

148 - def set_speeds(self, vlst):

149 for w, i in enumerate(vlst): 150 self[i].set_speeds( w )

151

152 - def cost(self, l1, l2):

153 """regularized cross entropy 154 155 @param l1: L1 coefficient (float) 156 @param l2: L2 coefficient (float) 157 @return: cost function""" 158 159 l1_term = l1 * self.weight_norm("l1") 160 l2_term = l2 * self.weight_norm("l2") 161 error = T.log(self[-1].p_y_given_x)[T.arange(self.Y.shape[0]), self.Y] 162 return -T.mean( error ) + l1_term + l2_term

163

164 - def update_params(self, train_batches, gradient_f, momentum, lrate):

165 """step on the direction of gradient 166 167 step on the direction of gradient 168 for a whole epoch and update the model params in place. 169 By definition speed is initialized to 0. 170 new_speed = -rho * dE/dw + mu * speed 171 new_weight = w + new_speed 172 173 @param train_batches: indexes of batches (list) 174 @param gradient_f: function that returns the list of gradients 175 from the batch index. 176 @param momentum: mu 177 @param lrate: rho 178 @return: none 179 """ 180 181 for batch_i in train_batches: 182 all_grads = gradient_f(batch_i) 183 for layer in self: 184 l_grads = map(lambda i: all_grads.pop(0), 185 range(len(layer.get_params()))) 186 187 layer.speed_update(l_grads, momentum, lrate) 188 layer.weight_update()

189 #new_speeds = map(speed_update_f, zip(layer.get_speeds(), l_grads)) 190 #layer.set_speeds( new_speeds ) 191 # 192 #new_weights = map(weight_update_f, 193 # zip(layer.get_speeds(), layer.get_weights())) 194 #layer.set_weights( new_weights ) 195

Source Code for Module dimer.nnet.nccn