1 """autoencoders"""
2
3 import logging
4
5 import numpy as np
6
7 import theano.tensor as T
8 from theano.tensor.nnet import sigmoid
9
10 from . import Model, SpeedLayer
11
12 log = logging.getLogger(__name__)
13
14
16 """An autoencoder layer"""
17
18 - def __init__(self, X, n_in, n_hidden, rng, thrng, dtype, clevel=0):
19 """initialize an AE instance
20
21 @param X: input (theano.tensor)
22 @param n_in: input dimension (int)
23 @param n_hidden: hiddden dimension (int)
24 @param rng: random state (np.random.RandomState)
25 @param thrng: random state (theano.tensor.shared_randomstreams.RandomStreams)
26 @param dtype: dtype of weights (np.dtype)
27 @param clevel: keeps ``1-corruption_level`` entries of the inputs the same
28 and zero-out randomly selected subset of size ``coruption_level``
29 """
30
31 super(AELayer, self).__init__([(n_in, n_hidden), (n_hidden,), (n_in,)],
32 ["W", "b", "b_prime"], [0.0, 0.0, 0.0], [dtype, dtype, dtype])
33
34 if type(rng) == np.random.RandomState:
35 log.debug("AEW: rnd")
36 thr = np.sqrt( 6. / (n_in+n_hidden) )
37 self._weights_[0].set_value( np.asarray(rng.uniform(low=-thr, high=thr,
38 size=(n_in, n_hidden)), dtype=dtype) )
39
40 self.theano_rng = thrng
41 self.input = X
42 self.corruption_level = clevel
43 self.tilde_input = self.corrupt( self.input, clevel )
44
45 w, b, bp = self.get_params()
46 self.encoder = sigmoid( T.dot(self.tilde_input, w) + b )
47 self.decoder = sigmoid( T.dot(self.encoder, w.T) + bp )
48
51
52 - def corrupt(self, X, corrupt_level):
53 """theano function that adds binomial noise to the given input
54
55 first argument of theano.rng.binomial is the shape(size) of
56 random numbers that it should produce
57 second argument is the number of trials
58 third argument is the probability of success of any trial
59 this will produce an array of 0s and 1s where 1 has a probability of
60 1 - ``corruption_level`` and 0 with ``corruption_level``
61 """
62
63 return self.theano_rng.binomial(size=X.shape, n=1, p=1-corrupt_level) * X
64
65 - def cost(self, l1, l2):
70
72 z = self.decoder
73 x = self.input
74
75
76 L = -T.sum(x * T.log(z) + (1 - x) * T.log(1 - z), axis=1 )
77
78
79
80
81 return T.mean(L)
82
85
87 """Denoising autoencoder
88
89 A denoising autoencoders tries to reconstruct the input from a corrupted
90 version of it by projecting it first in a latent space and reprojecting
91 it afterwards back in the input space. Please refer to Vincent et al.,2008
92 for more details. If x is the input then equation (1) computes a partially
93 destroyed version of x by means of a stochastic mapping q_D. Equation (2)
94 computes the projection of the input into the latent space. Equation (3)
95 computes the reconstruction of the input, while equation (4) computes the
96 reconstruction error.
97
98 .. math::
99
100 \tilde{x} ~ q_D(\tilde{x}|x) (1)
101
102 y = s(W \tilde{x} + b) (2)
103
104 x = s(W' y + b') (3)
105
106 L(x,z) = -sum_{k=1}^d [x_k \log z_k + (1-x_k) \log( 1-z_k)] (4)
107
108 """
109
110 - def __init__(self, ins, hs, rng, theano_rng, wdtype, corruption_level):
111 """ Initialize the dA class by specifying the number of visible units (the
112 dimension d of the input ), the number of hidden units ( the dimension
113 d' of the latent or hidden space ) and the corruption level.
114
115 @param ins: input dimension (int)
116 @param hs: hidden dimension (int)
117 @param rng: random state (np.random.RandomState)
118 @param theano_rng: random state (theano.tensor.shared_randomstreams.RandomStreams)
119 @param wdtype: dtype of weights (np.dtype)
120 @param corruption_level: keeps ``1-corruption_level`` entries of the inputs the same
121 and zero-out randomly selected subset of size ``coruption_level``
122
123 """
124
125 X = T.matrix("X")
126 super(AutoEncoder, self).__init__( [AELayer(X, ins, hs, rng, theano_rng,
127 wdtype, corruption_level)] )
128
129 - def cost(self, l1, l2):
130 return self[0].cost(l1, l2)
131
132
133 - def update_params(self, train_batches, gradient_f, momentum, lrate):
134 """step on the direction of the
135
136 step on the direction of the gradient for
137 a whole epoch and update the model params in place.
138 By definition speed is initialized to 0.
139 new_speed = -rho * dE/dw + mu * speed
140 new_weight = w + new_speed
141
142 @param train_batches: indexes of batches (list)
143 @param gradient_f: function that returns the list of gradients from the batch index.
144 @param momentum: mu
145 @param lrate: rho
146 @return: none
147 """
148
149 for batch_i in train_batches:
150 all_grads = gradient_f(batch_i)
151 for layer in self:
152 l_grads = map(lambda i: all_grads.pop(0),
153 range(len(layer.get_params())))
154
155 layer.speed_update(l_grads, momentum, lrate)
156 layer.weight_update()
157
159 """a stack of denoising autoencoders. Each layer is a denoising autoencoder.
160 A denoising autoencoders tries to reconstruct the input from a corrupted
161 version of it by projecting it first in a latent space and reprojecting
162 it afterwards back in the input space. Please refer to Vincent et al.,2008
163 for more details. If x is the input then equation (1) computes a partially
164 destroyed version of x by means of a stochastic mapping q_D. Equation (2)
165 computes the projection of the input into the latent space. Equation (3)
166 computes the reconstruction of the input, while equation (4) computes the
167 reconstruction error.
168
169 .. math::
170
171 \tilde{x} ~ q_D(\tilde{x}|x) (1)
172
173 y = s(W \tilde{x} + b) (2)
174
175 x = s(W' y + b') (3)
176
177 L(x,z) = -sum_{k=1}^d [x_k \log z_k + (1-x_k) \log( 1-z_k)] (4)
178
179 this is a completely unsupervised model that concatenates
180 autoencoders.
181 """
182
183 - def __init__(self, ins, hs_lst, rng, theano_rng, wdtype, corruption_level):
184 """initialize a stack of autoencoders
185
186
187 @param ins: input dimension (int)
188 @param hs_lst: hidden dimension (int)
189 @param rng: random state (np.random.RandomState)
190 @param theano_rng: random state (theano.tensor.shared_randomstreams.RandomStreams)
191 @param wdtype: dtype of weights (np.dtype)
192 @param corruption_level: keeps ``1-corruption_level`` entries of the
193 inputs the same and zero-out randomly selected subset
194 of size ``coruption_level``
195 """
196
197 X = T.matrix("X")
198 layers = []
199 for hs in hs_lst:
200 ael = AELayer(X, ins, hs, rng, theano_rng,
201 wdtype, corruption_level)
202 layers.append( ael )
203 X = layers[-1].encoder
204 ins = hs
205 super(AEStack, self).__init__(layers)
206
207 - def update_params(self, train_batches, gradient_f, momentum, lrate, lidx):
208 """step on the direction of the gradient
209
210 step on the direction of the gradient for
211 a whole epoch and update the model params in place.
212 By definition speed is initialized to 0.
213 new_speed = -rho * dE/dw + mu * speed
214 new_weight = w + new_speed
215
216 @param train_batches: indexes of batches (list)
217 @param gradient_f: function that returns the list of
218 gradients from the batch index.
219 @param momentum: mu
220 @param lrate: rho
221 @return: none
222 """
223
224 for batch_i in train_batches:
225 all_grads = gradient_f(batch_i)
226 layer = self[lidx]
227 l_grads = map(lambda i: all_grads.pop(0),
228 range(len(layer.get_params())))
229
230 layer.speed_update(l_grads, momentum, lrate)
231 layer.weight_update()
232