diff --git a/breze/arch/construct/simple.py b/breze/arch/construct/simple.py index f989a84..d2253a6 100644 --- a/breze/arch/construct/simple.py +++ b/breze/arch/construct/simple.py @@ -141,7 +141,9 @@ def _forward(self): self.output_in = conv.conv2d( self.inpt, self.weights, image_shape=( - self.n_samples, self.n_inpt, self.inpt_height, self.inpt_width), + self.n_samples, self.n_inpt, + self.inpt_height, self.inpt_width + ), subsample=self.subsample, border_mode='valid', ) @@ -182,3 +184,85 @@ def _forward(self): f = lookup(self.transfer, _transfer) self.output = f(self.output_in) + + +class LocalResponseNormalization(Layer): + """ + Class representing a Local Response Normalization layer [D] (section 3.3). + + For a^i_{x,y} the activity of a neuron computed by applying kernel i + at position (x,y) and applying ReLU nonlinearity, the response + normalized activation b^i_{x,y} is given by: + + b^i_{x,y} = a^i_{x,y} / (k + + alpha * sum_{j=max(0, i - n/2)}^{max(N-1, i + n/2)} (a^j_{x,y})^2 )**beta + + + References + ---------- + .. [D] Krizhevsky, A., Sutskever, I., & Hinton, G. E. (2012). + Imagenet classification with deep convolutional neural networks. + In Advances in neural information processing systems (pp. 1097-1105) + + + Attributes + ---------- + N : int + number of kernels + + k : int + hyperparameter + + n : int + number of adjacent kernels to sum over + + alpha : float + hyperparameter + + beta : float + hyperparameter + + """ + + def __init__(self, inpt, inpt_height, inpt_width, + N, k, n, alpha, beta, + transfer='identity', + declare=None, name=None): + self.inpt = inpt + self.inpt_height = inpt_height + self.inpt_width = inpt_width + self.transfer = transfer + + self.output_height = inpt_height + self.output_width = inpt_width + + self.N = N + self.k = k + self.n = n + self.alpha = alpha + self.beta = beta + + self.n_output = N + + super(LocalResponseNormalization, self).__init__(declare=declare, + name=name) + + def _forward(self): + square = T.sqr(self.inpt) + scale = T.zeros_like(self.inpt) + + for i in xrange(self.N): + T.set_subtensor( + scale[:, i, :, :], + self.alpha * square[ + :, + max(0, i - self.n//2):min(self.N - 1, i + self.n//2), + :, + :].sum(axis=1) + ) + + scale = self.k + scale ** self.beta + + self.output_in = self.inpt / scale + f = lookup(self.transfer, _transfer) + self.output = f(self.output_in)