From c9cbe1a28444e39e29022783c0e589e0535fe2d8 Mon Sep 17 00:00:00 2001 From: Yurii Shevchuk Date: Thu, 13 Dec 2018 16:58:02 +0100 Subject: [PATCH] fixed fan computation --- .../reinforcement_learning/vin/train_vin.py | 6 +---- neupy/init.py | 24 +++++++++++++------ neupy/layers/recurrent.py | 20 ++++++++-------- tests/layers/test_init_methods.py | 23 ++++++++++++++++++ 4 files changed, 51 insertions(+), 22 deletions(-) diff --git a/examples/reinforcement_learning/vin/train_vin.py b/examples/reinforcement_learning/vin/train_vin.py index 8c86f49a..6eb52e94 100644 --- a/examples/reinforcement_learning/vin/train_vin.py +++ b/examples/reinforcement_learning/vin/train_vin.py @@ -23,11 +23,7 @@ def random_weight(shape): initializer = init.Normal() weight = initializer.sample(shape) - return tf.Variable( - asfloat(weight), - name='network/scalar-step', - dtype=tf.float32 - ) + return tf.Variable(asfloat(weight), dtype=tf.float32) class ChannelGlobalMaxPooling(layers.BaseLayer): diff --git a/neupy/init.py b/neupy/init.py index ee152974..626467e5 100644 --- a/neupy/init.py +++ b/neupy/init.py @@ -18,18 +18,29 @@ def identify_fans(shape): Parameters ---------- shape : tuple or list - Matrix shape. Returns ------- tuple Tuple that contains :math:`fan_{in}` and :math:`fan_{out}`. """ - fan_in, fan_out = shape[0], 1 - output_feature_shape = shape[1:] + n_dimensions = len(shape) - if output_feature_shape: - fan_out = np.prod(output_feature_shape).item(0) + if n_dimensions == 0: + raise ValueError("Cannot apply initializer when shape is unknown") + + elif n_dimensions == 1: + fan_in, fan_out = shape[0], 1 + + elif n_dimensions == 2: + fan_in, fan_out = shape + + else: + # By default we assume that weights with more than 2 dimensions + # are generated for convolutional layers. + receptive_field = np.prod(shape[:-2]).item(0) + fan_in = shape[-2] * receptive_field + fan_out = shape[-1] * receptive_field return fan_in, fan_out @@ -122,8 +133,7 @@ def __repr__(self): class Normal(Initializer): """ - Initialize parameter sampling from the normal - distribution. + Initialize parameter sampling from the normal distribution. Parameters ---------- diff --git a/neupy/layers/recurrent.py b/neupy/layers/recurrent.py index 7cb7310e..e5861db6 100644 --- a/neupy/layers/recurrent.py +++ b/neupy/layers/recurrent.py @@ -147,16 +147,16 @@ class LSTM(BaseRNNLayer): input_weights : Initializer, ndarray Weight parameters for input connection. - Defaults to :class:`XavierUniform() `. + Defaults to :class:`HeNormal() `. hidden_weights : Initializer, ndarray Weight parameters for hidden connection. - Defaults to :class:`XavierUniform() `. + Defaults to :class:`HeNormal() `. cell_weights : Initializer, ndarray Weight parameters for cell connection. Require only when ``peepholes=True`` otherwise it will be ignored. - Defaults to :class:`XavierUniform() `. + Defaults to :class:`HeNormal() `. bias : Initializer, ndarray Bias parameters for all gates. @@ -252,9 +252,9 @@ class LSTM(BaseRNNLayer): ] ) """ - input_weights = ParameterProperty(default=init.XavierUniform()) - hidden_weights = ParameterProperty(default=init.XavierUniform()) - cell_weights = ParameterProperty(default=init.XavierUniform()) + input_weights = ParameterProperty(default=init.HeNormal()) + hidden_weights = ParameterProperty(default=init.HeNormal()) + cell_weights = ParameterProperty(default=init.HeNormal()) biases = ParameterProperty(default=init.Constant(0)) activation_functions = MultiCallableProperty( @@ -418,11 +418,11 @@ class GRU(BaseRNNLayer): input_weights : Initializer, ndarray Weight parameters for input connection. - Defaults to :class:`XavierUniform() `. + Defaults to :class:`HeNormal() `. hidden_weights : Initializer, ndarray Weight parameters for hidden connection. - Defaults to :class:`XavierUniform() `. + Defaults to :class:`HeNormal() `. bias : Initializer, ndarray Bias parameters for all gates. @@ -504,8 +504,8 @@ class GRU(BaseRNNLayer): ] ) """ - input_weights = ParameterProperty(default=init.XavierUniform()) - hidden_weights = ParameterProperty(default=init.XavierUniform()) + input_weights = ParameterProperty(default=init.HeNormal()) + hidden_weights = ParameterProperty(default=init.HeNormal()) biases = ParameterProperty(default=init.Constant(0)) activation_functions = MultiCallableProperty( diff --git a/tests/layers/test_init_methods.py b/tests/layers/test_init_methods.py index 81b91741..5217dcfe 100644 --- a/tests/layers/test_init_methods.py +++ b/tests/layers/test_init_methods.py @@ -4,6 +4,7 @@ import numpy as np from neupy import init +from neupy.init import identify_fans from base import BaseTestCase @@ -18,6 +19,28 @@ def assertNormalyDistributed(self, value): msg="Sampled distribution is not normal") +class FanIdentifierTestCase(BaseInitializerTestCase): + def test_identify_fans_1d(self): + self.assertEqual((10, 1), identify_fans((10,))) + self.assertEqual((1, 1), identify_fans((1,))) + + def test_identify_fans_2d(self): + self.assertEqual((10, 20), identify_fans((10, 20))) + self.assertEqual((20, 10), identify_fans((20, 10))) + + def test_identify_fans_exceptions(self): + with self.assertRaisesRegexp(ValueError, "shape is unknown"): + identify_fans(tuple()) + + def test_identify_fans_conv(self): + self.assertEqual((9, 90), identify_fans((3, 3, 1, 10))) + self.assertEqual((250, 150), identify_fans((5, 5, 10, 6))) + + def test_identify_fans_other_dim(self): + self.assertEqual((18, 180), identify_fans((3, 3, 2, 1, 10))) + self.assertEqual((48, 80), identify_fans((8, 6, 10))) + + class ConstantInitializationTestCase(BaseInitializerTestCase): def test_constant_initializer(self): const = init.Constant(value=0)