Makes create_slots automatically setup weights for swap_weights (#2195)

marksandler2 · web-flow · commit 0cb467443637 · 2020-10-08T14:40:04.000-07:00
* Enables moving_average optimizer to allow calling swap_weights without
the need to call shadow_copy first.

* Update moving_average.py
diff --git a/tensorflow_addons/optimizers/moving_average.py b/tensorflow_addons/optimizers/moving_average.py
@@ -127,12 +127,13 @@ def get_config(self):
         return {**base_config, **config}
 
     def _create_slots(self, var_list):
-        self._optimizer._create_slots(
-            var_list=var_list
-        )  # pylint: disable=protected-access
+        self._optimizer._create_slots(var_list=var_list)
         for var in var_list:
             self.add_slot(var, "average", var.read_value())
 
+        self._average_weights = [self.get_slot(var, "average") for var in var_list]
+        self._model_weights = var_list
+
     def shadow_copy(self, model_weights):
         """Creates shadow variables for the given model weights."""
         for var in model_weights:
diff --git a/tensorflow_addons/optimizers/tests/moving_average_test.py b/tensorflow_addons/optimizers/tests/moving_average_test.py
@@ -228,6 +228,38 @@ def test_dynamic_decay():
     np.testing.assert_allclose(ema_var0.read_value(), [0.64, 1.64])
 
 
+@pytest.mark.usefixtures("maybe_run_functions_eagerly")
+@pytest.mark.with_device([tf.distribute.MirroredStrategy])
+def test_swap_weight_no_shadow_copy(device):
+    with device.scope():
+        var = tf.Variable([1.0, 2.0])
+        grads = tf.constant([0.1, 0.1])
+
+        opt = MovingAverage(tf.keras.optimizers.SGD(lr=2.0), average_decay=0.5)
+
+    @tf.function
+    def apply_gradients():
+        opt.apply_gradients([(grads, var)])
+
+    device.run(apply_gradients)
+
+    np.testing.assert_allclose(var.read_value(), [0.8, 1.8])
+    ema_var = opt.get_slot(var, "average")
+    np.testing.assert_allclose(ema_var.read_value(), [0.9, 1.9])
+
+    with device.scope():
+        opt.swap_weights()
+
+    np.testing.assert_allclose(ema_var.read_value(), [0.8, 1.8])
+    np.testing.assert_allclose(var.read_value(), [0.9, 1.9])
+
+    with device.scope():
+        opt.swap_weights()
+
+    np.testing.assert_allclose(var.read_value(), [0.8, 1.8])
+    np.testing.assert_allclose(ema_var.read_value(), [0.9, 1.9])
+
+
 @pytest.mark.usefixtures("maybe_run_functions_eagerly")
 @pytest.mark.with_device([tf.distribute.MirroredStrategy])
 def test_swap_weights(device):