@@ -59,7 +59,7 @@ def yogi_update_numpy(
59
59
beta1 = np .array (beta1 , dtype = param .dtype )
60
60
beta2 = np .array (beta2 , dtype = param .dtype )
61
61
62
- alpha_t = alpha * np .sqrt (1 - beta2 ** t ) / (1 - beta1 ** t )
62
+ alpha_t = alpha * np .sqrt (1 - beta2 ** t ) / (1 - beta1 ** t )
63
63
64
64
m_t = beta1 * m + (1 - beta1 ) * g_t
65
65
g2_t = g_t * g_t
@@ -128,8 +128,8 @@ def do_test_sparse(beta1=0.0, l1reg=0.0, l2reg=0.0):
128
128
# Run 3 steps of Yogi.
129
129
for t in range (1 , 4 ):
130
130
beta1_power , beta2_power = get_beta_accumulators (opt , dtype )
131
- test_utils .assert_allclose_according_to_type (beta1 ** t , beta1_power )
132
- test_utils .assert_allclose_according_to_type (0.999 ** t , beta2_power )
131
+ test_utils .assert_allclose_according_to_type (beta1 ** t , beta1_power )
132
+ test_utils .assert_allclose_according_to_type (0.999 ** t , beta2_power )
133
133
opt .apply_gradients (zip ([grads0 , grads1 ], [var0 , var1 ]))
134
134
135
135
var0_np , m0 , v0 = yogi_update_numpy (
@@ -224,8 +224,8 @@ def do_test_basic(beta1=0.0, l1reg=0.0, l2reg=0.0):
224
224
# Run 3 steps of Yogi.
225
225
for t in range (1 , 4 ):
226
226
beta1_power , beta2_power = get_beta_accumulators (opt , dtype )
227
- test_utils .assert_allclose_according_to_type (beta1 ** t , beta1_power )
228
- test_utils .assert_allclose_according_to_type (0.999 ** t , beta2_power )
227
+ test_utils .assert_allclose_according_to_type (beta1 ** t , beta1_power )
228
+ test_utils .assert_allclose_according_to_type (0.999 ** t , beta2_power )
229
229
230
230
opt .apply_gradients (zip ([grads0 , grads1 ], [var0 , var1 ]))
231
231
@@ -284,8 +284,8 @@ def test_tensor_learning_rate():
284
284
# Run 3 steps of Yogi.
285
285
for t in range (1 , 4 ):
286
286
beta1_power , beta2_power = get_beta_accumulators (opt , dtype )
287
- test_utils .assert_allclose_according_to_type (0.9 ** t , beta1_power )
288
- test_utils .assert_allclose_according_to_type (0.999 ** t , beta2_power )
287
+ test_utils .assert_allclose_according_to_type (0.9 ** t , beta1_power )
288
+ test_utils .assert_allclose_according_to_type (0.999 ** t , beta2_power )
289
289
290
290
opt .apply_gradients (zip ([grads0 , grads1 ], [var0 , var1 ]))
291
291
@@ -320,8 +320,8 @@ def test_sharing():
320
320
# Run 3 steps of intertwined Yogi1 and Yogi2.
321
321
for t in range (1 , 4 ):
322
322
beta1_power , beta2_power = get_beta_accumulators (opt , dtype )
323
- test_utils .assert_allclose_according_to_type (0.9 ** t , beta1_power )
324
- test_utils .assert_allclose_according_to_type (0.999 ** t , beta2_power )
323
+ test_utils .assert_allclose_according_to_type (0.9 ** t , beta1_power )
324
+ test_utils .assert_allclose_according_to_type (0.999 ** t , beta2_power )
325
325
opt .apply_gradients (zip ([grads0 , grads1 ], [var0 , var1 ]))
326
326
var0_np , m0 , v0 = yogi_update_numpy (var0_np , grads0_np , t , m0 , v0 )
327
327
var1_np , m1 , v1 = yogi_update_numpy (var1_np , grads1_np , t , m1 , v1 )
0 commit comments