@@ -72,17 +72,19 @@ def main():
72
72
for t in range (update_window ):
73
73
if t != 0 :
74
74
with tf .control_dependencies ([opt_local ]): #compute gradients only if the local opt was run
75
- grads , varss = zip (* loptimizer .compute_gradients (loss ,var_list = tf .local_variables ()))
75
+ grads , varss = zip (* loptimizer .compute_gradients (loss ,
76
+ var_list = tf .local_variables ()))
76
77
else :
77
- grads , varss = zip (* loptimizer .compute_gradients (loss ,var_list = tf .local_variables ()))
78
+ grads , varss = zip (* loptimizer .compute_gradients (loss ,
79
+ var_list = tf .local_variables ()))
78
80
grad_list .append (grads ) #add gradients to the list
79
81
opt_local = loptimizer .apply_gradients (zip (grads ,varss ),
80
82
global_step = local_step ) #update local parameters
81
83
grads = tf .reduce_mean (grad_list ,axis = 0 )
82
84
grads = tuple ([grads [i ]for i in range (len (varss ))])
83
85
opt = optimizer .apply_gradients (
84
- zip (grads ,[ local_to_global [v ] for v in varss ])
85
- ,global_step = global_step ) #apply the gradients to variables on ps
86
+ zip (grads ,[ local_to_global [v ] for v in varss ])
87
+ ,global_step = global_step ) #apply the gradients to variables on ps
86
88
87
89
# Pull param from global server
88
90
with tf .control_dependencies ([opt ]):
@@ -106,12 +108,12 @@ def main():
106
108
107
109
#Monitored Training Session
108
110
sess = tf .train .MonitoredTrainingSession (master = server .target ,
109
- is_chief = is_chief ,
110
- config = config ,
111
- scaffold = scaff ,
112
- hooks = hooks ,
113
- save_checkpoint_secs = 1 ,
114
- checkpoint_dir = 'logdir' )
111
+ is_chief = is_chief ,
112
+ config = config ,
113
+ scaffold = scaff ,
114
+ hooks = hooks ,
115
+ save_checkpoint_secs = 1 ,
116
+ checkpoint_dir = 'logdir' )
115
117
if is_chief :
116
118
sess .run (assign_global ) #Assigns chief's initial values to ps
117
119
time .sleep (10 ) #grace period to wait on other workers before starting training
0 commit comments