Skip to content

Commit 8edde58

Browse files
author
Thomas Mulc
committed
hopefully fixed all space issues
1 parent 086dec6 commit 8edde58

File tree

10 files changed

+143
-142
lines changed

10 files changed

+143
-142
lines changed

ADAG/ADAG.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def main():
5151
name='local_step',collections=['local_non_trainable'])
5252

5353
with tf.device(tf.train.replica_device_setter(ps_tasks=n_pss,
54-
worker_device="/job:%s/task:%d" % (FLAGS.job_name,FLAGS.task_index))):
54+
worker_device="/job:%s/task:%d" % (FLAGS.job_name,FLAGS.task_index))):
5555
global_step = tf.Variable(0,dtype=tf.int32,trainable=False,name='global_step')
5656
target = tf.constant(100.,shape=[2],dtype=tf.float32)
5757
loss = tf.reduce_mean(tf.square(c-target))

DOWNPOUR-Easy/DOWNPOUR.py

+24-24
Original file line numberDiff line numberDiff line change
@@ -20,35 +20,35 @@ def main():
2020

2121
# Server Setup
2222
cluster_spec = {'ps':['localhost:2222'],
23-
'worker':['localhost:2223','localhost:2224']}
23+
'worker':['localhost:2223','localhost:2224']}
2424
n_pss = len(cluster_spec['ps']) #the number of parameter servers
2525
n_workers = len(cluster_spec['worker']) #the number of worker nodes
2626
cluster = tf.train.ClusterSpec(cluster_spec) #allows this node know about all other nodes
2727

2828
if FLAGS.job_name == 'ps': #checks if parameter server
2929
server = tf.train.Server(cluster,
30-
job_name="ps",
31-
task_index=FLAGS.task_index,
32-
config=config)
30+
job_name="ps",
31+
task_index=FLAGS.task_index,
32+
config=config)
3333
server.join()
3434
else: #it must be a worker server
3535
is_chief = (FLAGS.task_index == 0) #checks if this is the chief node
3636
server = tf.train.Server(cluster,
37-
job_name="worker",
38-
task_index=FLAGS.task_index,
39-
config=config)
37+
job_name="worker",
38+
task_index=FLAGS.task_index,
39+
config=config)
4040

4141
# Graph
4242
# Local operations
4343
with tf.device("/job:worker/replica:0/task:%d" % FLAGS.task_index):
4444
a = tf.Variable(tf.constant(0.,shape=[2]),dtype=tf.float32,
45-
collections=[tf.GraphKeys.LOCAL_VARIABLES])
45+
collections=[tf.GraphKeys.LOCAL_VARIABLES])
4646
b = tf.Variable(tf.constant(0.,shape=[2]),dtype=tf.float32,
47-
collections=[tf.GraphKeys.LOCAL_VARIABLES])
47+
collections=[tf.GraphKeys.LOCAL_VARIABLES])
4848
c=a+b
4949

5050
local_step = tf.Variable(0,dtype=tf.int32,trainable=False,name='local_step',
51-
collections=['local_non_trainable'])
51+
collections=['local_non_trainable'])
5252
lr = .0001
5353
loptimizer = tf.train.GradientDescentOptimizer(lr*FLAGS.task_index) #local optimizer
5454

@@ -62,20 +62,20 @@ def main():
6262
if t != 0:
6363
with tf.control_dependencies([opt_local]): #compute gradients only if the local opt was run
6464
grads, varss = zip(*loptimizer.compute_gradients(
65-
loss,var_list=tf.local_variables()))
65+
loss,var_list=tf.local_variables()))
6666
else:
6767
grads, varss = zip(*loptimizer.compute_gradients(
68-
loss,var_list=tf.local_variables()))
68+
loss,var_list=tf.local_variables()))
6969
grad_list.append(grads) #add gradients to the list
7070
opt_local = loptimizer.apply_gradients(zip(grads,varss),
71-
global_step=local_step) #update local parameters
71+
global_step=local_step) #update local parameters
7272

7373
grads = tf.reduce_sum(grad_list,axis=0) #sum updates before applying globally
7474
grads = tuple([grads[i]for i in range(len(varss))])
7575

7676

77-
with tf.device(tf.train.replica_device_setter(ps_tasks=n_pss\
78-
,worker_device="/job:%s/task:%d" % (FLAGS.job_name,FLAGS.task_index))):
77+
with tf.device(tf.train.replica_device_setter(ps_tasks=n_pss,
78+
worker_device="/job:%s/task:%d" % (FLAGS.job_name,FLAGS.task_index))):
7979

8080
global_step = tf.Variable(0,dtype=tf.int32,trainable=False,name='global_step')
8181

@@ -86,8 +86,8 @@ def main():
8686
# create global variables and/or references
8787
local_to_global, global_to_local = create_global_variables()
8888
opt = optimizer.apply_gradients(
89-
zip(grads,[local_to_global[v] for v in varss])
90-
,global_step=global_step) #apply the gradients to variables on ps
89+
zip(grads,[local_to_global[v] for v in varss])
90+
,global_step=global_step) #apply the gradients to variables on ps
9191

9292
# Pull params from global server
9393
with tf.control_dependencies([opt]):
@@ -103,7 +103,7 @@ def main():
103103
# Init ops
104104
init = tf.global_variables_initializer() # for global variables
105105
init_local = tf.variables_initializer(tf.local_variables() \
106-
+tf.get_collection('local_non_trainable'))#for local variables
106+
+tf.get_collection('local_non_trainable'))#for local variables
107107

108108
# Session
109109
stop_hook = tf.train.StopAtStepHook(last_step=60)
@@ -112,12 +112,12 @@ def main():
112112

113113
# Monitored Training Session
114114
sess = tf.train.MonitoredTrainingSession(master=server.target,
115-
is_chief=is_chief,
116-
config=config,
117-
scaffold=scaff,
118-
hooks=hooks,
119-
save_checkpoint_secs=1,
120-
checkpoint_dir='logdir')
115+
is_chief=is_chief,
116+
config=config,
117+
scaffold=scaff,
118+
hooks=hooks,
119+
save_checkpoint_secs=1,
120+
checkpoint_dir='logdir')
121121

122122
if is_chief:
123123
sess.run(assign_global) #Assigns chief's initial values to ps

DOWNPOUR/DOWNPOUR.py

+26-27
Original file line numberDiff line numberDiff line change
@@ -22,35 +22,35 @@ def main():
2222

2323
#Server Setup
2424
cluster_spec = {'ps':['localhost:2222'],
25-
'worker':['localhost:2223','localhost:2224']}
25+
'worker':['localhost:2223','localhost:2224']}
2626
n_pss = len(cluster_spec['ps']) #the number of parameter servers
2727
n_workers = len(cluster_spec['worker']) #the number of worker nodes
2828
cluster = tf.train.ClusterSpec(cluster_spec) #allows this node know about all other nodes
2929

3030
if FLAGS.job_name == 'ps': #checks if parameter server
3131
server = tf.train.Server(cluster,
32-
job_name="ps",
33-
task_index=FLAGS.task_index,
34-
config=config)
32+
job_name="ps",
33+
task_index=FLAGS.task_index,
34+
config=config)
3535
server.join()
3636
else: #it must be a worker server
3737
is_chief = (FLAGS.task_index == 0) #checks if this is the chief node
3838
server = tf.train.Server(cluster,
39-
job_name="worker",
40-
task_index=FLAGS.task_index,
41-
config=config)
39+
job_name="worker",
40+
task_index=FLAGS.task_index,
41+
config=config)
4242

4343
# Graph
4444
# Local operations
4545
with tf.device("/job:worker/replica:0/task:%d" % FLAGS.task_index):
4646
a = tf.Variable(tf.constant(0.,shape=[2]),dtype=tf.float32,
47-
collections=[tf.GraphKeys.LOCAL_VARIABLES])
47+
collections=[tf.GraphKeys.LOCAL_VARIABLES])
4848
b = tf.Variable(tf.constant(0.,shape=[2]),dtype=tf.float32,
49-
collections=[tf.GraphKeys.LOCAL_VARIABLES])
49+
collections=[tf.GraphKeys.LOCAL_VARIABLES])
5050
c=a+b
5151

5252
local_step = tf.Variable(0,dtype=tf.int32,trainable=False,name='local_step',
53-
collections=['local_non_trainable'])
53+
collections=['local_non_trainable'])
5454
lr = .0001
5555

5656
#loptimizer = tf.train.GradientDescentOptimizer(lr*FLAGS.task_index) #local optimizer
@@ -66,13 +66,13 @@ def main():
6666
if t != 0:
6767
with tf.control_dependencies([opt_local]): #compute gradients only if the local opt was run
6868
grads, varss = zip(*loptimizer.compute_gradients( \
69-
loss,var_list=tf.local_variables()))
69+
loss,var_list=tf.local_variables()))
7070
else:
7171
grads, varss = zip(*loptimizer.compute_gradients( \
72-
loss,var_list=tf.local_variables()))
72+
loss,var_list=tf.local_variables()))
7373
grad_list.append(grads) #add gradients to the list
7474
opt_local = loptimizer.apply_gradients(zip(grads,varss),
75-
global_step=local_step) #update local parameters
75+
global_step=local_step) #update local parameters
7676

7777
grads = tf.reduce_sum(grad_list,axis=0) #sum updates before applying globally
7878
grads = tuple([grads[i]for i in range(len(varss))])
@@ -83,9 +83,8 @@ def main():
8383
# delete the variables from the global collection
8484
clear_global_collection()
8585

86-
with tf.device(tf.train.replica_device_setter(
87-
ps_tasks=n_pss,
88-
worker_device="/job:%s/task:%d" % (FLAGS.job_name,FLAGS.task_index))):
86+
with tf.device(tf.train.replica_device_setter(ps_tasks=n_pss,
87+
worker_device="/job:%s/task:%d" % (FLAGS.job_name,FLAGS.task_index))):
8988
global_step = tf.Variable(0,dtype=tf.int32,trainable=False,name='global_step')
9089

9190
# all workers use the same learning rate and it is decided on by the task 0
@@ -95,8 +94,8 @@ def main():
9594
# create global variables and/or references
9695
local_to_global, global_to_local = create_global_variables(lopt_vars)
9796
opt = optimizer.apply_gradients(
98-
zip(grads,[local_to_global[v] for v in varss])
99-
,global_step=global_step) #apply the gradients to variables on ps
97+
zip(grads,[local_to_global[v] for v in varss])
98+
,global_step=global_step) #apply the gradients to variables on ps
10099

101100
# Pull params from global server
102101
with tf.control_dependencies([opt]):
@@ -111,7 +110,7 @@ def main():
111110
# Init ops
112111
init = tf.global_variables_initializer() # for global variables
113112
init_local = tf.variables_initializer(tf.local_variables() \
114-
+tf.get_collection('local_non_trainable')) #for local variables
113+
+tf.get_collection('local_non_trainable')) #for local variables
115114

116115
# Session
117116
stop_hook = tf.train.StopAtStepHook(last_step=60)
@@ -120,12 +119,12 @@ def main():
120119

121120
# Monitored Training Session
122121
sess = tf.train.MonitoredTrainingSession(master=server.target,
123-
is_chief=is_chief,
124-
config=config,
125-
scaffold=scaff,
126-
hooks=hooks,
127-
save_checkpoint_secs=1,
128-
checkpoint_dir='logdir')
122+
is_chief=is_chief,
123+
config=config,
124+
scaffold=scaff,
125+
hooks=hooks,
126+
save_checkpoint_secs=1,
127+
checkpoint_dir='logdir')
129128

130129
if is_chief:
131130
sess.run(assign_global) #Assigns chief's initial values to ps
@@ -207,8 +206,8 @@ def create_global_variables(local_optimizer_vars = []):
207206
shape = v.shape,
208207
dtype = v.dtype,
209208
trainable=True,
210-
collections=[tf.GraphKeys.GLOBAL_VARIABLES, \
211-
tf.GraphKeys.TRAINABLE_VARIABLES])
209+
collections=[tf.GraphKeys.GLOBAL_VARIABLES,
210+
tf.GraphKeys.TRAINABLE_VARIABLES])
212211
local_to_global[v] = v_g
213212
global_to_local[v_g] = v
214213
return local_to_global,global_to_local

Distributed-Setup/dist_setup.py

+10-9
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,19 @@
1717
def main():
1818
# Distributed Baggage
1919
cluster = tf.train.ClusterSpec({
20-
'ps':['localhost:2222'],
21-
'worker':['localhost:2223']
22-
}) #lets this node know about all other nodes
20+
'ps':['localhost:2222'],
21+
'worker':['localhost:2223']
22+
}) #lets this node know about all other nodes
2323
if FLAGS.job_name == 'ps': #checks if parameter server
2424
server = tf.train.Server(cluster,
25-
job_name="ps",
26-
task_index=FLAGS.task_index)
25+
job_name="ps",
26+
task_index=FLAGS.task_index)
2727
server.join()
2828
else:
2929
is_chief = (FLAGS.task_index == 0) #checks if this is the chief node
3030
server = tf.train.Server(cluster,
31-
job_name="worker",
32-
task_index=FLAGS.task_index)
31+
job_name="worker",
32+
task_index=FLAGS.task_index)
3333

3434
# Graph
3535
with tf.device('/cpu:0'):
@@ -44,8 +44,9 @@ def main():
4444

4545
# Session
4646
# Monitored Training Session
47-
sess = tf.train.MonitoredTrainingSession(master=server.target,
48-
is_chief=is_chief)
47+
sess = tf.train.MonitoredTrainingSession(
48+
master=server.target,
49+
is_chief=is_chief)
4950
for i in range(1000):
5051
if sess.should_stop(): break
5152
sess.run(opt)

Distributed-Setup/dist_setup_sup.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@
1717
def main():
1818
# Distributed Baggage
1919
cluster = tf.train.ClusterSpec({
20-
'ps':['localhost:2222'],
21-
'worker':['localhost:2223']
22-
}) #lets this node know about all other nodes
20+
'ps':['localhost:2222'],
21+
'worker':['localhost:2223']
22+
}) #lets this node know about all other nodes
2323
if FLAGS.job_name == 'ps': #checks if parameter server
2424
server = tf.train.Server(cluster,job_name="ps",task_index=FLAGS.task_index)
2525
server.join()
@@ -40,8 +40,8 @@ def main():
4040
# Session
4141
# Supervisor
4242
sv = tf.train.Supervisor(logdir=os.getcwd()+log_dir,
43-
is_chief=is_chief,
44-
save_model_secs=30)
43+
is_chief=is_chief,
44+
save_model_secs=30)
4545
sess = sv.prepare_or_wait_for_session(server.target)
4646
for i in range(1000):
4747
if sv.should_stop(): break

Hogwild/Hogwild.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -18,19 +18,19 @@
1818
def main():
1919
# Server Setup
2020
cluster = tf.train.ClusterSpec({
21-
'ps':['localhost:2222'],
22-
'worker':['localhost:2223','localhost:2224']
23-
}) #allows this node know about all other nodes
21+
'ps':['localhost:2222'],
22+
'worker':['localhost:2223','localhost:2224']
23+
}) #allows this node know about all other nodes
2424
if FLAGS.job_name == 'ps': #checks if parameter server
2525
server = tf.train.Server(cluster,
26-
job_name="ps",
27-
task_index=FLAGS.task_index)
26+
job_name="ps",
27+
task_index=FLAGS.task_index)
2828
server.join()
2929
else:
3030
is_chief = (FLAGS.task_index == 0) #checks if this is the chief node
3131
server = tf.train.Server(cluster,
32-
job_name="worker",
33-
task_index=FLAGS.task_index)
32+
job_name="worker",
33+
task_index=FLAGS.task_index)
3434

3535
# Graph
3636
with tf.device('/cpu:0'):
@@ -45,8 +45,8 @@ def main():
4545

4646
# Session
4747
sv = tf.train.Supervisor(logdir=os.getcwd()+log_dir,
48-
is_chief=is_chief,
49-
save_model_secs=30)
48+
is_chief=is_chief,
49+
save_model_secs=30)
5050
sess = sv.prepare_or_wait_for_session(server.target)
5151
for i in range(1000):
5252
if sv.should_stop(): break

Multiple-GPUs-Single-Machine/dist_mult_gpu_sing_mach.py

+11-11
Original file line numberDiff line numberDiff line change
@@ -14,19 +14,19 @@
1414
def main():
1515
# Server Setup
1616
cluster = tf.train.ClusterSpec({
17-
'ps':['localhost:2222'],
18-
'worker':['localhost:2223','localhost:2224']
19-
}) #allows this node know about all other nodes
17+
'ps':['localhost:2222'],
18+
'worker':['localhost:2223','localhost:2224']
19+
}) #allows this node know about all other nodes
2020
if FLAGS.job_name == 'ps': #checks if parameter server
2121
with tf.device('/cpu:0'):
2222
server = tf.train.Server(cluster,
23-
job_name="ps",
24-
task_index=FLAGS.task_index)
23+
job_name="ps",
24+
task_index=FLAGS.task_index)
2525
server.join()
2626
else:
2727
is_chief = (FLAGS.task_index == 0) #checks if this is the chief node
2828
server = tf.train.Server(cluster,job_name="worker",
29-
task_index=FLAGS.task_index,config=config)
29+
task_index=FLAGS.task_index,config=config)
3030
# Graph
3131
with tf.device('/gpu:0'):
3232
a = tf.Variable(tf.truncated_normal(shape=[2]),dtype=tf.float32)
@@ -40,13 +40,13 @@ def main():
4040

4141
# Session
4242
sv = tf.train.Supervisor(logdir=os.getcwd()+log_dir,
43-
is_chief=is_chief,
44-
save_model_secs=30)
43+
is_chief=is_chief,
44+
save_model_secs=30)
4545
gpu_options = tf.GPUOptions(allow_growth=True,
46-
allocator_type="BFC",
47-
visible_device_list="%d"%FLAGS.task_index)
46+
allocator_type="BFC",
47+
visible_device_list="%d"%FLAGS.task_index)
4848
config = tf.ConfigProto(gpu_options=gpu_options,
49-
allow_soft_placement=True)
49+
allow_soft_placement=True)
5050
sess = sv.prepare_or_wait_for_session(server.target,config=config)
5151
for i in range(1000):
5252
if sv.should_stop(): break

0 commit comments

Comments
 (0)