hopefully fixed all space issues

Thomas Mulc · Thomas Mulc · commit 8edde587b980 · 2017-10-25T21:37:03.000-07:00
diff --git a/ADAG/ADAG.py b/ADAG/ADAG.py
@@ -51,7 +51,7 @@ def main():
 						name='local_step',collections=['local_non_trainable'])
 
 		with tf.device(tf.train.replica_device_setter(ps_tasks=n_pss,
-        		worker_device="/job:%s/task:%d" % (FLAGS.job_name,FLAGS.task_index))):
+        	worker_device="/job:%s/task:%d" % (FLAGS.job_name,FLAGS.task_index))):
 			global_step = tf.Variable(0,dtype=tf.int32,trainable=False,name='global_step')
 			target = tf.constant(100.,shape=[2],dtype=tf.float32)
 			loss = tf.reduce_mean(tf.square(c-target))
diff --git a/DOWNPOUR-Easy/DOWNPOUR.py b/DOWNPOUR-Easy/DOWNPOUR.py
@@ -20,35 +20,35 @@ def main():
 
 	# Server Setup
 	cluster_spec = {'ps':['localhost:2222'],
-									'worker':['localhost:2223','localhost:2224']}
+				'worker':['localhost:2223','localhost:2224']}
 	n_pss = len(cluster_spec['ps']) #the number of parameter servers
 	n_workers = len(cluster_spec['worker']) #the number of worker nodes
 	cluster = tf.train.ClusterSpec(cluster_spec) #allows this node know about all other nodes
 
 	if FLAGS.job_name == 'ps': #checks if parameter server
 		server = tf.train.Server(cluster,
-														job_name="ps",
-														task_index=FLAGS.task_index,
-														config=config)
+					job_name="ps",
+					task_index=FLAGS.task_index,
+					config=config)
 		server.join()
 	else: #it must be a worker server
 		is_chief = (FLAGS.task_index == 0) #checks if this is the chief node
 		server = tf.train.Server(cluster,
-														job_name="worker",
-														task_index=FLAGS.task_index,
-														config=config)
+					job_name="worker",
+					task_index=FLAGS.task_index,
+					config=config)
 		
 		# Graph
 		# Local operations
 		with tf.device("/job:worker/replica:0/task:%d" % FLAGS.task_index):
 			a = tf.Variable(tf.constant(0.,shape=[2]),dtype=tf.float32,
-																	collections=[tf.GraphKeys.LOCAL_VARIABLES])
+						collections=[tf.GraphKeys.LOCAL_VARIABLES])
 			b = tf.Variable(tf.constant(0.,shape=[2]),dtype=tf.float32,
-																	collections=[tf.GraphKeys.LOCAL_VARIABLES])
+						collections=[tf.GraphKeys.LOCAL_VARIABLES])
 			c=a+b
 
 			local_step = tf.Variable(0,dtype=tf.int32,trainable=False,name='local_step',
-															 		collections=['local_non_trainable'])
+						collections=['local_non_trainable'])
 			lr = .0001
 			loptimizer = tf.train.GradientDescentOptimizer(lr*FLAGS.task_index) #local optimizer
 
@@ -62,20 +62,20 @@ def main():
 				if t != 0:
 					with tf.control_dependencies([opt_local]): #compute gradients only if the local opt was run
 						grads, varss = zip(*loptimizer.compute_gradients(
-																loss,var_list=tf.local_variables()))
+									loss,var_list=tf.local_variables()))
 				else:
 					grads, varss = zip(*loptimizer.compute_gradients(
-															loss,var_list=tf.local_variables()))
+								loss,var_list=tf.local_variables()))
 				grad_list.append(grads) #add gradients to the list
 				opt_local = loptimizer.apply_gradients(zip(grads,varss),
-										global_step=local_step) #update local parameters
+							global_step=local_step) #update local parameters
 
 			grads = tf.reduce_sum(grad_list,axis=0) #sum updates before applying globally
 			grads = tuple([grads[i]for i in range(len(varss))])
 
 			
-		with tf.device(tf.train.replica_device_setter(ps_tasks=n_pss\
-                 ,worker_device="/job:%s/task:%d" % (FLAGS.job_name,FLAGS.task_index))):
+		with tf.device(tf.train.replica_device_setter(ps_tasks=n_pss,
+					worker_device="/job:%s/task:%d" % (FLAGS.job_name,FLAGS.task_index))):
 
 			global_step = tf.Variable(0,dtype=tf.int32,trainable=False,name='global_step')
 
@@ -86,8 +86,8 @@ def main():
 			# create global variables and/or references
 			local_to_global, global_to_local = create_global_variables()
 			opt = optimizer.apply_gradients(
-														zip(grads,[local_to_global[v] for v in varss])
-														,global_step=global_step) #apply the gradients to variables on ps
+						zip(grads,[local_to_global[v] for v in varss])
+						,global_step=global_step) #apply the gradients to variables on ps
 
 			# Pull params from global server
 			with tf.control_dependencies([opt]):
@@ -103,7 +103,7 @@ def main():
 			# Init ops
 			init = tf.global_variables_initializer() # for global variables
 			init_local = tf.variables_initializer(tf.local_variables() \
-																					+tf.get_collection('local_non_trainable'))#for local variables
+						+tf.get_collection('local_non_trainable'))#for local variables
 
 		# Session
 		stop_hook = tf.train.StopAtStepHook(last_step=60)
@@ -112,12 +112,12 @@ def main():
 
 		# Monitored Training Session
 		sess = tf.train.MonitoredTrainingSession(master=server.target,
-																						is_chief=is_chief,
-																						config=config,
-																						scaffold=scaff,
-																						hooks=hooks,
-																						save_checkpoint_secs=1,
-																						checkpoint_dir='logdir')
+					is_chief=is_chief,
+					config=config,
+					scaffold=scaff,
+					hooks=hooks,
+					save_checkpoint_secs=1,
+					checkpoint_dir='logdir')
 
 		if is_chief:
 			sess.run(assign_global) #Assigns chief's initial values to ps
diff --git a/DOWNPOUR/DOWNPOUR.py b/DOWNPOUR/DOWNPOUR.py
@@ -22,35 +22,35 @@ def main():
 
 	#Server Setup
 	cluster_spec = {'ps':['localhost:2222'],
-									'worker':['localhost:2223','localhost:2224']}
+				'worker':['localhost:2223','localhost:2224']}
 	n_pss = len(cluster_spec['ps']) #the number of parameter servers
 	n_workers = len(cluster_spec['worker']) #the number of worker nodes
 	cluster = tf.train.ClusterSpec(cluster_spec) #allows this node know about all other nodes
 
 	if FLAGS.job_name == 'ps': #checks if parameter server
 		server = tf.train.Server(cluster,
-														job_name="ps",
-														task_index=FLAGS.task_index,
-														config=config)
+					job_name="ps",
+					task_index=FLAGS.task_index,
+					config=config)
 		server.join()
 	else: #it must be a worker server
 		is_chief = (FLAGS.task_index == 0) #checks if this is the chief node
 		server = tf.train.Server(cluster,
-														job_name="worker",
-														task_index=FLAGS.task_index,
-														config=config)
+					job_name="worker",
+					task_index=FLAGS.task_index,
+					config=config)
 		
 		# Graph
 		# Local operations
 		with tf.device("/job:worker/replica:0/task:%d" % FLAGS.task_index):
 			a = tf.Variable(tf.constant(0.,shape=[2]),dtype=tf.float32,
-																	collections=[tf.GraphKeys.LOCAL_VARIABLES])
+						collections=[tf.GraphKeys.LOCAL_VARIABLES])
 			b = tf.Variable(tf.constant(0.,shape=[2]),dtype=tf.float32,
-																	collections=[tf.GraphKeys.LOCAL_VARIABLES])
+						collections=[tf.GraphKeys.LOCAL_VARIABLES])
 			c=a+b
 
 			local_step = tf.Variable(0,dtype=tf.int32,trainable=False,name='local_step',
-															 		collections=['local_non_trainable'])
+						collections=['local_non_trainable'])
 			lr = .0001
 			
 			#loptimizer = tf.train.GradientDescentOptimizer(lr*FLAGS.task_index) #local optimizer
@@ -66,13 +66,13 @@ def main():
 				if t != 0:
 					with tf.control_dependencies([opt_local]): #compute gradients only if the local opt was run
 						grads, varss = zip(*loptimizer.compute_gradients( \
-															loss,var_list=tf.local_variables()))
+									loss,var_list=tf.local_variables()))
 				else:
 					grads, varss = zip(*loptimizer.compute_gradients( \
-															loss,var_list=tf.local_variables()))
+								loss,var_list=tf.local_variables()))
 				grad_list.append(grads) #add gradients to the list
 				opt_local = loptimizer.apply_gradients(zip(grads,varss),
-										global_step=local_step) #update local parameters
+							global_step=local_step) #update local parameters
 
 			grads = tf.reduce_sum(grad_list,axis=0) #sum updates before applying globally
 			grads = tuple([grads[i]for i in range(len(varss))])
@@ -83,9 +83,8 @@ def main():
 			# delete the variables from the global collection
 			clear_global_collection()
 
-		with tf.device(tf.train.replica_device_setter(
-									ps_tasks=n_pss,
-                	worker_device="/job:%s/task:%d" % (FLAGS.job_name,FLAGS.task_index))):
+		with tf.device(tf.train.replica_device_setter(ps_tasks=n_pss,
+          worker_device="/job:%s/task:%d" % (FLAGS.job_name,FLAGS.task_index))):
 			global_step = tf.Variable(0,dtype=tf.int32,trainable=False,name='global_step')
 
 			# all workers use the same learning rate and it is decided on by the task 0 
@@ -95,8 +94,8 @@ def main():
 			# create global variables and/or references
 			local_to_global, global_to_local = create_global_variables(lopt_vars)
 			opt = optimizer.apply_gradients(
-														zip(grads,[local_to_global[v] for v in varss])
-														,global_step=global_step) #apply the gradients to variables on ps
+						zip(grads,[local_to_global[v] for v in varss])
+						,global_step=global_step) #apply the gradients to variables on ps
 
 			# Pull params from global server
 			with tf.control_dependencies([opt]):
@@ -111,7 +110,7 @@ def main():
 			# Init ops
 			init = tf.global_variables_initializer() # for global variables
 			init_local = tf.variables_initializer(tf.local_variables() \
-											+tf.get_collection('local_non_trainable')) #for local variables
+						+tf.get_collection('local_non_trainable')) #for local variables
 
 		# Session
 		stop_hook = tf.train.StopAtStepHook(last_step=60)
@@ -120,12 +119,12 @@ def main():
 
 		# Monitored Training Session
 		sess = tf.train.MonitoredTrainingSession(master=server.target,
-																						is_chief=is_chief,
-																						config=config,
-																						scaffold=scaff,
-																						hooks=hooks,
-																						save_checkpoint_secs=1,
-																						checkpoint_dir='logdir')
+					is_chief=is_chief,
+					config=config,
+					scaffold=scaff,
+					hooks=hooks,
+					save_checkpoint_secs=1,
+					checkpoint_dir='logdir')
 		
 		if is_chief:
 			sess.run(assign_global) #Assigns chief's initial values to ps
@@ -207,8 +206,8 @@ def create_global_variables(local_optimizer_vars = []):
 					shape = v.shape,
 					dtype = v.dtype,
 					trainable=True,
-					collections=[tf.GraphKeys.GLOBAL_VARIABLES, \
-											tf.GraphKeys.TRAINABLE_VARIABLES])
+					collections=[tf.GraphKeys.GLOBAL_VARIABLES,
+								tf.GraphKeys.TRAINABLE_VARIABLES])
 				local_to_global[v] = v_g
 				global_to_local[v_g] = v
 	return local_to_global,global_to_local
diff --git a/Distributed-Setup/dist_setup.py b/Distributed-Setup/dist_setup.py
@@ -17,19 +17,19 @@
 def main():
 	# Distributed Baggage
 	cluster = tf.train.ClusterSpec({
-                'ps':['localhost:2222'],
-								'worker':['localhost:2223']
-                }) #lets this node know about all other nodes
+        'ps':['localhost:2222'],
+				'worker':['localhost:2223']
+        }) #lets this node know about all other nodes
 	if FLAGS.job_name == 'ps': #checks if parameter server
 		server = tf.train.Server(cluster,
-                            job_name="ps",
-                            task_index=FLAGS.task_index)
+          job_name="ps",
+          task_index=FLAGS.task_index)
 		server.join()
 	else:
 		is_chief = (FLAGS.task_index == 0) #checks if this is the chief node
 		server = tf.train.Server(cluster,
-                            job_name="worker",
-                            task_index=FLAGS.task_index)
+          job_name="worker",
+          task_index=FLAGS.task_index)
 
 		# Graph
 		with tf.device('/cpu:0'):
@@ -44,8 +44,9 @@ def main():
 
 		# Session
 		# Monitored Training Session
-		sess = tf.train.MonitoredTrainingSession(master=server.target,
-                                            is_chief=is_chief)
+		sess = tf.train.MonitoredTrainingSession(
+          master=server.target,
+          is_chief=is_chief)
 		for i in range(1000):
 			if sess.should_stop(): break
 			sess.run(opt)
diff --git a/Distributed-Setup/dist_setup_sup.py b/Distributed-Setup/dist_setup_sup.py
@@ -17,9 +17,9 @@
 def main():
 	# Distributed Baggage
 	cluster = tf.train.ClusterSpec({
-              'ps':['localhost:2222'],
-							'worker':['localhost:2223']
-              }) #lets this node know about all other nodes
+        'ps':['localhost:2222'],
+				'worker':['localhost:2223']
+        }) #lets this node know about all other nodes
 	if FLAGS.job_name == 'ps': #checks if parameter server
 		server = tf.train.Server(cluster,job_name="ps",task_index=FLAGS.task_index)
 		server.join()
@@ -40,8 +40,8 @@ def main():
 		# Session
     # Supervisor
 		sv = tf.train.Supervisor(logdir=os.getcwd()+log_dir,
-                            is_chief=is_chief,
-                            save_model_secs=30)
+          is_chief=is_chief,
+          save_model_secs=30)
 		sess = sv.prepare_or_wait_for_session(server.target)
 		for i in range(1000):
 			if sv.should_stop(): break
diff --git a/Hogwild/Hogwild.py b/Hogwild/Hogwild.py
@@ -18,19 +18,19 @@
 def main():
 	# Server Setup
   cluster = tf.train.ClusterSpec({
-                    'ps':['localhost:2222'],
-                    'worker':['localhost:2223','localhost:2224']
-                    }) #allows this node know about all other nodes
+        'ps':['localhost:2222'],
+        'worker':['localhost:2223','localhost:2224']
+        }) #allows this node know about all other nodes
   if FLAGS.job_name == 'ps': #checks if parameter server
     server = tf.train.Server(cluster,
-                            job_name="ps",
-                            task_index=FLAGS.task_index)
+          job_name="ps",
+          task_index=FLAGS.task_index)
     server.join()
   else:
     is_chief = (FLAGS.task_index == 0) #checks if this is the chief node
     server = tf.train.Server(cluster,
-                            job_name="worker",
-                            task_index=FLAGS.task_index)
+          job_name="worker",
+          task_index=FLAGS.task_index)
 		
     # Graph
     with tf.device('/cpu:0'):
@@ -45,8 +45,8 @@ def main():
 
 		# Session
     sv = tf.train.Supervisor(logdir=os.getcwd()+log_dir,
-                            is_chief=is_chief,
-                            save_model_secs=30)
+          is_chief=is_chief,
+          save_model_secs=30)
     sess = sv.prepare_or_wait_for_session(server.target)
     for i in range(1000):
       if sv.should_stop(): break
diff --git a/Multiple-GPUs-Single-Machine/dist_mult_gpu_sing_mach.py b/Multiple-GPUs-Single-Machine/dist_mult_gpu_sing_mach.py
@@ -14,19 +14,19 @@
 def main():
     # Server Setup
   cluster = tf.train.ClusterSpec({
-                    'ps':['localhost:2222'],
-                    'worker':['localhost:2223','localhost:2224']
-                    }) #allows this node know about all other nodes
+        'ps':['localhost:2222'],
+        'worker':['localhost:2223','localhost:2224']
+        }) #allows this node know about all other nodes
 	if FLAGS.job_name == 'ps': #checks if parameter server
 		with tf.device('/cpu:0'):
 			server = tf.train.Server(cluster,
-                              job_name="ps",
-                              task_index=FLAGS.task_index)
+            job_name="ps",
+            task_index=FLAGS.task_index)
 			server.join()
 	else:
 		is_chief = (FLAGS.task_index == 0) #checks if this is the chief node
 		server = tf.train.Server(cluster,job_name="worker",
-						task_index=FLAGS.task_index,config=config)
+					task_index=FLAGS.task_index,config=config)
 		# Graph
 		with tf.device('/gpu:0'):
 			a = tf.Variable(tf.truncated_normal(shape=[2]),dtype=tf.float32)
@@ -40,13 +40,13 @@ def main():
 
 		# Session
 		sv = tf.train.Supervisor(logdir=os.getcwd()+log_dir,
-                            is_chief=is_chief,
-                            save_model_secs=30)
+          is_chief=is_chief,
+          save_model_secs=30)
 		gpu_options = tf.GPUOptions(allow_growth=True,
-                            allocator_type="BFC",
-                            visible_device_list="%d"%FLAGS.task_index)
+          allocator_type="BFC",
+          visible_device_list="%d"%FLAGS.task_index)
 		config = tf.ConfigProto(gpu_options=gpu_options,
-                            allow_soft_placement=True)
+          allow_soft_placement=True)
 		sess = sv.prepare_or_wait_for_session(server.target,config=config)
 		for i in range(1000):
 			if sv.should_stop(): break
diff --git a/SDAG/dist_cpu_sing_mach_sync.py b/SDAG/dist_cpu_sing_mach_sync.py
diff --git a/Synchronous SGD different learning rates/ssgd.py b/Synchronous SGD different learning rates/ssgd.py
diff --git a/Synchronous-SGD/ssgd.py b/Synchronous-SGD/ssgd.py