SymbioticLab · fanlai0990 · Jan 25, 2023 · Dec 22, 2022 · Jan 5, 2023 · Jan 9, 2023
diff --git a/benchmark/configs/async_fl/async_fl.yml b/benchmark/configs/async_fl/async_fl.yml
diff --git a/benchmark/configs/cifar_cpu/cifar_cpu.yml b/benchmark/configs/cifar_cpu/cifar_cpu.yml
@@ -35,7 +35,7 @@ job_conf:
     - data_set: cifar10                     # Dataset: openImg, google_speech, stackoverflow
     - data_dir: $FEDSCALE_HOME/benchmark/dataset/data/    # Path of the dataset
     - model: shufflenet_v2_x2_0              # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs
-#    - model_zoo: fedscale-zoo              # Default zoo (torchcv) uses the pytorchvision zoo, which can not support small images well
+#    - model_zoo: fedscale-torch-zoo              # Default zoo (torchcv) uses the pytorchvision zoo, which can not support small images well
     - eval_interval: 5                     # How many rounds to run a testing on the testing set
     - rounds: 600                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 0                       # Remove clients w/ less than 21 samples

diff --git a/benchmark/configs/docker_deploy/cifar_cpu_docker.yml b/benchmark/configs/docker_deploy/cifar_cpu_docker.yml
@@ -54,7 +54,7 @@ job_conf:
     - data_set: cifar10                     # Dataset: openImg, google_speech, stackoverflow
     - data_dir: /FedScale/benchmark/dataset/data/    # Path of the dataset
     - model: shufflenet_v2_x2_0              # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs
-#    - model_zoo: fedscale-zoo              # Default zoo (torchcv) uses the pytorchvision zoo, which can not support small images well
+#    - model_zoo: fedscale-torch-zoo              # Default zoo (torchcv) uses the pytorchvision zoo, which can not support small images well
     - eval_interval: 10                     # How many rounds to run a testing on the testing set
     - rounds: 21                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 0                       # Remove clients w/ less than 21 samples

diff --git a/benchmark/configs/docker_deploy/femnist_docker.yml b/benchmark/configs/docker_deploy/femnist_docker.yml
@@ -58,7 +58,7 @@ job_conf:
     - device_conf_file: /FedScale/benchmark/dataset/data/device_info/client_device_capacity     # Path of the client trace
     - device_avail_file: /FedScale/benchmark/dataset/data/device_info/client_behave_trace
     - model: resnet18             # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs
-#    - model_zoo: fedscale-zoo
+#    - model_zoo: fedscale-torch-zoo
     - eval_interval: 10                     # How many rounds to run a testing on the testing set
     - rounds: 20                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 21                       # Remove clients w/ less than 21 samples

diff --git a/benchmark/configs/femnist/conf.yml b/benchmark/configs/femnist/conf.yml
@@ -38,7 +38,7 @@ job_conf:
     - device_conf_file: $FEDSCALE_HOME/benchmark/dataset/data/device_info/client_device_capacity     # Path of the client trace
     - device_avail_file: $FEDSCALE_HOME/benchmark/dataset/data/device_info/client_behave_trace
     - model: resnet18             # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs
-#    - model_zoo: fedscale-zoo
+#    - model_zoo: fedscale-torch-zoo
     - eval_interval: 10                     # How many rounds to run a testing on the testing set
     - rounds: 1000                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 21                       # Remove clients w/ less than 21 samples

diff --git a/benchmark/configs/k8s_deploy/cifar_cpu_k8s.yml b/benchmark/configs/k8s_deploy/cifar_cpu_k8s.yml
@@ -36,7 +36,7 @@ job_conf:
     - data_set: cifar10                     # Dataset: openImg, google_speech, stackoverflow
     - data_dir: /FedScale/benchmark/dataset/data/    # Path of the dataset
     - model: shufflenet_v2_x2_0              # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs
-#    - model_zoo: fedscale-zoo              # Default zoo (torchcv) uses the pytorchvision zoo, which can not support small images well
+#    - model_zoo: fedscale-torch-zoo              # Default zoo (torchcv) uses the pytorchvision zoo, which can not support small images well
     - eval_interval: 10                     # How many rounds to run a testing on the testing set
     - rounds: 21                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 0                       # Remove clients w/ less than 21 samples

diff --git a/benchmark/configs/k8s_deploy/femnist_k8s.yml b/benchmark/configs/k8s_deploy/femnist_k8s.yml
@@ -40,7 +40,7 @@ job_conf:
     - device_conf_file: /FedScale/benchmark/dataset/data/device_info/client_device_capacity     # Path of the client trace
     - device_avail_file: /FedScale/benchmark/dataset/data/device_info/client_behave_trace
     - model: resnet18             # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs
-#    - model_zoo: fedscale-zoo
+#    - model_zoo: fedscale-torch-zoo
     - eval_interval: 10                     # How many rounds to run a testing on the testing set
     - rounds: 21                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 21                       # Remove clients w/ less than 21 samples

diff --git a/benchmark/configs/tensorflow_engine/tf-engine.yml b/benchmark/configs/tensorflow_engine/tf-engine.yml
diff --git a/benchmark/configs/tf_cifar/tf_cifar.yml b/benchmark/configs/tf_cifar/tf_cifar.yml
@@ -0,0 +1,50 @@
+# Configuration file of running tensorflow backend
+
+# ========== Cluster configuration ==========
+# ip address of the parameter server (need 1 GPU process)
+ps_ip: localhost
+
+# ip address of each worker:# of available gpus process on each gpu in this node
+# Note that if we collocate ps and worker on same GPU, then we need to decrease this number of available processes on that GPU by 1
+# E.g., master node has 4 available processes, then 1 for the ps, and worker should be set to: worker:3
+worker_ips:
+    - localhost:[1] # worker_ip: [(# processes on gpu) for gpu in available_gpus] eg. 10.0.0.2:[4,4,4,4] This node has 4 gpus, each gpu has 4 processes.
+
+exp_path: $FEDSCALE_HOME/fedscale/cloud
+
+# Entry function of executor and aggregator under $exp_path
+executor_entry: execution/executor.py
+
+aggregator_entry: aggregation/aggregator.py
+
+auth:
+    ssh_user: ""
+    ssh_private_key: ~/.ssh/id_rsa
+
+# cmd to run before we can indeed run FAR (in order)
+setup_commands:
+    - source $HOME/anaconda3/bin/activate fedscale
+
+# ========== Additional job configuration ==========
+# Default parameters are specified in config_parser.py, wherein more description of the parameter can be found
+
+job_conf:
+    - job_name: tf-cifar10               # Generate logs under this folder: log_path/job_name/time_stamp
+    - log_path: $FEDSCALE_HOME/benchmark # Path of log files
+    - num_participants: 4                # Number of participants per round, we use K=100 in our paper, large K will be much slower
+    - data_set: cifar10                  # Dataset: openImg, google_speech, stackoverflow
+    - data_dir: $FEDSCALE_HOME/benchmark/dataset/data/    # Path of the dataset
+    - model: resnet50                    # Need to define the model in tf_aggregator.py
+    - model_zoo: fedscale-tensorflow-zoo
+    - eval_interval: 5000                # How many rounds to run a testing on the testing set
+    - rounds: 200                        # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
+    - filter_less: 0                     # Remove clients w/ less than 21 samples
+    - num_loaders: 2
+    - local_steps: 20
+    - learning_rate: 0.001
+    - input_shape: 32 32 3
+    - batch_size: 32
+    - num_classes: 10
+    - test_bsz: 32
+    - use_cuda: False
+    - engine: 'tensorflow'
diff --git a/benchmark/configs/tf_femnist/tf_femnist.yml b/benchmark/configs/tf_femnist/tf_femnist.yml
@@ -0,0 +1,50 @@
+# Configuration file of running tensorflow backend
+
+# ========== Cluster configuration ==========
+# ip address of the parameter server (need 1 GPU process)
+ps_ip: localhost
+
+# ip address of each worker:# of available gpus process on each gpu in this node
+# Note that if we collocate ps and worker on same GPU, then we need to decrease this number of available processes on that GPU by 1
+# E.g., master node has 4 available processes, then 1 for the ps, and worker should be set to: worker:3
+worker_ips:
+    - localhost:[1] # worker_ip: [(# processes on gpu) for gpu in available_gpus] eg. 10.0.0.2:[4,4,4,4] This node has 4 gpus, each gpu has 4 processes.
+
+exp_path: $FEDSCALE_HOME/fedscale/cloud
+
+# Entry function of executor and aggregator under $exp_path
+executor_entry: execution/executor.py
+
+aggregator_entry: aggregation/aggregator.py
+
+auth:
+    ssh_user: ""
+    ssh_private_key: ~/.ssh/id_rsa
+
+# cmd to run before we can indeed run FAR (in order)
+setup_commands:
+    - source $HOME/anaconda3/bin/activate fedscale
+
+# ========== Additional job configuration ==========
+# Default parameters are specified in config_parser.py, wherein more description of the parameter can be found
+
+job_conf:
+    - job_name: tf-femnist               # Generate logs under this folder: log_path/job_name/time_stamp
+    - log_path: $FEDSCALE_HOME/benchmark # Path of log files
+    - num_participants: 4                # Number of participants per round, we use K=100 in our paper, large K will be much slower
+    - data_set: femnist                  # Dataset: openImg, google_speech, stackoverflow
+    - data_dir: $FEDSCALE_HOME/benchmark/dataset/data/femnist    # Path of the dataset
+    - model: resnet50                    # Need to define the model in tf_aggregator.py
+    - model_zoo: fedscale-tensorflow-zoo
+    - eval_interval: 5000                # How many rounds to run a testing on the testing set
+    - rounds: 200                        # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
+    - filter_less: 0                     # Remove clients w/ less than 21 samples
+    - num_loaders: 2
+    - local_steps: 20
+    - learning_rate: 0.001
+    - batch_size: 32
+    - input_shape: 32 32 3
+    - num_classes: 62
+    - test_bsz: 32
+    - use_cuda: False
+    - engine: 'tensorflow'