|
| 1 | +# Configuration file of dry run experiment using Aggregator & Executor containers and k8s for container deployment |
| 2 | + |
| 3 | +# ========== Container configuration ========== |
| 4 | +# whether to use container deployment |
| 5 | +use_container: k8s |
| 6 | + |
| 7 | +# containers need a data-path mount to facilitate dataset reuse |
| 8 | +# We assume the same data-path is used on all host machines |
| 9 | +data_path: $FEDSCALE_HOME/benchmark |
| 10 | + |
| 11 | +# ========== Cluster configuration ========== |
| 12 | +# k8s-specific |
| 13 | +# number of aggregators, right now we only support a single aggregator |
| 14 | +# placeholder for supporting hierarchical aggregator in the future |
| 15 | +num_aggregators: 1 |
| 16 | + |
| 17 | +# k8s-specific |
| 18 | +# number of executors |
| 19 | +num_executors: 2 |
| 20 | + |
| 21 | +auth: |
| 22 | + ssh_user: "" |
| 23 | + ssh_private_key: ~/.ssh/id_rsa |
| 24 | + |
| 25 | +# cmd to run before we can indeed run FAR (in order) |
| 26 | +setup_commands: |
| 27 | + |
| 28 | + |
| 29 | +# ========== Additional job configuration ========== |
| 30 | +# Default parameters are specified in config_parser.py, wherein more description of the parameter can be found |
| 31 | + |
| 32 | +# We use fixed paths in job_conf as they will be accessed inside containers |
| 33 | +job_conf: |
| 34 | + - job_name: dryrun_k8s # Generate logs under this folder: log_path/job_name/time_stamp |
| 35 | + - log_path: /FedScale/benchmark # Path of log files |
| 36 | + - num_participants: 4 # Number of participants per round, we use K=100 in our paper, large K will be much slower |
| 37 | + - data_set: cifar10 # Dataset: openImg, google_speech, stackoverflow |
| 38 | + - data_dir: /FedScale/benchmark/dataset/data/ # Path of the dataset |
| 39 | + - model: resnet18 # Models: e.g., shufflenet_v2_x2_0, mobilenet_v2, resnet34, albert-base-v2# - gradient_policy: yogi # {"fed-yogi", "fed-prox", "fed-avg"}, "fed-avg" by default |
| 40 | + - eval_interval: 10 # How many rounds to run a testing on the testing set |
| 41 | + - rounds: 21 # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds |
| 42 | + - filter_less: 0 # Remove clients w/ less than 21 samples |
| 43 | + - num_loaders: 2 |
| 44 | + - local_steps: 20 |
| 45 | + - learning_rate: 0.001 |
| 46 | + - batch_size: 32 |
| 47 | + - test_bsz: 32 |
| 48 | + - use_cuda: False |
0 commit comments