diff --git a/benchmark/configs/async_fl/async_fl.yml b/benchmark/configs/async_fl/async_fl.yml
deleted file mode 100644
index 4d1448ef..00000000
--- a/benchmark/configs/async_fl/async_fl.yml
+++ /dev/null
@@ -1,61 +0,0 @@
-# Configuration file of FAR training experiment
-
-# ========== Cluster configuration ==========
-# ip address of the parameter server (need 1 GPU process)
-ps_ip: localhost
-
-# ip address of each worker:# of available gpus process on each gpu in this node
-# Note that if we collocate ps and worker on same GPU, then we need to decrease this number of available processes on that GPU by 1
-# E.g., master node has 4 available processes, then 1 for the ps, and worker should be set to: worker:3
-worker_ips:
-    - localhost:[2,2,2,2]
-
-exp_path: $FEDSCALE_HOME/fedscale/cloud
-
-# Entry function of executor and aggregator under $exp_path
-executor_entry: ../../examples/async_fl/async_executor.py
-
-aggregator_entry: ../../examples/async_fl/async_aggregator.py
-
-auth:
-    ssh_user: ""
-    ssh_private_key: ~/.ssh/id_rsa
-
-# cmd to run before we can indeed run FAR (in order)
-setup_commands:
-    - source $HOME/anaconda3/bin/activate fedscale
-
-# ========== Additional job configuration ==========
-# Default parameters are specified in config_parser.py, wherein more description of the parameter can be found
-
-# NOTE: We are supporting and improving the following implementation (Async FL) in FedScale:
-    # - "PAPAYA: Practical, Private, and Scalable Federated Learning", MLSys, 2022
-    # - "Federated Learning with Buffered Asynchronous Aggregation", AISTATS, 2022
-
-# We appreciate you to contribute and/or report bugs. Thank you!
-
-job_conf:
-    - job_name: async_femnist                    # Generate logs under this folder: log_path/job_name/time_stamp
-    - log_path: $FEDSCALE_HOME/benchmark   # Path of log files
-    - data_set: femnist                    # Dataset: openImg, google_speech, stackoverflow
-    - data_dir: $FEDSCALE_HOME/benchmark/dataset/data/femnist    # Path of the dataset
-    - data_map_file: $FEDSCALE_HOME/benchmark/dataset/data/femnist/client_data_mapping/train.csv              # Allocation of data to each client, turn to iid setting if not provided
-    - device_conf_file: $FEDSCALE_HOME/benchmark/dataset/data/device_info/client_device_capacity     # Path of the client trace
-    - device_avail_file: $FEDSCALE_HOME/benchmark/dataset/data/device_info/client_behave_trace
-    - model: resnet18            # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs
-#    - model_zoo: fedscale-zoo
-    - eval_interval: 5                     # How many rounds to run a testing on the testing set
-    - rounds: 1000                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
-    - filter_less: 21                       # Remove clients w/ less than 21 samples
-    - num_loaders: 2
-    - local_steps: 5
-    - learning_rate: 0.05
-    - batch_size: 20
-    - test_bsz: 20
-    - ps_port: 12342
-    - use_cuda: True
-    - overcommitment: 1.0
-    - arrival_interval: 5
-    - max_staleness: 5
-    - max_concurrency: 100
-    - async_buffer: 50                     # Number of updates need to be aggregated before generating new model version
diff --git a/benchmark/configs/cifar_cpu/cifar_cpu.yml b/benchmark/configs/cifar_cpu/cifar_cpu.yml
index c361d1d3..007c8b6d 100644
--- a/benchmark/configs/cifar_cpu/cifar_cpu.yml
+++ b/benchmark/configs/cifar_cpu/cifar_cpu.yml
@@ -35,7 +35,7 @@ job_conf:
     - data_set: cifar10                     # Dataset: openImg, google_speech, stackoverflow
     - data_dir: $FEDSCALE_HOME/benchmark/dataset/data/    # Path of the dataset
     - model: shufflenet_v2_x2_0              # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs
-#    - model_zoo: fedscale-zoo              # Default zoo (torchcv) uses the pytorchvision zoo, which can not support small images well
+#    - model_zoo: fedscale-torch-zoo              # Default zoo (torchcv) uses the pytorchvision zoo, which can not support small images well
     - eval_interval: 5                     # How many rounds to run a testing on the testing set
     - rounds: 600                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 0                       # Remove clients w/ less than 21 samples
diff --git a/benchmark/configs/docker_deploy/cifar_cpu_docker.yml b/benchmark/configs/docker_deploy/cifar_cpu_docker.yml
index 0106799a..86ec9678 100644
--- a/benchmark/configs/docker_deploy/cifar_cpu_docker.yml
+++ b/benchmark/configs/docker_deploy/cifar_cpu_docker.yml
@@ -54,7 +54,7 @@ job_conf:
     - data_set: cifar10                     # Dataset: openImg, google_speech, stackoverflow
     - data_dir: /FedScale/benchmark/dataset/data/    # Path of the dataset
     - model: shufflenet_v2_x2_0              # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs
-#    - model_zoo: fedscale-zoo              # Default zoo (torchcv) uses the pytorchvision zoo, which can not support small images well
+#    - model_zoo: fedscale-torch-zoo              # Default zoo (torchcv) uses the pytorchvision zoo, which can not support small images well
     - eval_interval: 10                     # How many rounds to run a testing on the testing set
     - rounds: 21                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 0                       # Remove clients w/ less than 21 samples
diff --git a/benchmark/configs/docker_deploy/femnist_docker.yml b/benchmark/configs/docker_deploy/femnist_docker.yml
index 87521a88..4069d362 100644
--- a/benchmark/configs/docker_deploy/femnist_docker.yml
+++ b/benchmark/configs/docker_deploy/femnist_docker.yml
@@ -58,7 +58,7 @@ job_conf:
     - device_conf_file: /FedScale/benchmark/dataset/data/device_info/client_device_capacity     # Path of the client trace
     - device_avail_file: /FedScale/benchmark/dataset/data/device_info/client_behave_trace
     - model: resnet18             # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs
-#    - model_zoo: fedscale-zoo
+#    - model_zoo: fedscale-torch-zoo
     - eval_interval: 10                     # How many rounds to run a testing on the testing set
     - rounds: 20                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 21                       # Remove clients w/ less than 21 samples
diff --git a/benchmark/configs/femnist/conf.yml b/benchmark/configs/femnist/conf.yml
index aaa2f17f..e59f65b6 100644
--- a/benchmark/configs/femnist/conf.yml
+++ b/benchmark/configs/femnist/conf.yml
@@ -38,7 +38,7 @@ job_conf:
     - device_conf_file: $FEDSCALE_HOME/benchmark/dataset/data/device_info/client_device_capacity     # Path of the client trace
     - device_avail_file: $FEDSCALE_HOME/benchmark/dataset/data/device_info/client_behave_trace
     - model: resnet18             # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs
-#    - model_zoo: fedscale-zoo
+#    - model_zoo: fedscale-torch-zoo
     - eval_interval: 10                     # How many rounds to run a testing on the testing set
     - rounds: 1000                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 21                       # Remove clients w/ less than 21 samples
diff --git a/benchmark/configs/k8s_deploy/cifar_cpu_k8s.yml b/benchmark/configs/k8s_deploy/cifar_cpu_k8s.yml
index 57c9c94a..5ba3179e 100644
--- a/benchmark/configs/k8s_deploy/cifar_cpu_k8s.yml
+++ b/benchmark/configs/k8s_deploy/cifar_cpu_k8s.yml
@@ -36,7 +36,7 @@ job_conf:
     - data_set: cifar10                     # Dataset: openImg, google_speech, stackoverflow
     - data_dir: /FedScale/benchmark/dataset/data/    # Path of the dataset
     - model: shufflenet_v2_x2_0              # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs
-#    - model_zoo: fedscale-zoo              # Default zoo (torchcv) uses the pytorchvision zoo, which can not support small images well
+#    - model_zoo: fedscale-torch-zoo              # Default zoo (torchcv) uses the pytorchvision zoo, which can not support small images well
     - eval_interval: 10                     # How many rounds to run a testing on the testing set
     - rounds: 21                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 0                       # Remove clients w/ less than 21 samples
diff --git a/benchmark/configs/k8s_deploy/femnist_k8s.yml b/benchmark/configs/k8s_deploy/femnist_k8s.yml
index 90c85b6e..f38ee48f 100644
--- a/benchmark/configs/k8s_deploy/femnist_k8s.yml
+++ b/benchmark/configs/k8s_deploy/femnist_k8s.yml
@@ -40,7 +40,7 @@ job_conf:
     - device_conf_file: /FedScale/benchmark/dataset/data/device_info/client_device_capacity     # Path of the client trace
     - device_avail_file: /FedScale/benchmark/dataset/data/device_info/client_behave_trace
     - model: resnet18             # NOTE: Please refer to our model zoo README and use models for these small image (e.g., 32x32x3) inputs
-#    - model_zoo: fedscale-zoo
+#    - model_zoo: fedscale-torch-zoo
     - eval_interval: 10                     # How many rounds to run a testing on the testing set
     - rounds: 21                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
     - filter_less: 21                       # Remove clients w/ less than 21 samples
diff --git a/benchmark/configs/tensorflow_engine/tf-engine.yml b/benchmark/configs/tensorflow_engine/tf-engine.yml
deleted file mode 100644
index 14031a71..00000000
--- a/benchmark/configs/tensorflow_engine/tf-engine.yml
+++ /dev/null
@@ -1,47 +0,0 @@
-# Configuration file of running tensorflow backend
-
-# ========== Cluster configuration ========== 
-# ip address of the parameter server (need 1 GPU process)
-ps_ip: 10.0.0.1
-
-# ip address of each worker:# of available gpus process on each gpu in this node
-# Note that if we collocate ps and worker on same GPU, then we need to decrease this number of available processes on that GPU by 1
-# E.g., master node has 4 available processes, then 1 for the ps, and worker should be set to: worker:3
-worker_ips: 
-    - 10.0.0.1:[1] # worker_ip: [(# processes on gpu) for gpu in available_gpus] eg. 10.0.0.2:[4,4,4,4] This node has 4 gpus, each gpu has 4 processes. 
-
-exp_path: $FEDSCALE_HOME/fedscale/cloud
-
-# Entry function of executor and aggregator under $exp_path
-executor_entry: $FEDSCALE_HOME/examples/tensorflow_engine/tf_executor.py
-
-aggregator_entry: $FEDSCALE_HOME/examples/tensorflow_engine/tf_aggregator.py
-
-auth:
-    ssh_user: ""
-    ssh_private_key: ~/.ssh/id_rsa
-
-# cmd to run before we can indeed run FAR (in order)
-setup_commands:
-    - source $HOME/anaconda3/bin/activate fedscale
-
-# ========== Additional job configuration ========== 
-# Default parameters are specified in config_parser.py, wherein more description of the parameter can be found
-
-job_conf: 
-    - job_name: tf-engine                   # Generate logs under this folder: log_path/job_name/time_stamp
-    - log_path: $FEDSCALE_HOME/benchmark # Path of log files
-    - num_participants: 4                      # Number of participants per round, we use K=100 in our paper, large K will be much slower
-    - data_set: cifar10                     # Dataset: openImg, google_speech, stackoverflow
-    - data_dir: $FEDSCALE_HOME/benchmark/dataset/data/    # Path of the dataset
-    - model: resnet50                      # Need to define the model in tf_aggregator.py
-    - eval_interval: 5000                     # How many rounds to run a testing on the testing set
-    - rounds: 200                          # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
-    - filter_less: 0                       # Remove clients w/ less than 21 samples
-    - num_loaders: 2
-    - local_steps: 20
-    - learning_rate: 0.001
-    - batch_size: 32
-    - test_bsz: 32
-    - use_cuda: False
-    - engine: 'tensorflow'
diff --git a/benchmark/configs/tf_cifar/tf_cifar.yml b/benchmark/configs/tf_cifar/tf_cifar.yml
new file mode 100644
index 00000000..cd59bec5
--- /dev/null
+++ b/benchmark/configs/tf_cifar/tf_cifar.yml
@@ -0,0 +1,50 @@
+# Configuration file of running tensorflow backend
+
+# ========== Cluster configuration ==========
+# ip address of the parameter server (need 1 GPU process)
+ps_ip: localhost
+
+# ip address of each worker:# of available gpus process on each gpu in this node
+# Note that if we collocate ps and worker on same GPU, then we need to decrease this number of available processes on that GPU by 1
+# E.g., master node has 4 available processes, then 1 for the ps, and worker should be set to: worker:3
+worker_ips:
+    - localhost:[1] # worker_ip: [(# processes on gpu) for gpu in available_gpus] eg. 10.0.0.2:[4,4,4,4] This node has 4 gpus, each gpu has 4 processes.
+
+exp_path: $FEDSCALE_HOME/fedscale/cloud
+
+# Entry function of executor and aggregator under $exp_path
+executor_entry: execution/executor.py
+
+aggregator_entry: aggregation/aggregator.py
+
+auth:
+    ssh_user: ""
+    ssh_private_key: ~/.ssh/id_rsa
+
+# cmd to run before we can indeed run FAR (in order)
+setup_commands:
+    - source $HOME/anaconda3/bin/activate fedscale
+
+# ========== Additional job configuration ==========
+# Default parameters are specified in config_parser.py, wherein more description of the parameter can be found
+
+job_conf:
+    - job_name: tf-cifar10               # Generate logs under this folder: log_path/job_name/time_stamp
+    - log_path: $FEDSCALE_HOME/benchmark # Path of log files
+    - num_participants: 4                # Number of participants per round, we use K=100 in our paper, large K will be much slower
+    - data_set: cifar10                  # Dataset: openImg, google_speech, stackoverflow
+    - data_dir: $FEDSCALE_HOME/benchmark/dataset/data/    # Path of the dataset
+    - model: resnet50                    # Need to define the model in tf_aggregator.py
+    - model_zoo: fedscale-tensorflow-zoo
+    - eval_interval: 5000                # How many rounds to run a testing on the testing set
+    - rounds: 200                        # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
+    - filter_less: 0                     # Remove clients w/ less than 21 samples
+    - num_loaders: 2
+    - local_steps: 20
+    - learning_rate: 0.001
+    - input_shape: 32 32 3
+    - batch_size: 32
+    - num_classes: 10
+    - test_bsz: 32
+    - use_cuda: False
+    - engine: 'tensorflow'
diff --git a/benchmark/configs/tf_femnist/tf_femnist.yml b/benchmark/configs/tf_femnist/tf_femnist.yml
new file mode 100644
index 00000000..eb3b3c5a
--- /dev/null
+++ b/benchmark/configs/tf_femnist/tf_femnist.yml
@@ -0,0 +1,50 @@
+# Configuration file of running tensorflow backend
+
+# ========== Cluster configuration ==========
+# ip address of the parameter server (need 1 GPU process)
+ps_ip: localhost
+
+# ip address of each worker:# of available gpus process on each gpu in this node
+# Note that if we collocate ps and worker on same GPU, then we need to decrease this number of available processes on that GPU by 1
+# E.g., master node has 4 available processes, then 1 for the ps, and worker should be set to: worker:3
+worker_ips:
+    - localhost:[1] # worker_ip: [(# processes on gpu) for gpu in available_gpus] eg. 10.0.0.2:[4,4,4,4] This node has 4 gpus, each gpu has 4 processes.
+
+exp_path: $FEDSCALE_HOME/fedscale/cloud
+
+# Entry function of executor and aggregator under $exp_path
+executor_entry: execution/executor.py
+
+aggregator_entry: aggregation/aggregator.py
+
+auth:
+    ssh_user: ""
+    ssh_private_key: ~/.ssh/id_rsa
+
+# cmd to run before we can indeed run FAR (in order)
+setup_commands:
+    - source $HOME/anaconda3/bin/activate fedscale
+
+# ========== Additional job configuration ==========
+# Default parameters are specified in config_parser.py, wherein more description of the parameter can be found
+
+job_conf:
+    - job_name: tf-femnist               # Generate logs under this folder: log_path/job_name/time_stamp
+    - log_path: $FEDSCALE_HOME/benchmark # Path of log files
+    - num_participants: 4                # Number of participants per round, we use K=100 in our paper, large K will be much slower
+    - data_set: femnist                  # Dataset: openImg, google_speech, stackoverflow
+    - data_dir: $FEDSCALE_HOME/benchmark/dataset/data/femnist    # Path of the dataset
+    - model: resnet50                    # Need to define the model in tf_aggregator.py
+    - model_zoo: fedscale-tensorflow-zoo
+    - eval_interval: 5000                # How many rounds to run a testing on the testing set
+    - rounds: 200                        # Number of rounds to run this training. We use 1000 in our paper, while it may converge w/ ~400 rounds
+    - filter_less: 0                     # Remove clients w/ less than 21 samples
+    - num_loaders: 2
+    - local_steps: 20
+    - learning_rate: 0.001
+    - batch_size: 32
+    - input_shape: 32 32 3
+    - num_classes: 62
+    - test_bsz: 32
+    - use_cuda: False
+    - engine: 'tensorflow'
diff --git a/docker/driver.py b/docker/driver.py
index 94ce2fdd..8e6b7b99 100644
--- a/docker/driver.py
+++ b/docker/driver.py
@@ -53,9 +53,9 @@ def process_cmd(yaml_file, local=False):
             exit(1)
     else:
         use_container = "default"
-        
 
-    
+
+
     ps_ip = yaml_conf['ps_ip']
     worker_ips, total_gpus = [], []
     cmd_script_list = []
@@ -79,7 +79,7 @@ def process_cmd(yaml_file, local=False):
 
     for conf in yaml_conf['job_conf']:
         job_conf.update(conf)
-        
+
     conf_script = ''
     setup_cmd = ''
     if yaml_conf['setup_commands'] is not None:
@@ -90,7 +90,7 @@ def process_cmd(yaml_file, local=False):
     cmd_sufix = f" "
 
     for conf_name in job_conf:
-        conf_script = conf_script + f' --{conf_name}={job_conf[conf_name]}'
+        conf_script = conf_script + f' --{conf_name} {job_conf[conf_name]}'
         if conf_name == "job_name":
             job_name = job_conf[conf_name]
         if conf_name == "log_path":
@@ -152,7 +152,7 @@ def process_cmd(yaml_file, local=False):
                         "rank_id": rank_id,
                         "cuda_id": cuda_id
                     }
-                    
+
                     worker_cmd = f" docker run -i --name fedscale-exec{rank_id}-{time_stamp} --network {yaml_conf['container_network']} -p {ports[rank_id]}:32000 --mount type=bind,source={yaml_conf['data_path']},target=/FedScale/benchmark fedscale/fedscale-exec"
                 else:
                     worker_cmd = f" python {yaml_conf['exp_path']}/{yaml_conf['executor_entry']} {conf_script} --this_rank={rank_id} --num_executors={total_gpu_processes} --cuda_device=cuda:{cuda_id} "
@@ -236,7 +236,7 @@ def process_cmd(yaml_file, local=False):
                 msg = json.dumps(msg)
                 send_socket.sendall(msg.encode('utf-8'))
                 send_socket.close()
-                break                
+                break
 
 
     print(f"Submitted job, please check your logs {job_conf['log_path']}/logs/{job_conf['job_name']}/{time_stamp} for status")
@@ -258,7 +258,7 @@ def terminate(job_name):
             print(f"Shutting down container {name} on {meta_dict['ip']}")
             with open(f"{job_name}_logging", 'a') as fout:
                 subprocess.Popen(f'ssh {job_meta["user"]}{meta_dict["ip"]} "docker rm --force {name}"',
-                                shell=True, stdout=fout, stderr=fout)          
+                                shell=True, stdout=fout, stderr=fout)
     elif job_meta['use_container'] == "k8s":
         # for now, assume we run in k8s admin mode, placeholder for client job submission in the future
         config.load_kube_config()
@@ -266,11 +266,11 @@ def terminate(job_name):
         for name, meta_dict in job_meta['k8s_dict'].items():
             if os.path.exists(meta_dict["yaml_path"]):
                 os.remove(meta_dict["yaml_path"])
-                
+
             print(f"Shutting down container {name}...")
             core_api.delete_namespaced_pod(name, namespace="fedscale")
 
-    else:    
+    else:
         for vm_ip in job_meta['vms']:
             print(f"Shutting down job on {vm_ip}")
             with open(f"{job_name}_logging", 'a') as fout:
@@ -293,7 +293,7 @@ def submit_to_k8s(yaml_conf):
         namespace_config = client.V1Namespace(
             metadata=client.V1ObjectMeta(name="fedscale"))
         core_api.create_namespace(namespace_config)
-    
+
     time_stamp = datetime.datetime.fromtimestamp(
         time.time()).strftime('%m%d_%H%M%S')
     running_vms = set()
@@ -334,7 +334,7 @@ def submit_to_k8s(yaml_conf):
         "rank_id": 0,
         "yaml_path": aggr_yaml_path
     }
-    
+
     print(f"Submitting aggregator container {aggr_name} to k8s...")
 
     # TODO: logging?
@@ -380,7 +380,7 @@ def submit_to_k8s(yaml_conf):
                 time.sleep(1)
             if aggr_ip == -1:
                 print(f"Error: aggregator {name} not ready after maximum waiting time allowed, aborting...")
-                exit(1)      
+                exit(1)
             meta_dict["ip"] = aggr_ip
         elif meta_dict['type'] == 'executor':
             print(f'Waiting executor container {name} to be ready...')
@@ -404,7 +404,7 @@ def submit_to_k8s(yaml_conf):
         else:
             print(f"Error: unrecognized type {meta_dict['type']}!")
             exit(1)
-    
+
 
     # TODO: make executors init multi-threaded to boost performance
     for name, meta_dict in k8s_dict.items():
@@ -456,7 +456,7 @@ def submit_to_k8s(yaml_conf):
                 msg = json.dumps(msg)
                 send_socket.sendall(msg.encode('utf-8'))
                 send_socket.close()
-                break            
+                break
         else:
             print(f"Error: unrecognized type {meta_dict['type']}!")
             exit(1)
@@ -468,14 +468,14 @@ def submit_to_k8s(yaml_conf):
 
 def check_log(job_name):
     current_path = os.path.dirname(os.path.abspath(__file__))
-    job_meta_path = os.path.join(current_path, job_name)    
+    job_meta_path = os.path.join(current_path, job_name)
     if not os.path.isfile(job_meta_path):
         print(f"Error: fail to terminate {job_name}, as it does not exist")
         exit(1)
 
     with open(job_meta_path, 'rb') as fin:
         job_meta = pickle.load(fin)
-    
+
     if job_meta['use_container'] == 'k8s':
         for name, meta_dict in job_meta['k8s_dict'].items():
             if meta_dict['type'] != 'aggregator':
@@ -488,7 +488,7 @@ def check_log(job_name):
     else:
         print("Error: only support checking job logs running in k8s mode!")
         exit(1)
-    
+
 
 print_help: bool = False
 if len(sys.argv) > 1:
diff --git a/docker/dryrun b/docker/dryrun
new file mode 100644
index 00000000..5f2607c9
Binary files /dev/null and b/docker/dryrun differ
diff --git a/docker/tf-cifar10 b/docker/tf-cifar10
new file mode 100644
index 00000000..5f2607c9
Binary files /dev/null and b/docker/tf-cifar10 differ
diff --git a/environment.yml b/environment.yml
index f1dbf326..388bb302 100644
--- a/environment.yml
+++ b/environment.yml
@@ -16,6 +16,7 @@ dependencies:
     - matplotlib==3.1.3
     - torch_baidu_ctc==0.3.0
     - tensorboardX==2.1
+    - overrides==3.1.0
     - python-levenshtein==0.12.0
     - pandas==1.1.0
     - PyYAML
diff --git a/examples/README.md b/examples/README.md
index 55024af6..e3af435a 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -79,7 +79,7 @@ In the function `round_weight_handler`, you can customize your aggregator optimi
 The following code segment shows how FedYoGi and FedAvg aggregate the participant gradients.
 
 ```
-class ServerOptimizer(object):
+class TorchServerOptimizer(object):
 
    def __init__(self, mode, args, device, sample_seed=233):
        self.mode = mode
diff --git a/examples/async_fl/async_aggregator.py b/examples/async_fl/async_aggregator.py
deleted file mode 100644
index 0125c197..00000000
--- a/examples/async_fl/async_aggregator.py
+++ /dev/null
@@ -1,509 +0,0 @@
-# -*- coding: utf-8 -*-
-import collections
-import os
-import sys
-
-import torch
-
-import fedscale.cloud.config_parser as parser
-from fedscale.cloud import commons
-from fedscale.cloud.aggregation.aggregator import Aggregator
-from fedscale.cloud.channels import job_api_pb2
-from fedscale.cloud.logger.aggragation import *
-
-sys.path.append(os.path.dirname(os.path.abspath(__file__)))
-from resource_manager import ResourceManager
-
-MAX_MESSAGE_LENGTH = 1 * 1024 * 1024 * 1024  # 1GB
-
-# NOTE: We are supporting and improving the following implementation (Async FL) in FedScale:
-    # - "PAPAYA: Practical, Private, and Scalable Federated Learning", MLSys, 2022
-    # - "Federated Learning with Buffered Asynchronous Aggregation", AISTATS, 2022
-
-# We appreciate you to contribute and/or report bugs. Thank you!
-
-class AsyncAggregator(Aggregator):
-    """This centralized aggregator collects training/testing feedbacks from executors"""
-
-    def __init__(self, args):
-        Aggregator.__init__(self, args)
-        self.resource_manager = ResourceManager(self.experiment_mode)
-        self.async_buffer_size = args.async_buffer
-        self.max_concurrency  = args.max_concurrency
-        self.client_round_duration = {}
-        self.client_start_time = collections.defaultdict(list)
-        self.round_stamp = [0]
-        self.client_model_version = collections.defaultdict(list)
-        self.virtual_client_clock = {}
-        self.weight_tensor_type = {}
-
-        # We need to keep the test model for specific round to avoid async mismatch
-        self.test_model = None
-        self.aggregate_update = {}
-        self.importance_sum = 0
-        self.client_end = []
-        self.round_staleness = []
-        self.round_tasks_issued = 0
-        # self.model_concurrency = collections.defaultdict(int)
-
-    def run(self):
-        """Start running the aggregator server by setting up execution
-        and communication environment, and monitoring the grpc message.
-        """
-        self.setup_env()
-        self.init_control_communication()
-        self.init_data_communication()
-
-        self.init_model()
-        self.save_last_param()
-        self.model_update_size = sys.getsizeof(
-            pickle.dumps(self.model)) / 1024.0 * 8.  # kbits
-        self.client_profiles = self.load_client_profile(
-            file_path=self.args.device_conf_file)
-
-        self.event_monitor()
-
-
-    def tictak_client_tasks(self, sampled_clients, num_clients_to_collect):
-
-        if self.experiment_mode == commons.SIMULATION_MODE:
-            # NOTE: We try to remove dummy events as much as possible in simulations,
-            # by removing the stragglers/offline clients in overcommitment"""
-            sampledClientsReal = []
-            startTimes = []
-            endTimes = []
-            completed_client_clock = {}
-
-            start_time = self.global_virtual_clock
-            constant_checkin_period = self.args.arrival_interval
-            # 1. remove dummy clients that are not available to the end of training
-            concurreny_count = 0
-
-            end_list = []
-            end_j = 0
-            for client_to_run in sampled_clients:
-                client_cfg = self.client_conf.get(client_to_run, self.args)
-                exe_cost = self.client_manager.get_completion_time(client_to_run,
-                            batch_size=client_cfg.batch_size, local_steps=client_cfg.local_steps,
-                            upload_size=self.model_update_size, download_size=self.model_update_size)
-
-                roundDuration = exe_cost['computation'] + \
-                    exe_cost['communication']
-                # if the client is not active by the time of collection, we consider it is lost in this round
-                start_time += constant_checkin_period
-                end_time = roundDuration + start_time
-                end_list.append(end_time)
-                while start_time > end_list[end_j]:
-                    concurreny_count -= 1
-                    end_j += 1
-                if concurreny_count > self.max_concurrency:
-                    end_list.pop()
-                    continue
-
-                if self.client_manager.isClientActive(client_to_run, end_time):
-                    concurreny_count += 1
-                    sampledClientsReal.append(client_to_run)
-                    completed_client_clock[client_to_run] = exe_cost
-                    startTimes.append(start_time)
-                    self.client_start_time[client_to_run].append(start_time)
-                    self.client_round_duration[client_to_run] = roundDuration
-                    endTimes.append(end_time)
-
-            num_clients_to_collect = min(
-                num_clients_to_collect, len(sampledClientsReal))
-            # 2. sort & execute clients based on completion time
-            sortedWorkersByCompletion = sorted(
-                range(len(endTimes)), key=lambda k: endTimes[k])
-            top_k_index = sortedWorkersByCompletion[:num_clients_to_collect]
-            clients_to_run = [sampledClientsReal[k] for k in top_k_index]
-            endTimes = [endTimes[k] for k in top_k_index]
-            return (clients_to_run,
-                    endTimes,
-                    completed_client_clock)  # dict : string the speed for each client
-
-        else:
-            completed_client_clock = {
-                client: {'computation': 1, 'communication': 1} for client in sampled_clients}
-            completionTimes = [1 for c in sampled_clients]
-            return (sampled_clients, sampled_clients, completed_client_clock,
-                    1, completionTimes)
-
-    def save_last_param(self):
-        """ Save the last model parameters
-        """
-        self.last_gradient_weights = [
-            p.data.clone() for p in self.model.parameters()]
-        self.model_weights = copy.deepcopy(self.model.state_dict())
-        self.weight_tensor_type = {p: self.model_weights[p].data.dtype \
-                                        for p in self.model_weights}
-
-    def aggregate_client_weights(self, results):
-        """May aggregate client updates on the fly"""
-        """
-            "PAPAYA: PRACTICAL, PRIVATE, AND SCALABLE FEDERATED LEARNING". MLSys, 2022
-        """
-        # Start to take the average of updates, and we do not keep updates to save memory
-        # Importance of each update is 1/staleness
-        client_staleness = self.round - self.client_model_version[results['clientId']].pop(0)
-
-        importance = 1./(math.sqrt(1 + client_staleness))
-        self.round_staleness.append(client_staleness)
-
-        new_round_aggregation = (self.model_in_update == 1)
-        if new_round_aggregation:
-            self.importance_sum = 0
-        self.importance_sum += importance
-
-        for p in results['update_weight']:
-            # Different to cloud/executor, update_weight here is (train_model_weight - untrained)
-            param_weight = results['update_weight'][p]
-
-            if isinstance(param_weight, list):
-                param_weight = np.asarray(param_weight, dtype=np.float32)
-            param_weight = torch.from_numpy(
-                param_weight).to(device=self.device)
-
-            if new_round_aggregation:
-                self.aggregate_update[p] = param_weight * importance
-            else:
-                self.aggregate_update[p] += param_weight * importance
-
-        if self.model_in_update == self.async_buffer_size:
-            for p in self.model_weights:
-                d_type = self.weight_tensor_type[p]
-                self.model_weights[p].data = (
-                    self.model_weights[p].data + self.aggregate_update[p] / float(self.importance_sum)  # self.model_in_update
-                ).to(dtype=d_type)
-
-    def round_completion_handler(self):
-        self.round += 1
-
-        logging.info(f"Round {self.round} average staleness {np.mean(self.round_staleness)}")
-        self.round_staleness = []
-        self.global_virtual_clock = self.round_stamp[-1]
-
-        if self.round % self.args.decay_round == 0:
-            self.args.learning_rate = max(
-                self.args.learning_rate * self.args.decay_factor, self.args.min_learning_rate)
-
-        # handle the global update w/ current and last
-        self.round_weight_handler(self.last_gradient_weights)
-
-        avg_loss = sum(self.loss_accumulator) / \
-            max(1, len(self.loss_accumulator))
-        logging.info(f"Wall clock: {round(self.global_virtual_clock)} s, round: {self.round}, asyn running participants: " +
-                     f"{self.resource_manager.get_task_length()}, aggregating {len(self.stats_util_accumulator)} participants, " +
-                     f"training loss: {avg_loss}")
-
-        # dump round completion information to tensorboard
-        if len(self.loss_accumulator):
-            self.log_train_result(avg_loss)
-
-        # update select participants
-        # NOTE: we simulate async, while have to sync every 10 rounds to avoid large division to trace
-        if self.resource_manager.get_task_length() < self.async_buffer_size * 5:
-
-            self.sampled_participants = self.select_participants(
-                select_num_participants=self.async_buffer_size*10, overcommitment=self.args.overcommitment)
-            (clientsToRun, clientsEndTime, virtual_client_clock) = self.tictak_client_tasks(
-                self.sampled_participants, len(self.sampled_participants))
-
-            logging.info(f"{len(clientsToRun)} clients with constant arrival following the order: {clientsToRun}")
-
-            # Issue requests to the resource manager; Tasks ordered by the completion time
-            self.resource_manager.register_tasks(clientsToRun, clientsEndTime)
-            self.virtual_client_clock.update(virtual_client_clock)
-
-        # Update executors and participants
-        if self.experiment_mode == commons.SIMULATION_MODE:
-            self.sampled_executors = list(self.individual_client_events.keys())
-        else:
-            self.sampled_executors = [str(c_id)
-                                      for c_id in self.sampled_participants]
-
-        self.save_last_param()
-        #self.round_stragglers = round_stragglers
-
-        self.model_in_update = 0
-        self.test_result_accumulator = []
-        self.stats_util_accumulator = []
-        self.client_training_results = []
-        self.loss_accumulator = []
-        # self.round_stamp.append(self.global_virtual_clock)
-
-        if self.round >= self.args.rounds:
-            self.broadcast_aggregator_events(commons.SHUT_DOWN)
-        elif self.round % self.args.eval_interval == 0:
-            self.test_model = copy.deepcopy(self.model)
-            self.broadcast_aggregator_events(commons.UPDATE_MODEL)
-            self.broadcast_aggregator_events(commons.MODEL_TEST)
-        else:
-            self.broadcast_aggregator_events(commons.UPDATE_MODEL)
-            self.broadcast_aggregator_events(commons.START_ROUND)
-
-    def find_latest_model(self, start_time):
-        for i, time_stamp in enumerate(reversed(self.round_stamp)):
-            if start_time >= time_stamp:
-                return len(self.round_stamp) - i
-        return 1
-
-    def get_test_config(self, client_id):
-        """FL model testing on clients, developers can further define personalized client config here.
-
-        Args:
-            client_id (int): The client id.
-
-        Returns:
-            dictionary: The testing config for new task.
-
-        """
-        # Get the straggler round-id
-        client_tasks = self.resource_manager.client_run_queue
-        current_pending_length = min(
-            self.resource_manager.client_run_queue_idx, len(client_tasks)-1)
-
-        current_pending_clients = client_tasks[current_pending_length:]
-        straggler_round = 1e10
-        for client in current_pending_clients:
-            straggler_round = min(
-                self.find_latest_model(self.client_start_time[client][0]), straggler_round)
-
-        return {'client_id': client_id,
-                'straggler_round': straggler_round,
-                'test_model': self.test_model}
-
-    def get_client_conf(self, clientId):
-        """Training configurations that will be applied on clients"""
-        conf = {
-            'learning_rate': self.args.learning_rate,
-        }
-        return conf
-
-    def create_client_task(self, executorId):
-        """Issue a new client training task to the executor"""
-
-        train_config = None
-        model = None
-
-        # NOTE: in batch execution simulation (i.e., multiple executors), we need to stall task scheduling
-        # to ensure clients in current async_buffer_size completes ahead of other tasks
-        with self.update_lock:
-            logging.info(f"====self.round_tasks_issued ({executorId}) is {self.round_tasks_issued}, {self.async_buffer_size}")
-            if self.round_tasks_issued < self.async_buffer_size:
-                next_clientId = self.resource_manager.get_next_task(executorId)
-                config = self.get_client_conf(next_clientId)
-                start_time = self.client_start_time[next_clientId][0]
-                end_time = self.client_round_duration[next_clientId] + start_time
-                model_id = self.find_latest_model(start_time)
-
-                self.client_model_version[next_clientId].append(model_id)
-
-                # The executor has already received the model, thus sending id is enough
-                model = model_id
-                train_config = {'client_id': next_clientId, 'task_config': config, 'end_time': end_time}
-                logging.info(
-                    f"Client {next_clientId} train on model {model_id} during {int(start_time)}-{int(end_time)}")
-
-                self.round_tasks_issued += 1
-
-
-        return train_config, model
-
-    def log_train_result(self, avg_loss):
-        """Result will be post on TensorBoard"""
-        self.log_writer.add_scalar('Train/round_to_loss', avg_loss, self.round)
-        self.log_writer.add_scalar(
-            'FAR/time_to_train_loss (min)', avg_loss, self.global_virtual_clock / 60.)
-        self.log_writer.add_scalar(
-            'FAR/round_duration (min)', self.round_duration / 60., self.round)
-
-    def client_completion_handler(self, results):
-        """We may need to keep all updates from clients,
-        if so, we need to append results to the cache
-
-        Args:
-            results (dictionary): client's training result
-
-        """
-        # Format:
-        #       -results = {'clientId':clientId, 'update_weight': model_param, 'moving_loss': round_train_loss,
-        #       'trained_size': count, 'wall_duration': time_cost, 'success': is_success 'utility': utility}
-
-        if self.round - self.client_model_version[results['clientId']][0] > self.args.max_staleness:
-            logging.info(f"Warning: Ignore stale client {results['clientId']} with {self.round - self.client_model_version[results['clientId']][0]}")
-            self.client_model_version[results['clientId']].pop(0)
-            self.client_start_time[results['clientId']].pop(0)
-            with self.update_lock:
-                self.round_tasks_issued -= 1
-            # self.individual_client_events['1'].append( commons.CLIENT_TRAIN)
-            return -1
-
-        # [ASYNC] New checkin clients ID would overlap with previous unfinished clients
-        logging.info(
-            f"Client {results['clientId']} completes from {self.client_start_time[results['clientId']][0]} " +
-            f"to {self.client_start_time[results['clientId']][0]+self.client_round_duration[results['clientId']]}")
-
-        self.client_end.append(self.client_round_duration[results['clientId']] + self.client_start_time[results['clientId']].pop(0))
-
-        if self.args.gradient_policy in ['q-fedavg']:
-            self.client_training_results.append(results)
-        # Feed metrics to client sampler
-        self.stats_util_accumulator.append(results['utility'])
-        self.loss_accumulator.append(results['moving_loss'])
-
-        self.client_manager.register_feedback(results['clientId'], results['utility'],
-                                              auxi=math.sqrt(
-                                                  results['moving_loss']),
-                                              time_stamp=self.round,
-                                              duration=self.virtual_client_clock[results['clientId']]['computation'] +
-                                                       self.virtual_client_clock[results['clientId']]['communication']
-                                              )
-
-        # ================== Aggregate weights ======================
-        with self.update_lock:
-            self.model_in_update += 1
-            if self.using_group_params == True:
-                self.aggregate_client_group_weights(results)
-            else:
-                self.aggregate_client_weights(results)
-
-        return 0
-
-    def CLIENT_EXECUTE_COMPLETION(self, request, context):
-        """FL clients complete the execution task.
-
-        Args:
-            request (CompleteRequest): Complete request info from executor.
-
-        Returns:
-            ServerResponse: Server response to job completion request
-
-        """
-
-        executor_id, client_id, event = request.executor_id, request.client_id, request.event
-        execution_status, execution_msg = request.status, request.msg
-        meta_result, data_result = request.meta_result, request.data_result
-
-        if event == commons.CLIENT_TRAIN:
-            # Training results may be uploaded in CLIENT_EXECUTE_RESULT request later,
-            # so we need to specify whether to ask client to do so (in case of straggler/timeout in real FL).
-            if execution_status is False:
-                logging.error(f"Executor {executor_id} fails to run client {client_id}, due to {execution_msg}")
-
-        elif event in (commons.MODEL_TEST, commons.UPLOAD_MODEL):
-            self.add_event_handler(
-                executor_id, event, meta_result, data_result)
-        else:
-            logging.error(f"Received undefined event {event} from client {client_id}")
-
-        # [ASYNC] Different from sync that only schedule tasks once previous training finish
-        if self.resource_manager.has_next_task(executor_id) and self.round_tasks_issued < self.async_buffer_size:
-            # NOTE: we do not pop the train immediately in simulation mode,
-            # since the executor may run multiple clients
-            if commons.CLIENT_TRAIN not in self.individual_client_events[executor_id] :
-            # if event in (commons.MODEL_TEST, commons.UPLOAD_MODEL):
-                self.individual_client_events[executor_id].append(
-                    commons.CLIENT_TRAIN)
-
-        return self.CLIENT_PING(request, context)
-
-    def CLIENT_PING(self, request, context):
-        """Handle client ping requests
-
-        Args:
-            request (PingRequest): Ping request info from executor.
-
-        Returns:
-            ServerResponse: Server response to ping request
-
-        """
-        # NOTE: client_id = executor_id in deployment,
-        # while multiple client_id may use the same executor_id (VMs) in simulations
-        executor_id, client_id = request.executor_id, request.client_id
-        response_data = response_msg = commons.DUMMY_RESPONSE
-        if len(self.individual_client_events[executor_id]) == 0:
-            # send dummy response
-            current_event = commons.DUMMY_EVENT
-            response_data = response_msg = commons.DUMMY_RESPONSE
-        else:
-            logging.info(f"====event queue {executor_id}, {self.individual_client_events[executor_id]}")
-            current_event = self.individual_client_events[executor_id].popleft()
-            if current_event == commons.CLIENT_TRAIN:
-                response_msg, response_data = self.create_client_task(
-                    executor_id)
-                if response_msg is None:
-                    current_event = commons.DUMMY_EVENT
-                    if self.experiment_mode != commons.SIMULATION_MODE:
-                        self.individual_client_events[executor_id].append(
-                            commons.CLIENT_TRAIN)
-            elif current_event == commons.MODEL_TEST:
-                response_msg = self.get_test_config(client_id)
-            elif current_event == commons.UPDATE_MODEL:
-                response_data = self.get_global_model()
-            elif current_event == commons.SHUT_DOWN:
-                response_msg = self.get_shutdown_config(executor_id)
-
-        response_msg, response_data = self.serialize_response(
-            response_msg), self.serialize_response(response_data)
-        # NOTE: in simulation mode, response data is pickle for faster (de)serialization
-        response = job_api_pb2.ServerResponse(event=current_event,
-                                              meta=response_msg, data=response_data)
-        if current_event != commons.DUMMY_EVENT:
-            logging.info(f"Issue EVENT ({current_event}) to EXECUTOR ({executor_id})")
-
-        return response
-
-
-    def event_monitor(self):
-        logging.info("Start monitoring events ...")
-
-        while True:
-            # Broadcast events to clients
-            if len(self.broadcast_events_queue) > 0:
-                current_event = self.broadcast_events_queue.popleft()
-
-                if current_event in (commons.UPDATE_MODEL, commons.MODEL_TEST):
-                    self.dispatch_client_events(current_event)
-
-                elif current_event == commons.START_ROUND:
-                    self.dispatch_client_events(commons.CLIENT_TRAIN)
-
-                elif current_event == commons.SHUT_DOWN:
-                    self.dispatch_client_events(commons.SHUT_DOWN)
-                    break
-
-            # Handle events queued on the aggregator
-            elif len(self.sever_events_queue) > 0:
-                client_id, current_event, meta, data = self.sever_events_queue.popleft()
-
-                if current_event == commons.UPLOAD_MODEL:
-                    state = self.client_completion_handler(
-                        self.deserialize_response(data))
-                    logging.info(
-                        f"Executor ({client_id}) finish client {self.deserialize_response(data)['clientId']} in round {self.round} [{self.model_in_update}/{ self.async_buffer_size}] ")
-                    if state == -1 :
-                        self.individual_client_events[client_id].append(commons.CLIENT_TRAIN)
-
-                    elif self.model_in_update == self.async_buffer_size:
-                        # clientID = self.deserialize_response(data)['clientId']
-
-                        # [ASYNC] handle different completion order
-                        self.round_stamp.append(max(self.client_end))
-                        self.client_end = []
-                        self.round_completion_handler()
-                        with self.update_lock: self.round_tasks_issued = 0
-
-                elif current_event == commons.MODEL_TEST:
-                    self.testing_completion_handler(
-                        client_id, self.deserialize_response(data))
-
-                else:
-                    logging.error(f"Event {current_event} is not defined")
-
-            else:
-                # execute every 100 ms
-                time.sleep(0.1)
-
-if __name__ == "__main__":
-    aggregator = AsyncAggregator(parser.args)
-    aggregator.run()
diff --git a/examples/async_fl/async_client.py b/examples/async_fl/async_client.py
deleted file mode 100644
index 18e2394d..00000000
--- a/examples/async_fl/async_client.py
+++ /dev/null
@@ -1,66 +0,0 @@
-import copy
-import logging
-import math
-import pickle
-
-import torch
-from torch.autograd import Variable
-
-from fedscale.cloud.execution.client import Client
-from fedscale.cloud.execution.optimizers import ClientOptimizer
-from fedscale.dataloaders.nlp import mask_tokens
-
-
-class Client(Client):
-    """Basic client component in Federated Learning"""
-
-    def train(self, client_data, model, conf):
-
-        clientId = conf.clientId
-        logging.info(f"Start to train (CLIENT: {clientId}) ...")
-        tokenizer, device = conf.tokenizer, conf.device
-
-        model = model.to(device=device)
-        model.train()
-
-        trained_unique_samples = min(
-            len(client_data.dataset), conf.local_steps * conf.batch_size)
-
-        self.global_model = None
-        if conf.gradient_policy == 'fed-prox':
-            # could be move to optimizer
-            self.global_model = [param.data.clone() for param in model.parameters()]
-
-        prev_model_dict = copy.deepcopy(model.state_dict())
-        optimizer = self.get_optimizer(model, conf)
-        criterion = self.get_criterion(conf)
-        error_type = None
-
-        # NOTE: One may hope to run fixed number of epochs, instead of iterations
-        # then replace the following with "while self.completed_steps < conf.local_steps * len(client_data)"
-        while self.completed_steps < conf.local_steps:
-            try:
-                self.train_step(client_data, conf, model, optimizer, criterion)
-            except Exception as ex:
-                error_type = ex
-                break
-
-        state_dicts = model.state_dict()
-        # In async, we need the delta_weight only
-        model_param = {p: (state_dicts[p] - prev_model_dict[p]).data.cpu().numpy() 
-                       for p in state_dicts}
-        results = {'clientId': clientId, 'moving_loss': self.epoch_train_loss,
-                   'trained_size': self.completed_steps*conf.batch_size, 
-                   'success': self.completed_steps == conf.local_steps}
-
-        if error_type is None:
-            logging.info(f"Training of (CLIENT: {clientId}) completes, {results}")
-        else:
-            logging.info(f"Training of (CLIENT: {clientId}) failed as {error_type}")
-
-        results['utility'] = math.sqrt(
-            self.loss_squre)*float(trained_unique_samples)
-        results['update_weight'] = model_param
-        results['wall_duration'] = 0
-
-        return results
diff --git a/examples/async_fl/async_executor.py b/examples/async_fl/async_executor.py
deleted file mode 100644
index fea88dad..00000000
--- a/examples/async_fl/async_executor.py
+++ /dev/null
@@ -1,172 +0,0 @@
-# -*- coding: utf-8 -*-
-import pickle
-
-import fedscale.cloud.channels.job_api_pb2 as job_api_pb2
-import fedscale.cloud.config_parser as parser
-from fedscale.cloud.execution.executor import Executor
-from fedscale.cloud.execution.rlclient import RLClient
-from fedscale.cloud.logger.execution import *
-from fedscale.cloud import commons
-
-sys.path.append(os.path.dirname(os.path.abspath(__file__)))
-from async_client import Client as CustomizedClient
-
-class AsyncExecutor(Executor):
-    """Each executor takes certain resource to run real training.
-       Each run simulates the execution of an individual client"""
-
-    def __init__(self, args):
-        super().__init__(args)
-        self.temp_model_path_version = lambda round: os.path.join(
-            logDir, f'model_{self.this_rank}_{round}.pth.tar')
-
-    def update_model_handler(self, model):
-        """Update the model copy on this executor"""
-        self.round += 1
-
-        # Dump latest model to disk
-        with open(self.temp_model_path_version(self.round), 'wb') as model_out:
-            logging.info(
-                f"Received latest model saved at {self.temp_model_path_version(self.round)}"
-            )
-            pickle.dump(model, model_out)
-
-    def load_global_model(self, round=None):
-        # load last global model
-        # logging.info(f"====Load global model with version {round}")
-        round = min(round, self.round) if round is not None else self.round
-        with open(self.temp_model_path_version(round), 'rb') as model_in:
-            model = pickle.load(model_in)
-        return model
-
-    def get_client_trainer(self, conf):
-        return CustomizedClient(conf)
-
-    def training_handler(self, clientId, conf, model=None):
-        """Train model given client ids"""
-
-        # Here model is model_id
-        client_model = self.load_global_model(model)
-
-        conf.clientId, conf.device = clientId, self.device
-        conf.tokenizer = tokenizer
-        if args.task == "rl":
-            client_data = self.training_sets
-            client = RLClient(conf)
-            train_res = client.train(
-                client_data=client_data, model=client_model, conf=conf)
-        else:
-            client_data = select_dataset(clientId, self.training_sets,
-                                         batch_size=conf.batch_size, args=self.args,
-                                         collate_fn=self.collate_fn
-                                         )
-
-            client = self.get_client_trainer(conf)
-            train_res = client.train(
-                client_data=client_data, model=client_model, conf=conf)
-
-        return train_res
-
-    def testing_handler(self, args, config=None):
-
-        evalStart = time.time()
-        device = self.device
-        model =   config['test_model']
-        if self.task == 'rl':
-            client = RLClient(args)
-            test_res = client.test(args, self.this_rank, model, device=device)
-            _, _, _, testResults = test_res
-        else:
-            data_loader = select_dataset(self.this_rank, self.testing_sets,
-                                         batch_size=args.test_bsz, args=args,
-                                         isTest=True, collate_fn=self.collate_fn
-                                         )
-
-            if self.task == 'voice':
-                criterion = CTCLoss(reduction='mean').to(device=device)
-            else:
-                criterion = torch.nn.CrossEntropyLoss().to(device=device)
-
-            if self.args.engine == commons.PYTORCH:
-                test_res = test_model(self.this_rank, model, data_loader,
-                                      device=device, criterion=criterion, tokenizer=tokenizer)
-            else:
-                raise Exception(f"Need customized implementation for model testing in {self.args.engine} engine")
-
-            test_loss, acc, acc_5, testResults = test_res
-            logging.info("After aggregation round {}, CumulTime {}, eval_time {}, test_loss {}, test_accuracy {:.2f}%, test_5_accuracy {:.2f}% \n"
-                         .format(self.round, round(time.time() - self.start_run_time, 4), round(time.time() - evalStart, 4), test_loss, acc*100., acc_5*100.))
-
-        gc.collect()
-
-        return testResults
-
-    def check_model_version(self, model_id):
-        return os.path.exists(self.temp_model_path_version(model_id))
-
-    def remove_stale_models(self, straggler_round):
-        """Remove useless models kept for async execution in the past"""
-        logging.info(f"Current straggler round is {straggler_round}")
-        stale_version = straggler_round-1
-        while self.check_model_version(stale_version):
-            logging.info(f"Executor {self.this_rank} removes stale model version {stale_version}")
-            os.remove(self.temp_model_path_version(stale_version))
-            stale_version -= 1
-
-    def event_monitor(self):
-        """Activate event handler once receiving new message
-        """
-        logging.info("Start monitoring events ...")
-        self.client_register()
-
-        while self.received_stop_request == False:
-            if len(self.event_queue) > 0:
-                request = self.event_queue.popleft()
-                current_event = request.event
-
-                logging.info(f"====Poping event {current_event}")
-                if current_event == commons.CLIENT_TRAIN:
-                    train_config = self.deserialize_response(request.meta)
-                    train_model = self.deserialize_response(request.data)
-                    if train_model is not None and not self.check_model_version(train_model):
-                        # The executor may have not received the model due to async grpc
-                        # TODO: server will lose track of scheduled but not executed task and remove the model
-                        logging.error(f"Warning: Not receive model {train_model} for client {train_config['client_id'] }")
-                        if self.round - train_model <= self.args.max_staleness:
-                            self.event_queue.append(request)
-                        time.sleep(1)
-                        continue
-
-                    train_config['model'] = train_model
-                    train_config['client_id'] = int(train_config['client_id'])
-                    client_id, train_res = self.Train(train_config)
-
-                    # Upload model updates
-                    future_call = self.aggregator_communicator.stub.CLIENT_EXECUTE_COMPLETION.future(
-                        job_api_pb2.CompleteRequest(client_id=str(client_id), executor_id=self.executor_id,
-                                                    event=commons.UPLOAD_MODEL, status=True, msg=None,
-                                                    meta_result=None, data_result=self.serialize_response(train_res)
-                                                    ))
-                    future_call.add_done_callback(lambda _response: self.dispatch_worker_events(_response.result()))
-
-                elif current_event == commons.MODEL_TEST:
-                    test_configs = self.deserialize_response(request.meta)
-                    self.remove_stale_models(test_configs['straggler_round'])
-                    self.Test(test_configs)
-
-                elif current_event == commons.UPDATE_MODEL:
-                    broadcast_config = self.deserialize_response(request.data)
-                    self.UpdateModel(broadcast_config)
-
-                elif current_event == commons.SHUT_DOWN:
-                    self.Stop()
-
-                elif current_event == commons.DUMMY_EVENT:
-                    pass
-            else:
-                time.sleep(1)
-                self.client_ping()
-
-if __name__ == "__main__":
-    executor = AsyncExecutor(parser.args)
-    executor.run()
diff --git a/examples/async_fl/resource_manager.py b/examples/async_fl/resource_manager.py
deleted file mode 100644
index 15f6908d..00000000
--- a/examples/async_fl/resource_manager.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import threading
-
-from fedscale.cloud import commons
-from fedscale.cloud.resource_manager import ResourceManager as DefaultManager
-import numpy as np
-
-class ResourceManager(DefaultManager):
-    """Schedule training tasks across GPUs/CPUs"""
-
-    def __init__(self, experiment_mode):
-        super().__init__(experiment_mode)
-        self.client_run_queue = []
-        self.experiment_mode = experiment_mode
-        self.update_lock = threading.Lock()
-        self.client_end_queue = []
-
-    def get_task_length(self):
-        self.update_lock.acquire()
-        remaining_task_num: int = len(self.client_run_queue)
-        self.update_lock.release()
-        return remaining_task_num
-
-    def register_tasks(self, clientsToRun, clientsEndTime):
-        self.client_end_queue += clientsEndTime.copy()
-        self.client_run_queue += clientsToRun.copy()
-        sortedClientIndex = np.argsort(self.client_end_queue)
-
-        self.client_run_queue = [self.client_run_queue[k] for k in sortedClientIndex]
-        self.client_end_queue = [self.client_end_queue[k] for k in sortedClientIndex]
-
-    def has_next_task(self, client_id=None):
-        exist_next_task = False
-        if self.experiment_mode == commons.SIMULATION_MODE:
-            exist_next_task = len(self.client_run_queue) > 0
-        else:
-            exist_next_task = client_id in self.client_run_queue
-        return exist_next_task
-
-    def get_next_task(self, client_id=None):
-        next_task_id = None
-        self.update_lock.acquire()
-        if self.experiment_mode == commons.SIMULATION_MODE:
-            if self.has_next_task(client_id):
-                next_task_id = self.client_run_queue[0]
-                self.client_run_queue.pop(0)
-                self.client_end_queue.pop(0)
-        else:
-            if client_id in self.client_run_queue:
-                next_task_id = client_id
-                self.client_run_queue.remove(next_task_id)
-
-        self.update_lock.release()
-        return next_task_id
diff --git a/examples/differential_privacy/customized_client.py b/examples/differential_privacy/customized_client.py
index 970b992f..b3568dad 100644
--- a/examples/differential_privacy/customized_client.py
+++ b/examples/differential_privacy/customized_client.py
@@ -8,10 +8,10 @@
 from clip_norm import clip_grad_norm_
 from torch.autograd import Variable
 
-from fedscale.cloud.execution.client import Client
+from fedscale.cloud.execution.torch_client import TorchClient
 
 
-class Customized_Client(Client):
+class Customized_Client(TorchClient):
     """
     Basic client component in Federated Learning
     Local differential privacy
@@ -19,8 +19,8 @@ class Customized_Client(Client):
 
     def train(self, client_data, model, conf):
 
-        clientId = conf.clientId
-        logging.info(f"Start to train (CLIENT: {clientId}) ...")
+        client_id = conf.client_id
+        logging.info(f"Start to train (CLIENT: {client_id}) ...")
         tokenizer, device = conf.tokenizer, conf.device
         last_model_params = [p.data.clone() for p in model.parameters()]
 
@@ -65,15 +65,15 @@ def train(self, client_data, model, conf):
             torch.normal(mean=0, std=sigma, size=state_dicts[p].data.shape).cpu().numpy()) for p in state_dicts}
 
 
-        results = {'clientId': clientId, 'moving_loss': self.epoch_train_loss,
+        results = {'client_id': client_id, 'moving_loss': self.epoch_train_loss,
                    'trained_size': self.completed_steps*conf.batch_size, 'success': self.completed_steps > 0}
         results['utility'] = math.sqrt(
-            self.loss_squre)*float(trained_unique_samples)
+            self.loss_squared)*float(trained_unique_samples)
 
         if error_type is None:
-            logging.info(f"Training of (CLIENT: {clientId}) completes, {results}")
+            logging.info(f"Training of (CLIENT: {client_id}) completes, {results}")
         else:
-            logging.info(f"Training of (CLIENT: {clientId}) failed as {error_type}")
+            logging.info(f"Training of (CLIENT: {client_id}) failed as {error_type}")
 
         results['update_weight'] = model_param
         results['wall_duration'] = 0
diff --git a/examples/differential_privacy/customized_executor.py b/examples/differential_privacy/customized_executor.py
index 94966bb0..e113ca79 100644
--- a/examples/differential_privacy/customized_executor.py
+++ b/examples/differential_privacy/customized_executor.py
@@ -8,7 +8,7 @@
 from fedscale.cloud.execution.executor import Executor
 import fedscale.cloud.config_parser as parser
 
-"""In this example, we only need to change the Client Component we need to import"""
+"""In this example, we only need to change the TorchClient Component we need to import"""
 
 class Customized_Executor(Executor):
     """Each executor takes certain resource to run real training.
diff --git a/examples/dry_run/customized_client.py b/examples/dry_run/customized_client.py
index cde31a10..4ead6fa6 100644
--- a/examples/dry_run/customized_client.py
+++ b/examples/dry_run/customized_client.py
@@ -7,17 +7,17 @@
 import torch
 from torch.autograd import Variable
 
-from fedscale.cloud.execution.client import Client
+from fedscale.cloud.execution.torch_client import TorchClient
 
 
-class Customized_Client(Client):
+class Customized_Client(TorchClient):
     """Basic client component in Federated Learning"""
 
     def train(self, client_data, model, conf):
         """We flip the label of the malicious client"""
-        clientId = conf.clientId
+        client_id = conf.client_id
 
-        logging.info(f"Start to train (CLIENT: {clientId}) ...")
+        logging.info(f"Start to train (CLIENT: {client_id}) ...")
         device = conf.device
 
         model = model.to(device=device)
@@ -62,14 +62,14 @@ def train(self, client_data, model, conf):
         state_dicts = model.state_dict()
         model_param = {p:state_dicts[p].data.cpu().numpy() for p in state_dicts}
 
-        results = {'clientId':clientId, 'moving_loss': epoch_train_loss,
+        results = {'client_id':client_id, 'moving_loss': epoch_train_loss,
                   'trained_size': completed_steps*conf.batch_size, 'success': completed_steps > 0}
         results['utility'] = math.sqrt(epoch_train_loss)*float(trained_unique_samples)
 
         if error_type is None:
-            logging.info(f"Training of (CLIENT: {clientId}) completes, {results}")
+            logging.info(f"Training of (CLIENT: {client_id}) completes, {results}")
         else:
-            logging.info(f"Training of (CLIENT: {clientId}) failed as {error_type}")
+            logging.info(f"Training of (CLIENT: {client_id}) failed as {error_type}")
 
         results['update_weight'] = model_param
         results['wall_duration'] = 0
diff --git a/examples/dry_run/customized_executor.py b/examples/dry_run/customized_executor.py
index 2741a0fb..bd936810 100644
--- a/examples/dry_run/customized_executor.py
+++ b/examples/dry_run/customized_executor.py
@@ -8,7 +8,7 @@
 from fedscale.cloud.execution.executor import Executor
 import fedscale.cloud.config_parser as parser
 
-"""In this example, we only need to change the Client Component we need to import"""
+"""In this example, we only need to change the TorchClient Component we need to import"""
 
 class Customized_Executor(Executor):
     """Each executor takes certain resource to run real training.
diff --git a/examples/heterofl/customized_aggregator.py b/examples/heterofl/customized_aggregator.py
index ff493a1c..0747f8b4 100644
--- a/examples/heterofl/customized_aggregator.py
+++ b/examples/heterofl/customized_aggregator.py
@@ -1,13 +1,14 @@
-import os
-import sys
+import math
+import random
+from collections import OrderedDict
+
+import torch
 
 import config
 import customized_fllibs
 from customized_fllibs import make_param_idx
-
-import fedscale.cloud.config_parser as parser
 from fedscale.cloud.aggregation.aggregator import Aggregator
-from fedscale.cloud.logger.aggragation import *
+from fedscale.cloud.logger.aggregation_logging import *
 
 
 class Customized_Aggregator(Aggregator):
@@ -55,14 +56,14 @@ def client_completion_handler(self, results):
         self.client_training_results.append(results)
         self.stats_util_accumulator.append(results['utility'])
         self.loss_accumulator.append(results['moving_loss'])
-        self.client_manager.registerScore(results['clientId'], results['utility'], auxi=math.sqrt(results['moving_loss']),
+        self.client_manager.registerScore(results['client_id'], results['utility'], auxi=math.sqrt(results['moving_loss']),
             time_stamp=self.epoch,
-            duration=self.virtual_client_clock[results['clientId']]['computation']+self.virtual_client_clock[results['clientId']]['communication']
+            duration=self.virtual_client_clock[results['client_id']]['computation']+self.virtual_client_clock[results['client_id']]['communication']
         )
 
         self.update_lock.acquire()
         self.model_in_update += 1
-        
+
         if self.model_in_update == self.tasks_round:
             self.combine_models()
 
@@ -115,9 +116,9 @@ def combine_models(self):
                     count[k] += 1
             tmp_v[count[k] > 0] = tmp_v[count[k] > 0].div_(count[k][count[k] > 0])
             v[count[k] > 0] = tmp_v[count[k] > 0].to(v.dtype)
-        return        
-    
-            
+        return
+
+
 if __name__ == "__main__":
     aggregator = Customized_Aggregator(parser.args)
     aggregator.run()
\ No newline at end of file
diff --git a/examples/heterofl/customized_client.py b/examples/heterofl/customized_client.py
index 9f5a58a1..14e4ebf0 100644
--- a/examples/heterofl/customized_client.py
+++ b/examples/heterofl/customized_client.py
@@ -5,17 +5,17 @@
 from customized_fllibs import split_model
 from resnet_heterofl import resnet18
 
-from fedscale.cloud.execution.client import Client
+from fedscale.cloud.execution.torch_client import TorchClient
 from fedscale.cloud.fllibs import Variable, logging, math, np, os, torch
 
 
-class Customized_Client(Client):
+class Customized_Client(TorchClient):
     def __init__(self, conf):
         super().__init__(conf)
         self.model_rate = None
         self.param_idx = None
         self.local_parameters = None
-        
+
 
     def make_model_rate(self):
         """get the model scaling rate"""
@@ -23,7 +23,7 @@ def make_model_rate(self):
             self.model_rate = np.random.choice(config.cfg['shrinkage'])
         elif config.cfg['model_split_mode'] == 'fix':
             for i in range(len(config.cfg['model_rate'])):
-                if self.clientId % sum(config.cfg['proportion_of_model']) < \
+                if self.client_id % sum(config.cfg['proportion_of_model']) < \
                     sum(config.cfg['proportion_of_model'][:i+1]):
                     self.model_rate = config.cfg['model_rate'][i]
                     break
@@ -31,13 +31,13 @@ def make_model_rate(self):
 
 
     def train(self, client_data, model, conf):
-        self.clientId = conf.clientId
+        self.client_id = conf.client_id
         self.make_model_rate()
-        logging.info(f"Start to split model (CLIENT: {self.clientId}, MODEL RATE: {self.model_rate}) ...")
+        logging.info(f"Start to split model (CLIENT: {self.client_id}, MODEL RATE: {self.model_rate}) ...")
         self.local_parameters = split_model(model, self.model_rate)
         self.local_model = resnet18(model_rate=self.model_rate)
         self.local_model.load_state_dict(self.local_parameters)
-        logging.info(f"Start to train (CLIENT: {self.clientId}) ...")
+        logging.info(f"Start to train (CLIENT: {self.client_id}) ...")
         device = conf.device
         self.local_model = self.local_model.to(device=device)
         self.local_model.train(True)
@@ -47,7 +47,7 @@ def train(self, client_data, model, conf):
         epoch_train_loss = 1e-4
         error_type = None
         completed_steps = 0
-        loss_squre = 0
+        loss_squared = 0
         completed_steps = 0
         while completed_steps < config.cfg['local_epochs']:
             try:
@@ -63,7 +63,7 @@ def train(self, client_data, model, conf):
                     loss_list = loss.tolist()
                     loss = loss.mean()
                     temp_loss = sum(loss_list)/float(len(loss_list))
-                    loss_squre = sum([l**2 for l in loss_list])/float(len(loss_list))
+                    loss_squared = sum([l**2 for l in loss_list])/float(len(loss_list))
                     if completed_steps < len(client_data):
                         if epoch_train_loss == 1e-4:
                             epoch_train_loss = temp_loss
@@ -73,20 +73,20 @@ def train(self, client_data, model, conf):
                     loss.backward()
                     torch.nn.utils.clip_grad_norm_(self.local_model.parameters(), 1)
                     optimizer.step()
-                logging.info(f"Client {self.clientId} completes local epoch: {completed_steps}, loss square: {loss_squre}")
+                logging.info(f"Client {self.client_id} completes local epoch: {completed_steps}, loss square: {loss_squared}")
                 completed_steps += 1
 
             except Exception as ex:
                 error_type = ex
                 break
-        results = {'clientId':self.clientId, 'moving_loss': epoch_train_loss,
+        results = {'client_id':self.client_id, 'moving_loss': epoch_train_loss,
                   'trained_size': completed_steps*conf.batch_size, 'success': completed_steps > 0}
-        results['utility'] = math.sqrt(loss_squre)*float(trained_unique_samples)
+        results['utility'] = math.sqrt(loss_squared)*float(trained_unique_samples)
 
         if error_type is None:
-            logging.info(f"Training of (CLIENT: {self.clientId}) completes, {results}")
+            logging.info(f"Training of (CLIENT: {self.client_id}) completes, {results}")
         else:
-            logging.info(f"Training of (CLIENT: {self.clientId}) failed as {error_type}")
+            logging.info(f"Training of (CLIENT: {self.client_id}) failed as {error_type}")
 
         results['wall_duration'] = 0
         results['model_rate'] = self.model_rate
diff --git a/examples/poisoning_setting/customized_client.py b/examples/poisoning_setting/customized_client.py
index 032a3936..655c6abe 100644
--- a/examples/poisoning_setting/customized_client.py
+++ b/examples/poisoning_setting/customized_client.py
@@ -8,25 +8,25 @@
 from clip_norm import clip_grad_norm_
 from torch.autograd import Variable
 
-from fedscale.cloud.execution.client import Client
+from fedscale.cloud.execution.torch_client import TorchClient
 
 
-class Customized_Client(Client):
+class Customized_Client(TorchClient):
     """Basic client component in Federated Learning"""
 
     def train(self, client_data, model, conf):
         """We flip the label of the malicious client"""
-        clientId = conf.clientId
+        client_id = conf.client_id
 
         """1 out of malicious_factor client is malicious"""
-        is_malicious = ((clientId+1) % conf.malicious_factor == 0)
+        is_malicious = ((client_id+1) % conf.malicious_factor == 0)
 
         if is_malicious:
             label_mapping = list(range(conf.num_class))
-            np.random.seed(clientId)
+            np.random.seed(client_id)
             np.random.shuffle(label_mapping)
 
-        logging.info(f"Start to train (CLIENT: {clientId}) ...")
+        logging.info(f"Start to train (CLIENT: {client_id}) ...")
         device = conf.device
 
         last_model_params = [p.data.clone() for p in model.parameters()]
@@ -96,14 +96,14 @@ def train(self, client_data, model, conf):
         state_dicts = model.state_dict()
         model_param = {p:state_dicts[p].data.cpu().numpy() for p in state_dicts}
 
-        results = {'clientId':clientId, 'moving_loss': epoch_train_loss,
+        results = {'client_id':client_id, 'moving_loss': epoch_train_loss,
                   'trained_size': completed_steps*conf.batch_size, 'success': completed_steps > 0}
         results['utility'] = math.sqrt(epoch_train_loss)*float(trained_unique_samples)
 
         if error_type is None:
-            logging.info(f"Training of (CLIENT: {clientId}) completes, {results}, is_malicious: {is_malicious}")
+            logging.info(f"Training of (CLIENT: {client_id}) completes, {results}, is_malicious: {is_malicious}")
         else:
-            logging.info(f"Training of (CLIENT: {clientId}) failed as {error_type}")
+            logging.info(f"Training of (CLIENT: {client_id}) failed as {error_type}")
 
         results['update_weight'] = model_param
         results['wall_duration'] = 0
diff --git a/examples/poisoning_setting/customized_executor.py b/examples/poisoning_setting/customized_executor.py
index 15593403..800b1d0b 100644
--- a/examples/poisoning_setting/customized_executor.py
+++ b/examples/poisoning_setting/customized_executor.py
@@ -9,7 +9,7 @@
 from fedscale.cloud.execution.executor import Executor
 
 
-"""In this example, we only need to change the Client Component we need to import"""
+"""In this example, we only need to change the TorchClient Component we need to import"""
 
 class Customized_Executor(Executor):
     """Each executor takes certain resource to run real training.
diff --git a/examples/tensorflow_engine/tf_aggregator.py b/examples/tensorflow_engine/tf_aggregator.py
deleted file mode 100644
index f3a2b64a..00000000
--- a/examples/tensorflow_engine/tf_aggregator.py
+++ /dev/null
@@ -1,34 +0,0 @@
-import os
-import sys
-
-import tensorflow as tf
-
-import fedscale.cloud.config_parser as parser
-from fedscale.cloud.aggregation.aggregator import Aggregator
-from fedscale.cloud.logger.aggragation import *
-
-
-class Customized_Aggregator(Aggregator):
-    """Feed aggregator using tensorflow models"""
-    def __init__(self, args):
-        super().__init__(args)
-
-    def init_model(self):
-        """Load model"""
-        # CIFAR-10 as example
-        self.model = tf.keras.applications.resnet.ResNet50(
-            include_top=True,
-            weights=None,
-            input_tensor=None,
-            input_shape=[32, 32, 3],
-            pooling=None,
-            classes=10
-        )
-        # Initiate model parameters dictionary <param_name, param>
-        self.model_weights = {
-            layer.name:[torch.from_numpy(p) for p in layer.get_weights()] for layer in self.model.layers
-        }
-
-if __name__ == "__main__":
-    aggregator = Customized_Aggregator(parser.args)
-    aggregator.run()
\ No newline at end of file
diff --git a/examples/tensorflow_engine/tf_client.py b/examples/tensorflow_engine/tf_client.py
deleted file mode 100644
index 2556d8fb..00000000
--- a/examples/tensorflow_engine/tf_client.py
+++ /dev/null
@@ -1,62 +0,0 @@
-import logging
-import math
-import os
-import sys
-
-import numpy as np
-import tensorflow as tf
-import torch
-
-from fedscale.cloud.execution.client import Client
-
-
-class Customized_Client(Client):
-    """Inherit default client to use tensorflow engine"""
-    def __init__(self, conf):
-        pass
-
-    def train(self, client_data, model, conf):
-
-        clientId = conf.clientId
-        logging.info(f"Start to train (CLIENT: {clientId}) ...")
-        train_len = len(client_data)
-        
-        def gen():
-            while True:
-                for x, y in client_data:
-                    # Convert torch tensor to tf tensor
-                    nx, ny = tf.convert_to_tensor(x.swapaxes(1, 3).numpy()), tf.convert_to_tensor(y.numpy()) 
-                    yield nx, ny
-
-        # Sample a batch to get tensor properties
-        temp_x, temp_y = next(gen())
-
-        tf_client_data = tf.data.Dataset.from_generator(
-            gen,
-            output_types=(temp_x.dtype, temp_y.dtype),
-            output_shapes=(temp_x.shape, temp_y.shape)
-        )
-
-        optimizer = tf.keras.optimizers.SGD(learning_rate=conf.learning_rate, momentum=0.9, 
-                    nesterov=False, name='SGD')
-        model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', 
-            metrics=['accuracy'])
-        
-        history = model.fit(tf_client_data, epochs=1, steps_per_epoch=conf.local_steps, verbose=0)
-
-        # Report the training results
-        results = {'clientId': clientId, 
-                    'moving_loss': sum(history.history['loss'])/(len(history.history['loss'])+1e-4),
-                    'trained_size': conf.local_steps*train_len, 'success': True, 'utility': 1}
-
-        logging.info(f"Training of (CLIENT: {clientId}) completes, {results}")
-
-
-        results['update_weight'] = {layer.name:layer.get_weights() for layer in model.layers}
-        results['wall_duration'] = 0
-
-        return results
-
-
-    def test(self, conf):
-        pass
diff --git a/examples/tensorflow_engine/tf_executor.py b/examples/tensorflow_engine/tf_executor.py
deleted file mode 100644
index 077b880d..00000000
--- a/examples/tensorflow_engine/tf_executor.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# -*- coding: utf-8 -*-
-
-import os
-import sys
-
-import tensorflow as tf
-from tf_client import Customized_Client
-
-import fedscale.cloud.config_parser as parser
-from fedscale.cloud.execution.executor import Executor
-from fedscale.cloud.logger.execution import *
-
-"""In this example, we only need to change the Client Component we need to import"""
-
-class Customized_Executor(Executor):
-    """Each executor takes certain resource to run real training.
-       Each run simulates the execution of an individual client"""
-
-    def __init__(self, args):
-        super(Customized_Executor, self).__init__(args)
-
-    def get_client_trainer(self, conf):
-        return Customized_Client(conf)
-
-    def init_model(self):
-        """Return the model architecture used in training"""
-        model = tf.keras.applications.resnet.ResNet50(
-            include_top=True,
-            weights=None,
-            input_tensor=None,
-            input_shape=[32, 32, 3],
-            pooling=None,
-            classes=10
-        )
-        return model
-
-
-if __name__ == "__main__":
-    executor = Customized_Executor(parser.args)
-    executor.run()
-
diff --git a/fedscale/cloud/aggregation/aggregator.py b/fedscale/cloud/aggregation/aggregator.py
index 0534b530..635872ed 100755
--- a/fedscale/cloud/aggregation/aggregator.py
+++ b/fedscale/cloud/aggregation/aggregator.py
@@ -1,22 +1,29 @@
 # -*- coding: utf-8 -*-
-
+import collections
+import copy
+import math
 import pickle
+import random
 import threading
+import time
 from concurrent import futures
 
 import grpc
+import numpy as np
 import torch
-from torch.utils.tensorboard import SummaryWriter
 
 import fedscale.cloud.channels.job_api_pb2_grpc as job_api_pb2_grpc
-import fedscale.cloud.logger.aggragation as logger
-import fedscale.cloud.config_parser as parser
-from fedscale.cloud import commons
+import fedscale.cloud.logger.aggregator_logging as logger
+from fedscale.cloud.aggregation.optimizers import TorchServerOptimizer
 from fedscale.cloud.channels import job_api_pb2
+from fedscale.cloud.client_manager import ClientManager
+from fedscale.cloud.internal.tensorflow_model_adapter import TensorflowModelAdapter
+from fedscale.cloud.internal.torch_model_adapter import TorchModelAdapter
 from fedscale.cloud.resource_manager import ResourceManager
 from fedscale.cloud.fllibs import *
+from torch.utils.tensorboard import SummaryWriter
 
-MAX_MESSAGE_LENGTH = 1*1024*1024*1024  # 1GB
+MAX_MESSAGE_LENGTH = 1 * 1024 * 1024 * 1024  # 1GB
 
 
 class Aggregator(job_api_pb2_grpc.JobServiceServicer):
@@ -26,12 +33,12 @@ class Aggregator(job_api_pb2_grpc.JobServiceServicer):
         args (dictionary): Variable arguments for fedscale runtime config. defaults to the setup in arg_parser.py
 
     """
+
     def __init__(self, args):
         # init aggregator loger
         logger.initiate_aggregator_setting()
 
         logging.info(f"Job args {args}")
-
         self.args = args
         self.experiment_mode = args.experiment_mode
         self.device = args.cuda_device if args.use_cuda else torch.device(
@@ -45,16 +52,11 @@ def __init__(self, args):
         self.client_manager = self.init_client_manager(args=args)
 
         # ======== model and data ========
-        self.model = None
+        self.model_wrapper = None
         self.model_in_update = 0
         self.update_lock = threading.Lock()
         # all weights including bias/#_batch_tracked (e.g., state_dict)
-        self.model_weights = collections.OrderedDict()
-        self.last_gradient_weights = []  # only gradient variables
-        self.model_state_dict = None
-        # NOTE: if <param_name, param_tensor> (e.g., model.parameters() in PyTorch), then False
-        # True, if <param_name, list_param_tensors> (e.g., layer.get_weights() in Tensorflow)
-        self.using_group_params = self.args.engine == commons.TENSORFLOW
+        self.model_weights = None
 
         # ======== channels ========
         self.connection_timeout = self.args.connection_timeout
@@ -62,7 +64,7 @@ def __init__(self, args):
         self.grpc_server = None
 
         # ======== Event Queue =======
-        self.individual_client_events = {}    # Unicast
+        self.individual_client_events = {}  # Unicast
         self.sever_events_queue = collections.deque()
         self.broadcast_events_queue = collections.deque()  # Broadcast
 
@@ -81,7 +83,6 @@ def __init__(self, args):
         self.model_update_size = 0.
 
         self.collate_fn = None
-        self.task = args.task
         self.round = 0
 
         self.start_run_time = time.time()
@@ -95,7 +96,8 @@ def __init__(self, args):
         self.registered_executor_info = set()
         self.test_result_accumulator = []
         self.testing_history = {'data_set': args.data_set, 'model': args.model, 'sample_mode': args.sample_mode,
-                                'gradient_policy': args.gradient_policy, 'task': args.task, 'perf': collections.OrderedDict()}
+                                'gradient_policy': args.gradient_policy, 'task': args.task,
+                                'perf': collections.OrderedDict()}
         self.log_writer = SummaryWriter(log_dir=logger.logDir)
 
         # ======== Task specific ============
@@ -105,8 +107,6 @@ def setup_env(self):
         """Set up experiments environment and server optimizer
         """
         self.setup_seed(seed=1)
-        self.optimizer = ServerOptimizer(
-            self.args.gradient_policy, self.args, self.device)
 
     def setup_seed(self, seed=1):
         """Set global random seed for better reproducibility
@@ -160,14 +160,17 @@ def init_data_communication(self):
         pass
 
     def init_model(self):
-        """Load the model architecture
-        """
-        assert self.args.engine == commons.PYTORCH, "Please define model for non-PyTorch models"
-
-        self.model = init_model()
-
-        # Initiate model parameters dictionary <param_name, param>
-        self.model_weights = self.model.state_dict()
+        """Initialize the model"""
+        if self.args.engine == commons.TENSORFLOW:
+            self.model_wrapper = TensorflowModelAdapter(init_model())
+        elif self.args.engine == commons.PYTORCH:
+            self.model_wrapper = TorchModelAdapter(
+                init_model(),
+                optimizer=TorchServerOptimizer(
+                    self.args.gradient_policy, self.args, self.device))
+        else:
+            raise ValueError(f"{self.args.engine} is not a supported engine.")
+        self.model_weights = self.model_wrapper.get_weights()
 
     def init_task_context(self):
         """Initiate execution context for specific tasks
@@ -217,7 +220,7 @@ def load_client_profile(self, file_path):
         global_client_profile = {}
         if os.path.exists(file_path):
             with open(file_path, 'rb') as fin:
-                # {clientId: [computer, bandwidth]}
+                # {client_id: [computer, bandwidth]}
                 global_client_profile = pickle.load(fin)
 
         return global_client_profile
@@ -233,17 +236,17 @@ def client_register_handler(self, executorId, info):
         logging.info(f"Loading {len(info['size'])} client traces ...")
         for _size in info['size']:
             # since the worker rankId starts from 1, we also configure the initial dataId as 1
-            mapped_id = (self.num_of_clients+1) % len(
+            mapped_id = (self.num_of_clients + 1) % len(
                 self.client_profiles) if len(self.client_profiles) > 0 else 1
             systemProfile = self.client_profiles.get(
                 mapped_id, {'computation': 1.0, 'communication': 1.0})
 
-            clientId = (
-                self.num_of_clients+1) if self.experiment_mode == commons.SIMULATION_MODE else executorId
+            client_id = (
+                    self.num_of_clients + 1) if self.experiment_mode == commons.SIMULATION_MODE else executorId
             self.client_manager.register_client(
-                executorId, clientId, size=_size, speed=systemProfile)
+                executorId, client_id, size=_size, speed=systemProfile)
             self.client_manager.registerDuration(
-                clientId,
+                client_id,
                 batch_size=self.args.batch_size,
                 local_steps=self.args.local_steps,
                 upload_size=self.model_update_size,
@@ -264,7 +267,8 @@ def executor_info_handler(self, executorId, info):
 
         """
         self.registered_executor_info.add(executorId)
-        logging.info(f"Received executor {executorId} information, {len(self.registered_executor_info)}/{len(self.executors)}")
+        logging.info(
+            f"Received executor {executorId} information, {len(self.registered_executor_info)}/{len(self.executors)}")
 
         # In this simulation, we run data split on each worker, so collecting info from one executor is enough
         # Waiting for data information from executors, or timeout
@@ -303,11 +307,13 @@ def tictak_client_tasks(self, sampled_clients, num_clients_to_collect):
                 client_cfg = self.client_conf.get(client_to_run, self.args)
 
                 exe_cost = self.client_manager.get_completion_time(client_to_run,
-                                                                 batch_size=client_cfg.batch_size, local_steps=client_cfg.local_steps,
-                                                                 upload_size=self.model_update_size, download_size=self.model_update_size)
+                                                                   batch_size=client_cfg.batch_size,
+                                                                   local_steps=client_cfg.local_steps,
+                                                                   upload_size=self.model_update_size,
+                                                                   download_size=self.model_update_size)
 
                 roundDuration = exe_cost['computation'] + \
-                    exe_cost['communication']
+                                exe_cost['communication']
                 # if the client is not active by the time of collection, we consider it is lost in this round
                 if self.client_manager.isClientActive(client_to_run, roundDuration + self.global_virtual_clock):
                     sampledClientsReal.append(client_to_run)
@@ -317,13 +323,13 @@ def tictak_client_tasks(self, sampled_clients, num_clients_to_collect):
             num_clients_to_collect = min(
                 num_clients_to_collect, len(completionTimes))
             # 2. get the top-k completions to remove stragglers
-            sortedWorkersByCompletion = sorted(
+            workers_sorted_by_completion_time = sorted(
                 range(len(completionTimes)), key=lambda k: completionTimes[k])
-            top_k_index = sortedWorkersByCompletion[:num_clients_to_collect]
+            top_k_index = workers_sorted_by_completion_time[:num_clients_to_collect]
             clients_to_run = [sampledClientsReal[k] for k in top_k_index]
 
             dummy_clients = [sampledClientsReal[k]
-                             for k in sortedWorkersByCompletion[num_clients_to_collect:]]
+                             for k in workers_sorted_by_completion_time[num_clients_to_collect:]]
             round_duration = completionTimes[top_k_index[-1]]
             completionTimes.sort()
 
@@ -346,9 +352,8 @@ def run(self):
         self.init_data_communication()
 
         self.init_model()
-        self.save_last_param()
         self.model_update_size = sys.getsizeof(
-            pickle.dumps(self.model))/1024.0*8.  # kbits
+            pickle.dumps(self.model_wrapper)) / 1024.0 * 8.  # kbits
         self.client_profiles = self.load_client_profile(
             file_path=self.args.device_conf_file)
 
@@ -366,7 +371,7 @@ def select_participants(self, select_num_participants, overcommitment=1.3):
 
         """
         return sorted(self.client_manager.select_participants(
-            int(select_num_participants*overcommitment),
+            int(select_num_participants * overcommitment),
             cur_time=self.global_virtual_clock),
         )
 
@@ -379,7 +384,7 @@ def client_completion_handler(self, results):
 
         """
         # Format:
-        #       -results = {'clientId':clientId, 'update_weight': model_param, 'moving_loss': round_train_loss,
+        #       -results = {'client_id':client_id, 'update_weight': model_param, 'moving_loss': round_train_loss,
         #       'trained_size': count, 'wall_duration': time_cost, 'success': is_success 'utility': utility}
 
         if self.args.gradient_policy in ['q-fedavg']:
@@ -388,124 +393,67 @@ def client_completion_handler(self, results):
         self.stats_util_accumulator.append(results['utility'])
         self.loss_accumulator.append(results['moving_loss'])
 
-        self.client_manager.register_feedback(results['clientId'], results['utility'],
-                                          auxi=math.sqrt(
-                                              results['moving_loss']),
-                                          time_stamp=self.round,
-                                          duration=self.virtual_client_clock[results['clientId']]['computation'] +
-                                          self.virtual_client_clock[results['clientId']]['communication']
-                                          )
+        self.client_manager.register_feedback(results['client_id'], results['utility'],
+                                              auxi=math.sqrt(
+                                                  results['moving_loss']),
+                                              time_stamp=self.round,
+                                              duration=self.virtual_client_clock[results['client_id']]['computation'] +
+                                                       self.virtual_client_clock[results['client_id']]['communication']
+                                              )
 
         # ================== Aggregate weights ======================
         self.update_lock.acquire()
 
         self.model_in_update += 1
-        if self.using_group_params == True:
-            self.aggregate_client_group_weights(results)
-        else:
-            self.aggregate_client_weights(results)
+        self.update_weight_aggregation(results['update_weight'])
 
         self.update_lock.release()
 
-    def aggregate_client_weights(self, results):
-        """May aggregate client updates on the fly
-
-        Args:
-            results (dictionary): client's training result
-
-        [FedAvg] "Communication-Efficient Learning of Deep Networks from Decentralized Data".
-        H. Brendan McMahan, Eider Moore, Daniel Ramage, Seth Hampson, Blaise Aguera y Arcas. AISTATS, 2017
-        """
-        # Start to take the average of updates, and we do not keep updates to save memory
-        # Importance of each update is 1/#_of_participants
-        # importance = 1./self.tasks_round
-
-        for p in results['update_weight']:
-            param_weight = results['update_weight'][p]
-            if isinstance(param_weight, list):
-                param_weight = np.asarray(param_weight, dtype=np.float32)
-            param_weight = torch.from_numpy(
-                param_weight).to(device=self.device)
-
-            if self.model_in_update == 1:
-                self.model_weights[p].data = param_weight
-            else:
-                self.model_weights[p].data += param_weight
-
-        if self.model_in_update == self.tasks_round:
-            for p in self.model_weights:
-                d_type = self.model_weights[p].data.dtype
-
-                self.model_weights[p].data = (
-                    self.model_weights[p]/float(self.tasks_round)).to(dtype=d_type)
-
-    def aggregate_client_group_weights(self, results):
-        """Streaming weight aggregation. Similar to aggregate_client_weights,
-        but each key corresponds to a group of weights (e.g., for Tensorflow)
-
-        Args:
-            results (dictionary): Client's training result
-
-        """
-        for p_g in results['update_weight']:
-            param_weights = results['update_weight'][p_g]
-            for idx, param_weight in enumerate(param_weights):
-                if isinstance(param_weight, list):
-                    param_weight = np.asarray(param_weight, dtype=np.float32)
-                param_weight = torch.from_numpy(
-                    param_weight).to(device=self.device)
-
-                if self.model_in_update == 1:
-                    self.model_weights[p_g][idx].data = param_weight
-                else:
-                    self.model_weights[p_g][idx].data += param_weight
-
-        if self.model_in_update == self.tasks_round:
-            for p in self.model_weights:
-                for idx in range(len(self.model_weights[p])):
-                    d_type = self.model_weights[p][idx].data.dtype
-
-                    self.model_weights[p][idx].data = (
-                        self.model_weights[p][idx].data/float(self.tasks_round)
-                    ).to(dtype=d_type)
-
-    def save_last_param(self):
-        """ Save the last model parameters
-        """
-        if self.args.engine == commons.TENSORFLOW:
-            self.last_gradient_weights = [
-                layer.get_weights() for layer in self.model.layers]
-            self.model_weights = copy.deepcopy(self.model.state_dict())
+    def update_weight_aggregation(self, update_weights):
+        if type(update_weights) is dict:
+            update_weights = [x for x in update_weights.values()]
+        if self.model_in_update == 1:
+            self.model_weights = update_weights
         else:
-            self.last_gradient_weights = [
-                p.data.clone() for p in self.model.parameters()]
-            self.model_weights = copy.deepcopy(self.model.state_dict())
+            self.model_weights = [weight + update_weights[i] for i, weight in enumerate(self.model_weights)]
+        if self.model_in_update == self.tasks_round:
+            self.model_weights = [np.divide(weight, self.tasks_round) for weight in self.model_weights]
+            self.model_wrapper.set_weights(copy.deepcopy(self.model_weights))
+
+    def aggregate_test_result(self):
+        accumulator = self.test_result_accumulator[0]
+        for i in range(1, len(self.test_result_accumulator)):
+            if self.args.task == "detection":
+                for key in accumulator:
+                    if key == "boxes":
+                        for j in range(596):
+                            accumulator[key][j] = accumulator[key][j] + \
+                                                  self.test_result_accumulator[i][key][j]
+                    else:
+                        accumulator[key] += self.test_result_accumulator[i][key]
+            else:
+                for key in accumulator:
+                    accumulator[key] += self.test_result_accumulator[i][key]
+        self.testing_history['perf'][self.round] = {'round': self.round, 'clock': self.global_virtual_clock}
+        for metric_name in accumulator.keys():
+            if metric_name == 'test_loss':
+                self.testing_history['perf'][self.round]['loss'] = accumulator['test_loss'] \
+                    if self.args.task == "detection" else accumulator['test_loss'] / accumulator['test_len']
+            elif metric_name not in ['test_len']:
+                self.testing_history['perf'][self.round][metric_name] \
+                    = accumulator[metric_name] / accumulator['test_len']
+
+        round_perf = self.testing_history['perf'][self.round]
+        logging.info(
+            "FL Testing in round: {}, virtual_clock: {}, results: {}"
+            .format(self.round, self.global_virtual_clock, round_perf))
 
     def update_default_task_config(self):
         """Update the default task configuration after each round
         """
         if self.round % self.args.decay_round == 0:
             self.args.learning_rate = max(
-                self.args.learning_rate*self.args.decay_factor, self.args.min_learning_rate)
-
-    def round_weight_handler(self, last_model):
-        """Update model when the round completes
-
-        Args:
-            last_model (list): A list of global model weight in last round.
-
-        """
-        if self.round > 1:
-            if self.args.engine == commons.TENSORFLOW:
-                for layer in self.model.layers:
-                    layer.set_weights([p.cpu().detach().numpy()
-                                      for p in self.model_weights[layer.name]])
-            else:
-                self.model.load_state_dict(self.model_weights)
-                current_grad_weights = [param.data.clone()
-                                        for param in self.model.parameters()]
-                self.optimizer.update_round_gradient(
-                    last_model, current_grad_weights, self.model)
+                self.args.learning_rate * self.args.decay_factor, self.args.min_learning_rate)
 
     def round_completion_handler(self):
         """Triggered upon the round completion, it registers the last round execution info,
@@ -513,22 +461,18 @@ def round_completion_handler(self):
         """
         self.global_virtual_clock += self.round_duration
         self.round += 1
-
-        # handle the global update w/ current and last
-        self.round_weight_handler(self.last_gradient_weights)
-
-        avgUtilLastround = sum(self.stats_util_accumulator) / \
-            max(1, len(self.stats_util_accumulator))
+        last_round_avg_util = sum(self.stats_util_accumulator) / \
+                              max(1, len(self.stats_util_accumulator))
         # assign avg reward to explored, but not ran workers
-        for clientId in self.round_stragglers:
-            self.client_manager.register_feedback(clientId, avgUtilLastround,
-                                              time_stamp=self.round,
-                                              duration=self.virtual_client_clock[clientId]['computation'] +
-                                              self.virtual_client_clock[clientId]['communication'],
-                                              success=False)
+        for client_id in self.round_stragglers:
+            self.client_manager.register_feedback(client_id, last_round_avg_util,
+                                                  time_stamp=self.round,
+                                                  duration=self.virtual_client_clock[client_id]['computation'] +
+                                                           self.virtual_client_clock[client_id]['communication'],
+                                                  success=False)
 
         avg_loss = sum(self.loss_accumulator) / \
-            max(1, len(self.loss_accumulator))
+                   max(1, len(self.loss_accumulator))
         logging.info(f"Wall clock: {round(self.global_virtual_clock)} s, round: {self.round}, Planned participants: " +
                      f"{len(self.sampled_participants)}, Succeed participants: {len(self.stats_util_accumulator)}, Training loss: {avg_loss}")
 
@@ -539,7 +483,8 @@ def round_completion_handler(self):
         # update select participants
         self.sampled_participants = self.select_participants(
             select_num_participants=self.args.num_participants, overcommitment=self.args.overcommitment)
-        (clientsToRun, round_stragglers, virtual_client_clock, round_duration, flatten_client_duration) = self.tictak_client_tasks(
+        (clientsToRun, round_stragglers, virtual_client_clock, round_duration,
+         flatten_client_duration) = self.tictak_client_tasks(
             self.sampled_participants, self.args.num_participants)
 
         logging.info(f"Selected participants to run: {clientsToRun}")
@@ -555,11 +500,9 @@ def round_completion_handler(self):
         else:
             self.sampled_executors = [str(c_id)
                                       for c_id in self.sampled_participants]
-
-        self.save_last_param()
         self.round_stragglers = round_stragglers
         self.virtual_client_clock = virtual_client_clock
-        self.flatten_client_duration = numpy.array(flatten_client_duration)
+        self.flatten_client_duration = np.array(flatten_client_duration)
         self.round_duration = round_duration
         self.model_in_update = 0
         self.test_result_accumulator = []
@@ -570,7 +513,7 @@ def round_completion_handler(self):
 
         if self.round >= self.args.rounds:
             self.broadcast_aggregator_events(commons.SHUT_DOWN)
-        elif self.round % self.args.eval_interval == 0:
+        elif self.round % self.args.eval_interval == 0 or self.round == 1:
             self.broadcast_aggregator_events(commons.UPDATE_MODEL)
             self.broadcast_aggregator_events(commons.MODEL_TEST)
         else:
@@ -582,9 +525,9 @@ def log_train_result(self, avg_loss):
         """
         self.log_writer.add_scalar('Train/round_to_loss', avg_loss, self.round)
         self.log_writer.add_scalar(
-            'FAR/time_to_train_loss (min)', avg_loss, self.global_virtual_clock/60.)
+            'FAR/time_to_train_loss (min)', avg_loss, self.global_virtual_clock / 60.)
         self.log_writer.add_scalar(
-            'FAR/round_duration (min)', self.round_duration/60., self.round)
+            'FAR/round_duration (min)', self.round_duration / 60., self.round)
         self.log_writer.add_histogram(
             'FAR/client_duration (min)', self.flatten_client_duration, self.round)
 
@@ -596,9 +539,9 @@ def log_test_result(self):
         self.log_writer.add_scalar(
             'Test/round_to_accuracy', self.testing_history['perf'][self.round]['top_1'], self.round)
         self.log_writer.add_scalar('FAR/time_to_test_loss (min)', self.testing_history['perf'][self.round]['loss'],
-                                   self.global_virtual_clock/60.)
+                                   self.global_virtual_clock / 60.)
         self.log_writer.add_scalar('FAR/time_to_test_accuracy (min)', self.testing_history['perf'][self.round]['top_1'],
-                                   self.global_virtual_clock/60.)
+                                   self.global_virtual_clock / 60.)
 
     def deserialize_response(self, responses):
         """Deserialize the response from executor
@@ -641,9 +584,7 @@ def testing_completion_handler(self, client_id, results):
 
         if len(self.test_result_accumulator) == len(self.executors):
 
-            logger.aggregate_test_result(
-                self.test_result_accumulator, self.args.task, \
-                self.round, self.global_virtual_clock, self.testing_history)
+            self.aggregate_test_result()
             # Dump the testing result
             with open(os.path.join(logger.logDir, 'testing_perf'), 'wb') as fout:
                 pickle.dump(self.testing_history, fout)
@@ -677,15 +618,15 @@ def dispatch_client_events(self, event, clients=None):
         for client_id in clients:
             self.individual_client_events[client_id].append(event)
 
-    def get_client_conf(self, clientId):
+    def get_client_conf(self, client_id):
         """Training configurations that will be applied on clients,
         developers can further define personalized client config here.
 
         Args:
-            clientId (int): The client id.
+            client_id (int): The client id.
 
         Returns:
-            dictionary: Client training config.
+            dictionary: TorchClient training config.
 
         """
         conf = {
@@ -703,14 +644,13 @@ def create_client_task(self, executorId):
             tuple: Training config for new task. (dictionary, PyTorch or TensorFlow module)
 
         """
-        next_clientId = self.resource_manager.get_next_task(executorId)
+        next_client_id = self.resource_manager.get_next_task(executorId)
         train_config = None
         # NOTE: model = None then the executor will load the global model broadcasted in UPDATE_MODEL
-        model = None
-        if next_clientId != None:
-            config = self.get_client_conf(next_clientId)
-            train_config = {'client_id': next_clientId, 'task_config': config}
-        return train_config, model
+        if next_client_id != None:
+            config = self.get_client_conf(next_client_id)
+            train_config = {'client_id': next_client_id, 'task_config': config}
+        return train_config, self.model_wrapper.get_weights()
 
     def get_test_config(self, client_id):
         """FL model testing on clients, developers can further define personalized client config here.
@@ -724,20 +664,11 @@ def get_test_config(self, client_id):
         """
         return {'client_id': client_id}
 
-    def get_global_model(self):
-        """Get global model that would be used by all FL clients (in default FL)
-
-        Returns:
-            PyTorch or TensorFlow module: Based on the executor's machine learning framework, initialize and return the model for training.
-
-        """
-        return self.model
-
     def get_shutdown_config(self, client_id):
         """Shutdown config for client, developers can further define personalized client config here.
 
         Args:
-            client_id (int): Client id.
+            client_id (int): TorchClient id.
 
         Returns:
             dictionary: Shutdown config for new task.
@@ -758,7 +689,7 @@ def add_event_handler(self, client_id, event, meta, data):
         self.sever_events_queue.append((client_id, event, meta, data))
 
     def CLIENT_REGISTER(self, request, context):
-        """FL Client register to the aggregator
+        """FL TorchClient register to the aggregator
 
         Args:
             request (RegisterRequest): Registeration request info from executor.
@@ -813,11 +744,11 @@ def CLIENT_PING(self, request, context):
                     current_event = commons.DUMMY_EVENT
                     if self.experiment_mode != commons.SIMULATION_MODE:
                         self.individual_client_events[executor_id].append(
-                                commons.CLIENT_TRAIN)
+                            commons.CLIENT_TRAIN)
             elif current_event == commons.MODEL_TEST:
                 response_msg = self.get_test_config(client_id)
             elif current_event == commons.UPDATE_MODEL:
-                response_data = self.get_global_model()
+                response_data = self.model_wrapper.get_weights()
             elif current_event == commons.SHUT_DOWN:
                 response_msg = self.get_shutdown_config(executor_id)
 
@@ -825,7 +756,7 @@ def CLIENT_PING(self, request, context):
             response_msg), self.serialize_response(response_data)
         # NOTE: in simulation mode, response data is pickle for faster (de)serialization
         response = job_api_pb2.ServerResponse(event=current_event,
-                                          meta=response_msg, data=response_data)
+                                              meta=response_msg, data=response_data)
         if current_event != commons.DUMMY_EVENT:
             logging.info(f"Issue EVENT ({current_event}) to EXECUTOR ({executor_id})")
 
@@ -852,7 +783,7 @@ def CLIENT_EXECUTE_COMPLETION(self, request, context):
             if execution_status is False:
                 logging.error(f"Executor {executor_id} fails to run client {client_id}, due to {execution_msg}")
 
-        # TODO: whether we should schedule tasks when client_ping or client_complete
+            # TODO: whether we should schedule tasks when client_ping or client_complete
             if self.resource_manager.has_next_task(executor_id):
                 # NOTE: we do not pop the train immediately in simulation mode,
                 # since the executor may run multiple clients
diff --git a/fedscale/cloud/aggregation/optimizers.py b/fedscale/cloud/aggregation/optimizers.py
index 796c7a16..47580e2c 100644
--- a/fedscale/cloud/aggregation/optimizers.py
+++ b/fedscale/cloud/aggregation/optimizers.py
@@ -1,4 +1,4 @@
-class ServerOptimizer(object):
+class TorchServerOptimizer(object):
     """This is a abstract server optimizer class
     
     Args:
diff --git a/fedscale/cloud/channels/job_api_pb2_grpc.py b/fedscale/cloud/channels/job_api_pb2_grpc.py
index b1a45fad..9f56cd0b 100644
--- a/fedscale/cloud/channels/job_api_pb2_grpc.py
+++ b/fedscale/cloud/channels/job_api_pb2_grpc.py
@@ -1,5 +1,5 @@
 # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
-"""Client and server classes corresponding to protobuf-defined services."""
+"""TorchClient and server classes corresponding to protobuf-defined services."""
 import grpc
 
 import fedscale.cloud.channels.job_api_pb2 as job__api__pb2
diff --git a/fedscale/cloud/commons.py b/fedscale/cloud/commons.py
index 89ec778e..3bb159ee 100644
--- a/fedscale/cloud/commons.py
+++ b/fedscale/cloud/commons.py
@@ -1,9 +1,8 @@
-
 # Define Basic Experiment Setup
+from enum import Enum
+
 SIMULATION_MODE = 'simulation'
 DEPLOYMENT_MODE = 'deployment'
-TENSORFLOW = 'tensorflow'
-PYTORCH = 'pytorch'
 
 # Define Basic FL Events
 UPDATE_MODEL = 'update_model'
@@ -17,3 +16,7 @@
 
 # PLACEHOLD
 DUMMY_RESPONSE = 'N'
+
+
+TENSORFLOW = 'tensorflow'
+PYTORCH = 'pytorch'
diff --git a/fedscale/cloud/config_parser.py b/fedscale/cloud/config_parser.py
index e0188e84..d5cf1476 100644
--- a/fedscale/cloud/config_parser.py
+++ b/fedscale/cloud/config_parser.py
@@ -9,7 +9,7 @@
 
 # The basic configuration of the cluster
 parser.add_argument('--ps_ip', type=str, default='127.0.0.1')
-parser.add_argument('--ps_port', type=str, default='29501')
+parser.add_argument('--ps_port', type=str, default='29500')
 parser.add_argument('--this_rank', type=int, default=1)
 parser.add_argument('--connection_timeout', type=int, default=60)
 parser.add_argument('--experiment_mode', type=str,
@@ -31,7 +31,8 @@
 
 # The configuration of model and dataset
 parser.add_argument('--model_zoo', type=str, default='torchcv',
-                    help="model zoo to load the models from", choices=["torchcv", "fedscale-zoo"])
+                    help="model zoo to load the models from", choices=["torchcv", "fedscale-torch-zoo",
+                                                                       "fedscale-tensorflow-zoo"])
 parser.add_argument('--data_dir', type=str, default='~/cifar10/')
 parser.add_argument('--device_conf_file', type=str, default='/tmp/client.cfg')
 parser.add_argument('--model', type=str, default='shufflenet_v2_x2_0')
@@ -50,7 +51,7 @@
 parser.add_argument('--blacklist_max_len', type=float, default=0.3)
 parser.add_argument('--embedding_file', type=str,
                     default='glove.840B.300d.txt')
-parser.add_argument('--input_shape', type=tuple, default=(1, 3, 28, 28))
+parser.add_argument('--input_shape', type=int, nargs='+', default=[1, 3, 28, 28])
 
 
 # The configuration of different hyper-parameters for training
diff --git a/fedscale/cloud/execution/client_base.py b/fedscale/cloud/execution/client_base.py
new file mode 100644
index 00000000..5163a7cc
--- /dev/null
+++ b/fedscale/cloud/execution/client_base.py
@@ -0,0 +1,40 @@
+import abc
+
+from fedscale.cloud.internal.model_adapter_base import ModelAdapterBase
+
+
+class ClientBase(abc.ABC):
+    """
+    Represents a framework-agnostic FL client that can perform training and evaluation.
+    """
+
+    @abc.abstractmethod
+    def train(self, client_data, model, conf):
+        """
+        Perform a training task.
+        :param client_data: client training dataset
+        :param model: the framework-specific model
+        :param conf: job config
+        :return: training results
+        """
+        pass
+
+    @abc.abstractmethod
+    def test(self, client_data, model, conf):
+        """
+        Perform a testing task.
+        :param client_data: client evaluation dataset
+        :param model: the framework-specific model
+        :param conf: job config
+        :return: testing results
+        """
+        pass
+
+    @abc.abstractmethod
+    def get_model_adapter(self, model) -> ModelAdapterBase:
+        """
+        Return framework-specific model adapter.
+        :param model: the model
+        :return: a model adapter containing the model
+        """
+        pass
diff --git a/fedscale/cloud/execution/data_processor.py b/fedscale/cloud/execution/data_processor.py
index 45532930..a280cd12 100644
--- a/fedscale/cloud/execution/data_processor.py
+++ b/fedscale/cloud/execution/data_processor.py
@@ -1,5 +1,4 @@
-import os
-
+import torch
 from torch.nn.utils.rnn import pad_sequence
 
 from fedscale.cloud.fllibs import *
@@ -14,9 +13,6 @@ def collate(examples):
 def voice_collate_fn(batch):
     def func(p):
         return p[0].size(1)
-
-    start_time = time.time()
-
     batch = sorted(batch, key=lambda sample: sample[0].size(1), reverse=True)
     longest_sample = max(batch, key=func)[0]
     freq_size = longest_sample.size(0)
@@ -36,7 +32,4 @@ def func(p):
         target_sizes[x] = len(target)
         targets.extend(target)
     targets = torch.IntTensor(targets)
-
-    end_time = time.time()
-
     return (inputs, targets, input_percentages, target_sizes), None
diff --git a/fedscale/cloud/execution/executor.py b/fedscale/cloud/execution/executor.py
index bbdedaed..f63dd397 100755
--- a/fedscale/cloud/execution/executor.py
+++ b/fedscale/cloud/execution/executor.py
@@ -2,19 +2,22 @@
 import collections
 import gc
 import pickle
+import random
+import time
 from argparse import Namespace
 
+import numpy as np
 import torch
 
 import fedscale.cloud.channels.job_api_pb2 as job_api_pb2
-import fedscale.cloud.logger.execution as logger
-import fedscale.cloud.config_parser as parser
-from fedscale.cloud import commons
+import fedscale.cloud.logger.executor_logging as logger
 from fedscale.cloud.channels.channel_context import ClientConnections
-from fedscale.cloud.execution.client import Client
+from fedscale.cloud.execution.tensorflow_client import TensorflowClient
+from fedscale.cloud.execution.torch_client import TorchClient
 from fedscale.cloud.execution.data_processor import collate, voice_collate_fn
-from fedscale.cloud.execution.rlclient import RLClient
+from fedscale.cloud.execution.rl_client import RLClient
 from fedscale.cloud.fllibs import *
+from fedscale.dataloaders.divide_data import DataPartitioner, select_dataset
 
 
 class Executor(object):
@@ -24,13 +27,14 @@ class Executor(object):
         args (dictionary): Variable arguments for fedscale runtime config. defaults to the setup in arg_parser.py
 
     """
+
     def __init__(self, args):
         # initiate the executor log path, and executor ips
         logger.initiate_client_setting()
 
+        self.model_adapter = self.get_client_trainer(args).get_model_adapter(init_model())
+
         self.args = args
-        self.device = args.cuda_device if args.use_cuda else torch.device(
-            'cpu')
         self.num_executors = args.num_executors
         # ======== env information ========
         self.this_rank = args.this_rank
@@ -38,8 +42,6 @@ def __init__(self, args):
 
         # ======== model and data ========
         self.training_sets = self.test_dataset = None
-        self.temp_model_path = os.path.join(
-            logger.logDir, 'model_'+str(args.this_rank)+'.pth.tar')
 
         # ======== channels ========
         self.aggregator_communicator = ClientConnections(
@@ -47,7 +49,6 @@ def __init__(self, args):
 
         # ======== runtime information ========
         self.collate_fn = None
-        self.task = args.task
         self.round = 0
         self.start_run_time = time.time()
         self.received_stop_request = False
@@ -75,10 +76,10 @@ def setup_seed(self, seed=1):
 
         """
         torch.manual_seed(seed)
+        torch.backends.cudnn.deterministic = True
         torch.cuda.manual_seed_all(seed)
         np.random.seed(seed)
         random.seed(seed)
-        torch.backends.cudnn.deterministic = True
 
     def init_control_communication(self):
         """Create communication channel between coordinator and executor.
@@ -91,18 +92,6 @@ def init_data_communication(self):
         """
         pass
 
-    def init_model(self):
-        """Get the model architecture used in training
-
-        Returns: 
-            PyTorch or TensorFlow module: Based on the executor's machine learning framework, initialize and return the model for training
-        
-        """
-        assert self.args.engine == commons.PYTORCH, "Please override this function to define non-PyTorch models"
-        model = init_model()
-        model = model.to(device=self.device)
-        return model
-
     def init_data(self):
         """Return the training and testing dataset
 
@@ -111,9 +100,13 @@ def init_data(self):
 
         """
         train_dataset, test_dataset = init_dataset()
-        if self.task == "rl":
+        if self.args.task == "rl":
             return train_dataset, test_dataset
-        # load data partitioner (entire_train_data)
+        if self.args.task == 'nlp':
+            self.collate_fn = collate
+        elif self.args.task == 'voice':
+            self.collate_fn = voice_collate_fn
+        # load data partitionxr (entire_train_data)
         logging.info("Data partitioner starts ...")
 
         training_sets = DataPartitioner(
@@ -127,11 +120,6 @@ def init_data(self):
 
         logging.info("Data partitioner completes ...")
 
-        if self.task == 'nlp':
-            self.collate_fn = collate
-        elif self.task == 'voice':
-            self.collate_fn = voice_collate_fn
-
         return training_sets, testing_sets
 
     def run(self):
@@ -144,10 +132,10 @@ def run(self):
 
     def dispatch_worker_events(self, request):
         """Add new events to worker queues
-        
+
         Args:
             request (string): Add grpc request from server (e.g. MODEL_TEST, MODEL_TRAIN) to event_queue.
-        
+
         """
         self.event_queue.append(request)
 
@@ -159,7 +147,7 @@ def deserialize_response(self, responses):
 
         Returns:
             ServerResponse defined at job_api.proto: The deserialized response object from server.
-        
+
         """
         return pickle.loads(responses)
 
@@ -167,22 +155,23 @@ def serialize_response(self, responses):
         """Serialize the response to send to server upon assigned job completion
 
         Args:
-            responses (string, bool, or bytes): Client responses after job completion.
+            responses (string, bool, or bytes): TorchClient responses after job completion.
 
         Returns:
             bytes stream: The serialized response object to server.
-        
+
         """
         return pickle.dumps(responses)
 
-    def UpdateModel(self, config):
+    def UpdateModel(self, model_weights):
         """Receive the broadcasted global model for current round
 
         Args:
             config (PyTorch or TensorFlow model): The broadcasted global model config
-        
+
         """
-        self.update_model_handler(model=config)
+        self.round += 1
+        self.model_adapter.set_weights(model_weights)
 
     def Train(self, config):
         """Load train config and data to start training on that client
@@ -190,19 +179,17 @@ def Train(self, config):
         Args:
             config (dictionary): The client training config.
 
-        Returns:     
+        Returns:
             tuple (int, dictionary): The client id and train result
 
         """
         client_id, train_config = config['client_id'], config['task_config']
 
-        model = None
-        if 'model' in config and config['model'] is not None:
-            model = config['model']
-
+        if 'model' not in config or not config['model']:
+            raise "The 'model' object must be a non-null value in the training config."
         client_conf = self.override_conf(train_config)
         train_res = self.training_handler(
-            clientId=client_id, conf=client_conf, model=model)
+            client_id=client_id, conf=client_conf, model=config['model'])
 
         # Report execution completion meta information
         response = self.aggregator_communicator.stub.CLIENT_EXECUTE_COMPLETION(
@@ -218,12 +205,12 @@ def Train(self, config):
 
     def Test(self, config):
         """Model Testing. By default, we test the accuracy on all data of clients in the test group
-        
+
         Args:
             config (dictionary): The client testing config.
-        
+
         """
-        test_res = self.testing_handler(args=self.args, config=config)
+        test_res = self.testing_handler()
         test_res = {'executorId': self.this_rank, 'results': test_res}
 
         # Report execution completion information
@@ -251,30 +238,6 @@ def report_executor_info_handler(self):
         """
         return self.training_sets.getSize()
 
-    def update_model_handler(self, model):
-        """Update the model copy on this executor
-
-        Args:
-            config (PyTorch or TensorFlow model): The broadcasted global model
-
-        """
-        self.round += 1
-
-        # Dump latest model to disk
-        with open(self.temp_model_path, 'wb') as model_out:
-            pickle.dump(model, model_out)
-
-    def load_global_model(self):
-        """ Load last global model
-
-        Returns:
-            PyTorch or TensorFlow model: The lastest global model
-
-        """
-        with open(self.temp_model_path, 'rb') as model_in:
-            model = pickle.load(model_in)
-        return model
-
     def override_conf(self, config):
         """ Override the variable arguments for different client
 
@@ -293,53 +256,48 @@ def override_conf(self, config):
         return Namespace(**default_conf)
 
     def get_client_trainer(self, conf):
-        """A abstract base class for client with training handler, developer can redefine to this function to customize the client training:
-
-        Args:
-            config (dictionary): The client runtime config.
-
-        Returns:
-            Client: A abstract base client class with runtime config conf.
-
         """
-        return Client(conf)
+        Returns a framework-specific client that handles training and evaluation.
+        :param conf: job config
+        :return: framework-specific client instance
+        """
+        if conf.engine == commons.TENSORFLOW:
+            return TensorflowClient(conf)
+        elif conf.engine == commons.PYTORCH:
+            if conf.task == 'rl':
+                return RLClient(conf)
+            else:
+                return TorchClient(conf)
+        raise "Currently, FedScale supports tensorflow and pytorch."
 
-    def training_handler(self, clientId, conf, model=None):
+    def training_handler(self, client_id, conf, model):
         """Train model given client id
-        
+
         Args:
-            clientId (int): The client id.
+            client_id (int): The client id.
             conf (dictionary): The client runtime config.
 
         Returns:
             dictionary: The train result
-        
-        """
-        # load last global model
-        client_model = self.load_global_model() if model is None else model
 
-        conf.clientId, conf.device = clientId, self.device
+        """
+        self.model_adapter.set_weights(model)
+        conf.client_id = client_id
         conf.tokenizer = tokenizer
-        if self.args.task == "rl":
-            client_data = self.training_sets
-            client = RLClient(conf)
-            train_res = client.train(
-                client_data=client_data, model=client_model, conf=conf)
-        else:
-            client_data = select_dataset(clientId, self.training_sets,
-                                         batch_size=conf.batch_size, args=self.args,
-                                         collate_fn=self.collate_fn
-                                         )
-
-            client = self.get_client_trainer(conf)
-            train_res = client.train(
-                client_data=client_data, model=client_model, conf=conf)
+        client_data = self.training_sets if self.args.task == "rl" else \
+            select_dataset(client_id, self.training_sets,
+                           batch_size=conf.batch_size, args=self.args,
+                           collate_fn=self.collate_fn
+                           )
+        client = self.get_client_trainer(self.args)
+        train_res = client.train(
+            client_data=client_data, model=self.model_adapter.get_model(), conf=conf)
 
         return train_res
 
-    def testing_handler(self, args, config=None):
+    def testing_handler(self):
         """Test model
-        
+
         Args:
             args (dictionary): Variable arguments for fedscale runtime config. defaults to the setup in arg_parser.py
             config (dictionary): Variable arguments from coordinator.
@@ -347,37 +305,21 @@ def testing_handler(self, args, config=None):
             dictionary: The test result
 
         """
-        evalStart = time.time()
-        device = self.device
-        model = self.load_global_model()
-        if self.task == 'rl':
-            client = RLClient(args)
-            test_res = client.test(args, self.this_rank, model, device=device)
-            _, _, _, testResults = test_res
-        else:
-            data_loader = select_dataset(self.this_rank, self.testing_sets,
-                                         batch_size=args.test_bsz, args=args,
-                                         isTest=True, collate_fn=self.collate_fn
-                                         )
-
-            if self.task == 'voice':
-                criterion = CTCLoss(reduction='mean').to(device=device)
-            else:
-                criterion = torch.nn.CrossEntropyLoss().to(device=device)
-
-            if self.args.engine == commons.PYTORCH:
-                test_res = test_model(self.this_rank, model, data_loader,
-                                      device=device, criterion=criterion, tokenizer=tokenizer)
-            else:
-                raise Exception(f"Need customized implementation for model testing in {self.args.engine} engine")
-
-            test_loss, acc, acc_5, testResults = test_res
-            logging.info("After aggregation round {}, CumulTime {}, eval_time {}, test_loss {}, test_accuracy {:.2f}%, test_5_accuracy {:.2f}% \n"
-                         .format(self.round, round(time.time() - self.start_run_time, 4), round(time.time() - evalStart, 4), test_loss, acc*100., acc_5*100.))
+        test_config = self.override_conf({
+            'rank': self.this_rank,
+            'memory_capacity': self.args.memory_capacity,
+            'tokenizer': tokenizer
+        })
+        client = self.get_client_trainer(test_config)
+        data_loader = select_dataset(self.this_rank, self.testing_sets,
+                                     batch_size=self.args.test_bsz, args=self.args,
+                                     isTest=True, collate_fn=self.collate_fn)
+
+        test_results = client.test(data_loader, self.model_adapter.get_model(), test_config)
 
         gc.collect()
 
-        return testResults
+        return test_results
 
     def client_register(self):
         """Register the executor information to the aggregator
@@ -414,7 +356,7 @@ def event_monitor(self):
         logging.info("Start monitoring events ...")
         self.client_register()
 
-        while self.received_stop_request == False:
+        while not self.received_stop_request:
             if len(self.event_queue) > 0:
                 request = self.event_queue.popleft()
                 current_event = request.event
@@ -438,8 +380,8 @@ def event_monitor(self):
                     self.Test(self.deserialize_response(request.meta))
 
                 elif current_event == commons.UPDATE_MODEL:
-                    broadcast_config = self.deserialize_response(request.data)
-                    self.UpdateModel(broadcast_config)
+                    model_weights = self.deserialize_response(request.data)
+                    self.UpdateModel(model_weights)
 
                 elif current_event == commons.SHUT_DOWN:
                     self.Stop()
diff --git a/fedscale/cloud/execution/rlclient.py b/fedscale/cloud/execution/rl_client.py
similarity index 85%
rename from fedscale/cloud/execution/rlclient.py
rename to fedscale/cloud/execution/rl_client.py
index cfc9755a..16e43867 100644
--- a/fedscale/cloud/execution/rlclient.py
+++ b/fedscale/cloud/execution/rl_client.py
@@ -1,7 +1,7 @@
 import logging
 import math
 
-from fedscale.cloud.execution.client import Client
+from fedscale.cloud.execution.torch_client import TorchClient
 from fedscale.cloud.execution.optimizers import ClientOptimizer
 
 import fedscale.cloud.config_parser as parser
@@ -9,7 +9,7 @@
     from fedscale.dataloaders.dqn import *
 
 
-class RLClient(Client):
+class RLClient(TorchClient):
     """Basic client component in Federated Learning"""
 
     def __init__(self, conf):
@@ -19,9 +19,9 @@ def __init__(self, conf):
 
     def train(self, client_data, model, conf):
 
-        clientId = conf.clientId
-        logging.info(f"Start to train (CLIENT: {clientId}) ...")
-        device = conf.device
+        client_id = conf.client_id
+        logging.info(f"Start to train (CLIENT: {client_id}) ...")
+        device = self.device
         model = model.to(device=device)
         # self.dqn.eval_net = self.dqn.eval_net.to(device=device)
         # self.dqn.target_net = self.dqn.target_net.to(device=device)
@@ -78,23 +78,23 @@ def train(self, client_data, model, conf):
         model.load_state_dict(self.dqn.target_net.state_dict())
         model_param = [param.data.cpu().numpy()
                        for param in model.parameters()]
-        results = {'clientId': clientId, 'moving_loss': epoch_train_loss,
+        results = {'client_id': client_id, 'moving_loss': epoch_train_loss,
                    'trained_size': completed_steps*conf.batch_size, 'success': completed_steps > 0}
         results['utility'] = math.sqrt(
             epoch_train_loss)*float(trained_unique_samples)
 
         if error_type is None:
-            logging.info(f"Training of (CLIENT: {clientId}) completes, {results}")
+            logging.info(f"Training of (CLIENT: {client_id}) completes, {results}")
         else:
-            logging.info(f"Training of (CLIENT: {clientId}) failed as {error_type}")
+            logging.info(f"Training of (CLIENT: {client_id}) failed as {error_type}")
 
         results['update_weight'] = model_param
         results['wall_duration'] = 0
 
         return results
 
-    def test(self, args, rank, model, device):
-        model = model.to(device=device)
+    def test(self, client_data, model, conf):
+        model = model.to(device=self.device)
         self.dqn.target_net.load_state_dict(model.state_dict())
         self.dqn.set_eval_mode()
         env = gym.make('CartPole-v0').unwrapped
@@ -112,11 +112,11 @@ def test(self, args, rank, model, device):
             self.dqn.store_transition(s, a, new_r, s_)
             reward_sum += new_r
             s = s_
-            if self.dqn.memory_counter > args.memory_capacity:
+            if self.dqn.memory_counter > conf['memory_capacity']:
                 test_loss += self.dqn.learn()
 
             if done:
                 break
         logging.info('Rank {}: Test set: Average loss: {}, Reward: {}'
-                     .format(rank, test_loss, reward_sum))
+                     .format(conf['rank'], test_loss, reward_sum))
         return 0, 0, 0, {'top_1': reward_sum, 'top_5': reward_sum, 'test_loss': test_loss, 'test_len': 1}
diff --git a/fedscale/cloud/execution/tensorflow_client.py b/fedscale/cloud/execution/tensorflow_client.py
new file mode 100644
index 00000000..2b9dd924
--- /dev/null
+++ b/fedscale/cloud/execution/tensorflow_client.py
@@ -0,0 +1,95 @@
+import logging
+import tensorflow as tf
+from overrides import overrides
+from fedscale.cloud.execution.client_base import ClientBase
+import numpy as np
+
+from fedscale.cloud.internal.tensorflow_model_adapter import TensorflowModelAdapter
+
+
+class TensorflowClient(ClientBase):
+    """Implements a TensorFlow-based client for training and evaluation."""
+
+    def __init__(self, args):
+        """
+        Initializes a tf client.
+        :param args: Job args
+        """
+        self.args = args
+
+    def _convert_np_to_tf_dataset(self, dataset):
+        """
+        Converts the iterable numpy dataset to a tensorflow Dataset.
+        :param dataset: numpy dataset
+        :return: tf.data.Dataset
+        """
+        def gen():
+            while True:
+                for x, y in dataset:
+                    # Convert torch tensor to tf tensor
+                    nx, ny = tf.convert_to_tensor(x.swapaxes(1, 3).numpy()), \
+                             tf.one_hot(tf.convert_to_tensor(y.numpy()), self.args.num_classes)
+                    yield nx, ny
+
+        # Sample a batch to get tensor properties
+        temp_x, temp_y = next(gen())
+        x_shape, y_shape = temp_x.shape.as_list(), temp_y.shape.as_list()
+        x_shape[0], y_shape[0] = None, None
+
+        return tf.data.Dataset.from_generator(
+            gen,
+            output_shapes=(tf.TensorShape(x_shape), tf.TensorShape(y_shape)),
+            output_types=(temp_x.dtype, temp_y.dtype),
+        )
+
+    @overrides
+    def train(self, client_data, model, conf):
+        """
+        Perform a training task.
+        :param client_data: client training dataset
+        :param model: the framework-specific model
+        :param conf: job config
+        :return: training results
+        """
+        client_id = conf.client_id
+        logging.info(f"Start to train (CLIENT: {client_id}) ...")
+        tf_dataset = self._convert_np_to_tf_dataset(client_data).take(conf.local_steps)
+        history = model.fit(tf_dataset, batch_size=conf.batch_size, verbose=1)
+
+        # Report the training results
+        results = {'client_id': client_id,
+                   'moving_loss': sum(history.history['loss']) / (len(history.history['loss']) + 1e-4),
+                   'trained_size': history.history['row_count'], 'success': True, 'utility': 1}
+
+        logging.info(f"Training of (CLIENT: {client_id}) completes, {results}")
+
+        results['update_weight'] = [np.asarray(layer.get_weights()) for layer in model.layers if layer.trainable]
+        results['wall_duration'] = 0
+
+        return results
+
+    @overrides
+    def test(self, client_data, model, conf):
+        """
+        Perform a testing task.
+        :param client_data: client evaluation dataset
+        :param model: the framework-specific model
+        :param conf: job config
+        :return: testing results
+        """
+        results = model.evaluate(self._convert_np_to_tf_dataset(client_data), batch_size=conf.batch_size,
+                                 return_dict=True)
+        for key, value in results.items():
+            if key != 'row_count':
+                results[key] = results['row_count'] * value
+        results['test_len'] = results['row_count']
+        return results
+
+    @overrides
+    def get_model_adapter(self, model) -> TensorflowModelAdapter:
+        """
+        Return framework-specific model adapter.
+        :param model: the model
+        :return: a model adapter containing the model
+        """
+        return TensorflowModelAdapter(model)
diff --git a/fedscale/cloud/execution/client.py b/fedscale/cloud/execution/torch_client.py
similarity index 63%
rename from fedscale/cloud/execution/client.py
rename to fedscale/cloud/execution/torch_client.py
index ea60be29..0dd83993 100644
--- a/fedscale/cloud/execution/client.py
+++ b/fedscale/cloud/execution/torch_client.py
@@ -1,22 +1,32 @@
 import logging
 import math
-import pickle 
+import time
+
 import torch
 from torch.autograd import Variable
+from overrides import overrides
+from torch.nn import CTCLoss
 
+from fedscale.cloud.execution.client_base import ClientBase
 from fedscale.cloud.execution.optimizers import ClientOptimizer
+from fedscale.cloud.internal.torch_model_adapter import TorchModelAdapter
 from fedscale.dataloaders.nlp import mask_tokens
+from fedscale.utils.model_test_module import test_pytorch_model
 
 
-class Client(object):
-    """Basic client component in Federated Learning"""
+class TorchClient(ClientBase):
+    """Implements a PyTorch-based client for training and evaluation."""
 
-    def __init__(self, conf):
+    def __init__(self, args):
+        """
+        Initializes a torch client.
+        :param args: Job args
+        """
+        self.args = args
         self.optimizer = ClientOptimizer()
-        self.init_task(conf)
-    
-    def init_task(self, conf):
-        if conf.task == "detection":
+        self.device = args.cuda_device if args.use_cuda else torch.device(
+            'cpu')
+        if args.task == "detection":
             self.im_data = Variable(torch.FloatTensor(1).cuda())
             self.im_info = Variable(torch.FloatTensor(1).cuda())
             self.num_boxes = Variable(torch.LongTensor(1).cuda())
@@ -24,15 +34,22 @@ def init_task(self, conf):
 
         self.epoch_train_loss = 1e-4
         self.completed_steps = 0
-        self.loss_squre = 0
+        self.loss_squared = 0
 
+    @overrides
     def train(self, client_data, model, conf):
-
-        clientId = conf.clientId
-        logging.info(f"Start to train (CLIENT: {clientId}) ...")
-        tokenizer, device = conf.tokenizer, conf.device
-
-        model = model.to(device=device)
+        """
+        Perform a training task.
+        :param client_data: client training dataset
+        :param model: the framework-specific model
+        :param conf: job config
+        :return: training results
+        """
+        client_id = conf.client_id
+        logging.info(f"Start to train (CLIENT: {client_id}) ...")
+        tokenizer = conf.tokenizer
+
+        model = model.to(device=self.device)
         model.train()
 
         trained_unique_samples = min(
@@ -47,7 +64,7 @@ def train(self, client_data, model, conf):
         criterion = self.get_criterion(conf)
         error_type = None
 
-        # NOTE: If one may hope to run fixed number of epochs, instead of iterations, 
+        # NOTE: If one may hope to run fixed number of epochs, instead of iterations,
         # use `while self.completed_steps < conf.local_steps * len(client_data)` instead
         while self.completed_steps < conf.local_steps:
             try:
@@ -59,17 +76,17 @@ def train(self, client_data, model, conf):
         state_dicts = model.state_dict()
         model_param = {p: state_dicts[p].data.cpu().numpy()
                        for p in state_dicts}
-        results = {'clientId': clientId, 'moving_loss': self.epoch_train_loss,
-                   'trained_size': self.completed_steps*conf.batch_size, 
+        results = {'client_id': client_id, 'moving_loss': self.epoch_train_loss,
+                   'trained_size': self.completed_steps * conf.batch_size,
                    'success': self.completed_steps == conf.local_steps}
 
         if error_type is None:
-            logging.info(f"Training of (CLIENT: {clientId}) completes, {results}")
+            logging.info(f"Training of (CLIENT: {client_id}) completes, {results}")
         else:
-            logging.info(f"Training of (CLIENT: {clientId}) failed as {error_type}")
+            logging.info(f"Training of (CLIENT: {client_id}) failed as {error_type}")
 
         results['utility'] = math.sqrt(
-            self.loss_squre)*float(trained_unique_samples)
+            self.loss_squared) * float(trained_unique_samples)
         results['update_weight'] = model_param
         results['wall_duration'] = 0
 
@@ -83,10 +100,10 @@ def get_optimizer(self, model, conf):
             for key, value in dict(model.named_parameters()).items():
                 if value.requires_grad:
                     if 'bias' in key:
-                        params += [{'params': [value], 'lr':lr*(cfg.TRAIN.DOUBLE_BIAS + 1),
+                        params += [{'params': [value], 'lr': lr * (cfg.TRAIN.DOUBLE_BIAS + 1),
                                     'weight_decay': cfg.TRAIN.BIAS_DECAY and cfg.TRAIN.WEIGHT_DECAY or 0}]
                     else:
-                        params += [{'params': [value], 'lr':lr,
+                        params += [{'params': [value], 'lr': lr,
                                     'weight_decay': cfg.TRAIN.WEIGHT_DECAY}]
             optimizer = torch.optim.SGD(params, momentum=cfg.TRAIN.MOMENTUM)
 
@@ -116,10 +133,10 @@ def get_criterion(self, conf):
         criterion = None
         if conf.task == 'voice':
             from torch_baidu_ctc import CTCLoss
-            criterion = CTCLoss(reduction='none').to(device=conf.device)
+            criterion = CTCLoss(reduction='none').to(device=self.device)
         else:
             criterion = torch.nn.CrossEntropyLoss(
-                reduction='none').to(device=conf.device)
+                reduction='none').to(device=self.device)
         return criterion
 
     def train_step(self, client_data, conf, model, optimizer, criterion):
@@ -128,10 +145,10 @@ def train_step(self, client_data, conf, model, optimizer, criterion):
             if conf.task == 'nlp':
                 (data, _) = data_pair
                 data, target = mask_tokens(
-                    data, tokenizer, conf, device=conf.device)
+                    data, tokenizer, conf, device=self.device)
             elif conf.task == 'voice':
                 (data, target, input_percentages,
-                    target_sizes), _ = data_pair
+                 target_sizes), _ = data_pair
                 input_sizes = input_percentages.mul_(
                     int(data.size(3))).int()
             elif conf.task == 'detection':
@@ -147,16 +164,16 @@ def train_step(self, client_data, conf, model, optimizer, criterion):
                 self.gt_boxes.resize_(data[2].size()).copy_(data[2])
                 self.num_boxes.resize_(data[3].size()).copy_(data[3])
             elif conf.task == 'speech':
-                data = torch.unsqueeze(data, 1).to(device=conf.device)
+                data = torch.unsqueeze(data, 1).to(device=self.device)
             elif conf.task == 'text_clf' and conf.model == 'albert-base-v2':
                 (data, masks) = data
                 data, masks = Variable(data).to(
-                    device=conf.device), Variable(masks).to(device=conf.device)
+                    device=self.device), Variable(masks).to(device=self.device)
 
             else:
-                data = Variable(data).to(device=conf.device)
+                data = Variable(data).to(device=self.device)
 
-            target = Variable(target).to(device=conf.device)
+            target = Variable(target).to(device=self.device)
 
             if conf.task == 'nlp':
                 outputs = model(data, labels=target)
@@ -173,19 +190,19 @@ def train_step(self, client_data, conf, model, optimizer, criterion):
                 output = outputs.logits
             elif conf.task == "detection":
                 rois, cls_prob, bbox_pred, \
-                    rpn_loss_cls, rpn_loss_box, \
-                    RCNN_loss_cls, RCNN_loss_bbox, \
-                    rois_label = model(
-                        self.im_data, self.im_info, self.gt_boxes, self.num_boxes)
+                rpn_loss_cls, rpn_loss_box, \
+                RCNN_loss_cls, RCNN_loss_bbox, \
+                rois_label = model(
+                    self.im_data, self.im_info, self.gt_boxes, self.num_boxes)
 
                 loss = rpn_loss_cls + rpn_loss_box \
-                    + RCNN_loss_cls + RCNN_loss_bbox
+                       + RCNN_loss_cls + RCNN_loss_bbox
 
                 loss_rpn_cls = rpn_loss_cls.item()
                 loss_rpn_box = rpn_loss_box.item()
                 loss_rcnn_cls = RCNN_loss_cls.item()
                 loss_rcnn_box = RCNN_loss_bbox.item()
-                
+
             else:
                 output = model(data)
                 loss = criterion(output, target)
@@ -202,16 +219,16 @@ def train_step(self, client_data, conf, model, optimizer, criterion):
                 loss_list = loss.tolist()
                 loss = loss.mean()
 
-            temp_loss = sum(loss_list)/float(len(loss_list))
-            self.loss_squre = sum([l**2 for l in loss_list]
-                                )/float(len(loss_list))
+            temp_loss = sum(loss_list) / float(len(loss_list))
+            self.loss_squared = sum([l ** 2 for l in loss_list]
+                                    ) / float(len(loss_list))
             # only measure the loss of the first epoch
             if self.completed_steps < len(client_data):
                 if self.epoch_train_loss == 1e-4:
                     self.epoch_train_loss = temp_loss
                 else:
                     self.epoch_train_loss = (
-                        1. - conf.loss_decay) * self.epoch_train_loss + conf.loss_decay * temp_loss
+                                                    1. - conf.loss_decay) * self.epoch_train_loss + conf.loss_decay * temp_loss
 
             # ========= Define the backward loss ==============
             optimizer.zero_grad()
@@ -227,6 +244,34 @@ def train_step(self, client_data, conf, model, optimizer, criterion):
             if self.completed_steps == conf.local_steps:
                 break
 
-
-    def test(self, conf):
-        pass
+    @overrides
+    def test(self, client_data, model, conf):
+        """
+        Perform a testing task.
+        :param client_data: client evaluation dataset
+        :param model: the framework-specific model
+        :param conf: job config
+        :return: testing results
+        """
+        evalStart = time.time()
+        if self.args.task == 'voice':
+            criterion = CTCLoss(reduction='mean').to(device=self.device)
+        else:
+            criterion = torch.nn.CrossEntropyLoss().to(device=self.device)
+        test_loss, acc, acc_5, test_results = test_pytorch_model(conf.rank, model, client_data,
+                                                                 device=self.device, criterion=criterion,
+                                                                 tokenizer=conf.tokenizer)
+        logging.info(
+            "Test results: Eval_time {}, test_loss {}, test_accuracy {:.2f}%, "
+            "test_5_accuracy {:.2f}% \n"
+                .format(round(time.time() - evalStart, 4), test_loss, acc * 100., acc_5 * 100.))
+        return test_results
+
+    @overrides
+    def get_model_adapter(self, model) -> TorchModelAdapter:
+        """
+        Return framework-specific model adapter.
+        :param model: the model
+        :return: a model adapter containing the model
+        """
+        return TorchModelAdapter(model)
diff --git a/fedscale/cloud/fllibs.py b/fedscale/cloud/fllibs.py
index 88b6bacc..de6729c2 100644
--- a/fedscale/cloud/fllibs.py
+++ b/fedscale/cloud/fllibs.py
@@ -1,45 +1,22 @@
 # Standard libs
-import collections
-import copy
-import datetime
-import gc
 import json
 import logging
-import math
 import os
-import pickle
-import random
-import re
-import socket
 import sys
-import threading
-import time
-from collections import OrderedDict
-
-import numpy
-import numpy as np
-# PyTorch libs
-import torch
-import torch.distributed as dist
 import torchvision.models as tormodels
-from torch.autograd import Variable
-from torch.multiprocessing import Process, Queue
-from torch.utils.data import DataLoader
 from torchvision import datasets, transforms
 
-from fedscale.cloud.aggregation.optimizers import ServerOptimizer
-from fedscale.cloud.client_manager import ClientManager
-
 # libs from fedscale
 import fedscale.cloud.config_parser as parser
-from fedscale.dataloaders.divide_data import DataPartitioner, select_dataset
+from fedscale.cloud import commons
 from fedscale.dataloaders.utils_data import get_data_transform
-from fedscale.utils.model_test_module import test_model
 # FedScale model libs
-from fedscale.utils.models.model_provider import get_cv_model
+from fedscale.utils.models.torch_model_provider import get_cv_model
+from fedscale.utils.models.tensorflow_model_provider import get_tensorflow_model
 
 tokenizer = None
 
+
 def import_libs():
     global tokenizer
 
@@ -47,35 +24,34 @@ def import_libs():
         global AdamW, AlbertTokenizer, AutoConfig, AutoModelWithLMHead, AutoTokenizer, MobileBertForPreTraining, load_and_cache_examples, mask_tokens
 
         from transformers import (AdamW, AlbertTokenizer, AutoConfig,
-                                AutoModelWithLMHead, AutoTokenizer,
-                                MobileBertForPreTraining)
+                                  AutoModelWithLMHead, AutoTokenizer,
+                                  MobileBertForPreTraining)
 
         from fedscale.dataloaders.nlp import load_and_cache_examples, mask_tokens
         tokenizer = AlbertTokenizer.from_pretrained(
             'albert-base-v2', do_lower_case=True)
     elif parser.args.task == 'speech':
-        global numba, SPEECH, BackgroundNoiseDataset, AddBackgroundNoiseOnSTFT, DeleteSTFT,FixSTFTDimension, StretchAudioOnSTFT, TimeshiftAudioOnSTFT, ToMelSpectrogramFromSTFT, ToSTFT, ChangeAmplitude, ChangeSpeedAndPitchAudio, FixAudioLength, LoadAudio, ToMelSpectrogram, ToTensor
+        global numba, SPEECH, BackgroundNoiseDataset, AddBackgroundNoiseOnSTFT, DeleteSTFT, FixSTFTDimension, StretchAudioOnSTFT, TimeshiftAudioOnSTFT, ToMelSpectrogramFromSTFT, ToSTFT, ChangeAmplitude, ChangeSpeedAndPitchAudio, FixAudioLength, LoadAudio, ToMelSpectrogram, ToTensor
 
         import numba
 
         from fedscale.dataloaders.speech import SPEECH, BackgroundNoiseDataset
         from fedscale.dataloaders.transforms_stft import (AddBackgroundNoiseOnSTFT,
-                                                        DeleteSTFT,
-                                                        FixSTFTDimension,
-                                                        StretchAudioOnSTFT,
-                                                        TimeshiftAudioOnSTFT,
-                                                        ToMelSpectrogramFromSTFT,
-                                                        ToSTFT)
+                                                          DeleteSTFT,
+                                                          FixSTFTDimension,
+                                                          StretchAudioOnSTFT,
+                                                          TimeshiftAudioOnSTFT,
+                                                          ToMelSpectrogramFromSTFT,
+                                                          ToSTFT)
         from fedscale.dataloaders.transforms_wav import (ChangeAmplitude,
-                                                        ChangeSpeedAndPitchAudio,
-                                                        FixAudioLength, LoadAudio,
-                                                        ToMelSpectrogram,
-                                                        ToTensor)
+                                                         ChangeSpeedAndPitchAudio,
+                                                         FixAudioLength, LoadAudio,
+                                                         ToMelSpectrogram,
+                                                         ToTensor)
     elif parser.args.task == 'detection':
         global pickle, get_imdb, readClass, resnet, nms, bbox_transform_inv, clip_boxes, cfg, cfg_from_file, cfg_from_list, get_output_dir, adjust_learning_rate, clip_gradient, load_net, save_checkpoint, save_net, weights_normal_init, roibatchLoader, combined_roidb
 
         import pickle
-
         from fedscale.dataloaders.rcnn.lib.datasets.factory import get_imdb
         from fedscale.dataloaders.rcnn.lib.datasets.pascal_voc import readClass
         from fedscale.dataloaders.rcnn.lib.model.faster_rcnn.resnet import resnet
@@ -102,6 +78,7 @@ def import_libs():
 
         from fedscale.dataloaders.dqn import RLData, Net, DQN
 
+
 # shared functions of aggregator and clients
 # initiate for nlp
 
@@ -109,7 +86,6 @@ def import_libs():
 os.environ['MASTER_ADDR'] = parser.args.ps_ip
 os.environ['MASTER_PORT'] = parser.args.ps_port
 
-
 outputClass = {'Mnist': 10, 'cifar10': 10, "imagenet": 1000, 'emnist': 47, 'amazon': 5,
                'openImg': 596, 'google_speech': 35, 'femnist': 62, 'yelp': 5, 'inaturalist': 1010
                }
@@ -124,7 +100,7 @@ def init_model():
 
     if parser.args.task == 'nlp':
         config = AutoConfig.from_pretrained(
-            os.path.join(parser.args.data_dir, parser.args.model+'-config.json'))
+            os.path.join(parser.args.data_dir, parser.args.model + '-config.json'))
         model = AutoModelWithLMHead.from_config(config)
         tokenizer = AlbertTokenizer.from_pretrained(
             parser.args.model, do_lower_case=True)
@@ -223,11 +199,14 @@ def init_model():
         elif parser.args.model == 'svm':
             from fedscale.utils.models.simple.models import LinearSVM
             model = LinearSVM(parser.args.input_dim, outputClass[parser.args.data_set])
+        elif parser.args.model_zoo == "fedscale-tensorflow-zoo":
+            assert parser.args.engine == commons.TENSORFLOW
+            model = get_tensorflow_model(parser.args.model, parser.args)
         else:
-            if parser.args.model_zoo == "fedscale-zoo":
+            if parser.args.model_zoo == "fedscale-torch-zoo":
                 if parser.args.task == "cv":
                     model = get_cv_model(name=parser.args.model,
-                        num_classes=outputClass[parser.args.data_set])
+                                         num_classes=outputClass[parser.args.data_set])
                 else:
                     raise NameError(f"Model zoo {parser.args.model_zoo} does not exist")
             elif parser.args.model_zoo == "torchcv":
@@ -239,7 +218,6 @@ def init_model():
 
 
 def init_dataset():
-
     import_libs()
 
     if parser.args.task == "detection":
@@ -249,7 +227,8 @@ def init_dataset():
             imdb, roidb, ratio_list, ratio_index = combined_roidb(
                 imdb_name, ['DATA_DIR', parser.args.data_dir], sizes=parser.args.train_size_file)
             train_dataset = roibatchLoader(
-                roidb, ratio_list, ratio_index, parser.args.batch_size, imdb.num_classes, imdb._image_index_temp,  training=True)
+                roidb, ratio_list, ratio_index, parser.args.batch_size, imdb.num_classes, imdb._image_index_temp,
+                training=True)
             imdb_, roidb_, ratio_list_, ratio_index_ = combined_roidb(
                 imdbval_name, ['DATA_DIR', parser.args.data_dir], sizes=parser.args.test_size_file, training=False)
             imdb_.competition_mode(on=True)
@@ -350,7 +329,8 @@ def init_dataset():
         elif parser.args.data_set == 'google_speech':
             bkg = '_background_noise_'
             data_aug_transform = transforms.Compose(
-                [ChangeAmplitude(), ChangeSpeedAndPitchAudio(), FixAudioLength(), ToSTFT(), StretchAudioOnSTFT(), TimeshiftAudioOnSTFT(), FixSTFTDimension()])
+                [ChangeAmplitude(), ChangeSpeedAndPitchAudio(), FixAudioLength(), ToSTFT(), StretchAudioOnSTFT(),
+                 TimeshiftAudioOnSTFT(), FixSTFTDimension()])
             bg_dataset = BackgroundNoiseDataset(
                 os.path.join(parser.args.data_dir, bkg), data_aug_transform)
             add_bg_noise = AddBackgroundNoiseOnSTFT(bg_dataset)
diff --git a/fedscale/cloud/internal/model_adapter_base.py b/fedscale/cloud/internal/model_adapter_base.py
new file mode 100644
index 00000000..067ef91c
--- /dev/null
+++ b/fedscale/cloud/internal/model_adapter_base.py
@@ -0,0 +1,32 @@
+import abc
+from typing import Any
+import numpy as np
+
+
+class ModelAdapterBase(abc.ABC):
+    """
+    Represents an adapter that operates on a framework-specific model.
+    """
+    @abc.abstractmethod
+    def set_weights(self, weights: np.ndarray):
+        """
+        Set the model's weights to the numpy weights array.
+        :param weights: numpy weights array
+        """
+        pass
+
+    @abc.abstractmethod
+    def get_weights(self) -> np.ndarray:
+        """
+        Get the model's weights as a numpy weights array. Note that it doesn't contain layer names. Rather, index 0
+        contains the model's first layer weights, and index N contains the N+1 layer's weights.
+        :return: A numpy array
+        """
+        pass
+
+    @abc.abstractmethod
+    def get_model(self) -> Any:
+        """
+        Get the instantiated framework specific model including the architecture.
+        """
+        pass
diff --git a/fedscale/cloud/internal/tensorflow_model_adapter.py b/fedscale/cloud/internal/tensorflow_model_adapter.py
new file mode 100644
index 00000000..5c5785a5
--- /dev/null
+++ b/fedscale/cloud/internal/tensorflow_model_adapter.py
@@ -0,0 +1,22 @@
+from typing import List
+
+import numpy as np
+import tensorflow as tf
+
+from fedscale.cloud.internal.model_adapter_base import ModelAdapterBase
+
+
+class TensorflowModelAdapter(ModelAdapterBase):
+    def __init__(self, model: tf.keras.Model):
+        self.model = model
+
+    def set_weights(self, weights: List[np.ndarray]):
+        for i, layer in enumerate(self.model.layers):
+            if layer.trainable:
+                layer.set_weights(weights[i])
+
+    def get_weights(self) -> List[np.ndarray]:
+        return [np.asarray(layer.get_weights()) for layer in self.model.layers if layer.trainable]
+
+    def get_model(self):
+        return self.model
diff --git a/fedscale/cloud/internal/torch_model_adapter.py b/fedscale/cloud/internal/torch_model_adapter.py
new file mode 100644
index 00000000..44e2a950
--- /dev/null
+++ b/fedscale/cloud/internal/torch_model_adapter.py
@@ -0,0 +1,49 @@
+from typing import List
+
+import numpy as np
+import torch
+
+from fedscale.cloud.aggregation.optimizers import TorchServerOptimizer
+from fedscale.cloud.internal.model_adapter_base import ModelAdapterBase
+
+
+class TorchModelAdapter(ModelAdapterBase):
+    """
+    Adapts functions to pytorch models.
+    """
+    def __init__(self, model: torch.nn.Module, optimizer: TorchServerOptimizer = None):
+        """
+        Initializes a TorchModelAdapter.
+        :param model: the PyTorch model to adapt
+        :param optimizer: the optimizer to apply weights, when specified.
+        """
+        self.model = model
+        self.optimizer = optimizer
+
+    def set_weights(self, weights: List[np.ndarray]):
+        """
+        Set the model's weights to the numpy weights array.
+        :param weights: numpy weights array
+        """
+        current_grad_weights = [param.data.clone() for param in self.model.state_dict().values()]
+        new_state_dict = {
+            name: torch.from_numpy(np.asarray(weights[i], dtype=np.float32))
+            for i, name in enumerate(self.model.state_dict().keys())
+        }
+        self.model.load_state_dict(new_state_dict)
+        if self.optimizer:
+            self.optimizer.update_round_gradient(weights, current_grad_weights, self.model)
+
+    def get_weights(self) -> List[np.ndarray]:
+        """
+        Get the model's weights as a numpy weights array. Note that it doesn't contain layer names. Rather, index 0
+        contains the model's first layer weights, and index N contains the N+1 layer's weights.
+        :return: A numpy array
+        """
+        return [params.data.clone() for params in self.model.state_dict().values()]
+
+    def get_model(self):
+        """
+        Get the instantiated framework specific model including the architecture.
+        """
+        return self.model
diff --git a/fedscale/cloud/logger/aggragation.py b/fedscale/cloud/logger/aggragation.py
deleted file mode 100644
index 978d98d2..00000000
--- a/fedscale/cloud/logger/aggragation.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# package for aggregator
-from fedscale.cloud.fllibs import *
-import fedscale.cloud.config_parser as parser
-
-logDir = None
-
-
-def init_logging():
-    global logDir
-    
-    logDir = os.path.join(parser.args.log_path, "logs", parser.args.job_name,
-                      parser.args.time_stamp, 'aggregator')
-    logFile = os.path.join(logDir, 'log')
-    if not os.path.isdir(logDir):
-        os.makedirs(logDir, exist_ok=True)
-
-    logging.basicConfig(
-        format='%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s',
-        datefmt='(%m-%d) %H:%M:%S',
-        level=logging.INFO,
-        handlers=[
-            logging.FileHandler(logFile, mode='a'),
-            logging.StreamHandler()
-        ])
-    
-
-
-def initiate_aggregator_setting():
-    init_logging()
-
-def aggregate_test_result(test_result_accumulator, task, round_num, global_virtual_clock, testing_history):    
-    
-    accumulator = test_result_accumulator[0]
-    for i in range(1, len(test_result_accumulator)):
-        if task == "detection":
-            for key in accumulator:
-                if key == "boxes":
-                    for j in range(596):
-                        accumulator[key][j] = accumulator[key][j] + \
-                            test_result_accumulator[i][key][j]
-                else:
-                    accumulator[key] += test_result_accumulator[i][key]
-        else:
-            for key in accumulator:
-                accumulator[key] += test_result_accumulator[i][key]
-    if task == "detection":
-        testing_history['perf'][round_num] = {'round': round_num, 'clock': global_virtual_clock,
-                                                    'top_1': round(accumulator['top_1']*100.0/len(test_result_accumulator), 4),
-                                                    'top_5': round(accumulator['top_5']*100.0/len(test_result_accumulator), 4),
-                                                    'loss': accumulator['test_loss'],
-                                                    'test_len': accumulator['test_len']
-                                                    }
-    else:
-        testing_history['perf'][round_num] = {'round': round_num, 'clock': global_virtual_clock,
-                                                    'top_1': round(accumulator['top_1']/accumulator['test_len']*100.0, 4),
-                                                    'top_5': round(accumulator['top_5']/accumulator['test_len']*100.0, 4),
-                                                    'loss': accumulator['test_loss']/accumulator['test_len'],
-                                                    'test_len': accumulator['test_len']
-                                                    }
-
-    logging.info("FL Testing in round: {}, virtual_clock: {}, top_1: {} %, top_5: {} %, test loss: {:.4f}, test len: {}"
-                    .format(round_num, global_virtual_clock, testing_history['perf'][round_num]['top_1'],
-                            testing_history['perf'][round_num]['top_5'], testing_history['perf'][round_num]['loss'],
-                            testing_history['perf'][round_num]['test_len']))
-
-
diff --git a/fedscale/cloud/logger/aggregator_logging.py b/fedscale/cloud/logger/aggregator_logging.py
new file mode 100644
index 00000000..b2ac3742
--- /dev/null
+++ b/fedscale/cloud/logger/aggregator_logging.py
@@ -0,0 +1,27 @@
+from fedscale.cloud.fllibs import *
+import fedscale.cloud.config_parser as parser
+
+logDir = None
+
+
+def init_logging():
+    global logDir
+
+    logDir = os.path.join(parser.args.log_path, "logs", parser.args.job_name,
+                          parser.args.time_stamp, 'aggregator')
+    logFile = os.path.join(logDir, 'log')
+    if not os.path.isdir(logDir):
+        os.makedirs(logDir, exist_ok=True)
+
+    logging.basicConfig(
+        format='%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s',
+        datefmt='(%m-%d) %H:%M:%S',
+        level=logging.INFO,
+        handlers=[
+            logging.FileHandler(logFile, mode='a'),
+            logging.StreamHandler()
+        ])
+
+
+def initiate_aggregator_setting():
+    init_logging()
diff --git a/fedscale/cloud/logger/dummy_logger.py b/fedscale/cloud/logger/dummy_logger.py
new file mode 100644
index 00000000..9728324e
--- /dev/null
+++ b/fedscale/cloud/logger/dummy_logger.py
@@ -0,0 +1,6 @@
+class DummyLogger:
+    def add_scalar(self, name, value, step):
+        pass
+
+    def add_histogram(self, name, dist, step):
+        pass
diff --git a/fedscale/cloud/logger/execution.py b/fedscale/cloud/logger/executor_logging.py
similarity index 88%
rename from fedscale/cloud/logger/execution.py
rename to fedscale/cloud/logger/executor_logging.py
index b4d426a2..2500da59 100644
--- a/fedscale/cloud/logger/execution.py
+++ b/fedscale/cloud/logger/executor_logging.py
@@ -1,16 +1,14 @@
-# package for client
-import os
-
 from fedscale.cloud.fllibs import *
 import fedscale.cloud.config_parser as parser
 
 logDir = None
 
+
 def init_logging():
     global logDir
 
     logDir = os.path.join(parser.args.log_path, "logs", parser.args.job_name,
-                      parser.args.time_stamp, 'executor')
+                          parser.args.time_stamp, 'executor')
     logFile = os.path.join(logDir, 'log')
     if not os.path.isdir(logDir):
         os.makedirs(logDir, exist_ok=True)
@@ -27,5 +25,3 @@ def init_logging():
 
 def initiate_client_setting():
     init_logging()
-
-
diff --git a/fedscale/dataloaders/divide_data.py b/fedscale/dataloaders/divide_data.py
index 43c90c0b..81d2139c 100755
--- a/fedscale/dataloaders/divide_data.py
+++ b/fedscale/dataloaders/divide_data.py
@@ -42,7 +42,6 @@ def __init__(self, data, args, numOfClass=0, seed=10, isTest=False):
         np.random.seed(seed)
 
         self.data_len = len(self.data)
-        self.task = args.task
         self.numOfLabels = numOfClass
         self.client_label_cnt = defaultdict(set)
 
@@ -62,8 +61,8 @@ def trace_partition(self, data_map_file):
         """Read data mapping from data_map_file. Format: <client_id, sample_name, sample_category, category_id>"""
         logging.info(f"Partitioning data by profile {data_map_file}...")
 
-        clientId_maps = {}
-        unique_clientIds = {}
+        client_id_maps = {}
+        unique_client_ids = {}
         # load meta data from the data_map_file
         with open(data_map_file) as csv_file:
             csv_reader = csv.reader(csv_file, delimiter=',')
@@ -77,19 +76,19 @@ def trace_partition(self, data_map_file):
                 else:
                     client_id = row[0]
 
-                    if client_id not in unique_clientIds:
-                        unique_clientIds[client_id] = len(unique_clientIds)
+                    if client_id not in unique_client_ids:
+                        unique_client_ids[client_id] = len(unique_client_ids)
 
-                    clientId_maps[sample_id] = unique_clientIds[client_id]
-                    self.client_label_cnt[unique_clientIds[client_id]].add(
+                    client_id_maps[sample_id] = unique_client_ids[client_id]
+                    self.client_label_cnt[unique_client_ids[client_id]].add(
                         row[-1])
                     sample_id += 1
 
         # Partition data given mapping
-        self.partitions = [[] for _ in range(len(unique_clientIds))]
+        self.partitions = [[] for _ in range(len(unique_client_ids))]
 
         for idx in range(sample_id):
-            self.partitions[clientId_maps[idx]].append(idx)
+            self.partitions[client_id_maps[idx]].append(idx)
 
     def partition_data_helper(self, num_clients, data_map_file=None):
 
@@ -114,7 +113,7 @@ def uniform_partition(self, num_clients):
             indexes = indexes[part_len:]
 
     def use(self, partition, istest):
-        resultIndex = self.partitions[partition]
+        resultIndex = self.partitions[partition % len(self.partitions)]
 
         exeuteLength = len(resultIndex) if not istest else int(
             len(resultIndex) * self.args.test_ratio)
diff --git a/fedscale/dataloaders/reddit.py b/fedscale/dataloaders/reddit.py
index 86c0eddc..6691befc 100644
--- a/fedscale/dataloaders/reddit.py
+++ b/fedscale/dataloaders/reddit.py
@@ -155,7 +155,7 @@ def load_file(self, path, is_train):
             for client_data in client_data_list:
                 client_list = client_data['users']
 
-                for clientId, client in enumerate(client_list):
+                for client_id, client in enumerate(client_list):
                     tokens_list = list(client_data['user_data'][client]['x'])
 
                     for tokens in tokens_list:
@@ -165,7 +165,7 @@ def load_file(self, path, is_train):
                         if not tokens_list:
                             continue
 
-                        mapping_dict[count] = clientId
+                        mapping_dict[count] = client_id
                         text.append(tokens_list)
 
                         count += 1
@@ -176,13 +176,13 @@ def load_file(self, path, is_train):
                     #print("====In loading data, remains {} clients, may take {} sec".format(num_of_remains, (time.time() - start_time)/clientCount * num_of_remains))
                     # logging.info("====In loading  data, remains {} clients".format(num_of_remains)
 
-                    if clientId % 5000 == 0:
+                    if client_id % 5000 == 0:
                         # dump the cache
                         with open(cache_path, 'wb') as fout:
                             pickle.dump(text, fout)
                             pickle.dump(mapping_dict, fout)
 
-                        print("====Dump for {} clients".format(clientId))
+                        print("====Dump for {} clients".format(client_id))
 
             # dump the cache
             with open(cache_path, 'wb') as fout:
diff --git a/fedscale/dataloaders/stackoverflow.py b/fedscale/dataloaders/stackoverflow.py
index 7252a833..7e5d2d75 100755
--- a/fedscale/dataloaders/stackoverflow.py
+++ b/fedscale/dataloaders/stackoverflow.py
@@ -199,7 +199,7 @@ def load_file(self, path, is_train):
             client_list = list(train_file['examples'])
             start_time = time.time()
 
-            for clientId, client in enumerate(client_list):
+            for client_id, client in enumerate(client_list):
                 tags_list = list(train_file['examples'][client]['tags'])
                 tokens_list = list(train_file['examples'][client]['tokens'])
                 title_list = list(train_file['examples'][client]['title'])
@@ -215,7 +215,7 @@ def load_file(self, path, is_train):
                     if not tokens_list:
                         continue
 
-                    mapping_dict[count] = clientId
+                    mapping_dict[count] = client_id
                     text.append(tokens_list)
                     target_tags.append(tags_list)
 
@@ -223,18 +223,18 @@ def load_file(self, path, is_train):
 
                 clientCount += 1
 
-                num_of_remains = len(client_list) - clientId
+                num_of_remains = len(client_list) - client_id
                 #print("====In loading data, remains {} clients, may take {} sec".format(num_of_remains, (time.time() - start_time)/clientCount * num_of_remains))
                 # logging.info("====In loading  data, remains {} clients".format(num_of_remains)
 
-                if clientId % 5000 == 0:
+                if client_id % 5000 == 0:
                     # dump the cache
                     with open(cache_path, 'wb') as fout:
                         pickle.dump(text, fout)
                         pickle.dump(target_tags, fout)
                         pickle.dump(mapping_dict, fout)
 
-                    #print("====Dump for {} clients".format(clientId))
+                    #print("====Dump for {} clients".format(client_id))
 
             # dump the cache
             with open(cache_path, 'wb') as fout:
diff --git a/fedscale/dataloaders/utils_data.py b/fedscale/dataloaders/utils_data.py
index 7f7db93f..395e8cf4 100755
--- a/fedscale/dataloaders/utils_data.py
+++ b/fedscale/dataloaders/utils_data.py
@@ -7,7 +7,7 @@ def get_data_transform(data: str):
     if data == 'mnist':
         train_transform = transforms.Compose([
             # transforms.Grayscale(num_output_channels=1),
-            transforms.Resize((28, 28)),
+            transforms.Resize((32, 32)),
             transforms.RandomHorizontalFlip(),
             transforms.ToTensor(),
             transforms.Normalize((0.1307,), (0.3081,))
@@ -15,7 +15,7 @@ def get_data_transform(data: str):
 
         test_transform = transforms.Compose([
             # transforms.Grayscale(num_output_channels=1),
-            transforms.Resize((28, 28)),
+            transforms.Resize((32, 32)),
             transforms.RandomHorizontalFlip(),
             transforms.ToTensor(),
             transforms.Normalize((0.1307,), (0.3081,))
diff --git a/fedscale/edge/mnn/app/src/main/java/com/fedscale/android/executor/FLExecutor.java b/fedscale/edge/mnn/app/src/main/java/com/fedscale/android/executor/FLExecutor.java
index dad51b91..77a9eb06 100644
--- a/fedscale/edge/mnn/app/src/main/java/com/fedscale/android/executor/FLExecutor.java
+++ b/fedscale/edge/mnn/app/src/main/java/com/fedscale/android/executor/FLExecutor.java
@@ -159,7 +159,7 @@ private String deserializeResponse(ByteString responses) throws IOException {
     /**
      * Serialize the response to send to server upon assigned job completion
      *
-     * @param responses Client responses after job completion.
+     * @param responses TorchClient responses after job completion.
      * @return The serialized response object to server.
      */
     private ByteString serializeResponse(String responses) throws IOException {
diff --git a/fedscale/edge/pytorch/torch_client.py b/fedscale/edge/pytorch/torch_client.py
index 569fc326..66af1cd5 100644
--- a/fedscale/edge/pytorch/torch_client.py
+++ b/fedscale/edge/pytorch/torch_client.py
@@ -1,8 +1,8 @@
-"""A skeleton for Pytorch Client"""
-from fedscale.cloud.execution.client import Client
+"""A skeleton for Pytorch TorchClient"""
+from fedscale.cloud.execution.torch_client import TorchClient
 
 
 
-class Torch_Client(Client):
-    """A class for PyTorch version of Client, directly inherited from fedscale/cloud/execution/client.py"""
+class Torch_Client(TorchClient):
+    """A class for PyTorch version of TorchClient, directly inherited from fedscale/cloud/execution/torch_client.py"""
     pass
diff --git a/fedscale/tests/cloud/aggregation/test_aggregator.py b/fedscale/tests/cloud/aggregation/test_aggregator.py
new file mode 100644
index 00000000..e0e9b41f
--- /dev/null
+++ b/fedscale/tests/cloud/aggregation/test_aggregator.py
@@ -0,0 +1,49 @@
+import copy
+import numpy as np
+import tensorflow as tf
+import torch
+
+from fedscale.cloud.aggregation.aggregator import Aggregator
+from fedscale.cloud.internal.tensorflow_model_adapter import TensorflowModelAdapter
+from fedscale.cloud.internal.torch_model_adapter import TorchModelAdapter
+
+
+class MockAggregator(Aggregator):
+    def __init__(self, model_wrapper):
+        self.model_weights = []
+        self.model_in_update = 1
+        self.tasks_round = 3
+        self.model_wrapper = model_wrapper
+
+
+def multiply_weights(weights, factor):
+    return [weights_group * factor for weights_group in weights]
+
+
+class TestAggregator:
+    def test_update_weight_aggregation_for_keras_model(self):
+        x = tf.keras.Input(shape=(2,))
+        y = tf.keras.layers.Dense(2, activation='softmax')(
+            tf.keras.layers.Dense(4, activation='softmax')(x))
+        model = tf.keras.Model(x, y)
+        model_adapter = TensorflowModelAdapter(model)
+        aggregator = MockAggregator(model_adapter)
+        weights = copy.deepcopy(model_adapter.get_weights())
+        aggregator.update_weight_aggregation(multiply_weights(weights, 2))
+        aggregator.model_in_update += 1
+        aggregator.update_weight_aggregation(multiply_weights(weights, 2))
+        aggregator.model_in_update += 1
+        aggregator.update_weight_aggregation(multiply_weights(weights, 5))
+        np.array_equal(aggregator.model_wrapper.get_weights(), multiply_weights(weights, 3))
+
+    def test_update_weight_aggregation_for_torch_model(self):
+        model = torch.nn.Linear(3, 2)
+        model_adapter = TorchModelAdapter(model)
+        aggregator = MockAggregator(model_adapter)
+        weights = copy.deepcopy(model_adapter.get_weights())
+        aggregator.update_weight_aggregation(multiply_weights(weights, 2))
+        aggregator.model_in_update += 1
+        aggregator.update_weight_aggregation(multiply_weights(weights, 2))
+        aggregator.model_in_update += 1
+        aggregator.update_weight_aggregation(multiply_weights(weights, 5))
+        np.array_equal(aggregator.model_wrapper.get_weights(), multiply_weights(weights, 3))
diff --git a/fedscale/utils/model_test_module.py b/fedscale/utils/model_test_module.py
index 8f74f5ae..b611f99b 100755
--- a/fedscale/utils/model_test_module.py
+++ b/fedscale/utils/model_test_module.py
@@ -24,6 +24,8 @@
     from fedscale.dataloaders.rcnn.lib.model.utils.config import cfg
     from fedscale.dataloaders.rcnn.lib.roi_data_layer.roidb import \
         combined_roidb
+elif parser.args.task == 'voice':
+    from fedscale.dataloaders.decoder import GreedyDecoder
 
 
 def cal_accuracy(targets, outputs):
@@ -50,7 +52,7 @@ def cal_accuracy(targets, outputs):
     return temp_acc, temp_all_or_false, temp_len
 
 
-def test_model(rank, model, test_data, device='cpu', criterion=nn.NLLLoss(), tokenizer=None):
+def test_pytorch_model(rank, model, test_data, device='cpu', criterion=nn.NLLLoss(), tokenizer=None):
 
     test_loss = 0
     correct = 0
diff --git a/fedscale/utils/models/tensorflow_model_provider.py b/fedscale/utils/models/tensorflow_model_provider.py
new file mode 100644
index 00000000..2e0fea53
--- /dev/null
+++ b/fedscale/utils/models/tensorflow_model_provider.py
@@ -0,0 +1,63 @@
+import tensorflow as tf
+
+
+class RowCount(tf.keras.metrics.Metric):
+    def __init__(self, name='row_count', **kwargs):
+        super(RowCount, self).__init__(**kwargs)
+        self.count = self.add_weight('count', initializer='zeros')
+
+    def update_state(self, y_true, y_pred, sample_weight=None):
+        self.count.assign_add(tf.reduce_sum(tf.cast(tf.shape(y_true)[0], self.dtype)))
+
+    def reset_state(self):
+        self.count.assign(0)
+
+    def result(self):
+        return self.count
+
+
+def build_resnet50(args):
+    model = tf.keras.applications.resnet.ResNet50(
+        include_top=True,
+        weights=None,
+        input_tensor=None,
+        input_shape=args.input_shape,
+        pooling=None,
+        classes=args.num_classes
+    )
+    optimizer = tf.keras.optimizers.SGD(learning_rate=args.learning_rate, momentum=0.9,
+                                        nesterov=False, name='SGD')
+    model.compile(optimizer=optimizer, loss='categorical_crossentropy',
+                  metrics=['accuracy', RowCount()])
+    return model
+
+
+def build_mobilenet_v3_small(args):
+    model = tf.keras.applications.MobileNetV3Small(
+        input_shape=args.input_shape,
+        alpha=1.0,
+        minimalistic=False,
+        input_tensor=None,
+        weights=None,
+        classes=args.num_classes,
+        pooling=None,
+        dropout_rate=0.2,
+        include_preprocessing=True,
+    )
+    optimizer = tf.keras.optimizers.SGD(learning_rate=args.learning_rate, momentum=0.9,
+                                        nesterov=False, name='SGD')
+    model.compile(optimizer=optimizer, loss='categorical_crossentropy',
+                  metrics=['accuracy', RowCount()])
+    return model
+
+
+_models = {
+    'resnet50': build_resnet50,
+    'mobilenetv3_small': build_mobilenet_v3_small
+}
+
+
+def get_tensorflow_model(name: str, args):
+    if name not in _models:
+        raise f"{name} is not a tensorflow-supported model in FedScale. Please add implementation to the _models dict."
+    return _models[name](args)
diff --git a/fedscale/utils/models/model_provider.py b/fedscale/utils/models/torch_model_provider.py
similarity index 100%
rename from fedscale/utils/models/model_provider.py
rename to fedscale/utils/models/torch_model_provider.py
diff --git a/thirdparty/oort/oort.py b/thirdparty/oort/oort.py
index 9903fbc5..7f1346dd 100644
--- a/thirdparty/oort/oort.py
+++ b/thirdparty/oort/oort.py
@@ -152,17 +152,17 @@ def __init__(self, args, sample_seed=233):
 
         np2.random.seed(sample_seed)
 
-    def register_client(self, clientId, feedbacks):
+    def register_client(self, client_id, feedbacks):
         # Initiate the score for arms. [score, time_stamp, # of trials, size of client, auxi, duration]
-        if clientId not in self.totalArms:
-            self.totalArms[clientId] = {}
-            self.totalArms[clientId]['reward'] = feedbacks['reward']
-            self.totalArms[clientId]['duration'] = feedbacks['duration']
-            self.totalArms[clientId]['time_stamp'] = self.training_round
-            self.totalArms[clientId]['count'] = 0
-            self.totalArms[clientId]['status'] = True
+        if client_id not in self.totalArms:
+            self.totalArms[client_id] = {}
+            self.totalArms[client_id]['reward'] = feedbacks['reward']
+            self.totalArms[client_id]['duration'] = feedbacks['duration']
+            self.totalArms[client_id]['time_stamp'] = self.training_round
+            self.totalArms[client_id]['count'] = 0
+            self.totalArms[client_id]['status'] = True
 
-            self.unexplored.add(clientId)
+            self.unexplored.add(client_id)
 
     def calculateSumUtil(self, clientList):
         cnt, cntUtil = 1e-4, 0
@@ -207,20 +207,20 @@ def pacer(self):
         logging.info("Training selector: Pacer {}: lastExploitationUtil {}, lastExplorationUtil {}, last_util_record {}".
                         format(self.training_round, lastExploitationUtil, lastExplorationUtil, self.last_util_record))
 
-    def update_client_util(self, clientId, feedbacks):
+    def update_client_util(self, client_id, feedbacks):
         '''
         @ feedbacks['reward']: statistical utility
         @ feedbacks['duration']: system utility
         @ feedbacks['count']: times of involved
         '''
-        self.totalArms[clientId]['reward'] = feedbacks['reward']
-        self.totalArms[clientId]['duration'] = feedbacks['duration']
-        self.totalArms[clientId]['time_stamp'] = feedbacks['time_stamp']
-        self.totalArms[clientId]['count'] += 1
-        self.totalArms[clientId]['status'] = feedbacks['status']
+        self.totalArms[client_id]['reward'] = feedbacks['reward']
+        self.totalArms[client_id]['duration'] = feedbacks['duration']
+        self.totalArms[client_id]['time_stamp'] = feedbacks['time_stamp']
+        self.totalArms[client_id]['count'] += 1
+        self.totalArms[client_id]['status'] = feedbacks['status']
 
-        self.unexplored.discard(clientId)
-        self.successfulClients.add(clientId)
+        self.unexplored.discard(client_id)
+        self.successfulClients.add(client_id)
 
 
     def get_blacklist(self):
@@ -230,9 +230,9 @@ def get_blacklist(self):
             sorted_client_ids = sorted(list(self.totalArms), reverse=True,
                                         key=lambda k:self.totalArms[k]['count'])
 
-            for clientId in sorted_client_ids:
-                if self.totalArms[clientId]['count'] > self.args.blacklist_rounds:
-                    blacklist.append(clientId)
+            for client_id in sorted_client_ids:
+                if self.totalArms[client_id]['count'] > self.args.blacklist_rounds:
+                    blacklist.append(client_id)
                 else:
                     break
 
@@ -252,9 +252,9 @@ def select_participant(self, num_of_clients, feasible_clients=None):
         viable_clients = feasible_clients if feasible_clients is not None else set([x for x in self.totalArms.keys() if self.totalArms[x]['status']])
         return self.getTopK(num_of_clients, self.training_round+1, viable_clients)
 
-    def update_duration(self, clientId, duration):
-        if clientId in self.totalArms:
-            self.totalArms[clientId]['duration'] = duration
+    def update_duration(self, client_id, duration):
+        if client_id in self.totalArms:
+            self.totalArms[client_id]['duration'] = duration
 
     def getTopK(self, numOfSamples, cur_time, feasible_clients):
         self.training_round = cur_time
@@ -279,11 +279,11 @@ def getTopK(self, numOfSamples, cur_time, feasible_clients):
 
         moving_reward, staleness, allloss = [], [], {}
 
-        for clientId in orderedKeys:
-            if self.totalArms[clientId]['reward'] > 0:
-                creward = self.totalArms[clientId]['reward']
+        for client_id in orderedKeys:
+            if self.totalArms[client_id]['reward'] > 0:
+                creward = self.totalArms[client_id]['reward']
                 moving_reward.append(creward)
-                staleness.append(cur_time - self.totalArms[clientId]['time_stamp'])
+                staleness.append(cur_time - self.totalArms[client_id]['time_stamp'])
 
 
         max_reward, min_reward, range_reward, avg_reward, clip_value = self.get_norm(moving_reward, self.args.clip_bound)
@@ -325,11 +325,11 @@ def getTopK(self, numOfSamples, cur_time, feasible_clients):
         cut_off_util = scores[sortedClientUtil[exploitLen]] * self.args.cut_off_util
 
         tempPickedClients = []
-        for clientId in sortedClientUtil:
+        for client_id in sortedClientUtil:
             # we want at least 10 times of clients for augmentation
-            if scores[clientId] < cut_off_util and len(tempPickedClients) > 10.*exploitLen:
+            if scores[client_id] < cut_off_util and len(tempPickedClients) > 10.*exploitLen:
                 break
-            tempPickedClients.append(clientId)
+            tempPickedClients.append(client_id)
 
         augment_factor = len(tempPickedClients)
 
@@ -364,10 +364,10 @@ def getTopK(self, numOfSamples, cur_time, feasible_clients):
         pickedClients = self.exploreClients + self.exploitClients
         top_k_score = []
         for i in range(min(3, len(pickedClients))):
-            clientId = pickedClients[i]
-            _score = (self.totalArms[clientId]['reward'] - min_reward)/range_reward
-            _staleness = self.alpha*((cur_time-self.totalArms[clientId]['time_stamp']) - min_staleness)/float(range_staleness) #math.sqrt(0.1*math.log(cur_time)/max(1e-4, self.totalArms[clientId]['time_stamp']))
-            top_k_score.append((self.totalArms[clientId], [_score, _staleness]))
+            client_id = pickedClients[i]
+            _score = (self.totalArms[client_id]['reward'] - min_reward)/range_reward
+            _staleness = self.alpha*((cur_time-self.totalArms[client_id]['time_stamp']) - min_staleness)/float(range_staleness) #math.sqrt(0.1*math.log(cur_time)/max(1e-4, self.totalArms[client_id]['time_stamp']))
+            top_k_score.append((self.totalArms[client_id], [_score, _staleness]))
 
         logging.info("At round {}, UCB exploited {}, augment_factor {}, exploreLen {}, un-explored {}, exploration {}, round_threshold {}, sampled score is {}"
             .format(cur_time, numOfExploited, augment_factor/max(1e-4, exploitLen), exploreLen, len(self.unexplored), self.exploration, self.round_threshold, top_k_score))
diff --git a/thirdparty/oort/utils/lp.py b/thirdparty/oort/utils/lp.py
index 22d7680f..0b7ee083 100644
--- a/thirdparty/oort/utils/lp.py
+++ b/thirdparty/oort/utils/lp.py
@@ -42,13 +42,13 @@ def select_by_sorted_num(raw_datas, pref, budget):
 
         top_k_indices = sorted(feasible_clients, reverse=True, key=lambda k:sum_of_cols[k])
 
-        for idx, clientId in enumerate(top_k_indices):
+        for idx, client_id in enumerate(top_k_indices):
             # Take this client, and reduce the preference by the capacity of each class on this client
 
             tempTakenSamples = {}
 
             for cl in listOfInterest:
-                takenSamples = min(preference[cl], datas[clientId][cl])
+                takenSamples = min(preference[cl], datas[client_id][cl])
                 preference[cl] -= takenSamples
 
                 if preference[cl] == 0:
@@ -57,8 +57,8 @@ def select_by_sorted_num(raw_datas, pref, budget):
 
                 tempTakenSamples[cl] = takenSamples
 
-            datas[clientId, :] = 0
-            clientsTaken[clientId] = tempTakenSamples
+            datas[client_id, :] = 0
+            clientsTaken[client_id] = tempTakenSamples
 
             if interestChanged: break
 
@@ -132,7 +132,7 @@ def run_select_by_category(request_list, data_distribution, client_info, budget,
                 #logging.info(f"Testing Selector: Augmenting the cut_off_clients to {cut_off_clients} in heuristic")
 
         augTime = time.time() - start_time
-        #logging.info(f"Testing Selector: Client augmentation took {augTime:.2f} sec to pick {len(select_clients)} clients")
+        #logging.info(f"Testing Selector: TorchClient augmentation took {augTime:.2f} sec to pick {len(select_clients)} clients")
 
         select_client_list = list(select_clients.keys())