-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfoolsgold.py
98 lines (85 loc) · 4.47 KB
/
foolsgold.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
from aggregators.aggregator_utils import prepare_grad_updates, wrapup_aggregated_grads
from aggregators.aggregatorbase import AggregatorBase
import numpy as np
import copy
import sklearn.metrics.pairwise as smp
from aggregators import aggregator_registry
from fl.models.model_utils import ol_from_model
@aggregator_registry
class FoolsGold(AggregatorBase):
"""
[The Limitations of Federated Learning in Sybil Settings](https://www.usenix.org/conference/raid2020/presentation/fung) - RAID '20
FoolsGold
It calculates the cosine similarity between the clients' accumulated updates and gets the max cosine similarity value of each client. Then, it re-weights the clients' updates based on the cosine similarity value.
"""
def __init__(self, args, **kwargs):
super().__init__(args)
"""
epsilon (float): a small value to avoid division by zero, log of zero, etc.
topk_ratio (float): the ratio of the top-k largest absolute value of the output layer parameters of last global model to identify the indicative features
"""
self.default_defense_params = {
"epsilon": 1.0e-6, "topk_ratio": 0.1}
self.update_and_set_attr()
self.algorithm = "FedSGD"
self.checkpoints = []
def aggregate(self, updates, **kwargs):
self.global_model = kwargs["last_global_model"]
# get model parameters updates and gradient updates
gradient_updates = prepare_grad_updates(
self.args.algorithm, updates, self.global_model)
feature_dim = len(gradient_updates[0])
# weights for updates to re-weight for clients' updates
wv = np.zeros((self.args.num_clients, 1), dtype=np.float32)
# 1. record and sum the historical gradients
# normalize updates of each client
for cid in range(self.args.num_clients):
cid_norm = np.linalg.norm(gradient_updates[cid])
if cid_norm > 1:
gradient_updates[cid] /= cid_norm
self.checkpoints.append(copy.deepcopy(gradient_updates))
sumed_updates = np.sum(self.checkpoints, axis=0)
# 2. get the indicative features mask via top-k largest absolute value of the last global model
ol_last_global_model = ol_from_model(
self.global_model, flatten=False, return_type='vector')
indicative_mask = self.get_indicative_mask(
ol_last_global_model, feature_dim)
# 3. calculate the cosine similarity (cs) between the clients' sum value and get the max cs value of each client
cos_dist = smp.cosine_similarity(
sumed_updates[:, indicative_mask == 1]) - np.eye(self.args.num_clients, dtype=np.float32)
wv = self.pardoning(cos_dist) # weight of updates
agg_grad_updates = np.dot(gradient_updates.T, wv)
return wrapup_aggregated_grads(agg_grad_updates, self.args.algorithm, self.global_model, aggregated=True)
def pardoning(self, cos_dist):
max_cs = np.max(cos_dist, axis=1) + self.epsilon
# 4. pardoning
# iterate i,j over the clients
for i in range(self.args.num_clients):
for j in range(self.args.num_clients):
if i == j:
continue
if max_cs[i] < max_cs[j]:
cos_dist[i][j] *= max_cs[i] / max_cs[j]
# diverse benign weights has smaller cosine similarity score, and should be re-weighted to have bigger weights
wv = 1 - np.max(cos_dist, axis=1)
wv = np.clip(wv, 0, 1)
wv /= np.max(wv)
wv[wv == 1] = .99
# Logit function
wv = np.log(wv / (1 - wv) + self.epsilon) + 0.5
wv[(np.isinf(wv) + wv > 1)] = 1
wv[wv < 0] = 0
return wv
def get_indicative_mask(self, ol_vec, feature_dim):
class_dim, ol_feature_dim = ol_vec.shape[0], ol_vec.shape[1]
ol_indicative_idx = np.zeros(
(class_dim, ol_feature_dim), dtype=np.int64) # index must be int or bool
topk = int(class_dim * self.topk_ratio)
for i in range(class_dim): # class-wise top-k largest
sig_features_idx = np.argpartition(ol_vec[i], -topk)[-topk:]
ol_indicative_idx[i][sig_features_idx] = 1
ol_indicative_idx = ol_indicative_idx.flatten()
# extend ol_indicative_idx to feature_dim by padding zero before len(ol_indicative_idx)
indicative_mask = np.pad(ol_indicative_idx,
(feature_dim - len(ol_indicative_idx), 0), 'constant')
return indicative_mask