-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathargs.py
202 lines (168 loc) · 9.91 KB
/
args.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
import os
import argparse
import sys
script_name = sys.argv[0]
parser = argparse.ArgumentParser()
# For training and testing
parser.add_argument('model_name', type=str, help='Give model name, this will name logs and checkpoints.')
parser.add_argument('--data_path', type=str, help='Path to the file with train, dev and test data', default='')
parser.add_argument('--test_only_data_path', type=str, help='Path to the file containing only test samples', default='')
parser.add_argument('--save_dir', type=str, help='Root directory where all models are saved', default='models')
parser.add_argument('--epoch', type=int, help='Max number of epochs to train', default=50)
parser.add_argument('--batch_size', type=int, help='Batch size to use for training', default=50)
parser.add_argument('--learning_rate', type=float, help='Learning rate to use', default=0.001)
parser.add_argument('--momentum', type=float, help='Momentum of SGD algorithm', default=0.8)
parser.add_argument('--first_n_samples', type=int, help='Only use first n samples of the training set', default=None)
parser.add_argument('--new_model_name', type=str, help='If given, will save the trained model as a new one', default='')
parser.add_argument('--use_reg_model', help='Use the model with regularization layers', action='store_true')
parser.add_argument('--patience', type=int, help='Number of epochs w/o improvements before stopping', default=5)
parser.add_argument('--is_rgb_data', help='Is the data file in RGB format?', action='store_true')
# For adversarial examples generation only
parser.add_argument('--adversarial_dir', type=str, help='Place to store adversarial examples', default='adversarial')
parser.add_argument('--attack_name', type=str, help='The attack to be performed', default='none',
choices=['none', 'fgsm', 'cw', 'colored'])
parser.add_argument('--test_data_only', help='Generate adv samples only for test set', action='store_true')
# For colored examples only
parser.add_argument('--bias_mode', help='How many of the labels will have bias?', choices=['none', 'partial', 'all'], default='none')
unbiased_data_mode_choices = ['pure', 'random_pure', 'mixture']
parser.add_argument('--unbiased_data_mode', help='How do we make the unbiased samples?', default='pure')
parser.add_argument('--ordered', help='Order biased samples by digits', action='store_true')
parser.add_argument('--clipped', help='Clip dataset to contain only biased digits', action='store_true')
augment_choices = ['none', 'basic', 'clipped', 'noise', 'noise_weak', 'noise_minor', 'random_pure', 'strips', 'mixture', 'pure_single']
parser.add_argument('--augment_mode', help='How to augment data with biased labels', choices=augment_choices, default='none')
# parser.add_argument('--diverse_test', help='Should we use more diverse test set instead of just pure colors?', action='store_true')
test_mode_choices = ['pure', 'random_pure', 'noise', 'strips', 'mixture', 'pure_black', 'pure1', 'pure2', 'pure3', 'pure12', 'pure13', 'pure23', 'pure123', 'pure_special', 'pure_half_1', 'pure_half_2', 'pure_half_3', 'pure_half_12', 'pure_half_13', 'pure_half_23', 'pure_half_123']
parser.add_argument('--test_mode', help='How should we fill the background of the generated test set?', choices=test_mode_choices, default='pure')
# parser.add_argument('--c_value', type=float, help='C value of the cw attack', default=0.5)
# For paired training only
parser.add_argument('--paired_data_path', type=str, help='Path to the paired data to be used', default='')
aug_mode_choices = ['pick_first', 'pick_random', 'one_digit']
parser.add_argument('--augment_data_mode', type=str, default='',
help='In which way do we use the augment data? Choose from {}'.format(aug_mode_choices))
parser.add_argument('--is_clipped_data', action='store_true', help='Is the augmented data clipped?')
help_str = """
Select regularization method.
1: Gradient norm
2: KL divergence
3: L2 distance
4: ???
5: L1 distance
"""
parser.add_argument('--method', type=int, metavar='M', choices=[0, 2, 3, 5], help=help_str, default=3)
help_str = """
Select which object to be used as the input to calculate the regularization loss.
0: Logits of the cnn
1: Activation value of the second to last layer
"""
parser.add_argument('--reg_object', metavar='T', type=int, choices=[0, 1], help=help_str, default=0)
help_str = "Number of fc layers to use to produce the regularization loss"
parser.add_argument('--reg_layers', type=int, metavar='N', choices=[1, 2], help=help_str, default=1)
help_str = 'Whether to use dropout as regularization'
parser.add_argument('--use_dropout', type=int, metavar='D', choices=[0, 1], help=help_str, default=1)
# parser.add_argument('--lam', type=float, metavar='L', help='Coefficient for grad_loss for method 1', default=1.0)
parser.add_argument('--reg', type=float, metavar='R', help='Coeff for L1 and L2 Loss in method 3 and 5', default=1e-4)
# parser.add_argument('--gamma', type=float, help='Coefficient for regularization loss', default=1.0)
args = parser.parse_args()
def get_args():
return args
class ARGS:
# For training and testing
model_name = args.model_name
data_path = args.data_path
new_model_name = args.new_model_name
save_dir = os.path.join(args.save_dir, model_name)
save_path = os.path.join(save_dir, model_name + '.pt')
new_save_dir = os.path.join(args.save_dir, new_model_name)
new_save_path = os.path.join(new_save_dir, new_model_name + '.pt')
epoch = args.epoch
batch_size = args.batch_size
learning_rate = args.learning_rate
momentum = args.momentum
first_n_samples = args.first_n_samples
use_reg_model = args.use_reg_model
patience = args.patience
is_rgb_data = args.is_rgb_data
# For dataset generation only
attack_name = args.attack_name
adversarial_dir = os.path.join(args.adversarial_dir, attack_name)
test_data_only = args.test_data_only
bias_mode = args.bias_mode
unbiased_data_mode = args.unbiased_data_mode
unbiased_data_mode_choices = unbiased_data_mode_choices
ordered = args.ordered
augment_mode = args.augment_mode
augment_choices = augment_choices
clipped = args.clipped
# diverse_test = args.diverse_test
test_mode = args.test_mode
test_mode_choices = test_mode_choices
# For evaluation
test_only_data_path = args.test_only_data_path
# For paired training only
paired_data_path = args.paired_data_path
method = args.method
reg_object = args.reg_object
reg_layers = args.reg_layers
use_dropout = args.use_dropout
reg = args.reg
augment_data_mode = args.augment_data_mode
is_clipped_data = args.is_clipped_data
# lam = args.lam
# Modes
isGeneration = script_name.startswith('generate_adversarial')
isAugmentation = isGeneration and augment_mode != 'none'
saveAsNew = bool(args.new_model_name)
isPairedTrain = script_name.startswith('train_paired')
assert attack_name == 'colored' or bool(data_path) or bool(test_only_data_path), "Require a data_path or a test_only_data_path argument, but not both. Unless you are generating colored adversarial dataset"
assert not isPairedTrain or bool(paired_data_path), "Paired training requires a paired_data_path"
assert not augment_data_mode or isPairedTrain, "Augment data can only be used in paired training"
assert not augment_data_mode or augment_data_mode.isdigit() or augment_data_mode in aug_mode_choices, \
"Augment data can only be used in paired training"
assert not is_clipped_data or augment_data_mode, "is_clipped_data is only for training paired with augmented data"
# assert not isPairedTrain or new_model_name, "Paired training requires a new_model_name"
assert not (clipped and isAugmentation), "'Clipped' arg is only for colored dataset generation. To clip data in" \
"augmentation, use '--augment_mode clipped' instead."
assert attack_name == 'colored' or bias_mode == 'none', "Partial bias is only supported by colored sample generation"
assert attack_name == 'colored' or not ordered, "ordered field is only supported by colored sample generation"
# Check directories
if not os.path.exists(args.save_dir):
os.mkdir(args.save_dir)
if not os.path.exists(save_dir):
os.mkdir(save_dir)
if isGeneration:
if not os.path.exists(args.adversarial_dir):
os.mkdir(args.adversarial_dir)
if not os.path.exists(adversarial_dir):
os.mkdir(adversarial_dir)
# Pre-processing
if attack_name == 'fgsm' and isGeneration:
batch_size = 1
@staticmethod
def toString():
return str(args)
# Validate arguments
"""
valid = True
msg = ""
if args.paired_training:
if not args.adv_data_path:
msg += 'Paired training requires adv_data_path. '
valid = False
assert valid, "Invalid arguments: " + msg
"""
# Original
"""
save_dir = 'models'
save_file_name = 'mnist_standard_60'
dir_mnist_standard = os.path.join('..', 'Datasets', 'MNISTPerturbed', 'standardMNIST')
file_mnist_standard = 'mnist.pkl'
path_mnist_standard = os.path.join(dir_mnist_standard, file_mnist_standard)
save_path = os.path.join(save_dir, save_file_name)
dir_mnist_perturbed = os.path.join('..', 'Datasets', 'MNISTPerturbed', 'extraTestData')
files_mnist_perturbed = [s for s in os.listdir(dir_mnist_perturbed) if s.endswith('npy')]
paths_mnist_perturbed = [os.path.join(dir_mnist_perturbed, f) for f in files_mnist_perturbed]
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
batch_size = 50
learning_rate = 0.001
num_epoch = 60
"""