36
36
37
37
@pytest .mark .long
38
38
@pytest .mark .parametrize ("batch_size" , [12 ])
39
- @pytest .mark .parametrize ("max_epochs" , [2 ])
40
- @pytest .mark .parametrize ("num_labels,atol" , [(120 , 1e-2 ), (1200 , 2e-2 )])
39
+ @pytest .mark .parametrize ("max_epochs" , [4 ])
40
+ @pytest .mark .parametrize ("num_labels,atol" , [(120 , 1e-2 ), (360 , 2e-2 )])
41
41
def test_metrics_is_similar_in_ddp (num_labels : int , atol : float , batch_size : int , max_epochs : int ) -> None :
42
42
devices = (1 , 2 , 3 )
43
43
# We will compare metrics from same experiment but with different amount of devices. For this we aggregate
@@ -48,18 +48,19 @@ def test_metrics_is_similar_in_ddp(num_labels: int, atol: float, batch_size: int
48
48
metric_topk2values = defaultdict (list )
49
49
50
50
for num_devices in devices :
51
- batch_size //= num_devices
51
+ batch_size_eff = batch_size // num_devices
52
+
52
53
params = (
53
54
f"--devices { num_devices } "
54
55
f"--max_epochs { max_epochs } "
55
56
f"--num_labels { num_labels } "
56
- f"--batch_size { batch_size } "
57
+ f"--batch_size { batch_size_eff } "
57
58
)
58
59
cmd = f"python { exp_file } " + params
59
60
subprocess .run (cmd , check = True , shell = True )
60
61
61
62
metrics_path = MetricValCallbackWithSaving .save_path_pattern .format (
62
- devices = num_devices , batch_size = batch_size , num_labels = num_labels
63
+ devices = num_devices , batch_size = batch_size_eff , num_labels = num_labels
63
64
)
64
65
metrics = torch .load (metrics_path )[OVERALL_CATEGORIES_KEY ]
65
66
Path (metrics_path ).unlink (missing_ok = True )
0 commit comments