forked from livingshade/Metis
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathexample_tree5.txt
138 lines (133 loc) · 9.65 KB
/
example_tree5.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
Winner:
[(4, 1), (4, 1), (4, 1), (4, 2), (4, 1), (4, 1), (4, 1)]
[[0], [1], [2], [3], [4, 5], [6, 7], [8, 9]]
cost: 13735.255058029838
[(4, 1), (4, 1), (4, 1), (8, 1), (4, 1), (4, 1), (4, 1)]
[[0], [1], [2], [3, 4], [5, 6], [7], [8, 9]]
|
|-------[(4, 1), (4, 1), (4, 1), (4, 2), (4, 1), (4, 1), (4, 1)]
| [[0], [1], [2], [3, 4], [5, 6], [7], [8, 9]]
| |
| [(4, 1), (4, 1), (4, 1), (4, 2), (4, 1), (4, 1), (4, 1)]
| [[0], [1], [2], [3], [4, 5], [6, 7], [8, 9]] (cost: 13735.255058029838)
| |
| |-------[(2, 2), (4, 1), (4, 1), (4, 2), (4, 1), (4, 1), (4, 1)]
| [[0], [1], [2], [3], [4, 5], [6, 7], [8, 9]] (cost: 13847.537195481009)
|
[(4, 1), (4, 1), (4, 1), (8, 1), (4, 1), (4, 1), (4, 1)]
[[0, 1], [], [2], [3], [4, 5], [6, 7], [8, 9]]
|
[(4, 1), (4, 1), (4, 1), (8, 1), (4, 1), (4, 1), (4, 1)]
[[0, 1], [], [2], [3], [4, 5], [6, 7], [8, 9]]
|
[(4, 1), (4, 1), (4, 1), (8, 1), (4, 1), (4, 1), (4, 1)]
[[0, 1], [], [2], [3], [4, 5], [6, 7], [8, 9]]
inter_stage_plan: InterStagePlan(ns_idx=1, node_sequence=(<DeviceType.V100: 'v100'>, <DeviceType.A100: 'a100'>), dg_idx=3, device_groups=[4, 4, 4, 8, 4, 4, 4], num_stage=7, batches=1, gbs=128)
NAIVE STRATEGY: [(4, 1), (4, 1), (4, 1), (8, 1), (4, 1), (4, 1), (4, 1)]
valid_strategies: [(4, 1), (4, 1), (4, 1), (8, 1), (4, 1), (4, 1), (4, 1)]
profile_cost of (8, (8, 1)): [5752.74932384491, 5752.74932384491, 5752.74932384491, 5752.74932384491, 5844.893217086792, 5844.893217086792, 5844.893217086792, 5844.893217086792]
stage_memory_capacity: [65536, 65536, 65536, 393216, 327680, 327680, 327680]
stage_compute_performance: [0.05562468255168519, 0.05562468255168519, 0.05562468255168519, 0.24906678555225004, 0.19468638893089815, 0.19468638893089815, 0.19468638893089815]
Testing Mapping: ('V100', 'V100', 'V100', 'V100') (0,)
Testing Mapping: ('V100', 'V100', 'V100', 'V100') (1,)
Testing Mapping: ('V100', 'V100', 'V100', 'V100') (2,)
Testing Mapping: ('V100', 'V100', 'V100', 'V100', 'A100', 'A100', 'A100', 'A100') (3, 4)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (5, 6)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (7,)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (8, 9)
layer_partition: [0, 1, 2, 3, 5, 7, 8, 10]
stage_memory_demand: [39100.001, 20730.001, 20730.001, 995040.0009999999, 41460.001, 20730.001, 81390.001], memory_state: [26435.999000000003, 44805.998999999996, 44805.998999999996, -601824.0009999999, 286219.999, 306949.999, 246289.999]
MEMORY EXCEEDED
TESTING NEW PARTITION
current partition: [[0], [1], [2], [3, 4], [5, 6], [7], [8, 9]]
MP strategies: [(4, 1), (4, 1), (4, 1), (8, 1), (4, 1), (4, 1), (4, 1)]
adj_stage_compute_performance(2): [0.07258544601230758, 0.0713290931633726, 0.07016580348843279, 0.0885827107898773, 0.24181039196665424, 0.22959305784627304, 0.22054318072006474]
Testing Mapping: ('V100', 'V100', 'V100', 'V100') (0, 1)
Testing Mapping: ('V100', 'V100', 'V100', 'V100') ()
Testing Mapping: ('V100', 'V100', 'V100', 'V100') (2,)
Testing Mapping: ('V100', 'V100', 'V100', 'V100', 'A100', 'A100', 'A100', 'A100') (3,)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (4, 5)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (6, 7)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (8, 9)
new partition: [[0, 1], [], [2], [3], [4, 5], [6, 7], [8, 9]]
layer_partition: [0, 2, 2, 3, 4, 6, 8, 10]
stage_memory_demand: [59830.001, 0.001, 20730.001, 497520.001, 41460.001, 41460.001, 81390.001], memory_state: [5705.999000000003, 65535.999, 44805.998999999996, -104304.00099999999, 286219.999, 286219.999, 246289.999]
MEMORY EXCEEDED
TESTING NEW PARTITION
current partition: [[0, 1], [], [2], [3], [4, 5], [6, 7], [8, 9]]
MP strategies: [(4, 1), (4, 1), (4, 1), (8, 1), (4, 1), (4, 1), (4, 1)]
adj_stage_compute_performance(3): [0.07463414759638551, 0.07318104575472333, 0.07184413152006872, 0.06301038193911154, 0.24714643507757697, 0.2333073127607782, 0.22320691460486516]
Testing Mapping: ('V100', 'V100', 'V100', 'V100') (0, 1)
Testing Mapping: ('V100', 'V100', 'V100', 'V100') ()
Testing Mapping: ('V100', 'V100', 'V100', 'V100') (2,)
Testing Mapping: ('V100', 'V100', 'V100', 'V100', 'A100', 'A100', 'A100', 'A100') (3,)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (4, 5)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (6, 7)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (8, 9)
new partition: [[0, 1], [], [2], [3], [4, 5], [6, 7], [8, 9]]
layer_partition: [0, 2, 2, 3, 4, 6, 8, 10]
stage_memory_demand: [59830.001, 0.001, 20730.001, 497520.001, 41460.001, 41460.001, 81390.001], memory_state: [5705.999000000003, 65535.999, 44805.998999999996, -104304.00099999999, 286219.999, 286219.999, 246289.999]
MEMORY EXCEEDED
TESTING NEW PARTITION
current partition: [[0, 1], [], [2], [3], [4, 5], [6, 7], [8, 9]]
MP strategies: [(4, 1), (4, 1), (4, 1), (8, 1), (4, 1), (4, 1), (4, 1)]
adj_stage_compute_performance(4): [0.07610449096290621, 0.07450622467794062, 0.07304198826013865, 0.04482035147389525, 0.2509464738945699, 0.2359343605720928, 0.22508515899710424]
layer_partition: None
TESTING NEW STRATEGY
curr_strategy: [(4, 1), (4, 1), (4, 1), (8, 1), (4, 1), (4, 1), (4, 1)]
next_strategy: [(4, 1), (4, 1), (4, 1), (4, 2), (4, 1), (4, 1), (4, 1)]
valid_strategies: [(4, 1), (4, 1), (4, 1), (4, 2), (4, 1), (4, 1), (4, 1)]
profile_cost of (8, (4, 2)): [6256.935596466064, 6256.935596466064, 6370.417356491089, 6370.417356491089]
stage_memory_capacity: [65536, 65536, 65536, 393216, 327680, 327680, 327680]
stage_compute_performance: [0.05679155767217861, 0.05679155767217861, 0.05679155767217861, 0.2333139714255888, 0.1987704518526251, 0.1987704518526251, 0.1987704518526251]
Testing Mapping: ('V100', 'V100', 'V100', 'V100') (0,)
Testing Mapping: ('V100', 'V100', 'V100', 'V100') (1,)
Testing Mapping: ('V100', 'V100', 'V100', 'V100') (2,)
Testing Mapping: ('V100', 'V100', 'V100', 'V100', 'A100', 'A100', 'A100', 'A100') (3, 4)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (5, 6)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (7,)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (8, 9)
layer_partition: [0, 1, 2, 3, 5, 7, 8, 10]
stage_memory_demand: [39100.001, 20730.001, 20730.001, 497520.001, 41460.001, 20730.001, 81390.001], memory_state: [26435.999000000003, 44805.998999999996, 44805.998999999996, -104304.00099999999, 286219.999, 306949.999, 246289.999]
MEMORY EXCEEDED
TESTING NEW PARTITION
current partition: [[0], [1], [2], [3, 4], [5, 6], [7], [8, 9]]
MP strategies: [(4, 1), (4, 1), (4, 1), (4, 2), (4, 1), (4, 1), (4, 1)]
adj_stage_compute_performance(2): [0.06339046749643745, 0.06290165936130716, 0.06244905923618653, 0.16596017800955884, 0.21710494766191002, 0.21235155985950283, 0.20883053185771971]
Testing Mapping: ('V100', 'V100', 'V100', 'V100') (0,)
Testing Mapping: ('V100', 'V100', 'V100', 'V100') (1,)
Testing Mapping: ('V100', 'V100', 'V100', 'V100') (2,)
Testing Mapping: ('V100', 'V100', 'V100', 'V100', 'A100', 'A100', 'A100', 'A100') (3,)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (4, 5)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (6, 7)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (8, 9)
new partition: [[0], [1], [2], [3], [4, 5], [6, 7], [8, 9]]
layer_partition: [0, 1, 2, 3, 4, 6, 8, 10]
stage_memory_demand: [39100.001, 20730.001, 20730.001, 248760.001, 41460.001, 41460.001, 81390.001], memory_state: [26435.999000000003, 44805.998999999996, 44805.998999999996, 144455.999, 286219.999, 286219.999, 246289.999]
layer_partition: [0, 1, 2, 3, 4, 6, 8, 10]
node_sequence: (<DeviceType.V100: 'v100'>, <DeviceType.A100: 'a100'>), device_group: [4, 4, 4, 8, 4, 4, 4], num_stage: 7, batches: 1, gbs: 128, strategies: [(4, 1), (4, 1), (4, 1), (4, 2), (4, 1), (4, 1), (4, 1)], layer_partition: [0, 1, 2, 3, 4, 6, 8, 10]
data loadbalancer: [14, 14, 50, 50]
execution_cost: 12020.796537399292, fb_sync_cost: 1674.325942993164, parameter_upate_costs: 4.385414123535156, dp_cost: 18.522121263586957, pp_cost: 16.250434782608696
cost: 13735.255058029838
NAIVE STRATEGY: [(4, 1), (4, 1), (4, 1), (8, 1), (4, 1), (4, 1), (4, 1)]
TESTING NEW STRATEGY
curr_strategy: [(4, 1), (4, 1), (4, 1), (4, 2), (4, 1), (4, 1), (4, 1)]
next_strategy: [(2, 2), (4, 1), (4, 1), (4, 2), (4, 1), (4, 1), (4, 1)]
valid_strategies: [(2, 2), (4, 1), (4, 1), (4, 2), (4, 1), (4, 1), (4, 1)]
profile_cost of (8, (4, 2)): [6256.935596466064, 6256.935596466064, 6370.417356491089, 6370.417356491089]
stage_memory_capacity: [65536, 65536, 65536, 393216, 327680, 327680, 327680]
stage_compute_performance: [0.05236865161451096, 0.05705786542895902, 0.05705786542895902, 0.23440803052350145, 0.19970252900135654, 0.19970252900135654, 0.19970252900135654]
Testing Mapping: ('V100', 'V100', 'V100', 'V100') (0,)
Testing Mapping: ('V100', 'V100', 'V100', 'V100') (1,)
Testing Mapping: ('V100', 'V100', 'V100', 'V100') (2,)
Testing Mapping: ('V100', 'V100', 'V100', 'V100', 'A100', 'A100', 'A100', 'A100') (3,)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (4, 5)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (6, 7)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (8, 9)
layer_partition: [0, 1, 2, 3, 4, 6, 8, 10]
stage_memory_demand: [39100.001, 20730.001, 20730.001, 248760.001, 41460.001, 41460.001, 81390.001], memory_state: [26435.999000000003, 44805.998999999996, 44805.998999999996, 144455.999, 286219.999, 286219.999, 246289.999]
layer_partition: [0, 1, 2, 3, 4, 6, 8, 10]
node_sequence: (<DeviceType.V100: 'v100'>, <DeviceType.A100: 'a100'>), device_group: [4, 4, 4, 8, 4, 4, 4], num_stage: 7, batches: 1, gbs: 128, strategies: [(2, 2), (4, 1), (4, 1), (4, 2), (4, 1), (4, 1), (4, 1)], layer_partition: [0, 1, 2, 3, 4, 6, 8, 10]
data loadbalancer: [14, 14, 50, 50]
execution_cost: 12130.518674850464, fb_sync_cost: 1674.325942993164, parameter_upate_costs: 4.385414123535156, dp_cost: 18.522121263586957, pp_cost: 18.810434782608695
cost: 13847.537195481009