example_tree5.txt

Winner:
[(4, 1), (4, 1), (4, 1), (4, 2), (4, 1), (4, 1), (4, 1)]
[[0], [1], [2], [3], [4, 5], [6, 7], [8, 9]] 
cost: 13735.255058029838

[(4, 1), (4, 1), (4, 1), (8, 1), (4, 1), (4, 1), (4, 1)]
[[0], [1], [2], [3, 4], [5, 6], [7], [8, 9]]
|
|-------[(4, 1), (4, 1), (4, 1), (4, 2), (4, 1), (4, 1), (4, 1)]
|       [[0], [1], [2], [3, 4], [5, 6], [7], [8, 9]]
|       |
|       [(4, 1), (4, 1), (4, 1), (4, 2), (4, 1), (4, 1), (4, 1)]
|       [[0], [1], [2], [3], [4, 5], [6, 7], [8, 9]] (cost: 13735.255058029838)
|       |
|       |-------[(2, 2), (4, 1), (4, 1), (4, 2), (4, 1), (4, 1), (4, 1)]
|                [[0], [1], [2], [3], [4, 5], [6, 7], [8, 9]] (cost: 13847.537195481009)
|
[(4, 1), (4, 1), (4, 1), (8, 1), (4, 1), (4, 1), (4, 1)]
[[0, 1], [], [2], [3], [4, 5], [6, 7], [8, 9]]
|
[(4, 1), (4, 1), (4, 1), (8, 1), (4, 1), (4, 1), (4, 1)]
[[0, 1], [], [2], [3], [4, 5], [6, 7], [8, 9]]
|
[(4, 1), (4, 1), (4, 1), (8, 1), (4, 1), (4, 1), (4, 1)]
[[0, 1], [], [2], [3], [4, 5], [6, 7], [8, 9]]


inter_stage_plan: InterStagePlan(ns_idx=1, node_sequence=(<DeviceType.V100: 'v100'>, <DeviceType.A100: 'a100'>), dg_idx=3, device_groups=[4, 4, 4, 8, 4, 4, 4], num_stage=7, batches=1, gbs=128)
NAIVE STRATEGY: [(4, 1), (4, 1), (4, 1), (8, 1), (4, 1), (4, 1), (4, 1)]
valid_strategies: [(4, 1), (4, 1), (4, 1), (8, 1), (4, 1), (4, 1), (4, 1)]
profile_cost of (8, (8, 1)): [5752.74932384491, 5752.74932384491, 5752.74932384491, 5752.74932384491, 5844.893217086792, 5844.893217086792, 5844.893217086792, 5844.893217086792]
stage_memory_capacity: [65536, 65536, 65536, 393216, 327680, 327680, 327680]
stage_compute_performance: [0.05562468255168519, 0.05562468255168519, 0.05562468255168519, 0.24906678555225004, 0.19468638893089815, 0.19468638893089815, 0.19468638893089815]
Testing Mapping: ('V100', 'V100', 'V100', 'V100') (0,)
Testing Mapping: ('V100', 'V100', 'V100', 'V100') (1,)
Testing Mapping: ('V100', 'V100', 'V100', 'V100') (2,)
Testing Mapping: ('V100', 'V100', 'V100', 'V100', 'A100', 'A100', 'A100', 'A100') (3, 4)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (5, 6)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (7,)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (8, 9)
layer_partition: [0, 1, 2, 3, 5, 7, 8, 10]
stage_memory_demand: [39100.001, 20730.001, 20730.001, 995040.0009999999, 41460.001, 20730.001, 81390.001], memory_state: [26435.999000000003, 44805.998999999996, 44805.998999999996, -601824.0009999999, 286219.999, 306949.999, 246289.999]
MEMORY EXCEEDED
TESTING NEW PARTITION
current partition: [[0], [1], [2], [3, 4], [5, 6], [7], [8, 9]]
MP strategies: [(4, 1), (4, 1), (4, 1), (8, 1), (4, 1), (4, 1), (4, 1)]
adj_stage_compute_performance(2): [0.07258544601230758, 0.0713290931633726, 0.07016580348843279, 0.0885827107898773, 0.24181039196665424, 0.22959305784627304, 0.22054318072006474]
Testing Mapping: ('V100', 'V100', 'V100', 'V100') (0, 1)
Testing Mapping: ('V100', 'V100', 'V100', 'V100') ()
Testing Mapping: ('V100', 'V100', 'V100', 'V100') (2,)
Testing Mapping: ('V100', 'V100', 'V100', 'V100', 'A100', 'A100', 'A100', 'A100') (3,)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (4, 5)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (6, 7)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (8, 9)
new partition: [[0, 1], [], [2], [3], [4, 5], [6, 7], [8, 9]]
layer_partition: [0, 2, 2, 3, 4, 6, 8, 10]
stage_memory_demand: [59830.001, 0.001, 20730.001, 497520.001, 41460.001, 41460.001, 81390.001], memory_state: [5705.999000000003, 65535.999, 44805.998999999996, -104304.00099999999, 286219.999, 286219.999, 246289.999]
MEMORY EXCEEDED
TESTING NEW PARTITION
current partition: [[0, 1], [], [2], [3], [4, 5], [6, 7], [8, 9]]
MP strategies: [(4, 1), (4, 1), (4, 1), (8, 1), (4, 1), (4, 1), (4, 1)]
adj_stage_compute_performance(3): [0.07463414759638551, 0.07318104575472333, 0.07184413152006872, 0.06301038193911154, 0.24714643507757697, 0.2333073127607782, 0.22320691460486516]
Testing Mapping: ('V100', 'V100', 'V100', 'V100') (0, 1)
Testing Mapping: ('V100', 'V100', 'V100', 'V100') ()
Testing Mapping: ('V100', 'V100', 'V100', 'V100') (2,)
Testing Mapping: ('V100', 'V100', 'V100', 'V100', 'A100', 'A100', 'A100', 'A100') (3,)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (4, 5)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (6, 7)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (8, 9)
new partition: [[0, 1], [], [2], [3], [4, 5], [6, 7], [8, 9]]
layer_partition: [0, 2, 2, 3, 4, 6, 8, 10]
stage_memory_demand: [59830.001, 0.001, 20730.001, 497520.001, 41460.001, 41460.001, 81390.001], memory_state: [5705.999000000003, 65535.999, 44805.998999999996, -104304.00099999999, 286219.999, 286219.999, 246289.999]
MEMORY EXCEEDED
TESTING NEW PARTITION
current partition: [[0, 1], [], [2], [3], [4, 5], [6, 7], [8, 9]]
MP strategies: [(4, 1), (4, 1), (4, 1), (8, 1), (4, 1), (4, 1), (4, 1)]
adj_stage_compute_performance(4): [0.07610449096290621, 0.07450622467794062, 0.07304198826013865, 0.04482035147389525, 0.2509464738945699, 0.2359343605720928, 0.22508515899710424]
layer_partition: None
TESTING NEW STRATEGY
curr_strategy: [(4, 1), (4, 1), (4, 1), (8, 1), (4, 1), (4, 1), (4, 1)]
next_strategy: [(4, 1), (4, 1), (4, 1), (4, 2), (4, 1), (4, 1), (4, 1)]
valid_strategies: [(4, 1), (4, 1), (4, 1), (4, 2), (4, 1), (4, 1), (4, 1)]
profile_cost of (8, (4, 2)): [6256.935596466064, 6256.935596466064, 6370.417356491089, 6370.417356491089]
stage_memory_capacity: [65536, 65536, 65536, 393216, 327680, 327680, 327680]
stage_compute_performance: [0.05679155767217861, 0.05679155767217861, 0.05679155767217861, 0.2333139714255888, 0.1987704518526251, 0.1987704518526251, 0.1987704518526251]
Testing Mapping: ('V100', 'V100', 'V100', 'V100') (0,)
Testing Mapping: ('V100', 'V100', 'V100', 'V100') (1,)
Testing Mapping: ('V100', 'V100', 'V100', 'V100') (2,)
Testing Mapping: ('V100', 'V100', 'V100', 'V100', 'A100', 'A100', 'A100', 'A100') (3, 4)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (5, 6)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (7,)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (8, 9)
layer_partition: [0, 1, 2, 3, 5, 7, 8, 10]
stage_memory_demand: [39100.001, 20730.001, 20730.001, 497520.001, 41460.001, 20730.001, 81390.001], memory_state: [26435.999000000003, 44805.998999999996, 44805.998999999996, -104304.00099999999, 286219.999, 306949.999, 246289.999]
MEMORY EXCEEDED
TESTING NEW PARTITION
current partition: [[0], [1], [2], [3, 4], [5, 6], [7], [8, 9]]
MP strategies: [(4, 1), (4, 1), (4, 1), (4, 2), (4, 1), (4, 1), (4, 1)]
adj_stage_compute_performance(2): [0.06339046749643745, 0.06290165936130716, 0.06244905923618653, 0.16596017800955884, 0.21710494766191002, 0.21235155985950283, 0.20883053185771971]
Testing Mapping: ('V100', 'V100', 'V100', 'V100') (0,)
Testing Mapping: ('V100', 'V100', 'V100', 'V100') (1,)
Testing Mapping: ('V100', 'V100', 'V100', 'V100') (2,)
Testing Mapping: ('V100', 'V100', 'V100', 'V100', 'A100', 'A100', 'A100', 'A100') (3,)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (4, 5)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (6, 7)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (8, 9)
new partition: [[0], [1], [2], [3], [4, 5], [6, 7], [8, 9]]
layer_partition: [0, 1, 2, 3, 4, 6, 8, 10]
stage_memory_demand: [39100.001, 20730.001, 20730.001, 248760.001, 41460.001, 41460.001, 81390.001], memory_state: [26435.999000000003, 44805.998999999996, 44805.998999999996, 144455.999, 286219.999, 286219.999, 246289.999]
layer_partition: [0, 1, 2, 3, 4, 6, 8, 10]
node_sequence: (<DeviceType.V100: 'v100'>, <DeviceType.A100: 'a100'>), device_group: [4, 4, 4, 8, 4, 4, 4], num_stage: 7, batches: 1, gbs: 128, strategies: [(4, 1), (4, 1), (4, 1), (4, 2), (4, 1), (4, 1), (4, 1)], layer_partition: [0, 1, 2, 3, 4, 6, 8, 10]
data loadbalancer: [14, 14, 50, 50]
execution_cost: 12020.796537399292, fb_sync_cost: 1674.325942993164, parameter_upate_costs: 4.385414123535156, dp_cost: 18.522121263586957, pp_cost: 16.250434782608696
cost: 13735.255058029838
NAIVE STRATEGY: [(4, 1), (4, 1), (4, 1), (8, 1), (4, 1), (4, 1), (4, 1)]
TESTING NEW STRATEGY
curr_strategy: [(4, 1), (4, 1), (4, 1), (4, 2), (4, 1), (4, 1), (4, 1)]
next_strategy: [(2, 2), (4, 1), (4, 1), (4, 2), (4, 1), (4, 1), (4, 1)]
valid_strategies: [(2, 2), (4, 1), (4, 1), (4, 2), (4, 1), (4, 1), (4, 1)]
profile_cost of (8, (4, 2)): [6256.935596466064, 6256.935596466064, 6370.417356491089, 6370.417356491089]
stage_memory_capacity: [65536, 65536, 65536, 393216, 327680, 327680, 327680]
stage_compute_performance: [0.05236865161451096, 0.05705786542895902, 0.05705786542895902, 0.23440803052350145, 0.19970252900135654, 0.19970252900135654, 0.19970252900135654]
Testing Mapping: ('V100', 'V100', 'V100', 'V100') (0,)
Testing Mapping: ('V100', 'V100', 'V100', 'V100') (1,)
Testing Mapping: ('V100', 'V100', 'V100', 'V100') (2,)
Testing Mapping: ('V100', 'V100', 'V100', 'V100', 'A100', 'A100', 'A100', 'A100') (3,)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (4, 5)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (6, 7)
Testing Mapping: ('A100', 'A100', 'A100', 'A100') (8, 9)
layer_partition: [0, 1, 2, 3, 4, 6, 8, 10]
stage_memory_demand: [39100.001, 20730.001, 20730.001, 248760.001, 41460.001, 41460.001, 81390.001], memory_state: [26435.999000000003, 44805.998999999996, 44805.998999999996, 144455.999, 286219.999, 286219.999, 246289.999]
layer_partition: [0, 1, 2, 3, 4, 6, 8, 10]
node_sequence: (<DeviceType.V100: 'v100'>, <DeviceType.A100: 'a100'>), device_group: [4, 4, 4, 8, 4, 4, 4], num_stage: 7, batches: 1, gbs: 128, strategies: [(2, 2), (4, 1), (4, 1), (4, 2), (4, 1), (4, 1), (4, 1)], layer_partition: [0, 1, 2, 3, 4, 6, 8, 10]
data loadbalancer: [14, 14, 50, 50]
execution_cost: 12130.518674850464, fb_sync_cost: 1674.325942993164, parameter_upate_costs: 4.385414123535156, dp_cost: 18.522121263586957, pp_cost: 18.810434782608695
cost: 13847.537195481009