@@ -54,12 +54,12 @@ def test_llama3_8B_f16_decomposed(self):
54
54
f"--iree-hip-target={ self .iree_hip_target } " ,
55
55
f"--tensor-parallelism-size=1" ,
56
56
f"--attention-kernel=decomposed" ,
57
- f"--num-prompts={ self .bs } " ,
57
+ f"--num-prompts={ self .batch_size } " ,
58
58
]
59
59
)
60
60
61
61
baseline_mean_perplexity = round (
62
- np .mean (baseline_perplexity ["perplexities" ][0 : self .bs ]), 6
62
+ np .mean (baseline_perplexity ["perplexities" ][0 : self .batch_size ]), 6
63
63
)
64
64
current_mean_perplexity = round (current_perplexity ["mean_perplexity" ], 6 )
65
65
@@ -73,7 +73,7 @@ def test_llama3_8B_f16_decomposed(self):
73
73
)
74
74
75
75
@skipif_run_quick_llama_test
76
- @pytest .mark .xfail (reason = "Compile Error" , strict = True , raises = IreeCompileException )
76
+ @pytest .mark .xfail (reason = "Compile Error" , raises = IreeCompileException )
77
77
def test_llama3_8B_f16 (self ):
78
78
79
79
# Llama 3.1 8B non-decomposed
@@ -90,12 +90,12 @@ def test_llama3_8B_f16(self):
90
90
f"--iree-hip-target={ self .iree_hip_target } " ,
91
91
f"--tensor-parallelism-size=1" ,
92
92
f"--attention-kernel=torch_sdpa" ,
93
- f"--num-prompts={ self .bs } " ,
93
+ f"--num-prompts={ self .batch_size } " ,
94
94
]
95
95
)
96
96
97
97
baseline_mean_perplexity = round (
98
- np .mean (baseline_perplexity ["perplexities" ][0 : self .bs ]), 6
98
+ np .mean (baseline_perplexity ["perplexities" ][0 : self .batch_size ]), 6
99
99
)
100
100
current_mean_perplexity = round (current_perplexity ["mean_perplexity" ], 6 )
101
101
@@ -109,7 +109,7 @@ def test_llama3_8B_f16(self):
109
109
)
110
110
111
111
@skipif_run_quick_llama_test
112
- @pytest .mark .xfail (reason = "Compile Error" , strict = True , raises = IreeCompileException )
112
+ @pytest .mark .xfail (reason = "Compile Error" , raises = IreeCompileException )
113
113
def test_llama3_8B_fp8_decomposed (self ):
114
114
115
115
# Llama 3.1 8B decomposed
@@ -126,12 +126,12 @@ def test_llama3_8B_fp8_decomposed(self):
126
126
f"--iree-hip-target={ self .iree_hip_target } " ,
127
127
f"--tensor-parallelism-size=1" ,
128
128
f"--attention-kernel=decomposed" ,
129
- f"--num-prompts={ self .bs } " ,
129
+ f"--num-prompts={ self .batch_size } " ,
130
130
]
131
131
)
132
132
133
133
baseline_mean_perplexity = round (
134
- np .mean (baseline_perplexity ["perplexities" ][0 : self .bs ]), 6
134
+ np .mean (baseline_perplexity ["perplexities" ][0 : self .batch_size ]), 6
135
135
)
136
136
current_mean_perplexity = round (current_perplexity ["mean_perplexity" ], 6 )
137
137
@@ -145,7 +145,7 @@ def test_llama3_8B_fp8_decomposed(self):
145
145
)
146
146
147
147
@skipif_run_quick_llama_test
148
- @pytest .mark .xfail (reason = "Compile Error" , strict = True , raises = IreeCompileException )
148
+ @pytest .mark .xfail (reason = "Compile Error" , raises = IreeCompileException )
149
149
def test_llama3_8B_fp8 (self ):
150
150
151
151
# Llama 3.1 8B non-decomposed
@@ -162,12 +162,12 @@ def test_llama3_8B_fp8(self):
162
162
f"--iree-hip-target={ self .iree_hip_target } " ,
163
163
f"--tensor-parallelism-size=1" ,
164
164
f"--attention-kernel=torch_sdpa" ,
165
- f"--num-prompts={ self .bs } " ,
165
+ f"--num-prompts={ self .batch_size } " ,
166
166
]
167
167
)
168
168
169
169
baseline_mean_perplexity = round (
170
- np .mean (baseline_perplexity ["perplexities" ][0 : self .bs ]), 6
170
+ np .mean (baseline_perplexity ["perplexities" ][0 : self .batch_size ]), 6
171
171
)
172
172
current_mean_perplexity = round (current_perplexity ["mean_perplexity" ], 6 )
173
173
@@ -200,12 +200,12 @@ def test_llama3_405B_f16_decomposed(self):
200
200
f"--iree-hip-target={ self .iree_hip_target } " ,
201
201
f"--tensor-parallelism-size={ self .tensor_parallelism_size } " ,
202
202
f"--attention-kernel=decomposed" ,
203
- f"--num-prompts={ self .bs } " ,
203
+ f"--num-prompts={ self .batch_size } " ,
204
204
]
205
205
)
206
206
207
207
baseline_mean_perplexity = round (
208
- np .mean (baseline_perplexity ["perplexities" ][0 : self .bs ]), 6
208
+ np .mean (baseline_perplexity ["perplexities" ][0 : self .batch_size ]), 6
209
209
)
210
210
current_mean_perplexity = round (current_perplexity ["mean_perplexity" ], 6 )
211
211
@@ -219,7 +219,7 @@ def test_llama3_405B_f16_decomposed(self):
219
219
)
220
220
221
221
@skipif_run_quick_llama_test
222
- @pytest .mark .xfail (reason = "Compile Error" , strict = True , raises = IreeCompileException )
222
+ @pytest .mark .xfail (reason = "Compile Error" , raises = IreeCompileException )
223
223
def test_llama3_405B_f16 (self ):
224
224
225
225
# Llama 3.1 405B non-decomposed
@@ -236,12 +236,12 @@ def test_llama3_405B_f16(self):
236
236
f"--iree-hip-target={ self .iree_hip_target } " ,
237
237
f"--tensor-parallelism-size={ self .tensor_parallelism_size } " ,
238
238
f"--attention-kernel=torch_sdpa" ,
239
- f"--num-prompts={ self .bs } " ,
239
+ f"--num-prompts={ self .batch_size } " ,
240
240
]
241
241
)
242
242
243
243
baseline_mean_perplexity = round (
244
- np .mean (baseline_perplexity ["perplexities" ][0 : self .bs ]), 6
244
+ np .mean (baseline_perplexity ["perplexities" ][0 : self .batch_size ]), 6
245
245
)
246
246
current_mean_perplexity = round (current_perplexity ["mean_perplexity" ], 6 )
247
247
@@ -255,7 +255,7 @@ def test_llama3_405B_f16(self):
255
255
)
256
256
257
257
@skipif_run_quick_llama_test
258
- @pytest .mark .xfail (reason = "Compile Error" , strict = True , raises = IreeCompileException )
258
+ @pytest .mark .xfail (reason = "Compile Error" , raises = IreeCompileException )
259
259
def test_llama3_405B_fp8_decomposed (self ):
260
260
261
261
# Llama 3.1 405B decomposed
@@ -272,12 +272,12 @@ def test_llama3_405B_fp8_decomposed(self):
272
272
f"--iree-hip-target={ self .iree_hip_target } " ,
273
273
f"--tensor-parallelism-size={ self .tensor_parallelism_size } " ,
274
274
f"--attention-kernel=decomposed" ,
275
- f"--num-prompts={ self .bs } " ,
275
+ f"--num-prompts={ self .batch_size } " ,
276
276
]
277
277
)
278
278
279
279
baseline_mean_perplexity = round (
280
- np .mean (baseline_perplexity ["perplexities" ][0 : self .bs ]), 6
280
+ np .mean (baseline_perplexity ["perplexities" ][0 : self .batch_size ]), 6
281
281
)
282
282
current_mean_perplexity = round (current_perplexity ["mean_perplexity" ], 6 )
283
283
@@ -291,7 +291,7 @@ def test_llama3_405B_fp8_decomposed(self):
291
291
)
292
292
293
293
@skipif_run_quick_llama_test
294
- @pytest .mark .xfail (reason = "Compile Error" , strict = True , raises = IreeCompileException )
294
+ @pytest .mark .xfail (reason = "Compile Error" , raises = IreeCompileException )
295
295
def test_llama3_405B_fp8 (self ):
296
296
297
297
# Llama 3.1 405B non-decomposed
@@ -308,12 +308,12 @@ def test_llama3_405B_fp8(self):
308
308
f"--iree-hip-target={ self .iree_hip_target } " ,
309
309
f"--tensor-parallelism-size={ self .tensor_parallelism_size } " ,
310
310
f"--attention-kernel=torch_sdpa" ,
311
- f"--num-prompts={ self .bs } " ,
311
+ f"--num-prompts={ self .batch_size } " ,
312
312
]
313
313
)
314
314
315
315
baseline_mean_perplexity = round (
316
- np .mean (baseline_perplexity ["perplexities" ][0 : self .bs ]), 6
316
+ np .mean (baseline_perplexity ["perplexities" ][0 : self .batch_size ]), 6
317
317
)
318
318
current_mean_perplexity = round (current_perplexity ["mean_perplexity" ], 6 )
319
319
0 commit comments