@@ -54,12 +54,12 @@ def test_llama3_8B_f16_decomposed(self):
5454                f"--iree-hip-target={ self .iree_hip_target }  " ,
5555                f"--tensor-parallelism-size=1" ,
5656                f"--attention-kernel=decomposed" ,
57-                 f"--num-prompts={ self .bs }  " ,
57+                 f"--num-prompts={ self .batch_size }  " ,
5858            ]
5959        )
6060
6161        baseline_mean_perplexity  =  round (
62-             np .mean (baseline_perplexity ["perplexities" ][0  : self .bs ]), 6 
62+             np .mean (baseline_perplexity ["perplexities" ][0  : self .batch_size ]), 6 
6363        )
6464        current_mean_perplexity  =  round (current_perplexity ["mean_perplexity" ], 6 )
6565
@@ -73,7 +73,7 @@ def test_llama3_8B_f16_decomposed(self):
7373        )
7474
7575    @skipif_run_quick_llama_test  
76-     @pytest .mark .xfail (reason = "Compile Error" , strict = True ,  raises = IreeCompileException ) 
76+     @pytest .mark .xfail (reason = "Compile Error" , raises = IreeCompileException ) 
7777    def  test_llama3_8B_f16 (self ):
7878
7979        # Llama 3.1 8B non-decomposed 
@@ -90,12 +90,12 @@ def test_llama3_8B_f16(self):
9090                f"--iree-hip-target={ self .iree_hip_target }  " ,
9191                f"--tensor-parallelism-size=1" ,
9292                f"--attention-kernel=torch_sdpa" ,
93-                 f"--num-prompts={ self .bs }  " ,
93+                 f"--num-prompts={ self .batch_size }  " ,
9494            ]
9595        )
9696
9797        baseline_mean_perplexity  =  round (
98-             np .mean (baseline_perplexity ["perplexities" ][0  : self .bs ]), 6 
98+             np .mean (baseline_perplexity ["perplexities" ][0  : self .batch_size ]), 6 
9999        )
100100        current_mean_perplexity  =  round (current_perplexity ["mean_perplexity" ], 6 )
101101
@@ -109,7 +109,7 @@ def test_llama3_8B_f16(self):
109109        )
110110
111111    @skipif_run_quick_llama_test  
112-     @pytest .mark .xfail (reason = "Compile Error" , strict = True ,  raises = IreeCompileException ) 
112+     @pytest .mark .xfail (reason = "Compile Error" , raises = IreeCompileException ) 
113113    def  test_llama3_8B_fp8_decomposed (self ):
114114
115115        # Llama 3.1 8B decomposed 
@@ -126,12 +126,12 @@ def test_llama3_8B_fp8_decomposed(self):
126126                f"--iree-hip-target={ self .iree_hip_target }  " ,
127127                f"--tensor-parallelism-size=1" ,
128128                f"--attention-kernel=decomposed" ,
129-                 f"--num-prompts={ self .bs }  " ,
129+                 f"--num-prompts={ self .batch_size }  " ,
130130            ]
131131        )
132132
133133        baseline_mean_perplexity  =  round (
134-             np .mean (baseline_perplexity ["perplexities" ][0  : self .bs ]), 6 
134+             np .mean (baseline_perplexity ["perplexities" ][0  : self .batch_size ]), 6 
135135        )
136136        current_mean_perplexity  =  round (current_perplexity ["mean_perplexity" ], 6 )
137137
@@ -145,7 +145,7 @@ def test_llama3_8B_fp8_decomposed(self):
145145        )
146146
147147    @skipif_run_quick_llama_test  
148-     @pytest .mark .xfail (reason = "Compile Error" , strict = True ,  raises = IreeCompileException ) 
148+     @pytest .mark .xfail (reason = "Compile Error" , raises = IreeCompileException ) 
149149    def  test_llama3_8B_fp8 (self ):
150150
151151        # Llama 3.1 8B non-decomposed 
@@ -162,12 +162,12 @@ def test_llama3_8B_fp8(self):
162162                f"--iree-hip-target={ self .iree_hip_target }  " ,
163163                f"--tensor-parallelism-size=1" ,
164164                f"--attention-kernel=torch_sdpa" ,
165-                 f"--num-prompts={ self .bs }  " ,
165+                 f"--num-prompts={ self .batch_size }  " ,
166166            ]
167167        )
168168
169169        baseline_mean_perplexity  =  round (
170-             np .mean (baseline_perplexity ["perplexities" ][0  : self .bs ]), 6 
170+             np .mean (baseline_perplexity ["perplexities" ][0  : self .batch_size ]), 6 
171171        )
172172        current_mean_perplexity  =  round (current_perplexity ["mean_perplexity" ], 6 )
173173
@@ -200,12 +200,12 @@ def test_llama3_405B_f16_decomposed(self):
200200                f"--iree-hip-target={ self .iree_hip_target }  " ,
201201                f"--tensor-parallelism-size={ self .tensor_parallelism_size }  " ,
202202                f"--attention-kernel=decomposed" ,
203-                 f"--num-prompts={ self .bs }  " ,
203+                 f"--num-prompts={ self .batch_size }  " ,
204204            ]
205205        )
206206
207207        baseline_mean_perplexity  =  round (
208-             np .mean (baseline_perplexity ["perplexities" ][0  : self .bs ]), 6 
208+             np .mean (baseline_perplexity ["perplexities" ][0  : self .batch_size ]), 6 
209209        )
210210        current_mean_perplexity  =  round (current_perplexity ["mean_perplexity" ], 6 )
211211
@@ -219,7 +219,7 @@ def test_llama3_405B_f16_decomposed(self):
219219        )
220220
221221    @skipif_run_quick_llama_test  
222-     @pytest .mark .xfail (reason = "Compile Error" , strict = True ,  raises = IreeCompileException ) 
222+     @pytest .mark .xfail (reason = "Compile Error" , raises = IreeCompileException ) 
223223    def  test_llama3_405B_f16 (self ):
224224
225225        # Llama 3.1 405B non-decomposed 
@@ -236,12 +236,12 @@ def test_llama3_405B_f16(self):
236236                f"--iree-hip-target={ self .iree_hip_target }  " ,
237237                f"--tensor-parallelism-size={ self .tensor_parallelism_size }  " ,
238238                f"--attention-kernel=torch_sdpa" ,
239-                 f"--num-prompts={ self .bs }  " ,
239+                 f"--num-prompts={ self .batch_size }  " ,
240240            ]
241241        )
242242
243243        baseline_mean_perplexity  =  round (
244-             np .mean (baseline_perplexity ["perplexities" ][0  : self .bs ]), 6 
244+             np .mean (baseline_perplexity ["perplexities" ][0  : self .batch_size ]), 6 
245245        )
246246        current_mean_perplexity  =  round (current_perplexity ["mean_perplexity" ], 6 )
247247
@@ -255,7 +255,7 @@ def test_llama3_405B_f16(self):
255255        )
256256
257257    @skipif_run_quick_llama_test  
258-     @pytest .mark .xfail (reason = "Compile Error" , strict = True ,  raises = IreeCompileException ) 
258+     @pytest .mark .xfail (reason = "Compile Error" , raises = IreeCompileException ) 
259259    def  test_llama3_405B_fp8_decomposed (self ):
260260
261261        # Llama 3.1 405B decomposed 
@@ -272,12 +272,12 @@ def test_llama3_405B_fp8_decomposed(self):
272272                f"--iree-hip-target={ self .iree_hip_target }  " ,
273273                f"--tensor-parallelism-size={ self .tensor_parallelism_size }  " ,
274274                f"--attention-kernel=decomposed" ,
275-                 f"--num-prompts={ self .bs }  " ,
275+                 f"--num-prompts={ self .batch_size }  " ,
276276            ]
277277        )
278278
279279        baseline_mean_perplexity  =  round (
280-             np .mean (baseline_perplexity ["perplexities" ][0  : self .bs ]), 6 
280+             np .mean (baseline_perplexity ["perplexities" ][0  : self .batch_size ]), 6 
281281        )
282282        current_mean_perplexity  =  round (current_perplexity ["mean_perplexity" ], 6 )
283283
@@ -291,7 +291,7 @@ def test_llama3_405B_fp8_decomposed(self):
291291        )
292292
293293    @skipif_run_quick_llama_test  
294-     @pytest .mark .xfail (reason = "Compile Error" , strict = True ,  raises = IreeCompileException ) 
294+     @pytest .mark .xfail (reason = "Compile Error" , raises = IreeCompileException ) 
295295    def  test_llama3_405B_fp8 (self ):
296296
297297        # Llama 3.1 405B non-decomposed 
@@ -308,12 +308,12 @@ def test_llama3_405B_fp8(self):
308308                f"--iree-hip-target={ self .iree_hip_target }  " ,
309309                f"--tensor-parallelism-size={ self .tensor_parallelism_size }  " ,
310310                f"--attention-kernel=torch_sdpa" ,
311-                 f"--num-prompts={ self .bs }  " ,
311+                 f"--num-prompts={ self .batch_size }  " ,
312312            ]
313313        )
314314
315315        baseline_mean_perplexity  =  round (
316-             np .mean (baseline_perplexity ["perplexities" ][0  : self .bs ]), 6 
316+             np .mean (baseline_perplexity ["perplexities" ][0  : self .batch_size ]), 6 
317317        )
318318        current_mean_perplexity  =  round (current_perplexity ["mean_perplexity" ], 6 )
319319
0 commit comments