Correct bs to batch_size

archana-ramalingam · archana-ramalingam · commit 6626fa1271a2 · 2024-11-22T04:16:08.000Z
diff --git a/sharktank/tests/evaluate/perplexity_vmfb_test.py b/sharktank/tests/evaluate/perplexity_vmfb_test.py
@@ -54,12 +54,12 @@ def test_llama3_8B_f16_decomposed(self):
                 f"--iree-hip-target={self.iree_hip_target}",
                 f"--tensor-parallelism-size=1",
                 f"--attention-kernel=decomposed",
-                f"--num-prompts={self.bs}",
+                f"--num-prompts={self.batch_size}",
             ]
         )
 
         baseline_mean_perplexity = round(
-            np.mean(baseline_perplexity["perplexities"][0 : self.bs]), 6
+            np.mean(baseline_perplexity["perplexities"][0 : self.batch_size]), 6
         )
         current_mean_perplexity = round(current_perplexity["mean_perplexity"], 6)
 
@@ -73,7 +73,7 @@ def test_llama3_8B_f16_decomposed(self):
         )
 
     @skipif_run_quick_llama_test
-    @pytest.mark.xfail(reason="Compile Error", strict=True, raises=IreeCompileException)
+    @pytest.mark.xfail(reason="Compile Error", raises=IreeCompileException)
     def test_llama3_8B_f16(self):
 
         # Llama 3.1 8B non-decomposed
@@ -90,12 +90,12 @@ def test_llama3_8B_f16(self):
                 f"--iree-hip-target={self.iree_hip_target}",
                 f"--tensor-parallelism-size=1",
                 f"--attention-kernel=torch_sdpa",
-                f"--num-prompts={self.bs}",
+                f"--num-prompts={self.batch_size}",
             ]
         )
 
         baseline_mean_perplexity = round(
-            np.mean(baseline_perplexity["perplexities"][0 : self.bs]), 6
+            np.mean(baseline_perplexity["perplexities"][0 : self.batch_size]), 6
         )
         current_mean_perplexity = round(current_perplexity["mean_perplexity"], 6)
 
@@ -109,7 +109,7 @@ def test_llama3_8B_f16(self):
         )
 
     @skipif_run_quick_llama_test
-    @pytest.mark.xfail(reason="Compile Error", strict=True, raises=IreeCompileException)
+    @pytest.mark.xfail(reason="Compile Error", raises=IreeCompileException)
     def test_llama3_8B_fp8_decomposed(self):
 
         # Llama 3.1 8B decomposed
@@ -126,12 +126,12 @@ def test_llama3_8B_fp8_decomposed(self):
                 f"--iree-hip-target={self.iree_hip_target}",
                 f"--tensor-parallelism-size=1",
                 f"--attention-kernel=decomposed",
-                f"--num-prompts={self.bs}",
+                f"--num-prompts={self.batch_size}",
             ]
         )
 
         baseline_mean_perplexity = round(
-            np.mean(baseline_perplexity["perplexities"][0 : self.bs]), 6
+            np.mean(baseline_perplexity["perplexities"][0 : self.batch_size]), 6
         )
         current_mean_perplexity = round(current_perplexity["mean_perplexity"], 6)
 
@@ -145,7 +145,7 @@ def test_llama3_8B_fp8_decomposed(self):
         )
 
     @skipif_run_quick_llama_test
-    @pytest.mark.xfail(reason="Compile Error", strict=True, raises=IreeCompileException)
+    @pytest.mark.xfail(reason="Compile Error", raises=IreeCompileException)
     def test_llama3_8B_fp8(self):
 
         # Llama 3.1 8B non-decomposed
@@ -162,12 +162,12 @@ def test_llama3_8B_fp8(self):
                 f"--iree-hip-target={self.iree_hip_target}",
                 f"--tensor-parallelism-size=1",
                 f"--attention-kernel=torch_sdpa",
-                f"--num-prompts={self.bs}",
+                f"--num-prompts={self.batch_size}",
             ]
         )
 
         baseline_mean_perplexity = round(
-            np.mean(baseline_perplexity["perplexities"][0 : self.bs]), 6
+            np.mean(baseline_perplexity["perplexities"][0 : self.batch_size]), 6
         )
         current_mean_perplexity = round(current_perplexity["mean_perplexity"], 6)
 
@@ -200,12 +200,12 @@ def test_llama3_405B_f16_decomposed(self):
                 f"--iree-hip-target={self.iree_hip_target}",
                 f"--tensor-parallelism-size={self.tensor_parallelism_size}",
                 f"--attention-kernel=decomposed",
-                f"--num-prompts={self.bs}",
+                f"--num-prompts={self.batch_size}",
             ]
         )
 
         baseline_mean_perplexity = round(
-            np.mean(baseline_perplexity["perplexities"][0 : self.bs]), 6
+            np.mean(baseline_perplexity["perplexities"][0 : self.batch_size]), 6
         )
         current_mean_perplexity = round(current_perplexity["mean_perplexity"], 6)
 
@@ -219,7 +219,7 @@ def test_llama3_405B_f16_decomposed(self):
         )
 
     @skipif_run_quick_llama_test
-    @pytest.mark.xfail(reason="Compile Error", strict=True, raises=IreeCompileException)
+    @pytest.mark.xfail(reason="Compile Error", raises=IreeCompileException)
     def test_llama3_405B_f16(self):
 
         # Llama 3.1 405B non-decomposed
@@ -236,12 +236,12 @@ def test_llama3_405B_f16(self):
                 f"--iree-hip-target={self.iree_hip_target}",
                 f"--tensor-parallelism-size={self.tensor_parallelism_size}",
                 f"--attention-kernel=torch_sdpa",
-                f"--num-prompts={self.bs}",
+                f"--num-prompts={self.batch_size}",
             ]
         )
 
         baseline_mean_perplexity = round(
-            np.mean(baseline_perplexity["perplexities"][0 : self.bs]), 6
+            np.mean(baseline_perplexity["perplexities"][0 : self.batch_size]), 6
         )
         current_mean_perplexity = round(current_perplexity["mean_perplexity"], 6)
 
@@ -255,7 +255,7 @@ def test_llama3_405B_f16(self):
         )
 
     @skipif_run_quick_llama_test
-    @pytest.mark.xfail(reason="Compile Error", strict=True, raises=IreeCompileException)
+    @pytest.mark.xfail(reason="Compile Error", raises=IreeCompileException)
     def test_llama3_405B_fp8_decomposed(self):
 
         # Llama 3.1 405B decomposed
@@ -272,12 +272,12 @@ def test_llama3_405B_fp8_decomposed(self):
                 f"--iree-hip-target={self.iree_hip_target}",
                 f"--tensor-parallelism-size={self.tensor_parallelism_size}",
                 f"--attention-kernel=decomposed",
-                f"--num-prompts={self.bs}",
+                f"--num-prompts={self.batch_size}",
             ]
         )
 
         baseline_mean_perplexity = round(
-            np.mean(baseline_perplexity["perplexities"][0 : self.bs]), 6
+            np.mean(baseline_perplexity["perplexities"][0 : self.batch_size]), 6
         )
         current_mean_perplexity = round(current_perplexity["mean_perplexity"], 6)
 
@@ -291,7 +291,7 @@ def test_llama3_405B_fp8_decomposed(self):
         )
 
     @skipif_run_quick_llama_test
-    @pytest.mark.xfail(reason="Compile Error", strict=True, raises=IreeCompileException)
+    @pytest.mark.xfail(reason="Compile Error", raises=IreeCompileException)
     def test_llama3_405B_fp8(self):
 
         # Llama 3.1 405B non-decomposed
@@ -308,12 +308,12 @@ def test_llama3_405B_fp8(self):
                 f"--iree-hip-target={self.iree_hip_target}",
                 f"--tensor-parallelism-size={self.tensor_parallelism_size}",
                 f"--attention-kernel=torch_sdpa",
-                f"--num-prompts={self.bs}",
+                f"--num-prompts={self.batch_size}",
             ]
         )
 
         baseline_mean_perplexity = round(
-            np.mean(baseline_perplexity["perplexities"][0 : self.bs]), 6
+            np.mean(baseline_perplexity["perplexities"][0 : self.batch_size]), 6
         )
         current_mean_perplexity = round(current_perplexity["mean_perplexity"], 6)
 

Original file line number	Diff line number	Diff line change
`@@ -54,12 +54,12 @@ def test_llama3_8B_f16_decomposed(self):`
`54`	`54`	`f"--iree-hip-target={self.iree_hip_target}",`
`55`	`55`	`f"--tensor-parallelism-size=1",`
`56`	`56`	`f"--attention-kernel=decomposed",`
`57`		`- f"--num-prompts={self.bs}",`
	`57`	`+ f"--num-prompts={self.batch_size}",`
`58`	`58`	`]`
`59`	`59`	`)`
`60`	`60`
`61`	`61`	`baseline_mean_perplexity = round(`
`62`		`- np.mean(baseline_perplexity["perplexities"][0 : self.bs]), 6`
	`62`	`+ np.mean(baseline_perplexity["perplexities"][0 : self.batch_size]), 6`
`63`	`63`	`)`
`64`	`64`	`current_mean_perplexity = round(current_perplexity["mean_perplexity"], 6)`
`65`	`65`
`@@ -73,7 +73,7 @@ def test_llama3_8B_f16_decomposed(self):`
`73`	`73`	`)`
`74`	`74`
`75`	`75`	`@skipif_run_quick_llama_test`
`76`		`- @pytest.mark.xfail(reason="Compile Error", strict=True, raises=IreeCompileException)`
	`76`	`+ @pytest.mark.xfail(reason="Compile Error", raises=IreeCompileException)`
`77`	`77`	`def test_llama3_8B_f16(self):`
`78`	`78`
`79`	`79`	`# Llama 3.1 8B non-decomposed`
`@@ -90,12 +90,12 @@ def test_llama3_8B_f16(self):`
`90`	`90`	`f"--iree-hip-target={self.iree_hip_target}",`
`91`	`91`	`f"--tensor-parallelism-size=1",`
`92`	`92`	`f"--attention-kernel=torch_sdpa",`
`93`		`- f"--num-prompts={self.bs}",`
	`93`	`+ f"--num-prompts={self.batch_size}",`
`94`	`94`	`]`
`95`	`95`	`)`
`96`	`96`
`97`	`97`	`baseline_mean_perplexity = round(`
`98`		`- np.mean(baseline_perplexity["perplexities"][0 : self.bs]), 6`
	`98`	`+ np.mean(baseline_perplexity["perplexities"][0 : self.batch_size]), 6`
`99`	`99`	`)`
`100`	`100`	`current_mean_perplexity = round(current_perplexity["mean_perplexity"], 6)`
`101`	`101`
`@@ -109,7 +109,7 @@ def test_llama3_8B_f16(self):`
`109`	`109`	`)`
`110`	`110`
`111`	`111`	`@skipif_run_quick_llama_test`
`112`		`- @pytest.mark.xfail(reason="Compile Error", strict=True, raises=IreeCompileException)`
	`112`	`+ @pytest.mark.xfail(reason="Compile Error", raises=IreeCompileException)`
`113`	`113`	`def test_llama3_8B_fp8_decomposed(self):`
`114`	`114`
`115`	`115`	`# Llama 3.1 8B decomposed`
`@@ -126,12 +126,12 @@ def test_llama3_8B_fp8_decomposed(self):`
`126`	`126`	`f"--iree-hip-target={self.iree_hip_target}",`
`127`	`127`	`f"--tensor-parallelism-size=1",`
`128`	`128`	`f"--attention-kernel=decomposed",`
`129`		`- f"--num-prompts={self.bs}",`
	`129`	`+ f"--num-prompts={self.batch_size}",`
`130`	`130`	`]`
`131`	`131`	`)`
`132`	`132`
`133`	`133`	`baseline_mean_perplexity = round(`
`134`		`- np.mean(baseline_perplexity["perplexities"][0 : self.bs]), 6`
	`134`	`+ np.mean(baseline_perplexity["perplexities"][0 : self.batch_size]), 6`
`135`	`135`	`)`
`136`	`136`	`current_mean_perplexity = round(current_perplexity["mean_perplexity"], 6)`
`137`	`137`
`@@ -145,7 +145,7 @@ def test_llama3_8B_fp8_decomposed(self):`
`145`	`145`	`)`
`146`	`146`
`147`	`147`	`@skipif_run_quick_llama_test`
`148`		`- @pytest.mark.xfail(reason="Compile Error", strict=True, raises=IreeCompileException)`
	`148`	`+ @pytest.mark.xfail(reason="Compile Error", raises=IreeCompileException)`
`149`	`149`	`def test_llama3_8B_fp8(self):`
`150`	`150`
`151`	`151`	`# Llama 3.1 8B non-decomposed`
`@@ -162,12 +162,12 @@ def test_llama3_8B_fp8(self):`
`162`	`162`	`f"--iree-hip-target={self.iree_hip_target}",`
`163`	`163`	`f"--tensor-parallelism-size=1",`
`164`	`164`	`f"--attention-kernel=torch_sdpa",`
`165`		`- f"--num-prompts={self.bs}",`
	`165`	`+ f"--num-prompts={self.batch_size}",`
`166`	`166`	`]`
`167`	`167`	`)`
`168`	`168`
`169`	`169`	`baseline_mean_perplexity = round(`
`170`		`- np.mean(baseline_perplexity["perplexities"][0 : self.bs]), 6`
	`170`	`+ np.mean(baseline_perplexity["perplexities"][0 : self.batch_size]), 6`
`171`	`171`	`)`
`172`	`172`	`current_mean_perplexity = round(current_perplexity["mean_perplexity"], 6)`
`173`	`173`
`@@ -200,12 +200,12 @@ def test_llama3_405B_f16_decomposed(self):`
`200`	`200`	`f"--iree-hip-target={self.iree_hip_target}",`
`201`	`201`	`f"--tensor-parallelism-size={self.tensor_parallelism_size}",`
`202`	`202`	`f"--attention-kernel=decomposed",`
`203`		`- f"--num-prompts={self.bs}",`
	`203`	`+ f"--num-prompts={self.batch_size}",`
`204`	`204`	`]`
`205`	`205`	`)`
`206`	`206`
`207`	`207`	`baseline_mean_perplexity = round(`
`208`		`- np.mean(baseline_perplexity["perplexities"][0 : self.bs]), 6`
	`208`	`+ np.mean(baseline_perplexity["perplexities"][0 : self.batch_size]), 6`
`209`	`209`	`)`
`210`	`210`	`current_mean_perplexity = round(current_perplexity["mean_perplexity"], 6)`
`211`	`211`
`@@ -219,7 +219,7 @@ def test_llama3_405B_f16_decomposed(self):`
`219`	`219`	`)`
`220`	`220`
`221`	`221`	`@skipif_run_quick_llama_test`
`222`		`- @pytest.mark.xfail(reason="Compile Error", strict=True, raises=IreeCompileException)`
	`222`	`+ @pytest.mark.xfail(reason="Compile Error", raises=IreeCompileException)`
`223`	`223`	`def test_llama3_405B_f16(self):`
`224`	`224`
`225`	`225`	`# Llama 3.1 405B non-decomposed`
`@@ -236,12 +236,12 @@ def test_llama3_405B_f16(self):`
`236`	`236`	`f"--iree-hip-target={self.iree_hip_target}",`
`237`	`237`	`f"--tensor-parallelism-size={self.tensor_parallelism_size}",`
`238`	`238`	`f"--attention-kernel=torch_sdpa",`
`239`		`- f"--num-prompts={self.bs}",`
	`239`	`+ f"--num-prompts={self.batch_size}",`
`240`	`240`	`]`
`241`	`241`	`)`
`242`	`242`
`243`	`243`	`baseline_mean_perplexity = round(`
`244`		`- np.mean(baseline_perplexity["perplexities"][0 : self.bs]), 6`
	`244`	`+ np.mean(baseline_perplexity["perplexities"][0 : self.batch_size]), 6`
`245`	`245`	`)`
`246`	`246`	`current_mean_perplexity = round(current_perplexity["mean_perplexity"], 6)`
`247`	`247`
`@@ -255,7 +255,7 @@ def test_llama3_405B_f16(self):`
`255`	`255`	`)`
`256`	`256`
`257`	`257`	`@skipif_run_quick_llama_test`
`258`		`- @pytest.mark.xfail(reason="Compile Error", strict=True, raises=IreeCompileException)`
	`258`	`+ @pytest.mark.xfail(reason="Compile Error", raises=IreeCompileException)`
`259`	`259`	`def test_llama3_405B_fp8_decomposed(self):`
`260`	`260`
`261`	`261`	`# Llama 3.1 405B decomposed`
`@@ -272,12 +272,12 @@ def test_llama3_405B_fp8_decomposed(self):`
`272`	`272`	`f"--iree-hip-target={self.iree_hip_target}",`
`273`	`273`	`f"--tensor-parallelism-size={self.tensor_parallelism_size}",`
`274`	`274`	`f"--attention-kernel=decomposed",`
`275`		`- f"--num-prompts={self.bs}",`
	`275`	`+ f"--num-prompts={self.batch_size}",`
`276`	`276`	`]`
`277`	`277`	`)`
`278`	`278`
`279`	`279`	`baseline_mean_perplexity = round(`
`280`		`- np.mean(baseline_perplexity["perplexities"][0 : self.bs]), 6`
	`280`	`+ np.mean(baseline_perplexity["perplexities"][0 : self.batch_size]), 6`
`281`	`281`	`)`
`282`	`282`	`current_mean_perplexity = round(current_perplexity["mean_perplexity"], 6)`
`283`	`283`
`@@ -291,7 +291,7 @@ def test_llama3_405B_fp8_decomposed(self):`
`291`	`291`	`)`
`292`	`292`
`293`	`293`	`@skipif_run_quick_llama_test`
`294`		`- @pytest.mark.xfail(reason="Compile Error", strict=True, raises=IreeCompileException)`
	`294`	`+ @pytest.mark.xfail(reason="Compile Error", raises=IreeCompileException)`
`295`	`295`	`def test_llama3_405B_fp8(self):`
`296`	`296`
`297`	`297`	`# Llama 3.1 405B non-decomposed`
`@@ -308,12 +308,12 @@ def test_llama3_405B_fp8(self):`
`308`	`308`	`f"--iree-hip-target={self.iree_hip_target}",`
`309`	`309`	`f"--tensor-parallelism-size={self.tensor_parallelism_size}",`
`310`	`310`	`f"--attention-kernel=torch_sdpa",`
`311`		`- f"--num-prompts={self.bs}",`
	`311`	`+ f"--num-prompts={self.batch_size}",`
`312`	`312`	`]`
`313`	`313`	`)`
`314`	`314`
`315`	`315`	`baseline_mean_perplexity = round(`
`316`		`- np.mean(baseline_perplexity["perplexities"][0 : self.bs]), 6`
	`316`	`+ np.mean(baseline_perplexity["perplexities"][0 : self.batch_size]), 6`
`317`	`317`	`)`
`318`	`318`	`current_mean_perplexity = round(current_perplexity["mean_perplexity"], 6)`
`319`	`319`