[ghstack] Add support for more shapes

jainapurva · jainapurva · commit 3553732ac303 · 2025-04-04T10:47:30.000-07:00
ghstack-source-id: bf520ee ghstack-comment-id: 2779402838 Pull Request resolved: #2021
diff --git a/benchmarks/microbenchmarks/README.md b/benchmarks/microbenchmarks/README.md
@@ -46,6 +46,16 @@ model_params:
         [2048, 4096, 1024],
         [4096, 4096, 1024]
       ]
+    - name: "llama"
+    - name: "pow2"
+      min_power: 10  # Optional, default is 10 (1024)
+      max_power: 14  # Optional, default is 14 (16,384)
+    - name: "pow2_extended"
+      min_power: 10  # Optional, default is 10 (1024)
+      max_power: 14  # Optional, default is 14 (16,384)
+    - name: "sweep"
+      min_power: 8   # Optional, default is 8 (256)
+      max_power: 15  # Optional, default is 15 (32,768)
   high_precision_dtype: "torch.bfloat16"
   compile: "max-autotune" # Options: "default", "max-autotune", "false"
   device: "cuda"  # Options: "cuda", "mps", "xpu", "cpu"
@@ -54,6 +64,13 @@ model_params:
 
 ## Configuration Options
 
+### Shape Generation Options
+- `custom`: Manually specify shapes as a list of [m, k, n] dimensions
+- `llama`: Use LLaMa 2 70B single-node weight shapes (assumes fused attn.wqkv and ffn.w13)
+- `pow2`: Generate shapes with dimensions that are powers of 2 (e.g., 1024, 2048, 4096, etc.)
+- `pow2_extended`: Generate shapes with dimensions that are powers of 2 and powers of 2 + half (e.g., 1024, 1536, 2048, 3072, etc.)
+- `sweep`: Generate a sweep of shapes with different powers of 2 for M, K, N dimensions
+
 ### Quantization Methods
 Currently, quantization string is in same format as the one being passed in llama/generate.py.
 - `baseline`: No quantization
diff --git a/benchmarks/microbenchmarks/benchmark_runner.py b/benchmarks/microbenchmarks/benchmark_runner.py
@@ -48,9 +48,50 @@ def get_shapes_for_config(
         name = shape_config["name"]
         if name == "custom":
             shapes.extend([(name, shape) for shape in shape_config["shapes"]])
+        elif name == "llama":
+            # LLaMa 2 70B single-node weight shapes
+            # assumes fused attn.wqkv and ffn.w13
+            bsz, seq_len = 4, 4096
+            M = bsz * seq_len
+            llama_shapes = {
+                "attn.wqkv": (M, 8192, 1280),
+                "attn.w0": (M, 1024, 8192),
+                "ffn.w13": (M, 8192, 7168),
+                "ffn.w2": (M, 3584, 8192),
+            }
+            shapes.extend([(f"{name}_{k}", v) for k, v in llama_shapes.items()])
+        elif name == "pow2":
+            # Generate shapes with dimensions that are powers of 2
+            min_power_of_2 = shape_config.get("min_power", 10)  # 1024
+            max_power_of_2 = shape_config.get("max_power", 14)  # 16,384
+            for idx, power_of_2 in enumerate(range(min_power_of_2, max_power_of_2 + 1)):
+                val = 2**power_of_2
+                shapes.append((f"{name}_{idx}", [val, val, val]))
+        elif name == "pow2_extended":
+            # Generate shapes with dimensions that are powers of 2 and powers of 2 + half
+            min_power_of_2 = shape_config.get("min_power", 10)  # 1024
+            max_power_of_2 = shape_config.get("max_power", 14)  # 16,384
+            for idx, power_of_2 in enumerate(range(min_power_of_2, max_power_of_2 + 1)):
+                val1 = 2**power_of_2
+                val2 = 2**power_of_2 + 2 ** (power_of_2 - 1)
+                shapes.append((f"{name}_{idx*2}", [val1, val1, val1]))
+                shapes.append((f"{name}_{idx*2+1}", [val2, val2, val2]))
+        elif name == "sweep":
+            # Generate a sweep of shapes with different powers of 2 for M, K, N
+            min_p2 = shape_config.get("min_power", 8)  # 256
+            max_p2 = shape_config.get("max_power", 15)  # 32,768
+            counter = 0
+            for M_p2 in range(min_p2, max_p2 + 1):
+                M = 2**M_p2
+                for K_p2 in range(min_p2, max_p2 + 1):
+                    K = 2**K_p2
+                    for N_p2 in range(min_p2, max_p2 + 1):
+                        N = 2**N_p2
+                        shapes.append((f"{name}_{counter}", [M, K, N]))
+                        counter += 1
         else:
             raise NotImplementedError(
-                f"Shape config {name} not supported. Currently only supports custom shapes."
+                f"Shape config {name} not supported. Supported options: custom, llama, pow2, pow2_extended, sweep."
             )
     return shapes
 
diff --git a/benchmarks/microbenchmarks/test/benchmark_config.yml b/benchmarks/microbenchmarks/test/benchmark_config.yml
@@ -31,6 +31,20 @@ model_params:
           [2048, 4096, 1024],
           # [4096, 4096, 1024]
         ]
+      # Example of using LLaMa shapes
+      - name: "llama"
+      # Example of using power of 2 shapes
+      - name: "pow2"
+        min_power: 10  # 1024
+        max_power: 12  # 4096
+      # Example of using extended power of 2 shapes
+      - name: "pow2_extended"
+        min_power: 10  # 1024
+        max_power: 11  # 2048
+      # Example of using sweep shapes (commented out as it generates many shapes)
+      # - name: "sweep"
+      #   min_power: 8   # 256
+      #   max_power: 9   # 512
     high_precision_dtype: "torch.bfloat16"
     use_torch_compile: true
     torch_compile_mode: "max-autotune"
diff --git a/benchmarks/microbenchmarks/test/test_benchmark_runner.py b/benchmarks/microbenchmarks/test/test_benchmark_runner.py
@@ -57,11 +57,55 @@ def tearDown(self):
         shutil.rmtree(self.temp_dir)
 
     def test_get_shapes_for_config(self):
+        # Test custom shapes
         shapes = get_shapes_for_config(
             self.test_config["model_params"][0]["matrix_shapes"]
         )
         self.assertEqual(len(shapes), 1)
         self.assertEqual(shapes[0], ("custom", [1024, 1024, 1024]))
+        
+        # Test llama shapes
+        llama_shapes = get_shapes_for_config([
+            {"name": "llama"}
+        ])
+        self.assertEqual(len(llama_shapes), 4)  # 4 LLaMa shapes
+        self.assertTrue(any(name.startswith("llama_attn.wqkv") for name, _ in llama_shapes))
+        self.assertTrue(any(name.startswith("llama_attn.w0") for name, _ in llama_shapes))
+        self.assertTrue(any(name.startswith("llama_ffn.w13") for name, _ in llama_shapes))
+        self.assertTrue(any(name.startswith("llama_ffn.w2") for name, _ in llama_shapes))
+        
+        # Test pow2 shapes
+        pow2_shapes = get_shapes_for_config([
+            {"name": "pow2", "min_power": 10, "max_power": 12}
+        ])
+        self.assertEqual(len(pow2_shapes), 3)  # 3 powers of 2 (10, 11, 12)
+        self.assertEqual(pow2_shapes[0], ("pow2_0", [1024, 1024, 1024]))  # 2^10
+        self.assertEqual(pow2_shapes[1], ("pow2_1", [2048, 2048, 2048]))  # 2^11
+        self.assertEqual(pow2_shapes[2], ("pow2_2", [4096, 4096, 4096]))  # 2^12
+        
+        # Test pow2_extended shapes
+        pow2_extended_shapes = get_shapes_for_config([
+            {"name": "pow2_extended", "min_power": 10, "max_power": 11}
+        ])
+        self.assertEqual(len(pow2_extended_shapes), 4)  # 2 powers of 2, each with 2 variants
+        self.assertEqual(pow2_extended_shapes[0], ("pow2_extended_0", [1024, 1024, 1024]))  # 2^10
+        self.assertEqual(pow2_extended_shapes[1], ("pow2_extended_1", [1536, 1536, 1536]))  # 2^10 + 2^9
+        self.assertEqual(pow2_extended_shapes[2], ("pow2_extended_2", [2048, 2048, 2048]))  # 2^11
+        self.assertEqual(pow2_extended_shapes[3], ("pow2_extended_3", [3072, 3072, 3072]))  # 2^11 + 2^10
+        
+        # Test sweep shapes (limited to a small range for testing)
+        sweep_shapes = get_shapes_for_config([
+            {"name": "sweep", "min_power": 8, "max_power": 9}
+        ])
+        # For min_power=8, max_power=9, we should have 8 shapes (2^3 = 8 combinations)
+        self.assertEqual(len(sweep_shapes), 8)
+        # Check that all shapes have the expected format
+        for name, shape in sweep_shapes:
+            self.assertTrue(name.startswith("sweep_"))
+            self.assertEqual(len(shape), 3)  # [M, K, N]
+            # Check that all dimensions are powers of 2 between 2^8 and 2^9
+            for dim in shape:
+                self.assertTrue(dim in [256, 512])  # 2^8, 2^9
 
     def test_get_param_combinations(self):
         model_param = self.test_config["model_params"][0]
diff --git a/benchmarks/microbenchmarks/utils.py b/benchmarks/microbenchmarks/utils.py
@@ -753,6 +753,7 @@ def print_results(results: List[BenchmarkResult]):
             result.config.name,
             result.config.quantization or "baseline",
             result.config.sparsity or "none",
+            f"{result.config.shape_name} ({result.config.m}, {result.config.k}, {result.config.n})",
             f"{result.model_inference_time_in_ms:.2f}",
             str(result.config.enable_profiler),
             str(result.config.enable_memory_profile),
@@ -774,6 +775,7 @@ def print_results(results: List[BenchmarkResult]):
         "Name",
         "Quantization",
         "Sparsity",
+        "Shape",
         "Inference Time (ms)",
         "Profiler Enabled",
         "Memory Profiling Enabled",