Skip to content

Commit 5ee6b58

Browse files
committed
Updates
1 parent 33fa3ca commit 5ee6b58

File tree

3 files changed

+11
-34
lines changed

3 files changed

+11
-34
lines changed

benchmarks/microbenchmarks/benchmark_inference.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,11 +94,11 @@ def run(config: BenchmarkConfig) -> BenchmarkResult:
9494
if config.enable_profiler:
9595
print("Running profiler...")
9696
try:
97-
result.profiler_json_path, result.perfetto_url = generate_model_profile(
97+
result.profiler_json_path = generate_model_profile(
9898
m_copy, input_data, config.profiler_file_name
9999
)
100-
except Exception:
101-
print(f"Error running profiler for {config.name}")
100+
except Exception as e:
101+
print(f"Error running profiler for {config.name} with error: {e}")
102102

103103
return result
104104
except Exception as e:

benchmarks/microbenchmarks/test/benchmark_config.yml

Lines changed: 6 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2,51 +2,27 @@
22
benchmark_mode: "inference"
33
quantization_config_recipe_names:
44
# Will run a baseline inference for model by default, without quantization for comparison
5-
# - "int4wo-32"
6-
# - "marlin"
75
- "int8wo"
6+
- "int8dq"
7+
- "float8dq"
8+
- "float8wo"
89
# sparsity_config_recipe_names:
910
# Will run a baseline inference for model by default, without sparsity for comparison
1011
# - "semi-sparse"
1112
# - "block"
1213
output_dir: "benchmarks/microbenchmarks/results"
1314
model_params:
14-
# - name: "small_bf16_linear"
15-
# matrix_shapes:
16-
# - name: "custom"
17-
# shapes: [
18-
# [1024, 1024, 1024], # [m, k, n]
19-
# ]
20-
# high_precision_dtype: "torch.bfloat16"
21-
# use_torch_compile: true
22-
# torch_compile_mode: "max-autotune"
23-
# device: "cuda"
24-
# model_type: "linear"
25-
# enable_profiler: true # Enable profiling for this model
26-
27-
- name: "large_bf16_ln_linear"
15+
- name: "small_bf16_linear"
2816
matrix_shapes:
2917
- name: "custom"
3018
shapes: [
19+
[1024, 1024, 1024], # [m, k, n]
3120
[2048, 4096, 1024],
32-
# [4096, 4096, 1024]
21+
[4096, 4096, 1024]
3322
]
3423
high_precision_dtype: "torch.bfloat16"
3524
use_torch_compile: true
3625
torch_compile_mode: "max-autotune"
3726
device: "cuda"
3827
model_type: "linear"
3928
enable_profiler: true # Enable profiling for this model
40-
enable_memory_profile: true # Enable memory profiling for this model
41-
42-
# - name: "cpu_fp32_linear"
43-
# matrix_shapes:
44-
# - name: "custom"
45-
# shapes: [
46-
# [4096, 4096, 1024]
47-
# ]
48-
# high_precision_dtype: "torch.float32"
49-
# use_torch_compile: false
50-
# device: "cpu"
51-
# model_type: "linear"
52-
# enable_profiler: true # Enable profiling for this model

benchmarks/microbenchmarks/utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -473,7 +473,7 @@ def print_results(results: List[BenchmarkResult]):
473473
result.config.name,
474474
result.config.quantization or "baseline",
475475
result.config.sparsity or "none",
476-
f"{result.config.shape_name} ({result.config.m}, {result.config.k}, {result.config.n})"
476+
f"{result.config.shape_name} ({result.config.m}, {result.config.k}, {result.config.n})",
477477
f"{result.model_inference_time_in_ms:.2f}",
478478
str(result.config.enable_profiler),
479479
]
@@ -485,6 +485,7 @@ def print_results(results: List[BenchmarkResult]):
485485
"Name",
486486
"Quantization",
487487
"Sparsity",
488+
"Shape",
488489
"Inference Time (ms)",
489490
"Profiler Enabled",
490491
]

0 commit comments

Comments
 (0)