Add Dynamic export_stacks Parameter to Benchmark Function (#3221)

SSYernar · facebook-github-bot · commit 06d7ea75279b · 2025-07-24T11:30:42.000-07:00
Summary: Pull Request resolved: #3221 Introduced a dynamic `export_stacks` parameter to the benchmarking function. This allows users to control the export of stack files for profiling. Reviewed By: aliafzal Differential Revision: D78748689 fbshipit-source-id: ae285b665e40f43e6d6ed2beae46145b4f9aa3e4
diff --git a/torchrec/distributed/benchmark/benchmark_utils.py b/torchrec/distributed/benchmark/benchmark_utils.py
@@ -889,6 +889,7 @@ def benchmark(
     enable_logging: bool = True,
     device_type: str = "cuda",
     benchmark_unsharded_module: bool = False,
+    export_stacks: bool = False,
 ) -> BenchmarkResult:
     if enable_logging:
         logger.info(f" BENCHMARK_MODEL[{name}]:\n{model}")
@@ -920,7 +921,7 @@ def _profile_iter_fn(prof: torch.profiler.profile) -> None:
         device_type=device_type,
         output_dir=output_dir,
         pre_gpu_load=0,
-        export_stacks=True,
+        export_stacks=export_stacks,
         reset_accumulated_memory_stats=False,
     )
 
@@ -939,6 +940,7 @@ def benchmark_func(
     rank: int,
     device_type: str = "cuda",
     pre_gpu_load: int = 0,
+    export_stacks: bool = False,
 ) -> BenchmarkResult:
     if benchmark_func_kwargs is None:
         benchmark_func_kwargs = {}
@@ -963,7 +965,7 @@ def _profile_iter_fn(prof: torch.profiler.profile) -> None:
         device_type=device_type,
         output_dir=profile_dir,
         pre_gpu_load=pre_gpu_load,
-        export_stacks=False,
+        export_stacks=export_stacks,
         reset_accumulated_memory_stats=True,
     )