Created run_pipeline API function to get benchmark results (#3237)

SSYernar · facebook-github-bot · commit 44f4bb5ed394 · 2025-07-28T14:12:35.000-07:00
Summary: Pull Request resolved: #3237 Created `run_pipeline` API function that runs the pipeline on given configurations and returns the list of BenchmarkResult objects (each BenchmarkResult corresponds to a specific `rank` up to `wolrld_size`). This change is needed for future ServiceLab integration since we will be collecting pipeline benchmarks on different settings. Reviewed By: jd7-tr Differential Revision: D78941384 fbshipit-source-id: 8fc26b8e1ca3a5130508f522bb0e9e3e8cbdc9a1
diff --git a/torchrec/distributed/benchmark/benchmark_train_pipeline.py b/torchrec/distributed/benchmark/benchmark_train_pipeline.py
@@ -40,7 +40,11 @@
     TestTowerCollectionSparseNNConfig,
     TestTowerSparseNNConfig,
 )
-from torchrec.distributed.benchmark.benchmark_utils import benchmark_func, cmd_conf
+from torchrec.distributed.benchmark.benchmark_utils import (
+    benchmark_func,
+    BenchmarkResult,
+    cmd_conf,
+)
 from torchrec.distributed.comm import get_local_size
 from torchrec.distributed.embedding_types import EmbeddingComputeKernel
 from torchrec.distributed.planner import Topology
@@ -201,15 +205,7 @@ def main(
     table_config: EmbeddingTablesConfig,
     model_selection: ModelSelectionConfig,
     pipeline_config: PipelineConfig,
-    model_config: Optional[
-        Union[
-            TestSparseNNConfig,
-            TestTowerCollectionSparseNNConfig,
-            TestTowerSparseNNConfig,
-            DeepFMConfig,
-            DLRMConfig,
-        ]
-    ] = None,
+    model_config: Optional[BaseModelConfig] = None,
 ) -> None:
     tables, weighted_tables = generate_tables(
         num_unweighted_features=table_config.num_unweighted_features,
@@ -254,6 +250,30 @@ def main(
     )
 
 
+def run_pipeline(
+    run_option: RunOptions,
+    table_config: EmbeddingTablesConfig,
+    pipeline_config: PipelineConfig,
+    model_config: BaseModelConfig,
+) -> List[BenchmarkResult]:
+
+    tables, weighted_tables = generate_tables(
+        num_unweighted_features=table_config.num_unweighted_features,
+        num_weighted_features=table_config.num_weighted_features,
+        embedding_feature_dim=table_config.embedding_feature_dim,
+    )
+
+    return run_multi_process_func(
+        func=runner,
+        world_size=run_option.world_size,
+        tables=tables,
+        weighted_tables=weighted_tables,
+        run_option=run_option,
+        model_config=model_config,
+        pipeline_config=pipeline_config,
+    )
+
+
 def runner(
     rank: int,
     world_size: int,
@@ -262,7 +282,7 @@ def runner(
     run_option: RunOptions,
     model_config: BaseModelConfig,
     pipeline_config: PipelineConfig,
-) -> None:
+) -> BenchmarkResult:
     # Ensure GPUs are available and we have enough of them
     assert (
         torch.cuda.is_available() and torch.cuda.device_count() >= world_size
@@ -383,6 +403,8 @@ def _func_to_benchmark(
         if rank == 0:
             print(result)
 
+        return result
+
 
 if __name__ == "__main__":
     main()