Skip to content

Commit f39c096

Browse files
Ahmed Shuaibimeta-codesync[bot]
authored andcommitted
feat: add planning result logging (#4022)
Summary: Pull Request resolved: #4022 Log planning SUCCEEDED/FAILED to `training_optimization_events` Scuba table from both `EmbeddingShardingPlanner` and `LinearProgrammingPlanner`. Reviewed By: hammad45 Differential Revision: D97581563
1 parent 9ce4faa commit f39c096

2 files changed

Lines changed: 27 additions & 0 deletions

File tree

torchrec/distributed/logging_handlers.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,4 +207,14 @@ def detect_technique(items: List) -> OptimizationTechnique: # type: ignore[type
207207
return OptimizationTechnique.NONE
208208

209209

210+
def log_planning_result(
211+
planner_type: str,
212+
technique: OptimizationTechnique = OptimizationTechnique.NONE,
213+
error_message: Optional[str] = None,
214+
**extra_metadata: str,
215+
) -> None:
216+
"""No-op OSS stub."""
217+
pass
218+
219+
210220
_log_handlers: dict[str, logging.Handler] = defaultdict(logging.NullHandler)

torchrec/distributed/planner/planners.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,9 @@ def decorator(func):
9393
return decorator
9494

9595

96+
from torchrec.distributed.logging_handlers import log_planning_result
97+
98+
9699
logger: logging.Logger = logging.getLogger(__name__)
97100

98101

@@ -720,6 +723,13 @@ def plan(
720723
)
721724

722725
validate_rank_assignment(sharding_plan, self._topology)
726+
727+
log_planning_result(
728+
planner_type=self.__class__.__name__,
729+
num_proposals=str(self._num_proposals),
730+
num_plans=str(self._num_plans),
731+
)
732+
723733
return sharding_plan
724734
else:
725735
global_storage_capacity = reduce(
@@ -779,6 +789,13 @@ def plan(
779789
debug=self._debug,
780790
)
781791

792+
log_planning_result(
793+
planner_type=self.__class__.__name__,
794+
error_message=str(last_planner_error),
795+
num_proposals=str(self._num_proposals),
796+
num_plans=str(self._num_plans),
797+
)
798+
782799
if not lowest_storage.fits_in(global_storage_constraints):
783800
raise PlannerError(
784801
error_type=PlannerErrorType.INSUFFICIENT_STORAGE,

0 commit comments

Comments
 (0)