diff --git a/tracking/translations_parser/cli/taskcluster_group.py b/tracking/translations_parser/cli/taskcluster_group.py index abd5c798c..53e394ea8 100644 --- a/tracking/translations_parser/cli/taskcluster_group.py +++ b/tracking/translations_parser/cli/taskcluster_group.py @@ -13,6 +13,7 @@ from collections import defaultdict from pathlib import Path +import wandb import yaml import taskcluster @@ -265,6 +266,16 @@ def publish_task_group(group_id: str) -> None: ) # Group and publish remaining metrics tasks via the logs publication + if ( + len( + wandb.Api().runs( + project_name, filters={"display_name": "group_logs", "group": group_name} + ) + ) + > 0 + ): + logger.warning("Skipping group_logs fake run publication as it already exists") + return with tempfile.TemporaryDirectory() as temp_dir: logs_folder = Path(temp_dir) / "logs" eval_folder = logs_folder / project_name / group_name / "eval" diff --git a/tracking/translations_parser/publishers.py b/tracking/translations_parser/publishers.py index 4243cb9cd..3715d72e6 100644 --- a/tracking/translations_parser/publishers.py +++ b/tracking/translations_parser/publishers.py @@ -101,6 +101,21 @@ def open(self, parser) -> None: self.parser = parser config = parser.config config.update(self.extra_kwargs.pop("config", {})) + + # Ensure no W&B run already exists + if name := self.extra_kwargs.get("name"): + existing_runs = list( + wandb.Api().runs( + self.project, + filters={"display_name": name, "group": self.extra_kwargs.get("group")}, + ) + ) + if len(existing_runs) > 0: + logger.warning( + f"This run already exists on W&B: {existing_runs}. No data will be published." + ) + return + # Start a W&B run try: self.wandb = wandb.init(