Skip to content

Commit 937be74

Browse files
authored
Add clearml logger
Add clearml logger
1 parent 8d7f38e commit 937be74

File tree

10 files changed

+147
-7
lines changed

10 files changed

+147
-7
lines changed

README.md

+10-1
Original file line numberDiff line numberDiff line change
@@ -392,7 +392,13 @@ from oml.miners.inbatch_all_tri import AllTripletsMiner
392392
from oml.models import ViTExtractor
393393
from oml.samplers.balance import BalanceSampler
394394
from oml.utils.download_mock_dataset import download_mock_dataset
395-
from oml.lightning.pipelines.logging import NeptunePipelineLogger, TensorBoardPipelineLogger, WandBPipelineLogger, MLFlowPipelineLogger
395+
from oml.lightning.pipelines.logging import (
396+
ClearMLPipelineLogger,
397+
MLFlowPipelineLogger,
398+
NeptunePipelineLogger,
399+
TensorBoardPipelineLogger,
400+
WandBPipelineLogger,
401+
)
396402

397403
dataset_root = "mock_dataset/"
398404
df_train, df_val = download_mock_dataset(dataset_root)
@@ -426,6 +432,9 @@ logger = TensorBoardPipelineLogger(".")
426432
# 4) Logging with MLFlow locally
427433
# logger = MLFlowPipelineLogger(experiment_name="exp", tracking_uri="file:./ml-runs")
428434

435+
# 5) Logging with ClearML
436+
# logger = ClearMLPipelineLogger(project_name="exp", task_name="test")
437+
429438
# run
430439
pl_model = ExtractorModule(extractor, criterion, optimizer)
431440
trainer = pl.Trainer(max_epochs=3, callbacks=[metric_callback], num_sanity_val_steps=0, logger=logger)

ci/requirements_optional.txt

+1
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@ jupyter>=1.0.0
33
neptune>=1.0.0, <1.10.1
44
wandb>=0.15.4
55
mlflow>=2.0.0
6+
clearml>=1.5.0
67

docs/readme/examples_source/extractor/train_val_pl.md

+10-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,13 @@ from oml.miners.inbatch_all_tri import AllTripletsMiner
1616
from oml.models import ViTExtractor
1717
from oml.samplers.balance import BalanceSampler
1818
from oml.utils.download_mock_dataset import download_mock_dataset
19-
from oml.lightning.pipelines.logging import NeptunePipelineLogger, TensorBoardPipelineLogger, WandBPipelineLogger, MLFlowPipelineLogger
19+
from oml.lightning.pipelines.logging import (
20+
ClearMLPipelineLogger,
21+
MLFlowPipelineLogger,
22+
NeptunePipelineLogger,
23+
TensorBoardPipelineLogger,
24+
WandBPipelineLogger,
25+
)
2026

2127
dataset_root = "mock_dataset/"
2228
df_train, df_val = download_mock_dataset(dataset_root)
@@ -50,6 +56,9 @@ logger = TensorBoardPipelineLogger(".")
5056
# 4) Logging with MLFlow locally
5157
# logger = MLFlowPipelineLogger(experiment_name="exp", tracking_uri="file:./ml-runs")
5258

59+
# 5) Logging with ClearML
60+
# logger = ClearMLPipelineLogger(project_name="exp", task_name="test")
61+
5362
# run
5463
pl_model = ExtractorModule(extractor, criterion, optimizer)
5564
trainer = pl.Trainer(max_epochs=3, callbacks=[metric_callback], num_sanity_val_steps=0, logger=logger)

docs/source/oml/logging.rst

+14
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,19 @@ There are several loggers integrated with Pipelines. You can also `use your cust
6060
tracking_uri: "file:./ml-runs" # another way: export MLFLOW_TRACKING_URI=file:./ml-runs
6161
...
6262
63+
* ClearML
64+
65+
.. code-block:: yaml
66+
67+
...
68+
logger:
69+
name: clearml
70+
args:
71+
project_name: "test_project"
72+
task_name: "test"
73+
offline_mode: False # if True logging is directed to a local dir
74+
...
75+
6376
6477
An example of logging via Neptune in the
6578
`feature extractor <https://github.com/OML-Team/open-metric-learning/tree/main/pipelines/features_extraction>`_
@@ -121,6 +134,7 @@ Take a look at the following example:
121134
`Training + Validation [Lightning and logging] <https://open-metric-learning.readthedocs.io/en/latest/feature_extraction/python_examples.html>`_.
122135
It shows how to use each of: `Tensorboard <https://pytorch.org/docs/stable/tensorboard.html>`_,
123136
`MLFlow <mlflow.org>`_,
137+
`ClearML <https://clear.ml/>`_,
124138
`Neptune <https://neptune.ai/>`_ or
125139
`WandB <https://wandb.ai/site>`_.
126140

oml/configs/logger/clearml.yaml

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
name: clearml
2+
args:
3+
project_name: "test_project"
4+
task_name: "test"

oml/lightning/pipelines/logging.py

+99-1
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
11
import warnings
2+
from argparse import Namespace
23
from pathlib import Path
3-
from typing import Any, Dict, List
4+
from typing import Any, Dict, List, Mapping, Optional, Union
45

56
import matplotlib.pyplot as plt
67
import numpy as np
8+
from lightning_fabric.utilities.logger import _flatten_dict
9+
from lightning_fabric.utilities.rank_zero import rank_zero_only
710
from pytorch_lightning.loggers import (
11+
Logger,
812
MLFlowLogger,
913
NeptuneLogger,
1014
TensorBoardLogger,
@@ -31,6 +35,66 @@ def prepare_tags(cfg: TCfg) -> List[str]:
3135
return tags
3236

3337

38+
class ClearMLLogger(Logger):
39+
def __init__(self, **kwargs: Any):
40+
try:
41+
from clearml import Task
42+
except ImportError as e:
43+
raise ModuleNotFoundError(
44+
"This contrib module requires clearml to be installed. "
45+
"You may install clearml using: \n pip install clearml \n"
46+
) from e
47+
48+
experiment_kwargs = {
49+
k: v for k, v in kwargs.items() if k not in ("project_name", "task_name", "task_type", "offline_mode")
50+
}
51+
52+
if kwargs.get("offline_mode", False):
53+
Task.set_offline(offline_mode=True)
54+
warnings.warn("ClearMLSaver: running in offline mode")
55+
56+
# Try to retrieve current the ClearML Task before trying to create a new one
57+
self.task = Task.current_task()
58+
if self.task is None:
59+
self.task = Task.init(
60+
project_name=kwargs.get("project_name"),
61+
task_name=kwargs.get("task_name"),
62+
task_type=kwargs.get("task_type", Task.TaskTypes.training),
63+
**experiment_kwargs,
64+
)
65+
66+
self.logger = self.task.get_logger()
67+
68+
@property
69+
def name(self) -> str:
70+
return "ClearMLLogger"
71+
72+
@property
73+
def version(self) -> Union[int, str]:
74+
return self.task.id
75+
76+
@rank_zero_only
77+
def finalize(self, status: str) -> None:
78+
self.logger.flush()
79+
80+
@rank_zero_only
81+
def log_hyperparams(self, params: Optional[Union[Dict[str, Any], Namespace]]) -> None:
82+
if isinstance(params, Namespace):
83+
params = vars(params)
84+
85+
if params is None:
86+
params = {}
87+
params = _flatten_dict(params)
88+
89+
self.task.connect(params)
90+
91+
@rank_zero_only
92+
def log_metrics(self, metrics: Mapping[str, float], step: Optional[int] = None) -> None:
93+
assert rank_zero_only.rank == 0, "experiment tried to log from global_rank != 0" # type: ignore
94+
for k, v in metrics.items():
95+
self.logger.report_scalar(title=k, series=k, iteration=step, value=v)
96+
97+
3498
class NeptunePipelineLogger(NeptuneLogger, IPipelineLogger):
3599
def log_pipeline_info(self, cfg: TCfg) -> None:
36100
warnings.warn(
@@ -132,10 +196,44 @@ def log_figure(self, fig: plt.Figure, title: str, idx: int) -> None:
132196
self.experiment.log_figure(figure=fig, artifact_file=f"{title}.png", run_id=self.run_id)
133197

134198

199+
class ClearMLPipelineLogger(ClearMLLogger, IPipelineLogger):
200+
def log_pipeline_info(self, cfg: TCfg) -> None:
201+
# log config
202+
self.log_hyperparams(prepare_config_to_logging(cfg))
203+
204+
# log tags
205+
self.task.add_tags(prepare_tags(cfg))
206+
207+
# log transforms as files
208+
names_files = save_transforms_as_files(cfg)
209+
if names_files:
210+
for name, transforms_file in names_files:
211+
self.task.upload_artifact(name=name, artifact_object=transforms_file)
212+
213+
# log code
214+
self.task.upload_artifact(name="code", artifact_object=OML_PATH)
215+
216+
# log dataframe
217+
self.task.upload_artifact(
218+
name="dataset",
219+
artifact_object=str(Path(cfg["dataset_root"]) / cfg["dataframe_name"]),
220+
)
221+
222+
def log_figure(self, fig: plt.Figure, title: str, idx: int) -> None:
223+
self.logger.report_matplotlib_figure(
224+
title=title,
225+
series="",
226+
figure=fig,
227+
iteration=idx,
228+
report_image=True,
229+
)
230+
231+
135232
__all__ = [
136233
"IPipelineLogger",
137234
"TensorBoardPipelineLogger",
138235
"WandBPipelineLogger",
139236
"NeptunePipelineLogger",
140237
"MLFlowPipelineLogger",
238+
"ClearMLPipelineLogger",
141239
]

oml/registry/loggers.py

+2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from oml.const import TCfg
55
from oml.interfaces.loggers import IPipelineLogger
66
from oml.lightning.pipelines.logging import (
7+
ClearMLPipelineLogger,
78
MLFlowPipelineLogger,
89
NeptunePipelineLogger,
910
TensorBoardPipelineLogger,
@@ -16,6 +17,7 @@
1617
"neptune": NeptunePipelineLogger,
1718
"tensorboard": TensorBoardPipelineLogger,
1819
"mlflow": MLFlowPipelineLogger,
20+
"clearml": ClearMLPipelineLogger,
1921
}
2022

2123
CLOUD_TOKEN_NAMES = {"wandb": "WANDB_API_KEY", "neptune": "NEPTUNE_API_TOKEN"}

tests/test_imports.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from oml.const import PROJECT_ROOT
1111

12-
LIBS_TO_IGNORE = ["torch_xla", "pytorch_grad_cam", "wandb", "neptune", "IPython"]
12+
LIBS_TO_IGNORE = ["torch_xla", "pytorch_grad_cam", "wandb", "neptune", "clearml", "IPython"]
1313

1414
NEED_TO_TEST_NOTEBOOKS = True
1515

tests/test_runs/test_pipelines/configs/train_arcface_with_categories.yaml

+5-3
Original file line numberDiff line numberDiff line change
@@ -51,17 +51,19 @@ metric_args:
5151
return_only_overall_category: True
5252
visualize_only_overall_category: True
5353

54-
log_images: False
54+
log_images: True
5555

5656
metric_for_checkpointing: OVERALL/cmc/1
5757

5858
max_epochs: 2
5959
valid_period: 1
6060

6161
logger:
62-
name: tensorboard
62+
name: clearml
6363
args:
64-
save_dir: "."
64+
project_name: "test_project"
65+
task_name: "test"
66+
offline_mode: True
6567

6668
tags:
6769
- mock

tests/test_runs/test_pipelines/test_pipelines.py

+1
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ def test_train_with_categories(accelerator: str, devices: int) -> None:
8080

8181

8282
@pytest.mark.long
83+
@pytest.mark.needs_optional_dependency
8384
@pytest.mark.parametrize("accelerator, devices", accelerator_devices_pairs())
8485
def test_train_arcface_with_categories(accelerator: str, devices: int) -> None:
8586
run("train_arcface_with_categories.py", accelerator, devices)

0 commit comments

Comments
 (0)