Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactored loggers #516

Merged
merged 6 commits into from
Mar 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ jobs:
- name: Running all tests
run: |
if [ "${{ matrix.python-version }}" == "3.8" ]; then
make run_all_tests WANDB_API_KEY=${{ secrets.WANDB_API_KEY }} NEPTUNE_API_TOKEN=${{ secrets.NEPTUNE_API_TOKEN }} DOWNLOAD_ZOO_IN_TESTS=yes
make run_all_tests WANDB_API_KEY=${{ secrets.WANDB_API_KEY }} NEPTUNE_API_TOKEN=${{ secrets.NEPTUNE_API_TOKEN }} DOWNLOAD_ZOO_IN_TESTS=yes TEST_CLOUD_LOGGERS=yes
else
make run_all_tests WANDB_API_KEY=${{ secrets.WANDB_API_KEY }} NEPTUNE_API_TOKEN=${{ secrets.NEPTUNE_API_TOKEN }}
fi
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ from oml.miners.inbatch_all_tri import AllTripletsMiner
from oml.models import ViTExtractor
from oml.samplers.balance import BalanceSampler
from oml.utils.download_mock_dataset import download_mock_dataset
from pytorch_lightning.loggers import NeptuneLogger, TensorBoardLogger, WandbLogger
from oml.lightning.pipelines.logging import NeptunePipelineLogger, TensorBoardPipelineLogger, WandBPipelineLogger

dataset_root = "mock_dataset/"
df_train, df_val = download_mock_dataset(dataset_root)
Expand All @@ -413,15 +413,15 @@ val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=4)
metric_callback = MetricValCallback(metric=EmbeddingMetrics(extra_keys=[train_dataset.paths_key,]), log_images=True)

# 1) Logging with Tensorboard
logger = TensorBoardLogger(".")
logger = TensorBoardPipelineLogger(".")

# 2) Logging with Neptune
# logger = NeptuneLogger(api_key="", project="", log_model_checkpoints=False)
# logger = NeptunePipelineLogger(api_key="", project="", log_model_checkpoints=False)

# 3) Logging with Weights and Biases
# import os
# os.environ["WANDB_API_KEY"] = ""
# logger = WandbLogger(project="test_project", log_model=False)
# logger = WandBPipelineLogger(project="test_project", log_model=False)

# run
pl_model = ExtractorModule(extractor, criterion, optimizer)
Expand Down
2 changes: 1 addition & 1 deletion ci/requirements_optional.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
grad-cam==1.4.6
jupyter>=1.0.0
neptune-client>=0.14.2, <1.0.0
neptune>=1.0.0, <1.10.1
wandb>=0.15.4
8 changes: 4 additions & 4 deletions docs/readme/examples_source/extractor/train_val_pl.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ from oml.miners.inbatch_all_tri import AllTripletsMiner
from oml.models import ViTExtractor
from oml.samplers.balance import BalanceSampler
from oml.utils.download_mock_dataset import download_mock_dataset
from pytorch_lightning.loggers import NeptuneLogger, TensorBoardLogger, WandbLogger
from oml.lightning.pipelines.logging import NeptunePipelineLogger, TensorBoardPipelineLogger, WandBPipelineLogger

dataset_root = "mock_dataset/"
df_train, df_val = download_mock_dataset(dataset_root)
Expand All @@ -37,15 +37,15 @@ val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=4)
metric_callback = MetricValCallback(metric=EmbeddingMetrics(extra_keys=[train_dataset.paths_key,]), log_images=True)

# 1) Logging with Tensorboard
logger = TensorBoardLogger(".")
logger = TensorBoardPipelineLogger(".")

# 2) Logging with Neptune
# logger = NeptuneLogger(api_key="", project="", log_model_checkpoints=False)
# logger = NeptunePipelineLogger(api_key="", project="", log_model_checkpoints=False)

# 3) Logging with Weights and Biases
# import os
# os.environ["WANDB_API_KEY"] = ""
# logger = WandbLogger(project="test_project", log_model=False)
# logger = WandBPipelineLogger(project="test_project", log_model=False)

# run
pl_model = ExtractorModule(extractor, criterion, optimizer)
Expand Down
18 changes: 18 additions & 0 deletions oml/interfaces/loggers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from abc import abstractmethod

from matplotlib import pyplot as plt
from pytorch_lightning.loggers import Logger as LightningLogger

from oml.const import TCfg


class IFigureLogger:
@abstractmethod
def log_figure(self, fig: plt.Figure, title: str, idx: int) -> None:
raise NotImplementedError()


class IPipelineLogger(LightningLogger, IFigureLogger):
@abstractmethod
def log_pipeline_info(self, cfg: TCfg) -> None:
raise NotImplementedError()
28 changes: 10 additions & 18 deletions oml/lightning/callbacks/metric.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
import warnings
from math import ceil
from typing import Any, Optional

import matplotlib.pyplot as plt
import numpy as np
import pytorch_lightning as pl
from pytorch_lightning import Callback
from pytorch_lightning.loggers import NeptuneLogger, TensorBoardLogger, WandbLogger
from pytorch_lightning.utilities.types import STEP_OUTPUT
from torch.utils.data import DataLoader

from oml.const import LOG_IMAGE_FOLDER
from oml.ddp.patching import check_loaders_is_patched, patch_dataloader_to_ddp
from oml.interfaces.loggers import IFigureLogger
from oml.interfaces.metrics import IBasicMetric, IMetricDDP, IMetricVisualisable
from oml.lightning.modules.ddp import ModuleDDP
from oml.utils.images.images import figure_to_nparray
from oml.utils.misc import flatten_dict


Expand Down Expand Up @@ -102,23 +101,16 @@ def _log_images(self, pl_module: pl.LightningModule) -> None:
if not isinstance(self.metric, IMetricVisualisable):
return

if not isinstance(pl_module.logger, IFigureLogger):
warnings.warn(
f"Unexpected logger {pl_module.logger}. Figures have not been saved. "
f"Please, use a child of {IFigureLogger}."
)
return

for fig, metric_log_str in zip(*self.metric.visualize()):
log_str = f"{LOG_IMAGE_FOLDER}/{metric_log_str}"
if isinstance(pl_module.logger, NeptuneLogger):
from neptune.new.types import File # this is the optional dependency

pl_module.logger.experiment[log_str].log(File.as_image(fig))
elif isinstance(pl_module.logger, WandbLogger):
fig_img = figure_to_nparray(fig)
pl_module.logger.log_image(images=[fig_img], key=metric_log_str)
elif isinstance(pl_module.logger, TensorBoardLogger):
fig_img = figure_to_nparray(fig)
pl_module.logger.experiment.add_image(
log_str, np.transpose(fig_img, (2, 0, 1)), pl_module.current_epoch
)
else:
raise ValueError(f"Logging with {type(pl_module.logger)} is not supported yet.")

pl_module.logger.log_figure(fig=fig, title=log_str, idx=pl_module.current_epoch)
plt.close(fig=fig)

def on_validation_epoch_end(self, trainer: pl.Trainer, pl_module: pl.LightningModule) -> None:
Expand Down
101 changes: 101 additions & 0 deletions oml/lightning/pipelines/logging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import warnings
from pathlib import Path
from typing import Any, Dict, List

import matplotlib.pyplot as plt
import numpy as np
from pytorch_lightning.loggers import NeptuneLogger, TensorBoardLogger, WandbLogger

from oml.const import OML_PATH, TCfg
from oml.interfaces.loggers import IPipelineLogger
from oml.registry.transforms import save_transforms_as_files
from oml.utils.images.images import figure_to_nparray
from oml.utils.misc import dictconfig_to_dict, flatten_dict


def prepare_config_to_logging(cfg: TCfg) -> Dict[str, Any]:
cwd = Path.cwd().name
flattened_dict = flatten_dict({**dictconfig_to_dict(cfg), **{"dir": cwd}}, sep="|")
return flattened_dict


def prepare_tags(cfg: TCfg) -> List[str]:
cwd = Path.cwd().name
tags = list(cfg.get("tags", [])) + [cfg.get("postfix", "")] + [cwd]
tags = list(filter(lambda x: len(x) > 0, tags))
return tags


class NeptunePipelineLogger(NeptuneLogger, IPipelineLogger):
def log_pipeline_info(self, cfg: TCfg) -> None:
warnings.warn(
"Unfortunately, in the case of using Neptune, you may experience that long experiments are "
"stacked and not responding. It's not an issue on OML's side, so, we cannot fix it."
)
self.log_hyperparams(prepare_config_to_logging(cfg))

tags = prepare_tags(cfg)
self.run["sys/tags"].add(tags)

# log transforms as files
for key, transforms_file in save_transforms_as_files(cfg):
self.run[key].upload(transforms_file)

# log source code
source_files = list(map(lambda x: str(x), OML_PATH.glob("**/*.py"))) + list(
map(lambda x: str(x), OML_PATH.glob("**/*.yaml"))
)
self.run["code"].upload_files(source_files)

# log dataframe
self.run["dataset"].upload(str(Path(cfg["dataset_root"]) / cfg["dataframe_name"]))

def log_figure(self, fig: plt.Figure, title: str, idx: int) -> None:
from neptune.types import File # this is the optional dependency

self.experiment[title].log(File.as_image(fig))


class WandBPipelineLogger(WandbLogger, IPipelineLogger):
def log_pipeline_info(self, cfg: TCfg) -> None:
# this is the optional dependency
import wandb

self.log_hyperparams(prepare_config_to_logging(cfg))

tags = prepare_tags(cfg)
self.experiment.tags = tags

# log transforms as files
keys_files = save_transforms_as_files(cfg)
if keys_files:
transforms = wandb.Artifact("transforms", type="transforms")
for _, transforms_file in keys_files:
transforms.add_file(transforms_file)
self.experiment.log_artifact(transforms)

# log source code
code = wandb.Artifact("source_code", type="code")
code.add_dir(OML_PATH, name="oml")
self.experiment.log_artifact(code)

# log dataset
dataset = wandb.Artifact("dataset", type="dataset")
dataset.add_file(str(Path(cfg["dataset_root"]) / cfg["dataframe_name"]))
self.experiment.log_artifact(dataset)

def log_figure(self, fig: plt.Figure, title: str, idx: int) -> None:
fig_img = figure_to_nparray(fig)
self.log_image(images=[fig_img], key=title)


class TensorBoardPipelineLogger(TensorBoardLogger, IPipelineLogger):
def log_pipeline_info(self, cfg: TCfg) -> None:
pass

def log_figure(self, fig: plt.Figure, title: str, idx: int) -> None:
fig_img = figure_to_nparray(fig)
self.experiment.add_image(title, np.transpose(fig_img, (2, 0, 1)), idx)


__all__ = ["IPipelineLogger", "TensorBoardPipelineLogger", "WandBPipelineLogger", "NeptunePipelineLogger"]
92 changes: 7 additions & 85 deletions oml/lightning/pipelines/parser.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,19 @@
import warnings
from pathlib import Path
from typing import Any, Dict, Optional

import torch
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import NeptuneLogger, TensorBoardLogger, WandbLogger
from pytorch_lightning.loggers.logger import Logger
from pytorch_lightning.strategies import DDPStrategy

from oml.const import OML_PATH, TCfg
from oml.const import TCfg
from oml.datasets.base import DatasetWithLabels
from oml.interfaces.loggers import IPipelineLogger
from oml.interfaces.samplers import IBatchSampler
from oml.lightning.pipelines.logging import TensorBoardPipelineLogger
from oml.registry.loggers import get_logger_by_cfg
from oml.registry.samplers import SAMPLERS_CATEGORIES_BASED, get_sampler_by_cfg
from oml.registry.schedulers import get_scheduler_by_cfg
from oml.registry.transforms import save_transforms_as_files
from oml.utils.misc import dictconfig_to_dict, flatten_dict
from oml.utils.misc import dictconfig_to_dict


def parse_engine_params_from_config(cfg: TCfg) -> Dict[str, Any]:
Expand Down Expand Up @@ -59,82 +57,9 @@ def check_is_config_for_ddp(cfg: TCfg) -> bool:
return bool(cfg["strategy"])


def parse_logger_from_config(cfg: TCfg) -> Logger:
logger = TensorBoardLogger(".") if cfg.get("logger", None) is None else get_logger_by_cfg(cfg["logger"])
return logger


def initialize_logging(cfg: TCfg) -> Logger:
logger = parse_logger_from_config(cfg)
cwd = Path.cwd().name

dict_to_log = flatten_dict({**dictconfig_to_dict(cfg), **{"dir": cwd}}, sep="|")

tags = list(cfg.get("tags", [])) + [cfg.get("postfix", "")] + [cwd]

if isinstance(logger, NeptuneLogger):
warnings.warn(
"Unfortunately, in the case of using Neptune, you may experience that long experiments are"
"stacked and not responding. It's not an issue on OML's side, so, we cannot fix it."
)
logger.log_hyperparams(dict_to_log)
upload_files_to_neptune_cloud(logger, cfg)
logger.run["sys/tags"].add(tags)

elif isinstance(logger, WandbLogger):
logger.log_hyperparams(dict_to_log)
upload_files_to_wandb_cloud(logger, cfg)
logger.experiment.tags = list(filter(lambda x: len(x) > 0, tags)) # it fails in the case of empty tag

elif isinstance(logger, TensorBoardLogger):
pass

else:
raise ValueError(f"Unexpected logger {type(logger)}")

return logger


def upload_files_to_neptune_cloud(logger: NeptuneLogger, cfg: TCfg) -> None:
assert isinstance(logger, NeptuneLogger)

# log transforms as files
for key, transforms_file in save_transforms_as_files(cfg):
logger.run[key].upload(transforms_file)

# log source code
source_files = list(map(lambda x: str(x), OML_PATH.glob("**/*.py"))) + list(
map(lambda x: str(x), OML_PATH.glob("**/*.yaml"))
)
logger.run["code"].upload_files(source_files)

# log dataset
logger.run["dataset"].upload(str(Path(cfg["dataset_root"]) / cfg["dataframe_name"]))


def upload_files_to_wandb_cloud(logger: WandbLogger, cfg: TCfg) -> None:
# this is the optional dependency
import wandb

assert isinstance(logger, WandbLogger)

# log transforms as files
keys_files = save_transforms_as_files(cfg)
if keys_files:
transforms = wandb.Artifact("transforms", type="transforms")
for _, transforms_file in keys_files:
transforms.add_file(transforms_file)
logger.experiment.log_artifact(transforms)

# log source code
code = wandb.Artifact("source_code", type="code")
code.add_dir(OML_PATH, name="oml")
logger.experiment.log_artifact(code)

# log dataset
dataset = wandb.Artifact("dataset", type="dataset")
dataset.add_file(str(Path(cfg["dataset_root"]) / cfg["dataframe_name"]))
logger.experiment.log_artifact(dataset)
def parse_logger_from_config(cfg: TCfg) -> IPipelineLogger:
logger = TensorBoardPipelineLogger(".") if cfg.get("logger", None) is None else get_logger_by_cfg(cfg["logger"])
return logger # type: ignore


def parse_scheduler_from_config(cfg: TCfg, optimizer: torch.optim.Optimizer) -> Dict[str, Any]:
Expand Down Expand Up @@ -182,9 +107,6 @@ def parse_ckpt_callback_from_config(cfg: TCfg) -> ModelCheckpoint:
__all__ = [
"parse_engine_params_from_config",
"check_is_config_for_ddp",
"initialize_logging",
"upload_files_to_neptune_cloud",
"upload_files_to_wandb_cloud",
"parse_scheduler_from_config",
"parse_sampler_from_config",
"parse_ckpt_callback_from_config",
Expand Down
6 changes: 4 additions & 2 deletions oml/lightning/pipelines/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
from oml.lightning.modules.extractor import ExtractorModule, ExtractorModuleDDP
from oml.lightning.pipelines.parser import (
check_is_config_for_ddp,
initialize_logging,
parse_ckpt_callback_from_config,
parse_engine_params_from_config,
parse_logger_from_config,
parse_sampler_from_config,
parse_scheduler_from_config,
)
Expand Down Expand Up @@ -70,7 +70,9 @@ def extractor_training_pipeline(cfg: TCfg) -> None:

cfg = dictconfig_to_dict(cfg)
pprint(cfg)
logger = initialize_logging(cfg)

logger = parse_logger_from_config(cfg)
logger.log_pipeline_info(cfg)

trainer_engine_params = parse_engine_params_from_config(cfg)
is_ddp = check_is_config_for_ddp(trainer_engine_params)
Expand Down
Loading
Loading