From b40afbc985aadbf6d350850af6d352912974213b Mon Sep 17 00:00:00 2001 From: Ivo Facoco Date: Fri, 25 Oct 2024 14:29:28 +0100 Subject: [PATCH 1/5] fix: moved sklearn and gudhi to main depency tree fix: wrong parser import in cli feat: device option in cli args feat: saving compute arguments in report dir feat: added preprocessing transform in image StandardTransform fix: changed from dino_vits8 to vit_b_32 as default image extractor fix: removed repeated code in imagenet --- notebooks/image_examples.ipynb | 7 ++++- pyproject.toml | 8 +++--- scripts/image_metrics.sh | 22 +++++++++++---- src/pymdma/cli.py | 15 +++++++++-- src/pymdma/image/input_layer.py | 6 ++++- src/pymdma/image/models/extractor.py | 16 ++++++++--- src/pymdma/image/models/features.py | 4 +-- src/pymdma/image/models/imagenet.py | 40 ---------------------------- 8 files changed, 59 insertions(+), 59 deletions(-) diff --git a/notebooks/image_examples.ipynb b/notebooks/image_examples.ipynb index 27578b3..86db6f4 100644 --- a/notebooks/image_examples.ipynb +++ b/notebooks/image_examples.ipynb @@ -477,6 +477,11 @@ } ], "metadata": { + "kernelspec": { + "display_name": ".venv-dev", + "language": "python", + "name": "python3" + }, "language_info": { "codemirror_mode": { "name": "ipython", @@ -487,7 +492,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.5" + "version": "3.12.7" } }, "nbformat": 4, diff --git a/pyproject.toml b/pyproject.toml index 18efa2a..026841b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,8 @@ pot = {version = ">=0.9.4, <0.10.0"} pydantic = {version = ">=2.8.2, <2.9.0"} python-dotenv = {version = ">=1.0.0, <2.0.0"} torch = {version = ">=2.1.0, <2.5.0"} +gudhi = {version = ">=3.9.0, <=4.0.0"} +scikit-learn = {version = ">1.4.0"} # Image dependencies pydom = {git = "https://github.com/umang-singhal/pydom.git", rev = "2554af8d0", optional = true} @@ -59,11 +61,9 @@ statsmodels = {version = ">=0.14.4, <0.15.0", optional = true} # sentence-transformers = {version = "^2.2.2", optional = true} # python-multipart = {version = "0.0.6", optional = true} # Tabular Dependencies -gudhi = {version = ">=3.9.0, <=4.0.0", optional = true} numba = {version = ">=0.60.0, <0.80.0", optional = true} pandas = {version = ">=2.0.0, <3.0.0", optional = true} pycanon = {version = "1.0.1.post2", optional = true} -scikit-learn = {version = ">1.4.0", optional = true} scipy = {version = ">=1.6.0, <2.0.0", optional = true} spacy = {version = ">=3.7.4, <4.0.0", optional = true} transformers = {version = ">=4.43.2, <5.0.0", optional = true} @@ -73,12 +73,12 @@ word2number = {version = ">=1.1.0, <1.5.0", optional = true} [tool.poetry.extras] image = ["pydom", "torchvision", "torchmetrics", "pycocotools", "opencv-python", "torch-fidelity"] -tabular = ["gudhi", "numba", "pandas", "pycanon", "scikit-learn", "scipy", "spacy", "transformers", "umap-learn", "word2number", "statsmodels"] +tabular = ["numba", "pandas", "pycanon", "scipy", "spacy", "transformers", "umap-learn", "word2number", "statsmodels"] time_series = ["tsfel", "wfdb", "statsmodels"] # text = ["accelerate", "datasets", "nltk", "sentence-transformers", "transformers", "python-multipart"] all = [ "pydom", "torchvision", "torchmetrics", "pycocotools", "opencv-python", "torch-fidelity", - "gudhi", "numba", "pandas", "pycanon", "scikit-learn", "scipy", "spacy", "transformers", "umap-learn", "word2number", + "numba", "pandas", "pycanon", "scipy", "spacy", "transformers", "umap-learn", "word2number", "tsfel", "wfdb", "statsmodels" # "accelerate", "datasets", "nltk", "sentence-transformers", "transformers", "python-multipart" ] diff --git a/scripts/image_metrics.sh b/scripts/image_metrics.sh index 98e4cde..f2603fd 100755 --- a/scripts/image_metrics.sh +++ b/scripts/image_metrics.sh @@ -1,17 +1,29 @@ #!/bin/bash +#SBATCH --job-name=sd20_eval +#SBATCH --output=logs/sd20_eval.log +#SBATCH --partition=gpu32 +#SBATCH --mem=32G +#SBATCH --time=8-24:00:00 +#SBATCH --cpus-per-task=40 +#SBATCH --gpus=1 +#SBATCH --mail-user=ivo.facoco@fraunhofer.pt +#SBATCH --mail-type=ALL pymdma --modality image \ --validation_type synth \ --reference_type dataset \ --evaluation_level dataset \ - --reference_data data/test/image/synthesis_val/reference \ - --target_data data/test/image/synthesis_val/dataset \ - --batch_size 3\ + --reference_data "$HOME/ldm_dataset_test.jsonl" \ + --target_data "$HOME/cdiffusion_dretinopathy/reports/figures/sd20_da_concepts" \ + --batch_size 30 \ --metric_group feature \ - --output_dir reports/image_metrics/ \ + --output_dir reports/sd20_da_concepts_vit32/ \ + --device cuda # --extractor_model_name inception_v3 +echo Done. + # python3 src/main.py \ # --modality image \ # --validation_type input \ @@ -24,4 +36,4 @@ pymdma --modality image \ # --metric_group quality \ # --annotation_file data/test/image/input_val/annotations/COCO_annotation_example_mask_exp.json # --extractor_model_name inception -# --reference_data data/test/image/synthesis_val/reference \ +# --reference_data data/test/image/synthesis_val/reference \ \ No newline at end of file diff --git a/src/pymdma/cli.py b/src/pymdma/cli.py index 5d456d3..a1dadab 100644 --- a/src/pymdma/cli.py +++ b/src/pymdma/cli.py @@ -123,6 +123,12 @@ def parse_args(): default=1, help="Number of workers to be used in the computation. Defaults to 1.", ) + parser.add_argument( + "--device", + type=str, + default="cpu", + help="Device to be used for computation. Defaults to 'cpu'.", + ) return parser.parse_args() @@ -138,7 +144,7 @@ def infer_data_source(data_modality: str, data_path: Path): return data_path # modality custom data parsers - module = import_module(f"{data_modality}.data.parsers") + module = import_module(f"pymdma.{data_modality}.data.parsers") if data_path.suffix == ".jsonl": return module.jsonl_files(data_path) @@ -208,6 +214,7 @@ def main() -> None: args.batch_size, args.output_dir if args.allow_feature_cache else None, annotation_file=args.annotation_file, + device=args.device, ) logger.info( @@ -253,7 +260,11 @@ def main() -> None: with open(args.output_dir / "output.json", "w") as f: f.write(json.dumps(output, indent=2)) - logger.info(f"Results saved to {args.output_dir / 'output.json'}") + with open(args.output_dir / "config.json", "w") as f: + args_vals = {key: str(val) for key, val in dict(vars(args)).items()} + json.dump(args_vals, f, indent=2) + + logger.info(f"Results saved to {args.output_dir}") if __name__ == "__main__": diff --git a/src/pymdma/image/input_layer.py b/src/pymdma/image/input_layer.py index 2f7644a..47e637e 100644 --- a/src/pymdma/image/input_layer.py +++ b/src/pymdma/image/input_layer.py @@ -13,6 +13,8 @@ from .models.features import ExtractorFactory from .utils.processing import batch_downsample_to_largest +from pymdma.image.data.transforms import RetinaCenterCrop + SUPPORTED_FILES = {".png", ".jpg", ".jpeg"} # TODO might want to add others @@ -208,9 +210,11 @@ def get_embeddings( extractor = ExtractorFactory.model_from_name(model_name) if extractor is None else extractor # extractor = model_instance if model_instance is not None else FeatureExtractor(model_name, device=self.device) + retina_center_crop = RetinaCenterCrop() reference_feats, _labels, _reference_ids = extractor.extract_features_dataloader( self.reference_loader, device=self.device, + preprocess_transform=retina_center_crop, ) synthetic_feats, _labels, synthetic_ids = extractor.extract_features_dataloader( self.target_loader, @@ -221,7 +225,7 @@ def get_embeddings( self.instance_ids = synthetic_ids.tolist() if offload_model: - extractor._model = extractor.to("cpu") + extractor = extractor.to("cpu") del extractor return reference_feats, synthetic_feats diff --git a/src/pymdma/image/models/extractor.py b/src/pymdma/image/models/extractor.py index b9a0517..75259d2 100644 --- a/src/pymdma/image/models/extractor.py +++ b/src/pymdma/image/models/extractor.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import List, Tuple +from typing import Callable, List, Optional, Tuple import numpy as np import torch @@ -15,12 +15,19 @@ class StandardTransform: - def __init__(self, image_size: Tuple[int], interp: Image.Resampling = Image.Resampling.BILINEAR) -> None: + def __init__( + self, + image_size: Tuple[int], + interpolation: Image.Resampling = Image.Resampling.BILINEAR, + preprocess_transform: Optional[Callable] = None, + ) -> None: assert isinstance(image_size, tuple), "Image size must be a tuple." self.img_size = image_size - self.interp = interp + self.interp = interpolation + self.preprocess_transform = preprocess_transform def __call__(self, image: Image.Image) -> torch.Tensor: + image = self.preprocess_transform(image) if self.preprocess_transform is not None else image image = image.resize(self.img_size, self.interp) # bring image to the range [0, 1] and normalize to [-1, 1] image = np.array(image).astype(np.float32) / 255.0 @@ -79,6 +86,7 @@ def extract_features_dataloader( self, dataloader: DataLoader, device: str = "cpu", + preprocess_transform: Optional[Callable] = None, ) -> Tuple[np.ndarray, np.ndarray]: """Use selected model to extract features from all images in dataloader. @@ -94,7 +102,7 @@ def extract_features_dataloader( ids_array = [] self.extractor = self.extractor.to(device) - dataloader.dataset.transform = StandardTransform(self.input_size, self.interpolation) + dataloader.dataset.transform = StandardTransform(self.input_size, self.interpolation, preprocess_transform) for batch, labels, img_ids in tqdm(dataloader, total=len(dataloader)): batch = batch.to(device) batch = self.extractor(batch).detach().cpu().numpy() diff --git a/src/pymdma/image/models/features.py b/src/pymdma/image/models/features.py index e13a001..a12dea1 100644 --- a/src/pymdma/image/models/features.py +++ b/src/pymdma/image/models/features.py @@ -2,7 +2,7 @@ class ExtractorFactory: - default = "dino_vits8" + default = "vit_b_32" @staticmethod def model_from_name( @@ -19,7 +19,7 @@ def model_from_name( if name == "inception_v3": extractor = InceptionExtractor() elif "vgg" in name: - extractor = VGGExtractor(model_name=name) + extractor = VGGExtractor(name) elif "dino" in name: extractor = DinoExtractor(name) elif "vit" in name: diff --git a/src/pymdma/image/models/imagenet.py b/src/pymdma/image/models/imagenet.py index a806a1b..bf72124 100644 --- a/src/pymdma/image/models/imagenet.py +++ b/src/pymdma/image/models/imagenet.py @@ -1,51 +1,11 @@ - import torch import torch.multiprocessing import torchvision.models as tvmodels from PIL import Image -from pymdma.common.definitions import EmbedderInterface - from .extractor import BaseExtractor -class ExtractorFactory(EmbedderInterface): - default = "vit_b_32" - - def model_from_name( - self, - name: str, - device: str = "cpu", - **kwargs, - ): - """Initializes the feature extractor with the given parameters. - - Args: - name (str): identifier of the extractor to be used. - device (str): model device. Defaults to "cpu". - """ - name = self.default if name == "default" else name - super().__init__(name) - self.device = device - - if name == "inception_v3": - self.extractor = InceptionExtractor(**kwargs) - elif "vgg" in name: - self.extractor = VGGExtractor(model_name=name) - elif "dino" in name: - self.extractor = DinoExtractor(name) - elif "vit" in name: - self.extractor = ViTExtractor(name) - else: - raise ValueError(f"Model {name} not available.") - - self.extractor.eval() - - def get_transform(self): - return self.extractor.transform - - - class InceptionExtractor(BaseExtractor): def __init__(self): super().__init__( From 5faa574505a3114be34558608e43d98507397cb1 Mon Sep 17 00:00:00 2001 From: Ivo Facoco Date: Fri, 25 Oct 2024 14:39:11 +0100 Subject: [PATCH 2/5] fix: removing temporary transform from image input layer --- requirements/requirements-text.txt | 12 ++++++------ src/pymdma/image/input_layer.py | 4 ---- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/requirements/requirements-text.txt b/requirements/requirements-text.txt index a28dad6..47a8e5b 100644 --- a/requirements/requirements-text.txt +++ b/requirements/requirements-text.txt @@ -1,6 +1,6 @@ -accelerate>0.24.1, <0.25.0 ; python_version >= "3.9" and python_version < "3.13" -datasets==2.20.0 ; python_version >= "3.9" and python_version < "3.13" -nltk==3.8.1 ; python_version >= "3.9" and python_version < "3.13" -python-multipart==0.0.6 ; python_version >= "3.9" and python_version < "3.13" -sentence-transformers==2.7.0 ; python_version >= "3.9" and python_version < "3.13" -transformers==4.43.2 ; python_version >= "3.9" and python_version < "3.13" +# accelerate>0.24.1, <0.25.0 ; python_version >= "3.9" and python_version < "3.13" +# datasets==2.20.0 ; python_version >= "3.9" and python_version < "3.13" +# nltk==3.8.1 ; python_version >= "3.9" and python_version < "3.13" +# python-multipart==0.0.6 ; python_version >= "3.9" and python_version < "3.13" +# sentence-transformers==2.7.0 ; python_version >= "3.9" and python_version < "3.13" +# transformers==4.43.2 ; python_version >= "3.9" and python_version < "3.13" diff --git a/src/pymdma/image/input_layer.py b/src/pymdma/image/input_layer.py index 47e637e..2018e80 100644 --- a/src/pymdma/image/input_layer.py +++ b/src/pymdma/image/input_layer.py @@ -13,8 +13,6 @@ from .models.features import ExtractorFactory from .utils.processing import batch_downsample_to_largest -from pymdma.image.data.transforms import RetinaCenterCrop - SUPPORTED_FILES = {".png", ".jpg", ".jpeg"} # TODO might want to add others @@ -210,11 +208,9 @@ def get_embeddings( extractor = ExtractorFactory.model_from_name(model_name) if extractor is None else extractor # extractor = model_instance if model_instance is not None else FeatureExtractor(model_name, device=self.device) - retina_center_crop = RetinaCenterCrop() reference_feats, _labels, _reference_ids = extractor.extract_features_dataloader( self.reference_loader, device=self.device, - preprocess_transform=retina_center_crop, ) synthetic_feats, _labels, synthetic_ids = extractor.extract_features_dataloader( self.target_loader, From 40f0ab3006eac9cbb4b79fd63691023db70252cd Mon Sep 17 00:00:00 2001 From: Ivo Facoco Date: Mon, 28 Oct 2024 12:20:28 +0000 Subject: [PATCH 3/5] Simplifying metric goal category aggregation fix: removed SyntheticFeature goals fix: removed InputQuality goals fix: updated metric goal cateogries in all metrics fix: wrong data casting in image feature extractor --- requirements/requirements-text.txt | 6 -- scripts/image_metrics.sh | 55 ++++++---------- scripts/tabular_metrics.sh | 18 +++-- scripts/time_series_metrics.sh | 20 +++--- src/pymdma/cli.py | 6 +- src/pymdma/common/compute.py | 3 +- src/pymdma/common/definitions.py | 5 +- src/pymdma/common/selection.py | 6 +- src/pymdma/constants.py | 65 +++---------------- src/pymdma/general/measures/external/piq.py | 8 +-- src/pymdma/general/measures/prd.py | 4 +- src/pymdma/general/measures/prdc.py | 12 ++-- .../image/measures/input_val/__init__.py | 4 +- .../measures/input_val/annotation/coco.py | 10 ++- .../input_val/{quality => data}/__init__.py | 0 .../{quality => data}/no_reference.py | 20 +++--- .../input_val/{quality => data}/reference.py | 8 +-- .../measures/synthesis_val/feature/giqa.py | 4 +- src/pymdma/image/models/extractor.py | 15 +++-- .../tabular/measures/input_val/__init__.py | 4 +- .../measures/input_val/data/__init__.py | 0 .../measures/input_val/{ => data}/privacy.py | 6 +- .../measures/input_val/{ => data}/quality.py | 18 ++--- .../measures/synthesis_val/data/similarity.py | 8 +-- .../measures/synthesis_val/feature/privacy.py | 6 +- .../measures/input_val/__init__.py | 2 +- .../measures/input_val/data/__init__.py | 0 .../measures/input_val/{ => data}/quality.py | 6 +- .../synthesis_val/feature/distance.py | 8 +-- 29 files changed, 127 insertions(+), 200 deletions(-) delete mode 100644 requirements/requirements-text.txt rename src/pymdma/image/measures/input_val/{quality => data}/__init__.py (100%) rename src/pymdma/image/measures/input_val/{quality => data}/no_reference.py (97%) rename src/pymdma/image/measures/input_val/{quality => data}/reference.py (97%) create mode 100644 src/pymdma/tabular/measures/input_val/data/__init__.py rename src/pymdma/tabular/measures/input_val/{ => data}/privacy.py (94%) rename src/pymdma/tabular/measures/input_val/{ => data}/quality.py (98%) create mode 100644 src/pymdma/time_series/measures/input_val/data/__init__.py rename src/pymdma/time_series/measures/input_val/{ => data}/quality.py (95%) diff --git a/requirements/requirements-text.txt b/requirements/requirements-text.txt deleted file mode 100644 index 47a8e5b..0000000 --- a/requirements/requirements-text.txt +++ /dev/null @@ -1,6 +0,0 @@ -# accelerate>0.24.1, <0.25.0 ; python_version >= "3.9" and python_version < "3.13" -# datasets==2.20.0 ; python_version >= "3.9" and python_version < "3.13" -# nltk==3.8.1 ; python_version >= "3.9" and python_version < "3.13" -# python-multipart==0.0.6 ; python_version >= "3.9" and python_version < "3.13" -# sentence-transformers==2.7.0 ; python_version >= "3.9" and python_version < "3.13" -# transformers==4.43.2 ; python_version >= "3.9" and python_version < "3.13" diff --git a/scripts/image_metrics.sh b/scripts/image_metrics.sh index f2603fd..a426470 100755 --- a/scripts/image_metrics.sh +++ b/scripts/image_metrics.sh @@ -1,39 +1,22 @@ #!/bin/bash -#SBATCH --job-name=sd20_eval -#SBATCH --output=logs/sd20_eval.log -#SBATCH --partition=gpu32 -#SBATCH --mem=32G -#SBATCH --time=8-24:00:00 -#SBATCH --cpus-per-task=40 -#SBATCH --gpus=1 -#SBATCH --mail-user=ivo.facoco@fraunhofer.pt -#SBATCH --mail-type=ALL +pymdma \ + --modality image \ + --validation_type input \ + --reference_type none \ + --evaluation_level instance \ + --target_data data/test/image/input_val/dataset \ + --reference_data data/test/image/input_val/reference \ + --batch_size 3\ + --output_dir reports/image_metrics/ \ + --annotation_file data/test/image/input_val/annotations/COCO_annotation_example_mask_exp.json -pymdma --modality image \ - --validation_type synth \ - --reference_type dataset \ - --evaluation_level dataset \ - --reference_data "$HOME/ldm_dataset_test.jsonl" \ - --target_data "$HOME/cdiffusion_dretinopathy/reports/figures/sd20_da_concepts" \ - --batch_size 30 \ - --metric_group feature \ - --output_dir reports/sd20_da_concepts_vit32/ \ - --device cuda - # --extractor_model_name inception_v3 - -echo Done. - -# python3 src/main.py \ - # --modality image \ - # --validation_type input \ - # --reference_type none \ - # --evaluation_level instance \ - # --target_data data/test/image/input_val/dataset \ - # --reference_data data/test/image/input_val/reference \ - # --batch_size 3\ - # --output_dir reports/image_metrics/ \ - # --metric_group quality \ - # --annotation_file data/test/image/input_val/annotations/COCO_annotation_example_mask_exp.json -# --extractor_model_name inception -# --reference_data data/test/image/synthesis_val/reference \ \ No newline at end of file +pymdma \ + --modality image \ + --validation_type synth \ + --reference_type dataset \ + --evaluation_level dataset \ + --target_data data/test/image/synthesis_val/dataset \ + --reference_data data/test/image/synthesis_val/reference \ + --batch_size 3\ + --output_dir reports/image_metrics/ \ diff --git a/scripts/tabular_metrics.sh b/scripts/tabular_metrics.sh index e175f72..61afc8e 100755 --- a/scripts/tabular_metrics.sh +++ b/scripts/tabular_metrics.sh @@ -7,14 +7,12 @@ pymdma --modality tabular \ --target_data data/test/tabular/input_val/dataset \ --batch_size 1\ --output_dir reports/tabular_metrics/ \ - --metric_group quality privacy -# pymdma --modality tabular \ -# --validation_type synth \ -# --evaluation_level dataset \ -# --reference_type dataset \ -# --reference_data data/test/tabular/synthesis_val/reference \ -# --target_data data/test/tabular/input_val/dataset \ -# --batch_size 1\ -# --output_dir reports/tabular_metrics/ \ -# --metric_group feature data +pymdma --modality tabular \ + --validation_type synth \ + --evaluation_level dataset \ + --reference_type dataset \ + --reference_data data/test/tabular/synthesis_val/reference \ + --target_data data/test/tabular/input_val/dataset \ + --batch_size 1\ + --output_dir reports/tabular_metrics/ \ diff --git a/scripts/time_series_metrics.sh b/scripts/time_series_metrics.sh index ab61cf7..f9459d5 100755 --- a/scripts/time_series_metrics.sh +++ b/scripts/time_series_metrics.sh @@ -1,19 +1,17 @@ #!/bin/bash -# pymdma --modality time_series \ -# --validation_type synth \ -# --evaluation_level dataset \ -# --reference_type dataset \ -# --target_data data/test/time_series/synthesis_val/dataset \ -# --reference_data data/test/time_series/synthesis_val/reference \ -# --batch_size 2\ -# --output_dir reports/tabular_metrics/ \ -# --metric_group feature +pymdma --modality time_series \ + --validation_type synth \ + --evaluation_level dataset \ + --reference_type dataset \ + --target_data data/test/time_series/synthesis_val/dataset \ + --reference_data data/test/time_series/synthesis_val/reference \ + --batch_size 2\ + --output_dir reports/tabular_metrics/ pymdma --modality time_series \ --validation_type input \ --evaluation_level instance \ --reference_type none \ --target_data data/test/time_series/input_val/dataset \ - --output_dir reports/time_series/ \ - --metric_group quality + --output_dir reports/time_series/ diff --git a/src/pymdma/cli.py b/src/pymdma/cli.py index a1dadab..253a8cf 100644 --- a/src/pymdma/cli.py +++ b/src/pymdma/cli.py @@ -16,6 +16,7 @@ DataModalities, EvaluationLevel, InputMetricGroups, + MetricGoal, ReferenceType, SyntheticMetricGroups, ValidationTypes, @@ -57,7 +58,7 @@ def parse_args(): type=str, nargs="+", default=None, - help="Metrics to be evaluated. E.g. feature, quality etc.", + help="Metrics to be evaluated. E.g. privacy, quality etc.", ) parser.add_argument( "--metric_goals", @@ -189,6 +190,9 @@ def main() -> None: metric_goals=None, ) + if args.annotation_file is None: + s_func.pop("annotation", None) + for eval_group in list(s_func.keys()): funcs = s_func[eval_group] if len(funcs) == 0: diff --git a/src/pymdma/common/compute.py b/src/pymdma/common/compute.py index cd44694..9b43aef 100644 --- a/src/pymdma/common/compute.py +++ b/src/pymdma/common/compute.py @@ -48,6 +48,7 @@ def __init__( self.n_workers = n_workers self.metrics = self._instanciate_metric_classes(group_classes) + self.global_context: Dict[str, any] = {} self.extractors = set() if pretrained_extractor_name is None else {pretrained_extractor_name} @@ -109,7 +110,7 @@ def _compute_and_reduce( def _compute_task(metric, metric_args): metric_name = metric.__class__.__name__ - logger.info(f"Extractor: {metric.extractor_model_name} | Metric: {metric_name}") + logger.info(f"Metric: {metric_name}") new_result = metric.compute(*metric_args, context=self.global_context) # merge metric with already compute one (batch calculation) diff --git a/src/pymdma/common/definitions.py b/src/pymdma/common/definitions.py index 624bc96..64ede30 100644 --- a/src/pymdma/common/definitions.py +++ b/src/pymdma/common/definitions.py @@ -1,8 +1,7 @@ from abc import ABC, abstractmethod from typing import Optional -from pymdma.constants import METRIC_GOALS as MetricGoals -from pymdma.constants import EvaluationLevel, ReferenceType +from pymdma.constants import EvaluationLevel, MetricGoal, ReferenceType class MetricClass: @@ -40,7 +39,7 @@ def get_embeddings(self, model_name: str, **kwargs): class Metric(ABC): # evaluation params evaluation_level: EvaluationLevel = EvaluationLevel.DATASET - metric_goal: MetricGoals + metric_goal: MetricGoal reference_type: ReferenceType = ReferenceType.NONE # metric specific diff --git a/src/pymdma/common/selection.py b/src/pymdma/common/selection.py index a0cc21f..91a936c 100644 --- a/src/pymdma/common/selection.py +++ b/src/pymdma/common/selection.py @@ -12,9 +12,8 @@ DataModalities, EvaluationLevel, InputMetricGroups, - InputQualityMetrics, + MetricGoal, ReferenceType, - SyntheticFeatureMetrics, SyntheticMetricGroups, ValidationTypes, ) @@ -93,7 +92,6 @@ def select_modality_input_layer( ) elif data_modality == "time_series": from pymdma.time_series.input_layer import TimeSeriesInputLayer - return TimeSeriesInputLayer( validation_type, @@ -204,7 +202,7 @@ def select_metric_functions( reference_type: ReferenceType, evaluation_level: Optional[EvaluationLevel] = None, metric_group: Optional[Union[SyntheticMetricGroups, InputMetricGroups]] = None, - metric_goals: Optional[List[Union[SyntheticFeatureMetrics, InputQualityMetrics]]] = None, + metric_goals: Optional[List[MetricGoal]] = None, ) -> Dict[str, List[Metric]]: """Helper function for selecting specific subset of measures. diff --git a/src/pymdma/constants.py b/src/pymdma/constants.py index c416a73..d37fc52 100644 --- a/src/pymdma/constants.py +++ b/src/pymdma/constants.py @@ -1,5 +1,4 @@ from enum import Enum -from typing import Union METRICS_PACKAGE_NAME = "measures" SEED = 42 @@ -29,6 +28,13 @@ class ValidationTypes(str, StrEnum): SYNTH = "synthesis_val" +class MetricGoal(str, StrEnum): + VALIDITY = "validity" + QUALITY = "quality" + PRIVACY = "privacy" + UTILITY = "utility" + + ################################################################ ####################### SYNTHETIC GROUP ######################## ################################################################ @@ -37,52 +43,12 @@ class SyntheticMetricGroups(str, StrEnum): DATA = "data" -# ============ Synthetic Metric Specific Goals ============ -class SyntheticFeatureMetrics(str, StrEnum): - FIDELITY = "fidelity" - DIVERSITY = "diversity" - AUTHENTICITY = "authenticity" # indicates privacy - PRIVACY = "privacy" - UTILITY = "utility" # TODO move utility to another group (not based on features) - QUALITY = "quality" # general quality - - ################################################################ ####################### INPUT GROUP ############################ ################################################################ class InputMetricGroups(str, StrEnum): - QUALITY = "quality" + QUALITY = "data" ANNOTATION = "annotation" - PRIVACY = "privacy" - - -# ============ Input Metric Specific Goals =============== -class InputQualityMetrics(str, StrEnum): - CONTRAST = "contrast" - BRIGHTNESS = "brightness" - COLORFULNESS = "colorfulness" - SHARPNESS = "sharpness" - PERCEPTUAL_QUALITY = "perceptual_quality" - NOISE = "noise" - SIMILARITY = "similarity" - UNIFORMITY = "uniformity" - UNIQUENESS = "uniqueness" - CONSISTENCY = "consistency" - DIMENSIONALITY = "dimensionality" - - -class InputPrivacyMetrics(str, StrEnum): - PRIVACY = "privacy" - ANONYMITY = "anonymity" - CONFIDENTIALITY = "confidentiality" - NON_REPUDIATION = "non_repudiation" - UNIQUENESS = "uniqueness" - - -class InputAnnotationMetrics(str, StrEnum): - COMPLETENESS = "completeness" - CORRECTNESS = "correctness" - UNIQUENESS = "uniqueness" # ============ Annotation Types =============== @@ -93,27 +59,15 @@ class AnnotationType(str, StrEnum): KEYPOINTS = "keypoints" -def valid_subclass(goal, subgoal): - dependency_map = { - InputMetricGroups.QUALITY: InputQualityMetrics, - InputMetricGroups.ANNOTATION: InputAnnotationMetrics, - InputMetricGroups.PRIVACY: InputPrivacyMetrics, - SyntheticMetricGroups.FEATURE: SyntheticFeatureMetrics, - } - return goal in dependency_map and dependency_map[goal].has_value(subgoal) - - # ===================== OTHER CONFIGS ===================== class EvaluationLevel(str, StrEnum): DATASET = "dataset" INSTANCE = "instance" - # FEATURE = "feature_wise" class ReferenceType(str, StrEnum): DATASET = "dataset" INSTANCE = "instance" - # FEATURE = "feature" NONE = "none" @@ -125,6 +79,3 @@ class OutputsTypes(str, StrEnum): STRING = "string" KEY_VAL = "key_value" # key-value pair (dict) str -> float | int | str KEY_ARRAY = "key_array" # key-values pair (dict) str -> list of float | int | str - - -METRIC_GOALS = Union[InputQualityMetrics, InputPrivacyMetrics, InputAnnotationMetrics, SyntheticFeatureMetrics] diff --git a/src/pymdma/general/measures/external/piq.py b/src/pymdma/general/measures/external/piq.py index 415e2a0..381a40f 100644 --- a/src/pymdma/general/measures/external/piq.py +++ b/src/pymdma/general/measures/external/piq.py @@ -8,7 +8,7 @@ from pymdma.common.definitions import FeatureMetric from pymdma.common.output import MetricResult -from pymdma.constants import EvaluationLevel, OutputsTypes, ReferenceType, SyntheticFeatureMetrics +from pymdma.constants import EvaluationLevel, MetricGoal, OutputsTypes, ReferenceType from ...functional.ratio import dispersion_ratio, distance_ratio from ...utils.util import features_splitting, to_tensor @@ -54,7 +54,7 @@ class FrechetDistance(FeatureMetric): reference_type = ReferenceType.DATASET evaluation_level = EvaluationLevel.DATASET - metric_goal = [SyntheticFeatureMetrics.DIVERSITY, SyntheticFeatureMetrics.FIDELITY] + metric_goal = MetricGoal.QUALITY higher_is_better: bool = False min_value: float = 0.0 @@ -185,7 +185,7 @@ class GeometryScore(FeatureMetric): reference_type = ReferenceType.DATASET evaluation_level = EvaluationLevel.DATASET - metric_goal = [SyntheticFeatureMetrics.DIVERSITY, SyntheticFeatureMetrics.FIDELITY] + metric_goal = MetricGoal.QUALITY higher_is_better: bool = False min_value: float = 0.0 @@ -280,7 +280,7 @@ class MultiScaleIntrinsicDistance(FeatureMetric): reference_type = ReferenceType.DATASET evaluation_level = EvaluationLevel.DATASET - metric_goal = [SyntheticFeatureMetrics.DIVERSITY, SyntheticFeatureMetrics.FIDELITY] + metric_goal = MetricGoal.QUALITY higher_is_better: bool = False min_value: float = 0.0 diff --git a/src/pymdma/general/measures/prd.py b/src/pymdma/general/measures/prd.py index 8f1a634..b153329 100644 --- a/src/pymdma/general/measures/prd.py +++ b/src/pymdma/general/measures/prd.py @@ -2,7 +2,7 @@ from pymdma.common.definitions import FeatureMetric from pymdma.common.output import MetricResult -from pymdma.constants import EvaluationLevel, OutputsTypes, ReferenceType, SyntheticFeatureMetrics +from pymdma.constants import EvaluationLevel, MetricGoal, OutputsTypes, ReferenceType from ..utils.util import cluster_into_bins @@ -181,7 +181,7 @@ class PrecisionRecallDistribution(FeatureMetric): reference_type = ReferenceType.DATASET evaluation_level = EvaluationLevel.DATASET - metric_goal = [SyntheticFeatureMetrics.FIDELITY, SyntheticFeatureMetrics.DIVERSITY] + metric_goal = MetricGoal.QUALITY def __init__( self, diff --git a/src/pymdma/general/measures/prdc.py b/src/pymdma/general/measures/prdc.py index 78e3a97..9938d09 100644 --- a/src/pymdma/general/measures/prdc.py +++ b/src/pymdma/general/measures/prdc.py @@ -2,7 +2,7 @@ from pymdma.common.definitions import FeatureMetric from pymdma.common.output import MetricResult -from pymdma.constants import EvaluationLevel, OutputsTypes, ReferenceType, SyntheticFeatureMetrics +from pymdma.constants import EvaluationLevel, MetricGoal, OutputsTypes, ReferenceType from ..utils.util import compute_nearest_neighbour_distances, compute_pairwise_distance @@ -43,7 +43,7 @@ class ImprovedPrecision(FeatureMetric): reference_type = ReferenceType.DATASET evaluation_level = [EvaluationLevel.INSTANCE, EvaluationLevel.DATASET] - metric_goal = SyntheticFeatureMetrics.FIDELITY + metric_goal = MetricGoal.QUALITY higher_is_better: bool = True min_value: float = 0.0 @@ -136,7 +136,7 @@ class ImprovedRecall(FeatureMetric): reference_type = ReferenceType.DATASET evaluation_level = [EvaluationLevel.INSTANCE, EvaluationLevel.DATASET] - metric_goal = SyntheticFeatureMetrics.DIVERSITY + metric_goal = MetricGoal.QUALITY higher_is_better: bool = True min_value: float = 0.0 @@ -230,7 +230,7 @@ class Density(FeatureMetric): reference_type = ReferenceType.DATASET evaluation_level = [EvaluationLevel.INSTANCE, EvaluationLevel.DATASET] - metric_goal = SyntheticFeatureMetrics.FIDELITY + metric_goal = MetricGoal.QUALITY higher_is_better: bool = True min_value: float = 0.0 @@ -320,7 +320,7 @@ class Coverage(FeatureMetric): reference_type = ReferenceType.DATASET evaluation_level = [EvaluationLevel.INSTANCE, EvaluationLevel.DATASET] - metric_goal = SyntheticFeatureMetrics.DIVERSITY + metric_goal = MetricGoal.QUALITY higher_is_better: bool = True min_value: float = 0.0 @@ -421,7 +421,7 @@ class Authenticity(FeatureMetric): reference_type = ReferenceType.DATASET evaluation_level = [EvaluationLevel.INSTANCE, EvaluationLevel.DATASET] - metric_goal = SyntheticFeatureMetrics.AUTHENTICITY + metric_goal = MetricGoal.PRIVACY higher_is_better: bool = True min_value: float = 0.0 diff --git a/src/pymdma/image/measures/input_val/__init__.py b/src/pymdma/image/measures/input_val/__init__.py index 89c4703..f3d493c 100644 --- a/src/pymdma/image/measures/input_val/__init__.py +++ b/src/pymdma/image/measures/input_val/__init__.py @@ -1,4 +1,4 @@ -from pymdma.image.measures.input_val.quality.no_reference import ( +from pymdma.image.measures.input_val.data.no_reference import ( BRISQUE, CLIPIQA, DOM, @@ -9,7 +9,7 @@ Tenengrad, TenengradRelative, ) -from pymdma.image.measures.input_val.quality.reference import MSSIM, PSNR, SSIM +from pymdma.image.measures.input_val.data.reference import MSSIM, PSNR, SSIM __all__ = [ "BRISQUE", diff --git a/src/pymdma/image/measures/input_val/annotation/coco.py b/src/pymdma/image/measures/input_val/annotation/coco.py index 97ba788..21beb8c 100644 --- a/src/pymdma/image/measures/input_val/annotation/coco.py +++ b/src/pymdma/image/measures/input_val/annotation/coco.py @@ -5,9 +5,7 @@ from pymdma.common.definitions import Metric from pymdma.common.output import MetricResult -from pymdma.constants import AnnotationType, EvaluationLevel -from pymdma.constants import InputAnnotationMetrics as AnnotationMetrics -from pymdma.constants import OutputsTypes, ReferenceType +from pymdma.constants import AnnotationType, EvaluationLevel, MetricGoal, OutputsTypes, ReferenceType _SUPPORTED_ANNOT_TYPES = { "segmentation": AnnotationType.MASK, @@ -62,7 +60,7 @@ class DatasetCompletness(Metric): reference_type = ReferenceType.NONE evaluation_level = [EvaluationLevel.DATASET, EvaluationLevel.INSTANCE] - metric_goal = AnnotationMetrics.COMPLETENESS + metric_goal = MetricGoal.VALIDITY annotation_type = [AnnotationType.LABEL, AnnotationType.BBOX, AnnotationType.MASK, AnnotationType.KEYPOINTS] def __init__( @@ -189,7 +187,7 @@ class AnnotationCorrectness(Metric): reference_type = ReferenceType.NONE evaluation_level = [EvaluationLevel.DATASET, EvaluationLevel.INSTANCE] - metric_goal = AnnotationMetrics.CORRECTNESS + metric_goal = MetricGoal.VALIDITY annotation_type = [AnnotationType.LABEL, AnnotationType.BBOX, AnnotationType.MASK, AnnotationType.KEYPOINTS] def __init__( @@ -373,7 +371,7 @@ class AnnotationUniqueness(Metric): reference_type = ReferenceType.NONE evaluation_level = [EvaluationLevel.DATASET, EvaluationLevel.INSTANCE] - metric_goal = AnnotationMetrics.UNIQUENESS + metric_goal = MetricGoal.VALIDITY annotation_type = [AnnotationType.BBOX, AnnotationType.MASK, AnnotationType.KEYPOINTS] def __init__( diff --git a/src/pymdma/image/measures/input_val/quality/__init__.py b/src/pymdma/image/measures/input_val/data/__init__.py similarity index 100% rename from src/pymdma/image/measures/input_val/quality/__init__.py rename to src/pymdma/image/measures/input_val/data/__init__.py diff --git a/src/pymdma/image/measures/input_val/quality/no_reference.py b/src/pymdma/image/measures/input_val/data/no_reference.py similarity index 97% rename from src/pymdma/image/measures/input_val/quality/no_reference.py rename to src/pymdma/image/measures/input_val/data/no_reference.py index cba2a84..2e08c08 100644 --- a/src/pymdma/image/measures/input_val/quality/no_reference.py +++ b/src/pymdma/image/measures/input_val/data/no_reference.py @@ -10,7 +10,7 @@ from pymdma.common.definitions import Metric from pymdma.common.output import DistributionResult, MetricResult -from pymdma.constants import EvaluationLevel, InputQualityMetrics, OutputsTypes, ReferenceType +from pymdma.constants import EvaluationLevel, MetricGoal, OutputsTypes, ReferenceType from ....utils.processing import image_resize @@ -50,7 +50,7 @@ class DOM(Metric): reference_type = ReferenceType.NONE evaluation_level = EvaluationLevel.INSTANCE - metric_goal = InputQualityMetrics.SHARPNESS + metric_goal = MetricGoal.QUALITY higher_is_better: bool = True min_value: float = 0.0 @@ -126,7 +126,7 @@ class Tenengrad(Metric): reference_type = ReferenceType.NONE evaluation_level = EvaluationLevel.INSTANCE - metric_goal = InputQualityMetrics.SHARPNESS + metric_goal = MetricGoal.QUALITY higher_is_better: bool = False min_value: float = 0.0 @@ -214,7 +214,7 @@ class TenengradRelative(Metric): reference_type = ReferenceType.NONE evaluation_level = EvaluationLevel.INSTANCE - metric_goal = InputQualityMetrics.SHARPNESS + metric_goal = MetricGoal.QUALITY higher_is_better: bool = False min_value: float = 0.0 @@ -303,7 +303,7 @@ class EME(Metric): reference_type = ReferenceType.NONE evaluation_level = EvaluationLevel.INSTANCE - metric_goal = InputQualityMetrics.CONTRAST + metric_goal = MetricGoal.QUALITY higher_is_better: bool = False min_value: float = 0.0 @@ -387,7 +387,7 @@ class ExposureBrightness(Metric): reference_type = ReferenceType.NONE evaluation_level = EvaluationLevel.INSTANCE - metric_goal = InputQualityMetrics.BRIGHTNESS + metric_goal = MetricGoal.QUALITY higher_is_better: bool = False min_value: float = 0.0 @@ -454,7 +454,7 @@ class Brightness(Metric): reference_type = ReferenceType.NONE evaluation_level = EvaluationLevel.INSTANCE - metric_goal = InputQualityMetrics.BRIGHTNESS + metric_goal = MetricGoal.QUALITY higher_is_better: bool = False min_value: float = 0.0 @@ -527,7 +527,7 @@ class Colorfulness(Metric): reference_type = ReferenceType.NONE evaluation_level = EvaluationLevel.INSTANCE - metric_goal = InputQualityMetrics.BRIGHTNESS + metric_goal = MetricGoal.QUALITY higher_is_better: bool = False min_value: float = 0.0 @@ -612,7 +612,7 @@ class CLIPIQA(Metric): reference_type = ReferenceType.NONE evaluation_level = EvaluationLevel.INSTANCE - metric_goal = InputQualityMetrics.PERCEPTUAL_QUALITY + metric_goal = MetricGoal.QUALITY higher_is_better: bool = False min_value: float = 0.0 @@ -714,7 +714,7 @@ class BRISQUE(Metric): reference_type = ReferenceType.NONE evaluation_level = EvaluationLevel.INSTANCE - metric_goal = InputQualityMetrics.PERCEPTUAL_QUALITY + metric_goal = MetricGoal.QUALITY higher_is_better: bool = False min_value: float = 0.0 diff --git a/src/pymdma/image/measures/input_val/quality/reference.py b/src/pymdma/image/measures/input_val/data/reference.py similarity index 97% rename from src/pymdma/image/measures/input_val/quality/reference.py rename to src/pymdma/image/measures/input_val/data/reference.py index 6a65c54..9669370 100644 --- a/src/pymdma/image/measures/input_val/quality/reference.py +++ b/src/pymdma/image/measures/input_val/data/reference.py @@ -8,7 +8,7 @@ from pymdma.common.definitions import Metric from pymdma.common.output import MetricResult -from pymdma.constants import EvaluationLevel, InputQualityMetrics, OutputsTypes, ReferenceType +from pymdma.constants import EvaluationLevel, MetricGoal, OutputsTypes, ReferenceType # TODO review documentations and attributes @@ -36,7 +36,7 @@ class PSNR(Metric): reference_type = ReferenceType.INSTANCE evaluation_level = EvaluationLevel.INSTANCE - metric_goal = InputQualityMetrics.NOISE + metric_goal = MetricGoal.QUALITY higher_is_better: bool = True min_value: float = 0.0 @@ -122,7 +122,7 @@ class SSIM(Metric): reference_type = ReferenceType.INSTANCE evaluation_level = EvaluationLevel.INSTANCE - metric_goal = InputQualityMetrics.SIMILARITY + metric_goal = MetricGoal.QUALITY higher_is_better: bool = True min_value: float = 0.0 @@ -217,7 +217,7 @@ class MSSIM(Metric): reference_type = ReferenceType.INSTANCE evaluation_level = EvaluationLevel.INSTANCE - metric_goal = InputQualityMetrics.SIMILARITY + metric_goal = MetricGoal.QUALITY higher_is_better: bool = True min_value: float = 0.0 diff --git a/src/pymdma/image/measures/synthesis_val/feature/giqa.py b/src/pymdma/image/measures/synthesis_val/feature/giqa.py index 2cb524a..1341a0d 100644 --- a/src/pymdma/image/measures/synthesis_val/feature/giqa.py +++ b/src/pymdma/image/measures/synthesis_val/feature/giqa.py @@ -5,7 +5,7 @@ from pymdma.common.definitions import FeatureMetric from pymdma.common.output import MetricResult -from pymdma.constants import EvaluationLevel, OutputsTypes, ReferenceType, SyntheticFeatureMetrics +from pymdma.constants import EvaluationLevel, MetricGoal, OutputsTypes, ReferenceType from pymdma.general.utils.util import min_max_scaling @@ -44,7 +44,7 @@ class GIQA(FeatureMetric): reference_type = ReferenceType.DATASET evaluation_level = [EvaluationLevel.INSTANCE, EvaluationLevel.DATASET] - metric_goal = SyntheticFeatureMetrics.QUALITY + metric_goal = MetricGoal.QUALITY higher_is_better: bool = True min_value: float = 0.0 diff --git a/src/pymdma/image/models/extractor.py b/src/pymdma/image/models/extractor.py index 75259d2..16dba60 100644 --- a/src/pymdma/image/models/extractor.py +++ b/src/pymdma/image/models/extractor.py @@ -30,12 +30,15 @@ def __call__(self, image: Image.Image) -> torch.Tensor: image = self.preprocess_transform(image) if self.preprocess_transform is not None else image image = image.resize(self.img_size, self.interp) # bring image to the range [0, 1] and normalize to [-1, 1] - image = np.array(image).astype(np.float32) / 255.0 - image = image * 2.0 - 1.0 - return torch.from_numpy(image).permute(2, 0, 1) + image = (np.array(image) - 128) / 128 + # image = np.array(image).astype(np.float32) / 255.0 + # image = image * 2.0 - 1.0 + return torch.from_numpy(image).permute(2, 0, 1).to(torch.float32) class BaseExtractor(torch.nn.Module, EmbedderInterface): + extractor: torch.nn.Module + def __init__( self, input_size: Tuple[int], @@ -68,7 +71,7 @@ def extract_features_from_files(self, files: List[Path], batch_size: int = 50, d batch_sizes.append(len(files) % batch_size) transform = StandardTransform(self.input_size, self.interpolation) - self.extractor = self.extractor.to(device) + self.extractor = self.extractor.to(device, dtype=torch.float32) act_array = [] start, end = 0, 0 @@ -101,10 +104,10 @@ def extract_features_dataloader( labels_array = [] ids_array = [] - self.extractor = self.extractor.to(device) + self.extractor = self.extractor.to(device, dtype=torch.float32) dataloader.dataset.transform = StandardTransform(self.input_size, self.interpolation, preprocess_transform) for batch, labels, img_ids in tqdm(dataloader, total=len(dataloader)): - batch = batch.to(device) + batch = batch.to(device, dtype=torch.float32) batch = self.extractor(batch).detach().cpu().numpy() act_array.append(batch) labels_array.append(labels) diff --git a/src/pymdma/tabular/measures/input_val/__init__.py b/src/pymdma/tabular/measures/input_val/__init__.py index ae636ae..9cbf540 100644 --- a/src/pymdma/tabular/measures/input_val/__init__.py +++ b/src/pymdma/tabular/measures/input_val/__init__.py @@ -1,5 +1,5 @@ -from pymdma.tabular.measures.input_val.privacy import KAnonymityScore -from pymdma.tabular.measures.input_val.quality import ( +from pymdma.tabular.measures.input_val.data.privacy import KAnonymityScore +from pymdma.tabular.measures.input_val.data.quality import ( CorrelationScore, DimCurseScore, MissingScore, diff --git a/src/pymdma/tabular/measures/input_val/data/__init__.py b/src/pymdma/tabular/measures/input_val/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/pymdma/tabular/measures/input_val/privacy.py b/src/pymdma/tabular/measures/input_val/data/privacy.py similarity index 94% rename from src/pymdma/tabular/measures/input_val/privacy.py rename to src/pymdma/tabular/measures/input_val/data/privacy.py index 5e41e36..8fb5bff 100644 --- a/src/pymdma/tabular/measures/input_val/privacy.py +++ b/src/pymdma/tabular/measures/input_val/data/privacy.py @@ -4,9 +4,9 @@ from pymdma.common.definitions import Metric from pymdma.common.output import MetricResult -from pymdma.constants import EvaluationLevel, InputPrivacyMetrics, OutputsTypes, ReferenceType +from pymdma.constants import EvaluationLevel, MetricGoal, OutputsTypes, ReferenceType -from ..utils_inp import compute_k_anonymity +from ...utils_inp import compute_k_anonymity class KAnonymityScore(Metric): @@ -61,7 +61,7 @@ class KAnonymityScore(Metric): reference_type = ReferenceType.NONE evaluation_level = EvaluationLevel.DATASET - metric_goal = InputPrivacyMetrics.ANONYMITY + metric_goal = MetricGoal.PRIVACY higher_is_better: bool = True min_value: float = 0.0 diff --git a/src/pymdma/tabular/measures/input_val/quality.py b/src/pymdma/tabular/measures/input_val/data/quality.py similarity index 98% rename from src/pymdma/tabular/measures/input_val/quality.py rename to src/pymdma/tabular/measures/input_val/data/quality.py index c0b7c0d..ca41ed8 100644 --- a/src/pymdma/tabular/measures/input_val/quality.py +++ b/src/pymdma/tabular/measures/input_val/data/quality.py @@ -4,9 +4,9 @@ from pymdma.common.definitions import Metric from pymdma.common.output import MetricResult -from pymdma.constants import EvaluationLevel, InputQualityMetrics, OutputsTypes, ReferenceType +from pymdma.constants import EvaluationLevel, MetricGoal, OutputsTypes, ReferenceType -from ..utils_inp import ( # proximity_score, +from ...utils_inp import ( # proximity_score, compute_vif, corr_matrix, corr_strong, @@ -56,7 +56,7 @@ class CorrelationScore(Metric): reference_type = ReferenceType.NONE evaluation_level = EvaluationLevel.DATASET - metric_goal = InputQualityMetrics.SIMILARITY + metric_goal = MetricGoal.QUALITY higher_is_better: bool = False min_value: float = 0.0 @@ -158,7 +158,7 @@ class UniquenessScore(Metric): reference_type = ReferenceType.NONE evaluation_level = EvaluationLevel.DATASET - metric_goal = InputQualityMetrics.UNIQUENESS + metric_goal = MetricGoal.QUALITY higher_is_better: bool = False min_value: float = 0.0 @@ -300,7 +300,7 @@ class UniformityScore(Metric): reference_type = ReferenceType.NONE evaluation_level = EvaluationLevel.DATASET - metric_goal = InputQualityMetrics.UNIFORMITY + metric_goal = MetricGoal.QUALITY higher_is_better: bool = False min_value: float = 0.0 @@ -429,7 +429,7 @@ class OutlierScore(Metric): reference_type = ReferenceType.NONE evaluation_level = EvaluationLevel.DATASET - metric_goal = InputQualityMetrics.UNIFORMITY + metric_goal = MetricGoal.QUALITY higher_is_better: bool = False min_value: float = 0.0 @@ -547,7 +547,7 @@ class MissingScore(Metric): reference_type = ReferenceType.NONE evaluation_level = EvaluationLevel.DATASET - metric_goal = InputQualityMetrics.CONSISTENCY + metric_goal = MetricGoal.QUALITY higher_is_better: bool = False min_value: float = 0.0 @@ -633,7 +633,7 @@ class DimCurseScore(Metric): reference_type = ReferenceType.NONE evaluation_level = EvaluationLevel.DATASET - metric_goal = InputQualityMetrics.DIMENSIONALITY + metric_goal = MetricGoal.QUALITY higher_is_better: bool = False min_value: float = 0.0 @@ -709,7 +709,7 @@ class VIFactorScore(Metric): reference_type = ReferenceType.NONE evaluation_level = EvaluationLevel.DATASET - metric_goal = InputQualityMetrics.SIMILARITY + metric_goal = MetricGoal.QUALITY higher_is_better: bool = False min_value: float = 0.0 diff --git a/src/pymdma/tabular/measures/synthesis_val/data/similarity.py b/src/pymdma/tabular/measures/synthesis_val/data/similarity.py index c119346..fa89f33 100644 --- a/src/pymdma/tabular/measures/synthesis_val/data/similarity.py +++ b/src/pymdma/tabular/measures/synthesis_val/data/similarity.py @@ -5,7 +5,7 @@ from pymdma.common.definitions import Metric from pymdma.common.output import MetricResult -from pymdma.constants import EvaluationLevel, OutputsTypes, ReferenceType, SyntheticFeatureMetrics +from pymdma.constants import EvaluationLevel, MetricGoal, OutputsTypes, ReferenceType from ...utils_syn import _get_js_divergence, _get_kl_divergence, _get_ks_similarity, _get_nn_pdf, _get_tv_similarity @@ -77,7 +77,7 @@ class StatisticalSimScore(Metric): reference_type = ReferenceType.DATASET evaluation_level = EvaluationLevel.DATASET - metric_goal = SyntheticFeatureMetrics.FIDELITY + metric_goal = MetricGoal.QUALITY higher_is_better: bool = True min_value: float = 0.0 @@ -239,7 +239,7 @@ class StatisiticalDivergenceScore(Metric): reference_type = ReferenceType.DATASET evaluation_level = EvaluationLevel.DATASET - metric_goal = SyntheticFeatureMetrics.FIDELITY + metric_goal = MetricGoal.QUALITY higher_is_better: bool = False min_value: float = -np.inf @@ -422,7 +422,7 @@ class CoherenceScore(Metric): reference_type = ReferenceType.DATASET evaluation_level = EvaluationLevel.DATASET - metric_goal = SyntheticFeatureMetrics.FIDELITY + metric_goal = MetricGoal.QUALITY higher_is_better: bool = True min_value: float = 0.0 diff --git a/src/pymdma/tabular/measures/synthesis_val/feature/privacy.py b/src/pymdma/tabular/measures/synthesis_val/feature/privacy.py index e73dbdf..cfd0ec4 100644 --- a/src/pymdma/tabular/measures/synthesis_val/feature/privacy.py +++ b/src/pymdma/tabular/measures/synthesis_val/feature/privacy.py @@ -2,7 +2,7 @@ from pymdma.common.definitions import FeatureMetric from pymdma.common.output import MetricResult -from pymdma.constants import EvaluationLevel, OutputsTypes, ReferenceType, SyntheticFeatureMetrics +from pymdma.constants import EvaluationLevel, MetricGoal, OutputsTypes, ReferenceType from ...utils_syn import _get_nn_distances, _get_nn_pdf, _get_pp_metrics @@ -37,7 +37,7 @@ class NNDRPrivacy(FeatureMetric): reference_type = ReferenceType.DATASET evaluation_level = EvaluationLevel.DATASET - metric_goal = SyntheticFeatureMetrics.PRIVACY + metric_goal = MetricGoal.PRIVACY higher_is_better: bool = True min_value: float = 0.0 @@ -136,7 +136,7 @@ class DCRPrivacy(FeatureMetric): reference_type = ReferenceType.DATASET evaluation_level = EvaluationLevel.DATASET - metric_goal = SyntheticFeatureMetrics.PRIVACY + metric_goal = MetricGoal.PRIVACY higher_is_better: bool = True min_value: float = 0.0 diff --git a/src/pymdma/time_series/measures/input_val/__init__.py b/src/pymdma/time_series/measures/input_val/__init__.py index cb8ca83..3795111 100644 --- a/src/pymdma/time_series/measures/input_val/__init__.py +++ b/src/pymdma/time_series/measures/input_val/__init__.py @@ -1,3 +1,3 @@ -from pymdma.time_series.measures.input_val.quality import SNR, Uniqueness +from pymdma.time_series.measures.input_val.data.quality import SNR, Uniqueness __all__ = ["SNR", "Uniqueness"] diff --git a/src/pymdma/time_series/measures/input_val/data/__init__.py b/src/pymdma/time_series/measures/input_val/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/pymdma/time_series/measures/input_val/quality.py b/src/pymdma/time_series/measures/input_val/data/quality.py similarity index 95% rename from src/pymdma/time_series/measures/input_val/quality.py rename to src/pymdma/time_series/measures/input_val/data/quality.py index 13f9941..714a77f 100644 --- a/src/pymdma/time_series/measures/input_val/quality.py +++ b/src/pymdma/time_series/measures/input_val/data/quality.py @@ -4,7 +4,7 @@ from pymdma.common.definitions import Metric from pymdma.common.output import MetricResult -from pymdma.constants import EvaluationLevel, InputQualityMetrics, OutputsTypes, ReferenceType +from pymdma.constants import EvaluationLevel, MetricGoal, OutputsTypes, ReferenceType class Uniqueness(Metric): @@ -29,7 +29,7 @@ class Uniqueness(Metric): reference_type = ReferenceType.NONE evaluation_level = EvaluationLevel.INSTANCE - metric_goal = InputQualityMetrics.UNIQUENESS + metric_goal = MetricGoal.QUALITY higher_is_better: bool = False min_value: float = 0.0 @@ -89,7 +89,7 @@ class SNR(Metric): reference_type = ReferenceType.NONE evaluation_level = EvaluationLevel.INSTANCE - metric_goal = InputQualityMetrics.NOISE + metric_goal = MetricGoal.QUALITY higher_is_better: bool = False min_value: float = 0.0 diff --git a/src/pymdma/time_series/measures/synthesis_val/feature/distance.py b/src/pymdma/time_series/measures/synthesis_val/feature/distance.py index 0c9b1c0..fced924 100644 --- a/src/pymdma/time_series/measures/synthesis_val/feature/distance.py +++ b/src/pymdma/time_series/measures/synthesis_val/feature/distance.py @@ -4,7 +4,7 @@ from pymdma.common.definitions import FeatureMetric from pymdma.common.output import MetricResult -from pymdma.constants import EvaluationLevel, OutputsTypes, ReferenceType, SyntheticFeatureMetrics +from pymdma.constants import EvaluationLevel, MetricGoal, OutputsTypes, ReferenceType from pymdma.general.functional.distance import cos_sim_2d, fast_mmd_linear, mk_mmd, mmd_kernel, wasserstein from pymdma.general.functional.ratio import dispersion_ratio, distance_ratio from pymdma.general.utils.util import features_splitting @@ -31,7 +31,7 @@ class WassersteinDistance(FeatureMetric): reference_type = ReferenceType.DATASET evaluation_level = EvaluationLevel.DATASET - metric_goal = [SyntheticFeatureMetrics.FIDELITY, SyntheticFeatureMetrics.DIVERSITY] + metric_goal = MetricGoal.QUALITY higher_is_better: bool = False min_value: float = 0.0 @@ -161,7 +161,7 @@ class MMD(FeatureMetric): reference_type = ReferenceType.DATASET evaluation_level = EvaluationLevel.DATASET - metric_goal = [SyntheticFeatureMetrics.FIDELITY, SyntheticFeatureMetrics.DIVERSITY] + metric_goal = MetricGoal.QUALITY higher_is_better: bool = False min_value: float = 0.0 @@ -299,7 +299,7 @@ class CosineSimilarity(FeatureMetric): reference_type = ReferenceType.DATASET evaluation_level = EvaluationLevel.DATASET - metric_goal = SyntheticFeatureMetrics.FIDELITY + metric_goal = MetricGoal.QUALITY higher_is_better: bool = False min_value: float = 0.0 From 1c70abf9e2fd046cae95c340526b39ed1eca618f Mon Sep 17 00:00:00 2001 From: Ivo Facoco Date: Mon, 28 Oct 2024 16:54:37 +0000 Subject: [PATCH 4/5] Minor patch 1.1.2 --- CHANGELOG.md | 16 ++++++++++++++++ VERSION | 2 +- pyproject.toml | 2 +- 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 65fb6e7..2ce720c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,22 @@ All notable changes to this project will be documented in this file. This format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [0.1.2] - 2024-10-28 +### Added + - CLIP device specification + - CLI compute arguments in reports dir + - Preprocessing transform in image datasets when using the API + +### Fixed + - Moved `sklearn` and `gudhi` dependencies to the main dependency tree + +### Changed + - Default image feature extractor is now `vit_b_32` + - Confusing synthetic and input metric goals where aggregated to `quality`, `privacy`, `annotation` and `utility` categories + + + ## [0.1.1] - 2024-10-24 ### Fixed - Fixed project configuration conflict between setup.py and pyproject.toml by reverting to poetry as main build engine diff --git a/VERSION b/VERSION index 6da28dd..8294c18 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.1.1 \ No newline at end of file +0.1.2 \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 026841b..c92aeeb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ # https://github.com/microsoft/vscode-python/blob/master/CHANGELOG.md#enhancements-1 [tool.poetry] name = "pymdma" -version = "0.1.1" +version = "0.1.2" description = "Multimodal Data Metrics for Auditing real and synthetic data" authors = ["Fraunhofer AICOS "] maintainers = [ From 50dab9958c19b03538050f4a2a625f36bd3599e5 Mon Sep 17 00:00:00 2001 From: Ivo Facoco Date: Mon, 28 Oct 2024 16:56:57 +0000 Subject: [PATCH 5/5] Changelog --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2ce720c..9a340f1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,7 @@ This format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) ## [0.1.2] - 2024-10-28 ### Added - - CLIP device specification + - CLI device specification - CLI compute arguments in reports dir - Preprocessing transform in image datasets when using the API @@ -16,7 +16,7 @@ This format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) ### Changed - Default image feature extractor is now `vit_b_32` - Confusing synthetic and input metric goals where aggregated to `quality`, `privacy`, `annotation` and `utility` categories - + - Moved metrics to sepecific folders based on `metric_group` (feature-based, data-based) ## [0.1.1] - 2024-10-24