diff --git a/oml/functional/metrics.py b/oml/functional/metrics.py index ed725c2a9..02f7ce828 100644 --- a/oml/functional/metrics.py +++ b/oml/functional/metrics.py @@ -109,15 +109,15 @@ def calc_retrieval_metrics( return metrics -def calc_topological_metrics(embeddings: Tensor, pfc_variance: Tuple[float, ...]) -> TMetricsDict: +def calc_topological_metrics(embeddings: Tensor, pcf_variance: Tuple[float, ...]) -> TMetricsDict: """ Function to evaluate different topological metrics. Args: embeddings: Embeddings matrix with the shape of ``[n_embeddings, embeddings_dim]``. - pfc_variance: Values in range [0, 1]. Find the number of components such that the amount + pcf_variance: Values in range [0, 1]. Find the number of components such that the amount of variance that needs to be explained is greater than the percentage specified - by ``pfc_variance``. + by ``pcf_variance``. Returns: Metrics dictionary. @@ -125,9 +125,9 @@ def calc_topological_metrics(embeddings: Tensor, pfc_variance: Tuple[float, ...] """ metrics: TMetricsDict = dict() - if pfc_variance: - main_components = calc_pcf(embeddings, pfc_variance) - metrics["pcf"] = dict(zip(pfc_variance, main_components)) + if pcf_variance: + main_components = calc_pcf(embeddings, pcf_variance) + metrics["pcf"] = dict(zip(pcf_variance, main_components)) return metrics @@ -484,16 +484,16 @@ def calc_fnmr_at_fmr(pos_dist: Tensor, neg_dist: Tensor, fmr_vals: Tuple[float, return fnmr_at_fmr -def calc_pcf(embeddings: Tensor, pfc_variance: Tuple[float, ...]) -> List[Tensor]: +def calc_pcf(embeddings: Tensor, pcf_variance: Tuple[float, ...]) -> List[Tensor]: """ Function estimates the Principal Components Fraction (PCF) of embeddings using Principal Component Analysis. The metric is defined as a fraction of components needed to explain the required variance in data. Args: embeddings: Embeddings matrix with the shape of ``[n_embeddings, embeddings_dim]``. - pfc_variance: Values in range [0, 1]. Find the number of components such that the amount + pcf_variance: Values in range [0, 1]. Find the number of components such that the amount of variance that needs to be explained is greater than the fraction specified - by ``pfc_variance``. + by ``pcf_variance``. Returns: List of linear dimensions as a fractions of the embeddings dimension. @@ -527,22 +527,22 @@ def calc_pcf(embeddings: Tensor, pfc_variance: Tuple[float, ...]) -> List[Tensor because the number of principal axes is superior to the desired explained variance threshold). >>> embeddings = torch.eye(4, 10, dtype=torch.float) - >>> calc_pcf(embeddings, pfc_variance=(0.5, 1)) + >>> calc_pcf(embeddings, pcf_variance=(0.5, 1)) tensor([0.2000, 0.5000]) """ # The code below mirrors code from scikit-learn repository: # https://github.com/scikit-learn/scikit-learn/blob/f3f51f9b6/sklearn/decomposition/_pca.py#L491 - _check_if_in_range(pfc_variance, 0, 1, "pfc_variance") + _check_if_in_range(pcf_variance, 0, 1, "pcf_variance") try: pca = PCA(embeddings) - n_components = pca.calc_principal_axes_number(pfc_variance).to(embeddings) + n_components = pca.calc_principal_axes_number(pcf_variance).to(embeddings) metric = n_components / embeddings.shape[1] except Exception: # Mostly we handle the following error here: # >>> The algorithm failed to converge because the input matrix is ill-conditioned # >>> or has too many repeated singular values - metric = [torch.tensor(float("nan"))] * len(pfc_variance) + metric = [torch.tensor(float("nan"))] * len(pcf_variance) return metric diff --git a/oml/metrics/embeddings.py b/oml/metrics/embeddings.py index d281d0275..53367a0a8 100644 --- a/oml/metrics/embeddings.py +++ b/oml/metrics/embeddings.py @@ -76,7 +76,7 @@ def __init__( precision_top_k: Tuple[int, ...] = (5,), map_top_k: Tuple[int, ...] = (5,), fmr_vals: Tuple[float, ...] = tuple(), - pfc_variance: Tuple[float, ...] = (0.5,), + pcf_variance: Tuple[float, ...] = (0.5,), categories_key: Optional[str] = None, sequence_key: Optional[str] = None, postprocessor: Optional[IDistancesPostprocessor] = None, @@ -102,9 +102,9 @@ def __init__( and ``fnmr@fmr=0.4``. Note, computing this metric requires additional memory overhead, that is why it's turned off by default. - pfc_variance: Values in range [0, 1]. Find the number of components such that the amount + pcf_variance: Values in range [0, 1]. Find the number of components such that the amount of variance that needs to be explained is greater than the percentage specified - by ``pfc_variance``. + by ``pcf_variance``. categories_key: Key to take the samples' categories from the batches (if you have ones) sequence_key: Key to take sequence ids from the batches (if you have ones) postprocessor: Postprocessor which applies some techniques like query reranking @@ -124,7 +124,7 @@ def __init__( self.precision_top_k = precision_top_k self.map_top_k = map_top_k self.fmr_vals = fmr_vals - self.pfc_variance = pfc_variance + self.pcf_variance = pcf_variance self.categories_key = categories_key self.sequence_key = sequence_key @@ -205,7 +205,7 @@ def compute_metrics(self) -> TMetricsDict_ByLabels: # type: ignore "map_top_k": self.map_top_k, "fmr_vals": self.fmr_vals, } - args_topological_metrics = {"pfc_variance": self.pfc_variance} + args_topological_metrics = {"pcf_variance": self.pcf_variance} metrics: TMetricsDict_ByLabels = dict() diff --git a/oml/utils/misc_torch.py b/oml/utils/misc_torch.py index 8f019e2e5..022a85b46 100644 --- a/oml/utils/misc_torch.py +++ b/oml/utils/misc_torch.py @@ -406,15 +406,15 @@ def inverse_transform(self, embeddings: torch.Tensor) -> torch.Tensor: self._check_dimensions(n_components) return torch.matmul(embeddings, self.components[:n_components, :]) + self.mean - def calc_principal_axes_number(self, pfc_variance: Tuple[float, ...]) -> torch.Tensor: + def calc_principal_axes_number(self, pcf_variance: Tuple[float, ...]) -> torch.Tensor: """ Function estimates the number of principal axes that are required to explain the `explained_variance_ths` variance. Args: - pfc_variance: Values in range [0, 1]. Find the number of components such that the amount + pcf_variance: Values in range [0, 1]. Find the number of components such that the amount of variance that needs to be explained is greater than the fraction specified - by ``pfc_variance``. + by ``pcf_variance``. Returns: List of amount of principal axes. @@ -437,12 +437,12 @@ def calc_principal_axes_number(self, pfc_variance: Tuple[float, ...]) -> torch.T >>> embeddings = torch.eye(4, 10, dtype=torch.float) >>> pca = PCA(embeddings) - >>> pca.calc_principal_axes_number(pfc_variance=(0.5, 1)) + >>> pca.calc_principal_axes_number(pcf_variance=(0.5, 1)) tensor([2, 5]) """ ratio_cumsum = torch.cumsum(self.explained_variance_ratio, dim=0) - n_components = torch.searchsorted(ratio_cumsum, torch.tensor(pfc_variance), side="right") + 1 + n_components = torch.searchsorted(ratio_cumsum, torch.tensor(pcf_variance), side="right") + 1 return n_components def _check_dimensions(self, n_components: int) -> None: diff --git a/pipelines/features_extraction/configs_experimental/train_inshop_arcface.yaml b/pipelines/features_extraction/configs_experimental/train_inshop_arcface.yaml index 7858888bd..b8a91dfa1 100644 --- a/pipelines/features_extraction/configs_experimental/train_inshop_arcface.yaml +++ b/pipelines/features_extraction/configs_experimental/train_inshop_arcface.yaml @@ -38,7 +38,7 @@ metric_args: metrics_to_exclude_from_visualization: [cmc,] cmc_top_k: [1] map_top_k: [5] - pfc_variance: [0.5, 0.9, 0.99] + pcf_variance: [0.5, 0.9, 0.99] return_only_overall_category: True visualize_only_overall_category: True diff --git a/pipelines/features_extraction/configs_experimental/train_inshop_with_mlp.yaml b/pipelines/features_extraction/configs_experimental/train_inshop_with_mlp.yaml index e089c1af8..afd9ada61 100644 --- a/pipelines/features_extraction/configs_experimental/train_inshop_with_mlp.yaml +++ b/pipelines/features_extraction/configs_experimental/train_inshop_with_mlp.yaml @@ -43,7 +43,7 @@ metric_args: cmc_top_k: [1] map_top_k: [5] fmr_vals: [1] - pfc_variance: [0.5, 0.9, 0.99] + pcf_variance: [0.5, 0.9, 0.99] return_only_overall_category: True visualize_only_overall_category: True diff --git a/pipelines/features_extraction/extractor_cars/train_cars.yaml b/pipelines/features_extraction/extractor_cars/train_cars.yaml index 750d460e1..f18b18e4e 100644 --- a/pipelines/features_extraction/extractor_cars/train_cars.yaml +++ b/pipelines/features_extraction/extractor_cars/train_cars.yaml @@ -41,7 +41,7 @@ metric_args: cmc_top_k: [1] # to calculate cmc@1 map_top_k: [5] # wo calculate map@5 fmr_vals: [0.01] # to calculate fmr@0.01 - pfc_variance: [0.5, 0.9, 0.99] # to calculate pfc@0.5, pfc@0.9, pfc@0.99 + pcf_variance: [0.5, 0.9, 0.99] # to calculate pcf@0.5, pcf@0.9, pcf@0.99 return_only_overall_category: True # set False if you want to see metric graphs for all the categories (doesn't matter for CARS, since it contains no categories) visualize_only_overall_category: True # set False to see images where the model performed worse for each separated category (doesn't matter for CARS, since it contains no categories) diff --git a/pipelines/features_extraction/extractor_cars/val_cars.yaml b/pipelines/features_extraction/extractor_cars/val_cars.yaml index 88047b572..88a2c2b3b 100644 --- a/pipelines/features_extraction/extractor_cars/val_cars.yaml +++ b/pipelines/features_extraction/extractor_cars/val_cars.yaml @@ -27,7 +27,7 @@ metric_args: map_top_k: [5] precision_top_k: [5] fmr_vals: [0.01] - pfc_variance: [0.5, 0.9, 0.99] + pcf_variance: [0.5, 0.9, 0.99] return_only_overall_category: False visualize_only_overall_category: True diff --git a/pipelines/features_extraction/extractor_cub/train_cub.yaml b/pipelines/features_extraction/extractor_cub/train_cub.yaml index 89a6937f7..4891259c6 100644 --- a/pipelines/features_extraction/extractor_cub/train_cub.yaml +++ b/pipelines/features_extraction/extractor_cub/train_cub.yaml @@ -39,7 +39,7 @@ metric_args: cmc_top_k: [1] map_top_k: [5] fmr_vals: [0.01] - pfc_variance: [0.5, 0.9, 0.99] + pcf_variance: [0.5, 0.9, 0.99] return_only_overall_category: True visualize_only_overall_category: True diff --git a/pipelines/features_extraction/extractor_cub/val_cub.yaml b/pipelines/features_extraction/extractor_cub/val_cub.yaml index e47b5cc62..ead335dfd 100644 --- a/pipelines/features_extraction/extractor_cub/val_cub.yaml +++ b/pipelines/features_extraction/extractor_cub/val_cub.yaml @@ -28,7 +28,7 @@ metric_args: map_top_k: [5] fmr_vals: [0.01] precision_top_k: [5] - pfc_variance: [0.5, 0.9, 0.99] + pcf_variance: [0.5, 0.9, 0.99] return_only_overall_category: False visualize_only_overall_category: True diff --git a/pipelines/features_extraction/extractor_inshop/train_inshop.yaml b/pipelines/features_extraction/extractor_inshop/train_inshop.yaml index 941508b80..629d20f9f 100644 --- a/pipelines/features_extraction/extractor_inshop/train_inshop.yaml +++ b/pipelines/features_extraction/extractor_inshop/train_inshop.yaml @@ -43,7 +43,7 @@ metric_args: cmc_top_k: [1] map_top_k: [5] fmr_vals: [] # Since InShop is a big dataset you should be careful with increasing of the memory footprint, which is needed to calculate fmr - pfc_variance: [0.5, 0.9, 0.99] + pcf_variance: [0.5, 0.9, 0.99] return_only_overall_category: True visualize_only_overall_category: True diff --git a/pipelines/features_extraction/extractor_inshop/val_inshop.yaml b/pipelines/features_extraction/extractor_inshop/val_inshop.yaml index cbb064087..66c35e463 100644 --- a/pipelines/features_extraction/extractor_inshop/val_inshop.yaml +++ b/pipelines/features_extraction/extractor_inshop/val_inshop.yaml @@ -28,7 +28,7 @@ metric_args: cmc_top_k: [1, 5] map_top_k: [5] precision_top_k: [5] - pfc_variance: [0.5, 0.9, 0.99] + pcf_variance: [0.5, 0.9, 0.99] return_only_overall_category: True visualize_only_overall_category: True diff --git a/pipelines/features_extraction/extractor_sop/train_sop.yaml b/pipelines/features_extraction/extractor_sop/train_sop.yaml index d35b254ee..b68df5bf8 100644 --- a/pipelines/features_extraction/extractor_sop/train_sop.yaml +++ b/pipelines/features_extraction/extractor_sop/train_sop.yaml @@ -43,7 +43,7 @@ metric_args: cmc_top_k: [1] map_top_k: [5] fmr_vals: [] # Since SOP is a big dataset you should be careful with increasing of the memory footprint, which is needed to calculate fmr - pfc_variance: [0.5, 0.9, 0.99] + pcf_variance: [0.5, 0.9, 0.99] return_only_overall_category: True visualize_only_overall_category: True diff --git a/pipelines/features_extraction/extractor_sop/val_sop.yaml b/pipelines/features_extraction/extractor_sop/val_sop.yaml index 8a03c09e0..9701f8573 100644 --- a/pipelines/features_extraction/extractor_sop/val_sop.yaml +++ b/pipelines/features_extraction/extractor_sop/val_sop.yaml @@ -26,7 +26,7 @@ metric_args: metrics_to_exclude_from_visualization: [cmc,] cmc_top_k: [1, 5] map_top_k: [5] - pfc_variance: [0.5, 0.9, 0.99] + pcf_variance: [0.5, 0.9, 0.99] return_only_overall_category: False visualize_only_overall_category: True diff --git a/pipelines/postprocessing/pairwise_postprocessing/extractor_train.yaml b/pipelines/postprocessing/pairwise_postprocessing/extractor_train.yaml index 3b94711ab..20994b534 100644 --- a/pipelines/postprocessing/pairwise_postprocessing/extractor_train.yaml +++ b/pipelines/postprocessing/pairwise_postprocessing/extractor_train.yaml @@ -40,7 +40,7 @@ metric_args: cmc_top_k: [1, 10, 20, 30, 100] map_top_k: [5, 10] fmr_vals: [] - pfc_variance: [] + pcf_variance: [] return_only_overall_category: True visualize_only_overall_category: True diff --git a/pipelines/postprocessing/pairwise_postprocessing/extractor_validate.yaml b/pipelines/postprocessing/pairwise_postprocessing/extractor_validate.yaml index 85ffbbe38..9c0e7ad66 100644 --- a/pipelines/postprocessing/pairwise_postprocessing/extractor_validate.yaml +++ b/pipelines/postprocessing/pairwise_postprocessing/extractor_validate.yaml @@ -29,7 +29,7 @@ metric_args: cmc_top_k: [1, 10, 20, 30, 100] map_top_k: [5, 10] precision_top_k: [] - pfc_variance: [] + pcf_variance: [] return_only_overall_category: True visualize_only_overall_category: True diff --git a/pipelines/postprocessing/pairwise_postprocessing/postprocessor_train.yaml b/pipelines/postprocessing/pairwise_postprocessing/postprocessor_train.yaml index 99f2bdf56..e57305d61 100644 --- a/pipelines/postprocessing/pairwise_postprocessing/postprocessor_train.yaml +++ b/pipelines/postprocessing/pairwise_postprocessing/postprocessor_train.yaml @@ -105,7 +105,7 @@ metric_args: cmc_top_k: [1, 10, 20, 30, 100] map_top_k: [5, 10] fmr_vals: [] - pfc_variance: [] + pcf_variance: [] return_only_overall_category: True visualize_only_overall_category: True diff --git a/pipelines/postprocessing/pairwise_postprocessing/postprocessor_validate.yaml b/pipelines/postprocessing/pairwise_postprocessing/postprocessor_validate.yaml index 53c12e999..0ed09cbe7 100644 --- a/pipelines/postprocessing/pairwise_postprocessing/postprocessor_validate.yaml +++ b/pipelines/postprocessing/pairwise_postprocessing/postprocessor_validate.yaml @@ -58,7 +58,7 @@ metric_args: map_top_k: [5, 10] fmr_vals: [] precision_top_k: [] - pfc_variance: [] + pcf_variance: [] return_only_overall_category: True visualize_only_overall_category: True diff --git a/tests/test_oml/test_functional/test_metrics/test_topological_metrics.py b/tests/test_oml/test_functional/test_metrics/test_topological_metrics.py index 4d9df94a8..c504ab0af 100644 --- a/tests/test_oml/test_functional/test_metrics/test_topological_metrics.py +++ b/tests/test_oml/test_functional/test_metrics/test_topological_metrics.py @@ -19,7 +19,7 @@ def eye_case() -> Tuple[torch.Tensor, TMetricsDict]: def test_calc_topological_metrics(eye_case: Tuple[torch.Tensor, TMetricsDict]) -> None: embeddings, metrics_expected = eye_case - args = {"pfc_variance": tuple(metrics_expected["pcf"].keys())} + args = {"pcf_variance": tuple(metrics_expected["pcf"].keys())} metrics_evaluated = calc_topological_metrics(embeddings, **args) compare_dicts_recursively(metrics_evaluated, metrics_expected) @@ -31,12 +31,12 @@ def test_calc_functions( metric_func: Callable[[torch.Tensor, Tuple[int, ...]], torch.Tensor], ) -> None: embeddings, metrics_expected = eye_case - pfc_variance = tuple(metrics_expected[metric_name].keys()) - kwargs = {"embeddings": embeddings, "pfc_variance": pfc_variance} + pcf_variance = tuple(metrics_expected[metric_name].keys()) + kwargs = {"embeddings": embeddings, "pcf_variance": pcf_variance} kwargs = remove_unused_kwargs(kwargs, metric_func) main_components_percentage = metric_func(**kwargs) # type: ignore - metrics_calculated = dict(zip(pfc_variance, main_components_percentage)) + metrics_calculated = dict(zip(pcf_variance, main_components_percentage)) for p in metrics_expected[metric_name].keys(): values_expected = metrics_expected[metric_name][p] values_calculated = metrics_calculated[p] diff --git a/tests/test_oml/test_metrics/test_embedding_metrics.py b/tests/test_oml/test_metrics/test_embedding_metrics.py index 8f6c7faec..89a2c3bc4 100644 --- a/tests/test_oml/test_metrics/test_embedding_metrics.py +++ b/tests/test_oml/test_metrics/test_embedding_metrics.py @@ -163,7 +163,7 @@ def run_retrieval_metrics(case) -> None: # type: ignore precision_top_k=tuple(), map_top_k=tuple(), fmr_vals=tuple(), - pfc_variance=tuple(), + pcf_variance=tuple(), postprocessor=get_trivial_postprocessor(top_n=2), ) @@ -199,7 +199,7 @@ def run_across_epochs(case1, case2) -> None: # type: ignore precision_top_k=tuple(), map_top_k=tuple(), fmr_vals=tuple(), - pfc_variance=tuple(), + pcf_variance=tuple(), postprocessor=get_trivial_postprocessor(top_n=3), )