pfc to pcf

alekseysh · alekseysh · commit 909bedc828f7 · 2024-01-28T23:25:16.000+07:00
diff --git a/oml/functional/metrics.py b/oml/functional/metrics.py
@@ -109,25 +109,25 @@ def calc_retrieval_metrics(
     return metrics
 
 
-def calc_topological_metrics(embeddings: Tensor, pfc_variance: Tuple[float, ...]) -> TMetricsDict:
+def calc_topological_metrics(embeddings: Tensor, pcf_variance: Tuple[float, ...]) -> TMetricsDict:
     """
     Function to evaluate different topological metrics.
 
     Args:
         embeddings: Embeddings matrix with the shape of ``[n_embeddings, embeddings_dim]``.
-        pfc_variance: Values in range [0, 1]. Find the number of components such that the amount
+        pcf_variance: Values in range [0, 1]. Find the number of components such that the amount
                       of variance that needs to be explained is greater than the percentage specified
-                      by ``pfc_variance``.
+                      by ``pcf_variance``.
 
     Returns:
         Metrics dictionary.
 
     """
     metrics: TMetricsDict = dict()
 
-    if pfc_variance:
-        main_components = calc_pcf(embeddings, pfc_variance)
-        metrics["pcf"] = dict(zip(pfc_variance, main_components))
+    if pcf_variance:
+        main_components = calc_pcf(embeddings, pcf_variance)
+        metrics["pcf"] = dict(zip(pcf_variance, main_components))
 
     return metrics
 
@@ -484,16 +484,16 @@ def calc_fnmr_at_fmr(pos_dist: Tensor, neg_dist: Tensor, fmr_vals: Tuple[float,
     return fnmr_at_fmr
 
 
-def calc_pcf(embeddings: Tensor, pfc_variance: Tuple[float, ...]) -> List[Tensor]:
+def calc_pcf(embeddings: Tensor, pcf_variance: Tuple[float, ...]) -> List[Tensor]:
     """
     Function estimates the Principal Components Fraction (PCF) of embeddings using Principal Component Analysis.
     The metric is defined as a fraction of components needed to explain the required variance in data.
 
     Args:
         embeddings: Embeddings matrix with the shape of ``[n_embeddings, embeddings_dim]``.
-        pfc_variance: Values in range [0, 1]. Find the number of components such that the amount
+        pcf_variance: Values in range [0, 1]. Find the number of components such that the amount
                       of variance that needs to be explained is greater than the fraction specified
-                      by ``pfc_variance``.
+                      by ``pcf_variance``.
     Returns:
         List of linear dimensions as a fractions of the embeddings dimension.
 
@@ -527,22 +527,22 @@ def calc_pcf(embeddings: Tensor, pfc_variance: Tuple[float, ...]) -> List[Tensor
         because the number of principal axes is superior to the desired explained variance threshold).
 
         >>> embeddings = torch.eye(4, 10, dtype=torch.float)
-        >>> calc_pcf(embeddings, pfc_variance=(0.5, 1))
+        >>> calc_pcf(embeddings, pcf_variance=(0.5, 1))
         tensor([0.2000, 0.5000])
 
     """
     # The code below mirrors code from scikit-learn repository:
     # https://github.com/scikit-learn/scikit-learn/blob/f3f51f9b6/sklearn/decomposition/_pca.py#L491
-    _check_if_in_range(pfc_variance, 0, 1, "pfc_variance")
+    _check_if_in_range(pcf_variance, 0, 1, "pcf_variance")
     try:
         pca = PCA(embeddings)
-        n_components = pca.calc_principal_axes_number(pfc_variance).to(embeddings)
+        n_components = pca.calc_principal_axes_number(pcf_variance).to(embeddings)
         metric = n_components / embeddings.shape[1]
     except Exception:
         # Mostly we handle the following error here:
         # >>> The algorithm failed to converge because the input matrix is ill-conditioned
         # >>> or has too many repeated singular values
-        metric = [torch.tensor(float("nan"))] * len(pfc_variance)
+        metric = [torch.tensor(float("nan"))] * len(pcf_variance)
 
     return metric
 
diff --git a/oml/metrics/embeddings.py b/oml/metrics/embeddings.py
@@ -76,7 +76,7 @@ def __init__(
         precision_top_k: Tuple[int, ...] = (5,),
         map_top_k: Tuple[int, ...] = (5,),
         fmr_vals: Tuple[float, ...] = tuple(),
-        pfc_variance: Tuple[float, ...] = (0.5,),
+        pcf_variance: Tuple[float, ...] = (0.5,),
         categories_key: Optional[str] = None,
         sequence_key: Optional[str] = None,
         postprocessor: Optional[IDistancesPostprocessor] = None,
@@ -102,9 +102,9 @@ def __init__(
                       and ``fnmr@fmr=0.4``.
                       Note, computing this metric requires additional memory overhead,
                       that is why it's turned off by default.
-            pfc_variance: Values in range [0, 1]. Find the number of components such that the amount
+            pcf_variance: Values in range [0, 1]. Find the number of components such that the amount
                           of variance that needs to be explained is greater than the percentage specified
-                          by ``pfc_variance``.
+                          by ``pcf_variance``.
             categories_key: Key to take the samples' categories from the batches (if you have ones)
             sequence_key: Key to take sequence ids from the batches (if you have ones)
             postprocessor: Postprocessor which applies some techniques like query reranking
@@ -124,7 +124,7 @@ def __init__(
         self.precision_top_k = precision_top_k
         self.map_top_k = map_top_k
         self.fmr_vals = fmr_vals
-        self.pfc_variance = pfc_variance
+        self.pcf_variance = pcf_variance
 
         self.categories_key = categories_key
         self.sequence_key = sequence_key
@@ -205,7 +205,7 @@ def compute_metrics(self) -> TMetricsDict_ByLabels:  # type: ignore
             "map_top_k": self.map_top_k,
             "fmr_vals": self.fmr_vals,
         }
-        args_topological_metrics = {"pfc_variance": self.pfc_variance}
+        args_topological_metrics = {"pcf_variance": self.pcf_variance}
 
         metrics: TMetricsDict_ByLabels = dict()
 
diff --git a/oml/utils/misc_torch.py b/oml/utils/misc_torch.py
@@ -406,15 +406,15 @@ def inverse_transform(self, embeddings: torch.Tensor) -> torch.Tensor:
         self._check_dimensions(n_components)
         return torch.matmul(embeddings, self.components[:n_components, :]) + self.mean
 
-    def calc_principal_axes_number(self, pfc_variance: Tuple[float, ...]) -> torch.Tensor:
+    def calc_principal_axes_number(self, pcf_variance: Tuple[float, ...]) -> torch.Tensor:
         """
         Function estimates the number of principal axes that are required to explain the `explained_variance_ths`
         variance.
 
         Args:
-            pfc_variance: Values in range [0, 1]. Find the number of components such that the amount
+            pcf_variance: Values in range [0, 1]. Find the number of components such that the amount
                           of variance that needs to be explained is greater than the fraction specified
-                          by ``pfc_variance``.
+                          by ``pcf_variance``.
         Returns:
             List of amount of principal axes.
 
@@ -437,12 +437,12 @@ def calc_principal_axes_number(self, pfc_variance: Tuple[float, ...]) -> torch.T
 
             >>> embeddings = torch.eye(4, 10, dtype=torch.float)
             >>> pca = PCA(embeddings)
-            >>> pca.calc_principal_axes_number(pfc_variance=(0.5, 1))
+            >>> pca.calc_principal_axes_number(pcf_variance=(0.5, 1))
             tensor([2, 5])
 
         """
         ratio_cumsum = torch.cumsum(self.explained_variance_ratio, dim=0)
-        n_components = torch.searchsorted(ratio_cumsum, torch.tensor(pfc_variance), side="right") + 1
+        n_components = torch.searchsorted(ratio_cumsum, torch.tensor(pcf_variance), side="right") + 1
         return n_components
 
     def _check_dimensions(self, n_components: int) -> None:
diff --git a/pipelines/features_extraction/configs_experimental/train_inshop_arcface.yaml b/pipelines/features_extraction/configs_experimental/train_inshop_arcface.yaml
@@ -38,7 +38,7 @@ metric_args:
   metrics_to_exclude_from_visualization: [cmc,]
   cmc_top_k: [1]
   map_top_k: [5]
-  pfc_variance: [0.5, 0.9, 0.99]
+  pcf_variance: [0.5, 0.9, 0.99]
   return_only_overall_category: True
   visualize_only_overall_category: True
 
diff --git a/pipelines/features_extraction/configs_experimental/train_inshop_with_mlp.yaml b/pipelines/features_extraction/configs_experimental/train_inshop_with_mlp.yaml
@@ -43,7 +43,7 @@ metric_args:
   cmc_top_k: [1]
   map_top_k: [5]
   fmr_vals: [1]
-  pfc_variance: [0.5, 0.9, 0.99]
+  pcf_variance: [0.5, 0.9, 0.99]
   return_only_overall_category: True
   visualize_only_overall_category: True
 
diff --git a/pipelines/features_extraction/extractor_cars/train_cars.yaml b/pipelines/features_extraction/extractor_cars/train_cars.yaml
@@ -41,7 +41,7 @@ metric_args:
   cmc_top_k: [1]  # to calculate cmc@1
   map_top_k: [5]  # wo calculate map@5
   fmr_vals: [0.01]  # to calculate fmr@0.01
-  pfc_variance: [0.5, 0.9, 0.99]  # to calculate pfc@0.5, pfc@0.9, pfc@0.99
+  pcf_variance: [0.5, 0.9, 0.99]  # to calculate pcf@0.5, pcf@0.9, pcf@0.99
   return_only_overall_category: True  # set False if you want to see metric graphs for all the categories (doesn't matter for CARS, since it contains no categories)
   visualize_only_overall_category: True  # set False to see images where the model performed worse for each separated category (doesn't matter for CARS, since it contains no categories)
 
diff --git a/pipelines/features_extraction/extractor_cars/val_cars.yaml b/pipelines/features_extraction/extractor_cars/val_cars.yaml
@@ -27,7 +27,7 @@ metric_args:
   map_top_k: [5]
   precision_top_k: [5]
   fmr_vals: [0.01]
-  pfc_variance: [0.5, 0.9, 0.99]
+  pcf_variance: [0.5, 0.9, 0.99]
   return_only_overall_category: False
   visualize_only_overall_category: True
 
diff --git a/pipelines/features_extraction/extractor_cub/train_cub.yaml b/pipelines/features_extraction/extractor_cub/train_cub.yaml
@@ -39,7 +39,7 @@ metric_args:
   cmc_top_k: [1]
   map_top_k: [5]
   fmr_vals: [0.01]
-  pfc_variance: [0.5, 0.9, 0.99]
+  pcf_variance: [0.5, 0.9, 0.99]
   return_only_overall_category: True
   visualize_only_overall_category: True
 
diff --git a/pipelines/features_extraction/extractor_cub/val_cub.yaml b/pipelines/features_extraction/extractor_cub/val_cub.yaml
@@ -28,7 +28,7 @@ metric_args:
   map_top_k: [5]
   fmr_vals: [0.01]
   precision_top_k: [5]
-  pfc_variance: [0.5, 0.9, 0.99]
+  pcf_variance: [0.5, 0.9, 0.99]
   return_only_overall_category: False
   visualize_only_overall_category: True
 
diff --git a/pipelines/features_extraction/extractor_inshop/train_inshop.yaml b/pipelines/features_extraction/extractor_inshop/train_inshop.yaml
@@ -43,7 +43,7 @@ metric_args:
   cmc_top_k: [1]
   map_top_k: [5]
   fmr_vals: []  # Since InShop is a big dataset you should be careful with increasing of the memory footprint, which is needed to calculate fmr
-  pfc_variance: [0.5, 0.9, 0.99]
+  pcf_variance: [0.5, 0.9, 0.99]
   return_only_overall_category: True
   visualize_only_overall_category: True
 
diff --git a/pipelines/features_extraction/extractor_inshop/val_inshop.yaml b/pipelines/features_extraction/extractor_inshop/val_inshop.yaml
@@ -28,7 +28,7 @@ metric_args:
   cmc_top_k: [1, 5]
   map_top_k: [5]
   precision_top_k: [5]
-  pfc_variance: [0.5, 0.9, 0.99]
+  pcf_variance: [0.5, 0.9, 0.99]
   return_only_overall_category: True
   visualize_only_overall_category: True
 
diff --git a/pipelines/features_extraction/extractor_sop/train_sop.yaml b/pipelines/features_extraction/extractor_sop/train_sop.yaml
@@ -43,7 +43,7 @@ metric_args:
   cmc_top_k: [1]
   map_top_k: [5]
   fmr_vals: []  # Since SOP is a big dataset you should be careful with increasing of the memory footprint, which is needed to calculate fmr
-  pfc_variance: [0.5, 0.9, 0.99]
+  pcf_variance: [0.5, 0.9, 0.99]
   return_only_overall_category: True
   visualize_only_overall_category: True
 
diff --git a/pipelines/features_extraction/extractor_sop/val_sop.yaml b/pipelines/features_extraction/extractor_sop/val_sop.yaml
@@ -26,7 +26,7 @@ metric_args:
   metrics_to_exclude_from_visualization: [cmc,]
   cmc_top_k: [1, 5]
   map_top_k: [5]
-  pfc_variance: [0.5, 0.9, 0.99]
+  pcf_variance: [0.5, 0.9, 0.99]
   return_only_overall_category: False
   visualize_only_overall_category: True
 
diff --git a/pipelines/postprocessing/pairwise_postprocessing/extractor_train.yaml b/pipelines/postprocessing/pairwise_postprocessing/extractor_train.yaml
@@ -40,7 +40,7 @@ metric_args:
   cmc_top_k: [1, 10, 20, 30, 100]
   map_top_k: [5, 10]
   fmr_vals: []
-  pfc_variance: []
+  pcf_variance: []
   return_only_overall_category: True
   visualize_only_overall_category: True
 
diff --git a/pipelines/postprocessing/pairwise_postprocessing/extractor_validate.yaml b/pipelines/postprocessing/pairwise_postprocessing/extractor_validate.yaml
@@ -29,7 +29,7 @@ metric_args:
   cmc_top_k: [1, 10, 20, 30, 100]
   map_top_k: [5, 10]
   precision_top_k: []
-  pfc_variance: []
+  pcf_variance: []
   return_only_overall_category: True
   visualize_only_overall_category: True
 
diff --git a/pipelines/postprocessing/pairwise_postprocessing/postprocessor_train.yaml b/pipelines/postprocessing/pairwise_postprocessing/postprocessor_train.yaml
@@ -105,7 +105,7 @@ metric_args:
   cmc_top_k: [1, 10, 20, 30, 100]
   map_top_k: [5, 10]
   fmr_vals: []
-  pfc_variance: []
+  pcf_variance: []
   return_only_overall_category: True
   visualize_only_overall_category: True
 
diff --git a/pipelines/postprocessing/pairwise_postprocessing/postprocessor_validate.yaml b/pipelines/postprocessing/pairwise_postprocessing/postprocessor_validate.yaml
@@ -58,7 +58,7 @@ metric_args:
   map_top_k: [5, 10]
   fmr_vals: []
   precision_top_k: []
-  pfc_variance: []
+  pcf_variance: []
   return_only_overall_category: True
   visualize_only_overall_category: True
 
diff --git a/tests/test_oml/test_functional/test_metrics/test_topological_metrics.py b/tests/test_oml/test_functional/test_metrics/test_topological_metrics.py
@@ -19,7 +19,7 @@ def eye_case() -> Tuple[torch.Tensor, TMetricsDict]:
 
 def test_calc_topological_metrics(eye_case: Tuple[torch.Tensor, TMetricsDict]) -> None:
     embeddings, metrics_expected = eye_case
-    args = {"pfc_variance": tuple(metrics_expected["pcf"].keys())}
+    args = {"pcf_variance": tuple(metrics_expected["pcf"].keys())}
     metrics_evaluated = calc_topological_metrics(embeddings, **args)
     compare_dicts_recursively(metrics_evaluated, metrics_expected)
 
@@ -31,12 +31,12 @@ def test_calc_functions(
     metric_func: Callable[[torch.Tensor, Tuple[int, ...]], torch.Tensor],
 ) -> None:
     embeddings, metrics_expected = eye_case
-    pfc_variance = tuple(metrics_expected[metric_name].keys())
-    kwargs = {"embeddings": embeddings, "pfc_variance": pfc_variance}
+    pcf_variance = tuple(metrics_expected[metric_name].keys())
+    kwargs = {"embeddings": embeddings, "pcf_variance": pcf_variance}
 
     kwargs = remove_unused_kwargs(kwargs, metric_func)
     main_components_percentage = metric_func(**kwargs)  # type: ignore
-    metrics_calculated = dict(zip(pfc_variance, main_components_percentage))
+    metrics_calculated = dict(zip(pcf_variance, main_components_percentage))
     for p in metrics_expected[metric_name].keys():
         values_expected = metrics_expected[metric_name][p]
         values_calculated = metrics_calculated[p]
diff --git a/tests/test_oml/test_metrics/test_embedding_metrics.py b/tests/test_oml/test_metrics/test_embedding_metrics.py
@@ -163,7 +163,7 @@ def run_retrieval_metrics(case) -> None:  # type: ignore
         precision_top_k=tuple(),
         map_top_k=tuple(),
         fmr_vals=tuple(),
-        pfc_variance=tuple(),
+        pcf_variance=tuple(),
         postprocessor=get_trivial_postprocessor(top_n=2),
     )
 
@@ -199,7 +199,7 @@ def run_across_epochs(case1, case2) -> None:  # type: ignore
         precision_top_k=tuple(),
         map_top_k=tuple(),
         fmr_vals=tuple(),
-        pfc_variance=tuple(),
+        pcf_variance=tuple(),
         postprocessor=get_trivial_postprocessor(top_n=3),
     )