Skip to content

Commit 909bedc

Browse files
author
alekseysh
committed
pfc to pcf
1 parent 417d5f3 commit 909bedc

File tree

19 files changed

+43
-43
lines changed

19 files changed

+43
-43
lines changed

oml/functional/metrics.py

+13-13
Original file line numberDiff line numberDiff line change
@@ -109,25 +109,25 @@ def calc_retrieval_metrics(
109109
return metrics
110110

111111

112-
def calc_topological_metrics(embeddings: Tensor, pfc_variance: Tuple[float, ...]) -> TMetricsDict:
112+
def calc_topological_metrics(embeddings: Tensor, pcf_variance: Tuple[float, ...]) -> TMetricsDict:
113113
"""
114114
Function to evaluate different topological metrics.
115115
116116
Args:
117117
embeddings: Embeddings matrix with the shape of ``[n_embeddings, embeddings_dim]``.
118-
pfc_variance: Values in range [0, 1]. Find the number of components such that the amount
118+
pcf_variance: Values in range [0, 1]. Find the number of components such that the amount
119119
of variance that needs to be explained is greater than the percentage specified
120-
by ``pfc_variance``.
120+
by ``pcf_variance``.
121121
122122
Returns:
123123
Metrics dictionary.
124124
125125
"""
126126
metrics: TMetricsDict = dict()
127127

128-
if pfc_variance:
129-
main_components = calc_pcf(embeddings, pfc_variance)
130-
metrics["pcf"] = dict(zip(pfc_variance, main_components))
128+
if pcf_variance:
129+
main_components = calc_pcf(embeddings, pcf_variance)
130+
metrics["pcf"] = dict(zip(pcf_variance, main_components))
131131

132132
return metrics
133133

@@ -484,16 +484,16 @@ def calc_fnmr_at_fmr(pos_dist: Tensor, neg_dist: Tensor, fmr_vals: Tuple[float,
484484
return fnmr_at_fmr
485485

486486

487-
def calc_pcf(embeddings: Tensor, pfc_variance: Tuple[float, ...]) -> List[Tensor]:
487+
def calc_pcf(embeddings: Tensor, pcf_variance: Tuple[float, ...]) -> List[Tensor]:
488488
"""
489489
Function estimates the Principal Components Fraction (PCF) of embeddings using Principal Component Analysis.
490490
The metric is defined as a fraction of components needed to explain the required variance in data.
491491
492492
Args:
493493
embeddings: Embeddings matrix with the shape of ``[n_embeddings, embeddings_dim]``.
494-
pfc_variance: Values in range [0, 1]. Find the number of components such that the amount
494+
pcf_variance: Values in range [0, 1]. Find the number of components such that the amount
495495
of variance that needs to be explained is greater than the fraction specified
496-
by ``pfc_variance``.
496+
by ``pcf_variance``.
497497
Returns:
498498
List of linear dimensions as a fractions of the embeddings dimension.
499499
@@ -527,22 +527,22 @@ def calc_pcf(embeddings: Tensor, pfc_variance: Tuple[float, ...]) -> List[Tensor
527527
because the number of principal axes is superior to the desired explained variance threshold).
528528
529529
>>> embeddings = torch.eye(4, 10, dtype=torch.float)
530-
>>> calc_pcf(embeddings, pfc_variance=(0.5, 1))
530+
>>> calc_pcf(embeddings, pcf_variance=(0.5, 1))
531531
tensor([0.2000, 0.5000])
532532
533533
"""
534534
# The code below mirrors code from scikit-learn repository:
535535
# https://github.com/scikit-learn/scikit-learn/blob/f3f51f9b6/sklearn/decomposition/_pca.py#L491
536-
_check_if_in_range(pfc_variance, 0, 1, "pfc_variance")
536+
_check_if_in_range(pcf_variance, 0, 1, "pcf_variance")
537537
try:
538538
pca = PCA(embeddings)
539-
n_components = pca.calc_principal_axes_number(pfc_variance).to(embeddings)
539+
n_components = pca.calc_principal_axes_number(pcf_variance).to(embeddings)
540540
metric = n_components / embeddings.shape[1]
541541
except Exception:
542542
# Mostly we handle the following error here:
543543
# >>> The algorithm failed to converge because the input matrix is ill-conditioned
544544
# >>> or has too many repeated singular values
545-
metric = [torch.tensor(float("nan"))] * len(pfc_variance)
545+
metric = [torch.tensor(float("nan"))] * len(pcf_variance)
546546

547547
return metric
548548

oml/metrics/embeddings.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def __init__(
7676
precision_top_k: Tuple[int, ...] = (5,),
7777
map_top_k: Tuple[int, ...] = (5,),
7878
fmr_vals: Tuple[float, ...] = tuple(),
79-
pfc_variance: Tuple[float, ...] = (0.5,),
79+
pcf_variance: Tuple[float, ...] = (0.5,),
8080
categories_key: Optional[str] = None,
8181
sequence_key: Optional[str] = None,
8282
postprocessor: Optional[IDistancesPostprocessor] = None,
@@ -102,9 +102,9 @@ def __init__(
102102
and ``fnmr@fmr=0.4``.
103103
Note, computing this metric requires additional memory overhead,
104104
that is why it's turned off by default.
105-
pfc_variance: Values in range [0, 1]. Find the number of components such that the amount
105+
pcf_variance: Values in range [0, 1]. Find the number of components such that the amount
106106
of variance that needs to be explained is greater than the percentage specified
107-
by ``pfc_variance``.
107+
by ``pcf_variance``.
108108
categories_key: Key to take the samples' categories from the batches (if you have ones)
109109
sequence_key: Key to take sequence ids from the batches (if you have ones)
110110
postprocessor: Postprocessor which applies some techniques like query reranking
@@ -124,7 +124,7 @@ def __init__(
124124
self.precision_top_k = precision_top_k
125125
self.map_top_k = map_top_k
126126
self.fmr_vals = fmr_vals
127-
self.pfc_variance = pfc_variance
127+
self.pcf_variance = pcf_variance
128128

129129
self.categories_key = categories_key
130130
self.sequence_key = sequence_key
@@ -205,7 +205,7 @@ def compute_metrics(self) -> TMetricsDict_ByLabels: # type: ignore
205205
"map_top_k": self.map_top_k,
206206
"fmr_vals": self.fmr_vals,
207207
}
208-
args_topological_metrics = {"pfc_variance": self.pfc_variance}
208+
args_topological_metrics = {"pcf_variance": self.pcf_variance}
209209

210210
metrics: TMetricsDict_ByLabels = dict()
211211

oml/utils/misc_torch.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -406,15 +406,15 @@ def inverse_transform(self, embeddings: torch.Tensor) -> torch.Tensor:
406406
self._check_dimensions(n_components)
407407
return torch.matmul(embeddings, self.components[:n_components, :]) + self.mean
408408

409-
def calc_principal_axes_number(self, pfc_variance: Tuple[float, ...]) -> torch.Tensor:
409+
def calc_principal_axes_number(self, pcf_variance: Tuple[float, ...]) -> torch.Tensor:
410410
"""
411411
Function estimates the number of principal axes that are required to explain the `explained_variance_ths`
412412
variance.
413413
414414
Args:
415-
pfc_variance: Values in range [0, 1]. Find the number of components such that the amount
415+
pcf_variance: Values in range [0, 1]. Find the number of components such that the amount
416416
of variance that needs to be explained is greater than the fraction specified
417-
by ``pfc_variance``.
417+
by ``pcf_variance``.
418418
Returns:
419419
List of amount of principal axes.
420420
@@ -437,12 +437,12 @@ def calc_principal_axes_number(self, pfc_variance: Tuple[float, ...]) -> torch.T
437437
438438
>>> embeddings = torch.eye(4, 10, dtype=torch.float)
439439
>>> pca = PCA(embeddings)
440-
>>> pca.calc_principal_axes_number(pfc_variance=(0.5, 1))
440+
>>> pca.calc_principal_axes_number(pcf_variance=(0.5, 1))
441441
tensor([2, 5])
442442
443443
"""
444444
ratio_cumsum = torch.cumsum(self.explained_variance_ratio, dim=0)
445-
n_components = torch.searchsorted(ratio_cumsum, torch.tensor(pfc_variance), side="right") + 1
445+
n_components = torch.searchsorted(ratio_cumsum, torch.tensor(pcf_variance), side="right") + 1
446446
return n_components
447447

448448
def _check_dimensions(self, n_components: int) -> None:

pipelines/features_extraction/configs_experimental/train_inshop_arcface.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ metric_args:
3838
metrics_to_exclude_from_visualization: [cmc,]
3939
cmc_top_k: [1]
4040
map_top_k: [5]
41-
pfc_variance: [0.5, 0.9, 0.99]
41+
pcf_variance: [0.5, 0.9, 0.99]
4242
return_only_overall_category: True
4343
visualize_only_overall_category: True
4444

pipelines/features_extraction/configs_experimental/train_inshop_with_mlp.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ metric_args:
4343
cmc_top_k: [1]
4444
map_top_k: [5]
4545
fmr_vals: [1]
46-
pfc_variance: [0.5, 0.9, 0.99]
46+
pcf_variance: [0.5, 0.9, 0.99]
4747
return_only_overall_category: True
4848
visualize_only_overall_category: True
4949

pipelines/features_extraction/extractor_cars/train_cars.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ metric_args:
4141
cmc_top_k: [1] # to calculate cmc@1
4242
map_top_k: [5] # wo calculate map@5
4343
fmr_vals: [0.01] # to calculate [email protected]
44-
pfc_variance: [0.5, 0.9, 0.99] # to calculate pfc@0.5, pfc@0.9, pfc@0.99
44+
pcf_variance: [0.5, 0.9, 0.99] # to calculate pcf@0.5, pcf@0.9, pcf@0.99
4545
return_only_overall_category: True # set False if you want to see metric graphs for all the categories (doesn't matter for CARS, since it contains no categories)
4646
visualize_only_overall_category: True # set False to see images where the model performed worse for each separated category (doesn't matter for CARS, since it contains no categories)
4747

pipelines/features_extraction/extractor_cars/val_cars.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ metric_args:
2727
map_top_k: [5]
2828
precision_top_k: [5]
2929
fmr_vals: [0.01]
30-
pfc_variance: [0.5, 0.9, 0.99]
30+
pcf_variance: [0.5, 0.9, 0.99]
3131
return_only_overall_category: False
3232
visualize_only_overall_category: True
3333

pipelines/features_extraction/extractor_cub/train_cub.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ metric_args:
3939
cmc_top_k: [1]
4040
map_top_k: [5]
4141
fmr_vals: [0.01]
42-
pfc_variance: [0.5, 0.9, 0.99]
42+
pcf_variance: [0.5, 0.9, 0.99]
4343
return_only_overall_category: True
4444
visualize_only_overall_category: True
4545

pipelines/features_extraction/extractor_cub/val_cub.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ metric_args:
2828
map_top_k: [5]
2929
fmr_vals: [0.01]
3030
precision_top_k: [5]
31-
pfc_variance: [0.5, 0.9, 0.99]
31+
pcf_variance: [0.5, 0.9, 0.99]
3232
return_only_overall_category: False
3333
visualize_only_overall_category: True
3434

pipelines/features_extraction/extractor_inshop/train_inshop.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ metric_args:
4343
cmc_top_k: [1]
4444
map_top_k: [5]
4545
fmr_vals: [] # Since InShop is a big dataset you should be careful with increasing of the memory footprint, which is needed to calculate fmr
46-
pfc_variance: [0.5, 0.9, 0.99]
46+
pcf_variance: [0.5, 0.9, 0.99]
4747
return_only_overall_category: True
4848
visualize_only_overall_category: True
4949

pipelines/features_extraction/extractor_inshop/val_inshop.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ metric_args:
2828
cmc_top_k: [1, 5]
2929
map_top_k: [5]
3030
precision_top_k: [5]
31-
pfc_variance: [0.5, 0.9, 0.99]
31+
pcf_variance: [0.5, 0.9, 0.99]
3232
return_only_overall_category: True
3333
visualize_only_overall_category: True
3434

pipelines/features_extraction/extractor_sop/train_sop.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ metric_args:
4343
cmc_top_k: [1]
4444
map_top_k: [5]
4545
fmr_vals: [] # Since SOP is a big dataset you should be careful with increasing of the memory footprint, which is needed to calculate fmr
46-
pfc_variance: [0.5, 0.9, 0.99]
46+
pcf_variance: [0.5, 0.9, 0.99]
4747
return_only_overall_category: True
4848
visualize_only_overall_category: True
4949

pipelines/features_extraction/extractor_sop/val_sop.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ metric_args:
2626
metrics_to_exclude_from_visualization: [cmc,]
2727
cmc_top_k: [1, 5]
2828
map_top_k: [5]
29-
pfc_variance: [0.5, 0.9, 0.99]
29+
pcf_variance: [0.5, 0.9, 0.99]
3030
return_only_overall_category: False
3131
visualize_only_overall_category: True
3232

pipelines/postprocessing/pairwise_postprocessing/extractor_train.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ metric_args:
4040
cmc_top_k: [1, 10, 20, 30, 100]
4141
map_top_k: [5, 10]
4242
fmr_vals: []
43-
pfc_variance: []
43+
pcf_variance: []
4444
return_only_overall_category: True
4545
visualize_only_overall_category: True
4646

pipelines/postprocessing/pairwise_postprocessing/extractor_validate.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ metric_args:
2929
cmc_top_k: [1, 10, 20, 30, 100]
3030
map_top_k: [5, 10]
3131
precision_top_k: []
32-
pfc_variance: []
32+
pcf_variance: []
3333
return_only_overall_category: True
3434
visualize_only_overall_category: True
3535

pipelines/postprocessing/pairwise_postprocessing/postprocessor_train.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ metric_args:
105105
cmc_top_k: [1, 10, 20, 30, 100]
106106
map_top_k: [5, 10]
107107
fmr_vals: []
108-
pfc_variance: []
108+
pcf_variance: []
109109
return_only_overall_category: True
110110
visualize_only_overall_category: True
111111

pipelines/postprocessing/pairwise_postprocessing/postprocessor_validate.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ metric_args:
5858
map_top_k: [5, 10]
5959
fmr_vals: []
6060
precision_top_k: []
61-
pfc_variance: []
61+
pcf_variance: []
6262
return_only_overall_category: True
6363
visualize_only_overall_category: True
6464

tests/test_oml/test_functional/test_metrics/test_topological_metrics.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def eye_case() -> Tuple[torch.Tensor, TMetricsDict]:
1919

2020
def test_calc_topological_metrics(eye_case: Tuple[torch.Tensor, TMetricsDict]) -> None:
2121
embeddings, metrics_expected = eye_case
22-
args = {"pfc_variance": tuple(metrics_expected["pcf"].keys())}
22+
args = {"pcf_variance": tuple(metrics_expected["pcf"].keys())}
2323
metrics_evaluated = calc_topological_metrics(embeddings, **args)
2424
compare_dicts_recursively(metrics_evaluated, metrics_expected)
2525

@@ -31,12 +31,12 @@ def test_calc_functions(
3131
metric_func: Callable[[torch.Tensor, Tuple[int, ...]], torch.Tensor],
3232
) -> None:
3333
embeddings, metrics_expected = eye_case
34-
pfc_variance = tuple(metrics_expected[metric_name].keys())
35-
kwargs = {"embeddings": embeddings, "pfc_variance": pfc_variance}
34+
pcf_variance = tuple(metrics_expected[metric_name].keys())
35+
kwargs = {"embeddings": embeddings, "pcf_variance": pcf_variance}
3636

3737
kwargs = remove_unused_kwargs(kwargs, metric_func)
3838
main_components_percentage = metric_func(**kwargs) # type: ignore
39-
metrics_calculated = dict(zip(pfc_variance, main_components_percentage))
39+
metrics_calculated = dict(zip(pcf_variance, main_components_percentage))
4040
for p in metrics_expected[metric_name].keys():
4141
values_expected = metrics_expected[metric_name][p]
4242
values_calculated = metrics_calculated[p]

tests/test_oml/test_metrics/test_embedding_metrics.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ def run_retrieval_metrics(case) -> None: # type: ignore
163163
precision_top_k=tuple(),
164164
map_top_k=tuple(),
165165
fmr_vals=tuple(),
166-
pfc_variance=tuple(),
166+
pcf_variance=tuple(),
167167
postprocessor=get_trivial_postprocessor(top_n=2),
168168
)
169169

@@ -199,7 +199,7 @@ def run_across_epochs(case1, case2) -> None: # type: ignore
199199
precision_top_k=tuple(),
200200
map_top_k=tuple(),
201201
fmr_vals=tuple(),
202-
pfc_variance=tuple(),
202+
pcf_variance=tuple(),
203203
postprocessor=get_trivial_postprocessor(top_n=3),
204204
)
205205

0 commit comments

Comments
 (0)