Skip to content

Commit

Permalink
naming fix: pfc to pcf
Browse files Browse the repository at this point in the history
naming fix: pfc to pcf
  • Loading branch information
AlekseySh authored Jan 28, 2024
1 parent 417d5f3 commit 4f43b5f
Show file tree
Hide file tree
Showing 19 changed files with 43 additions and 43 deletions.
26 changes: 13 additions & 13 deletions oml/functional/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,25 +109,25 @@ def calc_retrieval_metrics(
return metrics


def calc_topological_metrics(embeddings: Tensor, pfc_variance: Tuple[float, ...]) -> TMetricsDict:
def calc_topological_metrics(embeddings: Tensor, pcf_variance: Tuple[float, ...]) -> TMetricsDict:
"""
Function to evaluate different topological metrics.
Args:
embeddings: Embeddings matrix with the shape of ``[n_embeddings, embeddings_dim]``.
pfc_variance: Values in range [0, 1]. Find the number of components such that the amount
pcf_variance: Values in range [0, 1]. Find the number of components such that the amount
of variance that needs to be explained is greater than the percentage specified
by ``pfc_variance``.
by ``pcf_variance``.
Returns:
Metrics dictionary.
"""
metrics: TMetricsDict = dict()

if pfc_variance:
main_components = calc_pcf(embeddings, pfc_variance)
metrics["pcf"] = dict(zip(pfc_variance, main_components))
if pcf_variance:
main_components = calc_pcf(embeddings, pcf_variance)
metrics["pcf"] = dict(zip(pcf_variance, main_components))

return metrics

Expand Down Expand Up @@ -484,16 +484,16 @@ def calc_fnmr_at_fmr(pos_dist: Tensor, neg_dist: Tensor, fmr_vals: Tuple[float,
return fnmr_at_fmr


def calc_pcf(embeddings: Tensor, pfc_variance: Tuple[float, ...]) -> List[Tensor]:
def calc_pcf(embeddings: Tensor, pcf_variance: Tuple[float, ...]) -> List[Tensor]:
"""
Function estimates the Principal Components Fraction (PCF) of embeddings using Principal Component Analysis.
The metric is defined as a fraction of components needed to explain the required variance in data.
Args:
embeddings: Embeddings matrix with the shape of ``[n_embeddings, embeddings_dim]``.
pfc_variance: Values in range [0, 1]. Find the number of components such that the amount
pcf_variance: Values in range [0, 1]. Find the number of components such that the amount
of variance that needs to be explained is greater than the fraction specified
by ``pfc_variance``.
by ``pcf_variance``.
Returns:
List of linear dimensions as a fractions of the embeddings dimension.
Expand Down Expand Up @@ -527,22 +527,22 @@ def calc_pcf(embeddings: Tensor, pfc_variance: Tuple[float, ...]) -> List[Tensor
because the number of principal axes is superior to the desired explained variance threshold).
>>> embeddings = torch.eye(4, 10, dtype=torch.float)
>>> calc_pcf(embeddings, pfc_variance=(0.5, 1))
>>> calc_pcf(embeddings, pcf_variance=(0.5, 1))
tensor([0.2000, 0.5000])
"""
# The code below mirrors code from scikit-learn repository:
# https://github.com/scikit-learn/scikit-learn/blob/f3f51f9b6/sklearn/decomposition/_pca.py#L491
_check_if_in_range(pfc_variance, 0, 1, "pfc_variance")
_check_if_in_range(pcf_variance, 0, 1, "pcf_variance")
try:
pca = PCA(embeddings)
n_components = pca.calc_principal_axes_number(pfc_variance).to(embeddings)
n_components = pca.calc_principal_axes_number(pcf_variance).to(embeddings)
metric = n_components / embeddings.shape[1]
except Exception:
# Mostly we handle the following error here:
# >>> The algorithm failed to converge because the input matrix is ill-conditioned
# >>> or has too many repeated singular values
metric = [torch.tensor(float("nan"))] * len(pfc_variance)
metric = [torch.tensor(float("nan"))] * len(pcf_variance)

return metric

Expand Down
10 changes: 5 additions & 5 deletions oml/metrics/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def __init__(
precision_top_k: Tuple[int, ...] = (5,),
map_top_k: Tuple[int, ...] = (5,),
fmr_vals: Tuple[float, ...] = tuple(),
pfc_variance: Tuple[float, ...] = (0.5,),
pcf_variance: Tuple[float, ...] = (0.5,),
categories_key: Optional[str] = None,
sequence_key: Optional[str] = None,
postprocessor: Optional[IDistancesPostprocessor] = None,
Expand All @@ -102,9 +102,9 @@ def __init__(
and ``fnmr@fmr=0.4``.
Note, computing this metric requires additional memory overhead,
that is why it's turned off by default.
pfc_variance: Values in range [0, 1]. Find the number of components such that the amount
pcf_variance: Values in range [0, 1]. Find the number of components such that the amount
of variance that needs to be explained is greater than the percentage specified
by ``pfc_variance``.
by ``pcf_variance``.
categories_key: Key to take the samples' categories from the batches (if you have ones)
sequence_key: Key to take sequence ids from the batches (if you have ones)
postprocessor: Postprocessor which applies some techniques like query reranking
Expand All @@ -124,7 +124,7 @@ def __init__(
self.precision_top_k = precision_top_k
self.map_top_k = map_top_k
self.fmr_vals = fmr_vals
self.pfc_variance = pfc_variance
self.pcf_variance = pcf_variance

self.categories_key = categories_key
self.sequence_key = sequence_key
Expand Down Expand Up @@ -205,7 +205,7 @@ def compute_metrics(self) -> TMetricsDict_ByLabels: # type: ignore
"map_top_k": self.map_top_k,
"fmr_vals": self.fmr_vals,
}
args_topological_metrics = {"pfc_variance": self.pfc_variance}
args_topological_metrics = {"pcf_variance": self.pcf_variance}

metrics: TMetricsDict_ByLabels = dict()

Expand Down
10 changes: 5 additions & 5 deletions oml/utils/misc_torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,15 +406,15 @@ def inverse_transform(self, embeddings: torch.Tensor) -> torch.Tensor:
self._check_dimensions(n_components)
return torch.matmul(embeddings, self.components[:n_components, :]) + self.mean

def calc_principal_axes_number(self, pfc_variance: Tuple[float, ...]) -> torch.Tensor:
def calc_principal_axes_number(self, pcf_variance: Tuple[float, ...]) -> torch.Tensor:
"""
Function estimates the number of principal axes that are required to explain the `explained_variance_ths`
variance.
Args:
pfc_variance: Values in range [0, 1]. Find the number of components such that the amount
pcf_variance: Values in range [0, 1]. Find the number of components such that the amount
of variance that needs to be explained is greater than the fraction specified
by ``pfc_variance``.
by ``pcf_variance``.
Returns:
List of amount of principal axes.
Expand All @@ -437,12 +437,12 @@ def calc_principal_axes_number(self, pfc_variance: Tuple[float, ...]) -> torch.T
>>> embeddings = torch.eye(4, 10, dtype=torch.float)
>>> pca = PCA(embeddings)
>>> pca.calc_principal_axes_number(pfc_variance=(0.5, 1))
>>> pca.calc_principal_axes_number(pcf_variance=(0.5, 1))
tensor([2, 5])
"""
ratio_cumsum = torch.cumsum(self.explained_variance_ratio, dim=0)
n_components = torch.searchsorted(ratio_cumsum, torch.tensor(pfc_variance), side="right") + 1
n_components = torch.searchsorted(ratio_cumsum, torch.tensor(pcf_variance), side="right") + 1
return n_components

def _check_dimensions(self, n_components: int) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ metric_args:
metrics_to_exclude_from_visualization: [cmc,]
cmc_top_k: [1]
map_top_k: [5]
pfc_variance: [0.5, 0.9, 0.99]
pcf_variance: [0.5, 0.9, 0.99]
return_only_overall_category: True
visualize_only_overall_category: True

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ metric_args:
cmc_top_k: [1]
map_top_k: [5]
fmr_vals: [1]
pfc_variance: [0.5, 0.9, 0.99]
pcf_variance: [0.5, 0.9, 0.99]
return_only_overall_category: True
visualize_only_overall_category: True

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ metric_args:
cmc_top_k: [1] # to calculate cmc@1
map_top_k: [5] # wo calculate map@5
fmr_vals: [0.01] # to calculate [email protected]
pfc_variance: [0.5, 0.9, 0.99] # to calculate pfc@0.5, pfc@0.9, pfc@0.99
pcf_variance: [0.5, 0.9, 0.99] # to calculate pcf@0.5, pcf@0.9, pcf@0.99
return_only_overall_category: True # set False if you want to see metric graphs for all the categories (doesn't matter for CARS, since it contains no categories)
visualize_only_overall_category: True # set False to see images where the model performed worse for each separated category (doesn't matter for CARS, since it contains no categories)

Expand Down
2 changes: 1 addition & 1 deletion pipelines/features_extraction/extractor_cars/val_cars.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ metric_args:
map_top_k: [5]
precision_top_k: [5]
fmr_vals: [0.01]
pfc_variance: [0.5, 0.9, 0.99]
pcf_variance: [0.5, 0.9, 0.99]
return_only_overall_category: False
visualize_only_overall_category: True

Expand Down
2 changes: 1 addition & 1 deletion pipelines/features_extraction/extractor_cub/train_cub.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ metric_args:
cmc_top_k: [1]
map_top_k: [5]
fmr_vals: [0.01]
pfc_variance: [0.5, 0.9, 0.99]
pcf_variance: [0.5, 0.9, 0.99]
return_only_overall_category: True
visualize_only_overall_category: True

Expand Down
2 changes: 1 addition & 1 deletion pipelines/features_extraction/extractor_cub/val_cub.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ metric_args:
map_top_k: [5]
fmr_vals: [0.01]
precision_top_k: [5]
pfc_variance: [0.5, 0.9, 0.99]
pcf_variance: [0.5, 0.9, 0.99]
return_only_overall_category: False
visualize_only_overall_category: True

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ metric_args:
cmc_top_k: [1]
map_top_k: [5]
fmr_vals: [] # Since InShop is a big dataset you should be careful with increasing of the memory footprint, which is needed to calculate fmr
pfc_variance: [0.5, 0.9, 0.99]
pcf_variance: [0.5, 0.9, 0.99]
return_only_overall_category: True
visualize_only_overall_category: True

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ metric_args:
cmc_top_k: [1, 5]
map_top_k: [5]
precision_top_k: [5]
pfc_variance: [0.5, 0.9, 0.99]
pcf_variance: [0.5, 0.9, 0.99]
return_only_overall_category: True
visualize_only_overall_category: True

Expand Down
2 changes: 1 addition & 1 deletion pipelines/features_extraction/extractor_sop/train_sop.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ metric_args:
cmc_top_k: [1]
map_top_k: [5]
fmr_vals: [] # Since SOP is a big dataset you should be careful with increasing of the memory footprint, which is needed to calculate fmr
pfc_variance: [0.5, 0.9, 0.99]
pcf_variance: [0.5, 0.9, 0.99]
return_only_overall_category: True
visualize_only_overall_category: True

Expand Down
2 changes: 1 addition & 1 deletion pipelines/features_extraction/extractor_sop/val_sop.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ metric_args:
metrics_to_exclude_from_visualization: [cmc,]
cmc_top_k: [1, 5]
map_top_k: [5]
pfc_variance: [0.5, 0.9, 0.99]
pcf_variance: [0.5, 0.9, 0.99]
return_only_overall_category: False
visualize_only_overall_category: True

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ metric_args:
cmc_top_k: [1, 10, 20, 30, 100]
map_top_k: [5, 10]
fmr_vals: []
pfc_variance: []
pcf_variance: []
return_only_overall_category: True
visualize_only_overall_category: True

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ metric_args:
cmc_top_k: [1, 10, 20, 30, 100]
map_top_k: [5, 10]
precision_top_k: []
pfc_variance: []
pcf_variance: []
return_only_overall_category: True
visualize_only_overall_category: True

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ metric_args:
cmc_top_k: [1, 10, 20, 30, 100]
map_top_k: [5, 10]
fmr_vals: []
pfc_variance: []
pcf_variance: []
return_only_overall_category: True
visualize_only_overall_category: True

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ metric_args:
map_top_k: [5, 10]
fmr_vals: []
precision_top_k: []
pfc_variance: []
pcf_variance: []
return_only_overall_category: True
visualize_only_overall_category: True

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def eye_case() -> Tuple[torch.Tensor, TMetricsDict]:

def test_calc_topological_metrics(eye_case: Tuple[torch.Tensor, TMetricsDict]) -> None:
embeddings, metrics_expected = eye_case
args = {"pfc_variance": tuple(metrics_expected["pcf"].keys())}
args = {"pcf_variance": tuple(metrics_expected["pcf"].keys())}
metrics_evaluated = calc_topological_metrics(embeddings, **args)
compare_dicts_recursively(metrics_evaluated, metrics_expected)

Expand All @@ -31,12 +31,12 @@ def test_calc_functions(
metric_func: Callable[[torch.Tensor, Tuple[int, ...]], torch.Tensor],
) -> None:
embeddings, metrics_expected = eye_case
pfc_variance = tuple(metrics_expected[metric_name].keys())
kwargs = {"embeddings": embeddings, "pfc_variance": pfc_variance}
pcf_variance = tuple(metrics_expected[metric_name].keys())
kwargs = {"embeddings": embeddings, "pcf_variance": pcf_variance}

kwargs = remove_unused_kwargs(kwargs, metric_func)
main_components_percentage = metric_func(**kwargs) # type: ignore
metrics_calculated = dict(zip(pfc_variance, main_components_percentage))
metrics_calculated = dict(zip(pcf_variance, main_components_percentage))
for p in metrics_expected[metric_name].keys():
values_expected = metrics_expected[metric_name][p]
values_calculated = metrics_calculated[p]
Expand Down
4 changes: 2 additions & 2 deletions tests/test_oml/test_metrics/test_embedding_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def run_retrieval_metrics(case) -> None: # type: ignore
precision_top_k=tuple(),
map_top_k=tuple(),
fmr_vals=tuple(),
pfc_variance=tuple(),
pcf_variance=tuple(),
postprocessor=get_trivial_postprocessor(top_n=2),
)

Expand Down Expand Up @@ -199,7 +199,7 @@ def run_across_epochs(case1, case2) -> None: # type: ignore
precision_top_k=tuple(),
map_top_k=tuple(),
fmr_vals=tuple(),
pfc_variance=tuple(),
pcf_variance=tuple(),
postprocessor=get_trivial_postprocessor(top_n=3),
)

Expand Down

0 comments on commit 4f43b5f

Please sign in to comment.