@@ -109,25 +109,25 @@ def calc_retrieval_metrics(
109
109
return metrics
110
110
111
111
112
- def calc_topological_metrics (embeddings : Tensor , pfc_variance : Tuple [float , ...]) -> TMetricsDict :
112
+ def calc_topological_metrics (embeddings : Tensor , pcf_variance : Tuple [float , ...]) -> TMetricsDict :
113
113
"""
114
114
Function to evaluate different topological metrics.
115
115
116
116
Args:
117
117
embeddings: Embeddings matrix with the shape of ``[n_embeddings, embeddings_dim]``.
118
- pfc_variance : Values in range [0, 1]. Find the number of components such that the amount
118
+ pcf_variance : Values in range [0, 1]. Find the number of components such that the amount
119
119
of variance that needs to be explained is greater than the percentage specified
120
- by ``pfc_variance ``.
120
+ by ``pcf_variance ``.
121
121
122
122
Returns:
123
123
Metrics dictionary.
124
124
125
125
"""
126
126
metrics : TMetricsDict = dict ()
127
127
128
- if pfc_variance :
129
- main_components = calc_pcf (embeddings , pfc_variance )
130
- metrics ["pcf" ] = dict (zip (pfc_variance , main_components ))
128
+ if pcf_variance :
129
+ main_components = calc_pcf (embeddings , pcf_variance )
130
+ metrics ["pcf" ] = dict (zip (pcf_variance , main_components ))
131
131
132
132
return metrics
133
133
@@ -484,16 +484,16 @@ def calc_fnmr_at_fmr(pos_dist: Tensor, neg_dist: Tensor, fmr_vals: Tuple[float,
484
484
return fnmr_at_fmr
485
485
486
486
487
- def calc_pcf (embeddings : Tensor , pfc_variance : Tuple [float , ...]) -> List [Tensor ]:
487
+ def calc_pcf (embeddings : Tensor , pcf_variance : Tuple [float , ...]) -> List [Tensor ]:
488
488
"""
489
489
Function estimates the Principal Components Fraction (PCF) of embeddings using Principal Component Analysis.
490
490
The metric is defined as a fraction of components needed to explain the required variance in data.
491
491
492
492
Args:
493
493
embeddings: Embeddings matrix with the shape of ``[n_embeddings, embeddings_dim]``.
494
- pfc_variance : Values in range [0, 1]. Find the number of components such that the amount
494
+ pcf_variance : Values in range [0, 1]. Find the number of components such that the amount
495
495
of variance that needs to be explained is greater than the fraction specified
496
- by ``pfc_variance ``.
496
+ by ``pcf_variance ``.
497
497
Returns:
498
498
List of linear dimensions as a fractions of the embeddings dimension.
499
499
@@ -527,22 +527,22 @@ def calc_pcf(embeddings: Tensor, pfc_variance: Tuple[float, ...]) -> List[Tensor
527
527
because the number of principal axes is superior to the desired explained variance threshold).
528
528
529
529
>>> embeddings = torch.eye(4, 10, dtype=torch.float)
530
- >>> calc_pcf(embeddings, pfc_variance =(0.5, 1))
530
+ >>> calc_pcf(embeddings, pcf_variance =(0.5, 1))
531
531
tensor([0.2000, 0.5000])
532
532
533
533
"""
534
534
# The code below mirrors code from scikit-learn repository:
535
535
# https://github.com/scikit-learn/scikit-learn/blob/f3f51f9b6/sklearn/decomposition/_pca.py#L491
536
- _check_if_in_range (pfc_variance , 0 , 1 , "pfc_variance " )
536
+ _check_if_in_range (pcf_variance , 0 , 1 , "pcf_variance " )
537
537
try :
538
538
pca = PCA (embeddings )
539
- n_components = pca .calc_principal_axes_number (pfc_variance ).to (embeddings )
539
+ n_components = pca .calc_principal_axes_number (pcf_variance ).to (embeddings )
540
540
metric = n_components / embeddings .shape [1 ]
541
541
except Exception :
542
542
# Mostly we handle the following error here:
543
543
# >>> The algorithm failed to converge because the input matrix is ill-conditioned
544
544
# >>> or has too many repeated singular values
545
- metric = [torch .tensor (float ("nan" ))] * len (pfc_variance )
545
+ metric = [torch .tensor (float ("nan" ))] * len (pcf_variance )
546
546
547
547
return metric
548
548
0 commit comments