fixed buggy behaviour for n_comp = 1

comane · comane · commit 3c7cfdc92414 · 2025-02-09T12:57:56.000Z
diff --git a/validphys2/src/validphys/closuretest/closure_results.py b/validphys2/src/validphys/closuretest/closure_results.py
@@ -1,7 +1,10 @@
 """
-closuretest/results.py
+closuretest/closure_results.py
+
+Module containing actiosn to calculate sigle closure test estimators.
+This is useful for quickly checking the
+bias of a fit without having to run the full multiclosure analysis.
 
-underlying actions to calculate closure test estimators plus some table actions
 """
 from collections import namedtuple
 
diff --git a/validphys2/src/validphys/closuretest/multiclosure.py b/validphys2/src/validphys/closuretest/multiclosure.py
@@ -199,7 +199,7 @@ class RegularizedMulticlosureLoader(MulticlosureLoader):
 @check_multifit_replicas
 def regularized_multiclosure_dataset_loader(
     multiclosure_dataset_loader: MulticlosureLoader,
-    explained_variance_ratio=0.99,
+    explained_variance_ratio=0.95,
     _internal_max_reps=None,
     _internal_min_reps=20,
 ) -> RegularizedMulticlosureLoader:
@@ -211,7 +211,7 @@ def regularized_multiclosure_dataset_loader(
     Parameters
     ----------
     multiclosure_dataset_loader: MulticlosureLoader
-    explained_variance_ratio: float, default is 0.99
+    explained_variance_ratio: float, default is 0.95
     _internal_max_reps: int, default is None
         Maximum number of replicas used in the fits
         this is needed to check that the number of replicas is the same for all fits
@@ -236,7 +236,7 @@ def regularized_multiclosure_dataset_loader(
             n_comp=1,
             reg_covmat_reps_mean=covmat_reps_mean,
             sqrt_reg_covmat_reps_mean=np.sqrt(covmat_reps_mean),
-            std_covmat_reps=np.sqrt(np.diag(covmat_reps_mean)),
+            std_covmat_reps=np.sqrt(covmat_reps_mean),
         )
 
     # diagonalize the mean covariance matrix and only keep the principal components
@@ -286,7 +286,7 @@ def regularized_multiclosure_dataset_loader(
 @check_multifit_replicas
 def regularized_multiclosure_data_loader(
     multiclosure_data_loader: MulticlosureLoader,
-    explained_variance_ratio=0.99,
+    explained_variance_ratio=0.95,
     _internal_max_reps=None,
     _internal_min_reps=20,
 ):
@@ -298,7 +298,7 @@ def regularized_multiclosure_data_loader(
     Parameters
     ----------
     multiclosure_data_loader: MulticlosureLoader
-    explained_variance_ratio: float, default is 0.99
+    explained_variance_ratio: float, default is 0.95
     _internal_max_reps: int, default is None
         Maximum number of replicas used in the fits
         this is needed to check that the number of replicas is the same for all fits
@@ -394,7 +394,6 @@ def compute_normalized_bias(
     np.array
         Array of shape len(fits) containing the normalized bias for each fit.
     """
-    # TODO
     closure_theories = regularized_multiclosure_loader.closure_theories
     law_theory = regularized_multiclosure_loader.law_theory
     n_comp = regularized_multiclosure_loader.n_comp
@@ -408,10 +407,6 @@ def compute_normalized_bias(
     delta_bias = reps.mean(axis=2).T - law_theory.central_value[:, np.newaxis]
 
     if n_comp == 1:
-        # TODO: this bit still needs to be tested
-        import IPython
-
-        IPython.embed()
         delta_bias = pc_basis * delta_bias
         if corrmat:
             delta_bias /= std_covmat_reps
@@ -424,7 +419,7 @@ def compute_normalized_bias(
             delta_bias = pc_basis.T @ delta_bias
         biases = calc_chi2(sqrt_reg_covmat_reps_mean, delta_bias)
 
-    return biases
+    return biases / n_comp
 
 
 def bias_dataset(regularized_multiclosure_dataset_loader):
@@ -444,7 +439,7 @@ def bias_dataset(regularized_multiclosure_dataset_loader):
     """
     bias_fits = compute_normalized_bias(regularized_multiclosure_dataset_loader, corrmat=False)
     n_comp = regularized_multiclosure_dataset_loader.n_comp
-    return bias_fits / n_comp, n_comp
+    return bias_fits, n_comp
 
 
 """
@@ -459,7 +454,7 @@ def bias_data(regularized_multiclosure_data_loader):
     """
     bias_fits = compute_normalized_bias(regularized_multiclosure_data_loader, corrmat=True)
     n_comp = regularized_multiclosure_data_loader.n_comp
-    return bias_fits / n_comp, n_comp
+    return bias_fits, n_comp
 
 
 def normalized_delta_bias_data(
@@ -496,7 +491,6 @@ def normalized_delta_bias_data(
     # compute bias diff and project it onto space spanned by PCs
     delta_bias = reps.mean(axis=2).T - law_th.central_value[:, np.newaxis]
 
-    # TODO: need to understand the n_comp case
     if n_comp == 1:
         # For full data we regularize the correlation matrix
         delta_bias = pc_basis.T @ (delta_bias / std_covmat_reps)
diff --git a/validphys2/src/validphys/closuretest/multiclosure_bootstrap.py b/validphys2/src/validphys/closuretest/multiclosure_bootstrap.py
@@ -171,7 +171,7 @@ def bootstrapped_regularized_multiclosure_dataset_loader(
     n_rep: int,
     n_boot_multiclosure: int,
     use_repeats: bool = True,
-    explained_variance_ratio: float = 0.99,
+    explained_variance_ratio: float = 0.95,
     _internal_max_reps=None,
     _internal_min_reps=20,
 ) -> tuple:
@@ -212,7 +212,7 @@ def bootstrapped_regularized_multiclosure_data_loader(
     n_rep: int,
     n_boot_multiclosure: int,
     use_repeats: bool = True,
-    explained_variance_ratio: float = 0.99,
+    explained_variance_ratio: float = 0.95,
     _internal_max_reps=None,
     _internal_min_reps=20,
 ) -> tuple:
@@ -264,7 +264,7 @@ def bootstrapped_bias_dataset(bootstrapped_regularized_multiclosure_dataset_load
 
 
 """
-TODO
+Collect `bootstrapped_bias_dataset` over all datasets.
 """
 bootstrapped_bias_datasets = collect("bootstrapped_bias_dataset", ("data",))
 
@@ -300,8 +300,7 @@ def bootstrapped_bias_data(bootstrapped_regularized_multiclosure_data_loader):
 def bootstrapped_normalized_delta_bias_data(bootstrapped_regularized_multiclosure_data_loader):
     """
     Compute the normalized deltas for each bootstrap sample.
-
-    TODO: Add more details on what deltas are
+    Note: delta is the bias in the diagonal basis.
 
     Parameters
     ----------
diff --git a/validphys2/src/validphys/closuretest/multiclosure_pdf.py b/validphys2/src/validphys/closuretest/multiclosure_pdf.py
@@ -7,16 +7,19 @@
 ``multiclosure_pdf_output.py``
 
 """
+
 import numpy as np
 import scipy.linalg as la
 import scipy.special
 
 from reportengine import collect
 from validphys.calcutils import calc_chi2
-from validphys.closuretest.multiclosure import DEFAULT_SEED
 from validphys.core import PDF
 from validphys.pdfgrids import xplotting_grid
 
+
+DEFAULT_SEED = 1234
+
 # Define the NN31IC basis with the charm PDF excluded. It is excluded because
 # the exercises carried out with this module are intended to be done in the
 # data region and at the fitting scale, where the charm is noisy. Results
@@ -65,19 +68,11 @@ def xi_pdfgrids(pdf: PDF, Q: (float, int), internal_singlet_gluon_xgrid, interna
     """
     # NOTE: Could we hardcode Q to the initial scale/infer from fits?
     singlet_gluon_grid = xplotting_grid(
-        pdf,
-        Q,
-        xgrid=internal_singlet_gluon_xgrid,
-        basis="NN31IC",
-        flavours=XI_FLAVOURS[:2],
+        pdf, Q, xgrid=internal_singlet_gluon_xgrid, basis="NN31IC", flavours=XI_FLAVOURS[:2]
     )
 
     nonsinglet_grid = xplotting_grid(
-        pdf,
-        Q,
-        xgrid=internal_nonsinglet_xgrid,
-        basis="NN31IC",
-        flavours=XI_FLAVOURS[2:],
+        pdf, Q, xgrid=internal_nonsinglet_xgrid, basis="NN31IC", flavours=XI_FLAVOURS[2:]
     )
     return singlet_gluon_grid, nonsinglet_grid
 
@@ -102,10 +97,7 @@ def underlying_xi_grid_values(
     from a set of fits
     """
     underlying_grid = xi_pdfgrids(
-        multiclosure_underlyinglaw,
-        Q,
-        internal_singlet_gluon_xgrid,
-        internal_nonsinglet_xgrid,
+        multiclosure_underlyinglaw, Q, internal_singlet_gluon_xgrid, internal_nonsinglet_xgrid
     )
     return xi_grid_values(underlying_grid)
 
@@ -376,10 +368,7 @@ def fits_bootstrap_pdf_ratio(
     for _ in range(n_boot):
         # perform single bootstrap
         boot_central_diff, boot_rep_diff = bootstrap_pdf_differences(
-            fits_xi_grid_values,
-            underlying_xi_grid_values,
-            multiclosure_underlyinglaw,
-            rng,
+            fits_xi_grid_values, underlying_xi_grid_values, multiclosure_underlyinglaw, rng
         )
         # need various dependencies for ratio actions
         flav_cov = fits_covariance_matrix_by_flavour(boot_rep_diff)
diff --git a/validphys2/src/validphys/closuretest/multiclosure_pdf_output.py b/validphys2/src/validphys/closuretest/multiclosure_pdf_output.py
@@ -5,6 +5,7 @@
 PDF space.
 
 """
+
 from matplotlib.ticker import MaxNLocator
 import numpy as np
 import pandas as pd
@@ -14,8 +15,8 @@
 from reportengine.figure import figure, figuregen
 from reportengine.table import table
 from validphys import plotutils
-from validphys.closuretest.multiclosure import DEFAULT_SEED
 from validphys.closuretest.multiclosure_pdf import (
+    DEFAULT_SEED,
     XI_FLAVOURS,
     bootstrap_pdf_differences,
     fits_covariance_matrix_by_flavour,
@@ -104,12 +105,7 @@ def plot_pdf_central_diff_histogram(replica_and_central_diff_totalpdf):
     ax.set_xlim(xlim)
 
     x = np.linspace(*xlim, 100)
-    ax.plot(
-        x,
-        scipy.stats.norm.pdf(x),
-        "-k",
-        label="Normal distribution",
-    )
+    ax.plot(x, scipy.stats.norm.pdf(x), "-k", label="Normal distribution")
     ax.legend()
     ax.set_xlabel("Difference to input PDF")
     return fig
@@ -192,10 +188,7 @@ def fits_bootstrap_pdf_xi_table(
     for _ in range(n_boot):
         # perform single bootstrap
         boot_central_diff, boot_rep_diff = bootstrap_pdf_differences(
-            fits_xi_grid_values,
-            underlying_xi_grid_values,
-            multiclosure_underlyinglaw,
-            rng,
+            fits_xi_grid_values, underlying_xi_grid_values, multiclosure_underlyinglaw, rng
         )
 
         flav_cov = fits_covariance_matrix_by_flavour(boot_rep_diff)
@@ -210,11 +203,7 @@ def fits_bootstrap_pdf_xi_table(
     # construct table in this action, since bootstrap rawdata isn't required elsewhere
     index = pd.Index([f"${XI_FLAVOURS[0]}$", *XI_FLAVOURS[1:], "Total"], name="flavour")
     res = np.concatenate(
-        (
-            np.mean(xi_boot, axis=0)[:, np.newaxis],
-            np.std(xi_boot, axis=0)[:, np.newaxis],
-        ),
-        axis=1,
+        (np.mean(xi_boot, axis=0)[:, np.newaxis], np.std(xi_boot, axis=0)[:, np.newaxis]), axis=1
     )
     return pd.DataFrame(
         res,
@@ -241,9 +230,7 @@ def fits_bootstrap_pdf_sqrt_ratio_table(fits_bootstrap_pdf_sqrt_ratio):
         axis=1,
     )
     return pd.DataFrame(
-        res,
-        columns=[r"bootstrap mean sqrt ratio", r"bootstrap std. dev. sqrt ratio"],
-        index=index,
+        res, columns=[r"bootstrap mean sqrt ratio", r"bootstrap std. dev. sqrt ratio"], index=index
     )
 
 
diff --git a/validphys2/src/validphys/compareclosuretemplates/comparecard.yaml b/validphys2/src/validphys/compareclosuretemplates/comparecard.yaml
@@ -69,4 +69,4 @@ pdfscalespecs:
 Q: 1.651
 
 actions_:
-  - report(main=true)
+  - bias_data #report(main=true)