Skip to content

Commit 3c7cfdc

Browse files
committed
fixed buggy behaviour for n_comp = 1
1 parent 7f55cde commit 3c7cfdc

File tree

6 files changed

+32
-60
lines changed

6 files changed

+32
-60
lines changed

validphys2/src/validphys/closuretest/closure_results.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
"""
2-
closuretest/results.py
2+
closuretest/closure_results.py
3+
4+
Module containing actiosn to calculate sigle closure test estimators.
5+
This is useful for quickly checking the
6+
bias of a fit without having to run the full multiclosure analysis.
37
4-
underlying actions to calculate closure test estimators plus some table actions
58
"""
69
from collections import namedtuple
710

validphys2/src/validphys/closuretest/multiclosure.py

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ class RegularizedMulticlosureLoader(MulticlosureLoader):
199199
@check_multifit_replicas
200200
def regularized_multiclosure_dataset_loader(
201201
multiclosure_dataset_loader: MulticlosureLoader,
202-
explained_variance_ratio=0.99,
202+
explained_variance_ratio=0.95,
203203
_internal_max_reps=None,
204204
_internal_min_reps=20,
205205
) -> RegularizedMulticlosureLoader:
@@ -211,7 +211,7 @@ def regularized_multiclosure_dataset_loader(
211211
Parameters
212212
----------
213213
multiclosure_dataset_loader: MulticlosureLoader
214-
explained_variance_ratio: float, default is 0.99
214+
explained_variance_ratio: float, default is 0.95
215215
_internal_max_reps: int, default is None
216216
Maximum number of replicas used in the fits
217217
this is needed to check that the number of replicas is the same for all fits
@@ -236,7 +236,7 @@ def regularized_multiclosure_dataset_loader(
236236
n_comp=1,
237237
reg_covmat_reps_mean=covmat_reps_mean,
238238
sqrt_reg_covmat_reps_mean=np.sqrt(covmat_reps_mean),
239-
std_covmat_reps=np.sqrt(np.diag(covmat_reps_mean)),
239+
std_covmat_reps=np.sqrt(covmat_reps_mean),
240240
)
241241

242242
# diagonalize the mean covariance matrix and only keep the principal components
@@ -286,7 +286,7 @@ def regularized_multiclosure_dataset_loader(
286286
@check_multifit_replicas
287287
def regularized_multiclosure_data_loader(
288288
multiclosure_data_loader: MulticlosureLoader,
289-
explained_variance_ratio=0.99,
289+
explained_variance_ratio=0.95,
290290
_internal_max_reps=None,
291291
_internal_min_reps=20,
292292
):
@@ -298,7 +298,7 @@ def regularized_multiclosure_data_loader(
298298
Parameters
299299
----------
300300
multiclosure_data_loader: MulticlosureLoader
301-
explained_variance_ratio: float, default is 0.99
301+
explained_variance_ratio: float, default is 0.95
302302
_internal_max_reps: int, default is None
303303
Maximum number of replicas used in the fits
304304
this is needed to check that the number of replicas is the same for all fits
@@ -394,7 +394,6 @@ def compute_normalized_bias(
394394
np.array
395395
Array of shape len(fits) containing the normalized bias for each fit.
396396
"""
397-
# TODO
398397
closure_theories = regularized_multiclosure_loader.closure_theories
399398
law_theory = regularized_multiclosure_loader.law_theory
400399
n_comp = regularized_multiclosure_loader.n_comp
@@ -408,10 +407,6 @@ def compute_normalized_bias(
408407
delta_bias = reps.mean(axis=2).T - law_theory.central_value[:, np.newaxis]
409408

410409
if n_comp == 1:
411-
# TODO: this bit still needs to be tested
412-
import IPython
413-
414-
IPython.embed()
415410
delta_bias = pc_basis * delta_bias
416411
if corrmat:
417412
delta_bias /= std_covmat_reps
@@ -424,7 +419,7 @@ def compute_normalized_bias(
424419
delta_bias = pc_basis.T @ delta_bias
425420
biases = calc_chi2(sqrt_reg_covmat_reps_mean, delta_bias)
426421

427-
return biases
422+
return biases / n_comp
428423

429424

430425
def bias_dataset(regularized_multiclosure_dataset_loader):
@@ -444,7 +439,7 @@ def bias_dataset(regularized_multiclosure_dataset_loader):
444439
"""
445440
bias_fits = compute_normalized_bias(regularized_multiclosure_dataset_loader, corrmat=False)
446441
n_comp = regularized_multiclosure_dataset_loader.n_comp
447-
return bias_fits / n_comp, n_comp
442+
return bias_fits, n_comp
448443

449444

450445
"""
@@ -459,7 +454,7 @@ def bias_data(regularized_multiclosure_data_loader):
459454
"""
460455
bias_fits = compute_normalized_bias(regularized_multiclosure_data_loader, corrmat=True)
461456
n_comp = regularized_multiclosure_data_loader.n_comp
462-
return bias_fits / n_comp, n_comp
457+
return bias_fits, n_comp
463458

464459

465460
def normalized_delta_bias_data(
@@ -496,7 +491,6 @@ def normalized_delta_bias_data(
496491
# compute bias diff and project it onto space spanned by PCs
497492
delta_bias = reps.mean(axis=2).T - law_th.central_value[:, np.newaxis]
498493

499-
# TODO: need to understand the n_comp case
500494
if n_comp == 1:
501495
# For full data we regularize the correlation matrix
502496
delta_bias = pc_basis.T @ (delta_bias / std_covmat_reps)

validphys2/src/validphys/closuretest/multiclosure_bootstrap.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ def bootstrapped_regularized_multiclosure_dataset_loader(
171171
n_rep: int,
172172
n_boot_multiclosure: int,
173173
use_repeats: bool = True,
174-
explained_variance_ratio: float = 0.99,
174+
explained_variance_ratio: float = 0.95,
175175
_internal_max_reps=None,
176176
_internal_min_reps=20,
177177
) -> tuple:
@@ -212,7 +212,7 @@ def bootstrapped_regularized_multiclosure_data_loader(
212212
n_rep: int,
213213
n_boot_multiclosure: int,
214214
use_repeats: bool = True,
215-
explained_variance_ratio: float = 0.99,
215+
explained_variance_ratio: float = 0.95,
216216
_internal_max_reps=None,
217217
_internal_min_reps=20,
218218
) -> tuple:
@@ -264,7 +264,7 @@ def bootstrapped_bias_dataset(bootstrapped_regularized_multiclosure_dataset_load
264264

265265

266266
"""
267-
TODO
267+
Collect `bootstrapped_bias_dataset` over all datasets.
268268
"""
269269
bootstrapped_bias_datasets = collect("bootstrapped_bias_dataset", ("data",))
270270

@@ -300,8 +300,7 @@ def bootstrapped_bias_data(bootstrapped_regularized_multiclosure_data_loader):
300300
def bootstrapped_normalized_delta_bias_data(bootstrapped_regularized_multiclosure_data_loader):
301301
"""
302302
Compute the normalized deltas for each bootstrap sample.
303-
304-
TODO: Add more details on what deltas are
303+
Note: delta is the bias in the diagonal basis.
305304
306305
Parameters
307306
----------

validphys2/src/validphys/closuretest/multiclosure_pdf.py

Lines changed: 8 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,19 @@
77
``multiclosure_pdf_output.py``
88
99
"""
10+
1011
import numpy as np
1112
import scipy.linalg as la
1213
import scipy.special
1314

1415
from reportengine import collect
1516
from validphys.calcutils import calc_chi2
16-
from validphys.closuretest.multiclosure import DEFAULT_SEED
1717
from validphys.core import PDF
1818
from validphys.pdfgrids import xplotting_grid
1919

20+
21+
DEFAULT_SEED = 1234
22+
2023
# Define the NN31IC basis with the charm PDF excluded. It is excluded because
2124
# the exercises carried out with this module are intended to be done in the
2225
# data region and at the fitting scale, where the charm is noisy. Results
@@ -65,19 +68,11 @@ def xi_pdfgrids(pdf: PDF, Q: (float, int), internal_singlet_gluon_xgrid, interna
6568
"""
6669
# NOTE: Could we hardcode Q to the initial scale/infer from fits?
6770
singlet_gluon_grid = xplotting_grid(
68-
pdf,
69-
Q,
70-
xgrid=internal_singlet_gluon_xgrid,
71-
basis="NN31IC",
72-
flavours=XI_FLAVOURS[:2],
71+
pdf, Q, xgrid=internal_singlet_gluon_xgrid, basis="NN31IC", flavours=XI_FLAVOURS[:2]
7372
)
7473

7574
nonsinglet_grid = xplotting_grid(
76-
pdf,
77-
Q,
78-
xgrid=internal_nonsinglet_xgrid,
79-
basis="NN31IC",
80-
flavours=XI_FLAVOURS[2:],
75+
pdf, Q, xgrid=internal_nonsinglet_xgrid, basis="NN31IC", flavours=XI_FLAVOURS[2:]
8176
)
8277
return singlet_gluon_grid, nonsinglet_grid
8378

@@ -102,10 +97,7 @@ def underlying_xi_grid_values(
10297
from a set of fits
10398
"""
10499
underlying_grid = xi_pdfgrids(
105-
multiclosure_underlyinglaw,
106-
Q,
107-
internal_singlet_gluon_xgrid,
108-
internal_nonsinglet_xgrid,
100+
multiclosure_underlyinglaw, Q, internal_singlet_gluon_xgrid, internal_nonsinglet_xgrid
109101
)
110102
return xi_grid_values(underlying_grid)
111103

@@ -376,10 +368,7 @@ def fits_bootstrap_pdf_ratio(
376368
for _ in range(n_boot):
377369
# perform single bootstrap
378370
boot_central_diff, boot_rep_diff = bootstrap_pdf_differences(
379-
fits_xi_grid_values,
380-
underlying_xi_grid_values,
381-
multiclosure_underlyinglaw,
382-
rng,
371+
fits_xi_grid_values, underlying_xi_grid_values, multiclosure_underlyinglaw, rng
383372
)
384373
# need various dependencies for ratio actions
385374
flav_cov = fits_covariance_matrix_by_flavour(boot_rep_diff)

validphys2/src/validphys/closuretest/multiclosure_pdf_output.py

Lines changed: 6 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
PDF space.
66
77
"""
8+
89
from matplotlib.ticker import MaxNLocator
910
import numpy as np
1011
import pandas as pd
@@ -14,8 +15,8 @@
1415
from reportengine.figure import figure, figuregen
1516
from reportengine.table import table
1617
from validphys import plotutils
17-
from validphys.closuretest.multiclosure import DEFAULT_SEED
1818
from validphys.closuretest.multiclosure_pdf import (
19+
DEFAULT_SEED,
1920
XI_FLAVOURS,
2021
bootstrap_pdf_differences,
2122
fits_covariance_matrix_by_flavour,
@@ -104,12 +105,7 @@ def plot_pdf_central_diff_histogram(replica_and_central_diff_totalpdf):
104105
ax.set_xlim(xlim)
105106

106107
x = np.linspace(*xlim, 100)
107-
ax.plot(
108-
x,
109-
scipy.stats.norm.pdf(x),
110-
"-k",
111-
label="Normal distribution",
112-
)
108+
ax.plot(x, scipy.stats.norm.pdf(x), "-k", label="Normal distribution")
113109
ax.legend()
114110
ax.set_xlabel("Difference to input PDF")
115111
return fig
@@ -192,10 +188,7 @@ def fits_bootstrap_pdf_xi_table(
192188
for _ in range(n_boot):
193189
# perform single bootstrap
194190
boot_central_diff, boot_rep_diff = bootstrap_pdf_differences(
195-
fits_xi_grid_values,
196-
underlying_xi_grid_values,
197-
multiclosure_underlyinglaw,
198-
rng,
191+
fits_xi_grid_values, underlying_xi_grid_values, multiclosure_underlyinglaw, rng
199192
)
200193

201194
flav_cov = fits_covariance_matrix_by_flavour(boot_rep_diff)
@@ -210,11 +203,7 @@ def fits_bootstrap_pdf_xi_table(
210203
# construct table in this action, since bootstrap rawdata isn't required elsewhere
211204
index = pd.Index([f"${XI_FLAVOURS[0]}$", *XI_FLAVOURS[1:], "Total"], name="flavour")
212205
res = np.concatenate(
213-
(
214-
np.mean(xi_boot, axis=0)[:, np.newaxis],
215-
np.std(xi_boot, axis=0)[:, np.newaxis],
216-
),
217-
axis=1,
206+
(np.mean(xi_boot, axis=0)[:, np.newaxis], np.std(xi_boot, axis=0)[:, np.newaxis]), axis=1
218207
)
219208
return pd.DataFrame(
220209
res,
@@ -241,9 +230,7 @@ def fits_bootstrap_pdf_sqrt_ratio_table(fits_bootstrap_pdf_sqrt_ratio):
241230
axis=1,
242231
)
243232
return pd.DataFrame(
244-
res,
245-
columns=[r"bootstrap mean sqrt ratio", r"bootstrap std. dev. sqrt ratio"],
246-
index=index,
233+
res, columns=[r"bootstrap mean sqrt ratio", r"bootstrap std. dev. sqrt ratio"], index=index
247234
)
248235

249236

validphys2/src/validphys/compareclosuretemplates/comparecard.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,4 +69,4 @@ pdfscalespecs:
6969
Q: 1.651
7070

7171
actions_:
72-
- report(main=true)
72+
- bias_data #report(main=true)

0 commit comments

Comments
 (0)