Skip to content

Commit 06eb011

Browse files
committed
polished closure results, plots and reports from all closure estimators (bias and variance) that were computed using the experimental covmat rather than the regularized PDF covmat
1 parent 3c7cfdc commit 06eb011

File tree

5 files changed

+13
-374
lines changed

5 files changed

+13
-374
lines changed
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,14 @@
11
"""
22
closuretest/plots.py
33
4-
Plots of statistical estimators for closure tests
4+
Plots of statistical estimators for single closure test.
5+
See multiclosure module for more estimators and plots.
56
"""
7+
68
from reportengine.figure import figure
79
from validphys import plotutils
810

911

10-
@figure
11-
def plot_biases(biases_table):
12-
"""
13-
Plot the bias of each experiment for all fits with bars. For information on
14-
how biases is calculated see `bias_experiment`
15-
"""
16-
fig, ax = plotutils.barplot(
17-
biases_table.values.T,
18-
collabels=biases_table.index.values,
19-
datalabels=biases_table.columns.droplevel(1).values,
20-
)
21-
ax.set_title("Biases per experiment for each fit")
22-
ax.legend()
23-
return fig
24-
25-
2612
@figure
2713
def plot_delta_chi2(delta_chi2_bootstrap, fits):
2814
"""Plots distributions of delta chi2 for each fit in `fits`.
@@ -45,32 +31,6 @@ def errorbar_figure_from_table(df):
4531
"""Given a table with even columns as central values as odd columns as errors
4632
plot an errorbar plot"""
4733
fig, ax = plotutils.plot_horizontal_errorbars(
48-
df.values[:, ::2].T,
49-
df.values[:, 1::2].T,
50-
df.index.values,
51-
df.columns.unique(0),
52-
xlim=0,
34+
df.values[:, ::2].T, df.values[:, 1::2].T, df.index.values, df.columns.unique(0), xlim=0
5335
)
5436
return fig, ax
55-
56-
57-
@figure
58-
def plot_fits_bootstrap_variance(fits_bootstrap_variance_table):
59-
"""Plot variance as error bars, with mean and central value calculated
60-
from bootstrap sample
61-
"""
62-
fig, ax = errorbar_figure_from_table(fits_bootstrap_variance_table)
63-
ax.set_title("Variance by experiment for closure fits")
64-
return fig
65-
66-
67-
@figure
68-
def plot_fits_bootstrap_bias(fits_bootstrap_bias_table):
69-
"""Plot the bias for each experiment for all `fits` as a point with an error bar,
70-
where the error bar is given by bootstrapping the bias across replicas
71-
72-
The number of bootstrap samples can be controlled by the parameter `bootstrap_samples`
73-
"""
74-
fig, ax = errorbar_figure_from_table(fits_bootstrap_bias_table)
75-
ax.set_title("Bias by experiment for closure fits")
76-
return fig

validphys2/src/validphys/closuretest/closure_results.py

+7-256
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,16 @@
22
closuretest/closure_results.py
33
44
Module containing actiosn to calculate sigle closure test estimators.
5-
This is useful for quickly checking the
6-
bias of a fit without having to run the full multiclosure analysis.
5+
This is useful for quickly checking the bias of a fit without having
6+
to run the full multiclosure analysis.
77
88
"""
9-
from collections import namedtuple
109

1110
import numpy as np
1211
import pandas as pd
1312

1413
from reportengine import collect
1514
from reportengine.table import table
16-
from validphys.calcutils import bootstrap_values, calc_chi2
17-
from validphys.checks import check_pdf_is_montecarlo
1815
from validphys.closuretest.closure_checks import (
1916
check_fit_isclosure,
2017
check_fits_areclosures,
@@ -23,261 +20,20 @@
2320
check_use_fitcommondata,
2421
)
2522

26-
BiasData = namedtuple("BiasData", ("bias", "ndata"))
2723

28-
underlying_results = collect("results", ("fitunderlyinglaw",))
29-
30-
31-
@check_fit_isclosure
32-
@check_use_fitcommondata
33-
def bias_dataset(results, underlying_results, fit, use_fitcommondata):
34-
"""Calculate the bias for a given dataset and fit. The bias is defined as
35-
chi2 between the prediction from the underlying PDF (which was used to
36-
generate the closure pseudodata), also known as level zero closure data, and
37-
the central prediction calculated from the fitted PDF.
38-
39-
we require that use_fitcommondata is true because the generated closure data
40-
is used to generate the multiplicative contributions to the covariance
41-
matrix
42-
"""
43-
dt_ct, th_ct = results
44-
# does collect need to collect a list even with one element?
45-
((_, th_ul),) = underlying_results
46-
central_diff = th_ct.central_value - th_ul.central_value
47-
bias_out = calc_chi2(dt_ct.sqrtcovmat, central_diff) # unnormalised
48-
return BiasData(bias_out, len(th_ct))
49-
50-
51-
underlying_dataset_inputs_results = collect("dataset_inputs_results", ("fitunderlyinglaw",))
52-
53-
54-
@check_fit_isclosure
55-
@check_use_fitcommondata
56-
def bias_experiment(
57-
dataset_inputs_results, underlying_dataset_inputs_results, fit, use_fitcommondata
58-
):
59-
"""Like `bias_dataset` but for a whole experiment."""
60-
return bias_dataset(
61-
dataset_inputs_results,
62-
underlying_dataset_inputs_results,
63-
fit,
64-
use_fitcommondata,
65-
)
66-
67-
68-
experiments_bias = collect("bias_experiment", ("group_dataset_inputs_by_experiment",))
69-
fits_experiments_bias = collect("experiments_bias", ("fits", "fitcontext"))
7024
fits_experiments = collect("experiments_data", ("fits", "fitcontext"))
7125

7226

73-
@table
74-
@check_fits_same_filterseed
75-
@check_fits_underlying_law_match
76-
def biases_table(fits_experiments, fits_experiments_bias, fits, show_total: bool = False):
77-
"""Creates a table with fits as the columns and the experiments from both
78-
fits as the row index.
79-
"""
80-
col = ["bias"]
81-
dfs = []
82-
for fit, experiments, biases in zip(fits, fits_experiments, fits_experiments_bias):
83-
total = 0
84-
total_points = 0
85-
records = []
86-
for biasres, experiment in zip(biases, experiments):
87-
records.append(
88-
dict(
89-
experiment=str(experiment),
90-
bias=biasres.bias / biasres.ndata, # normalised bias
91-
)
92-
)
93-
if show_total:
94-
total += biasres.bias
95-
total_points += biasres.ndata
96-
if show_total:
97-
total /= total_points
98-
records.append(dict(experiment="Total", bias=total))
99-
100-
df = pd.DataFrame.from_records(
101-
records, columns=("experiment", "bias"), index=("experiment")
102-
)
103-
df.columns = pd.MultiIndex.from_product(([str(fit)], col))
104-
dfs.append(df)
105-
return pd.concat(dfs, axis=1, sort=True)
106-
107-
108-
@check_pdf_is_montecarlo
109-
@check_fit_isclosure
110-
def bootstrap_bias_experiment(
111-
dataset_inputs_results, underlying_dataset_inputs_results, bootstrap_samples=500
112-
):
113-
"""Calculates bias as per `bias_experiment` but performs bootstrap sample
114-
across replicas. note that bias_experiment returns a named tuple like
115-
(unnormalised_bias, ndata) whereas this actions simply returns an array
116-
`boostrap_bias` with length bootstrap_samples. Each element of
117-
returned array is bias/n_data (bias normalised by number of datapoints)
118-
"""
119-
dt_ct, th_ct = dataset_inputs_results
120-
((_, th_ul),) = underlying_dataset_inputs_results
121-
th_ct_boot_cv = bootstrap_values(th_ct.error_members, bootstrap_samples)
122-
boot_diffs = th_ct_boot_cv - th_ul.central_value[:, np.newaxis]
123-
boot_bias = calc_chi2(dt_ct.sqrtcovmat, boot_diffs) / len(dt_ct)
124-
return boot_bias
125-
126-
127-
experiments_bootstrap_bias = collect(
128-
"bootstrap_bias_experiment", ("group_dataset_inputs_by_experiment",)
129-
)
130-
fits_experiments_bootstrap_bias = collect("experiments_bootstrap_bias", ("fits", "fitcontext"))
131-
132-
133-
@table
134-
@check_use_fitcommondata
135-
@check_fits_areclosures
136-
@check_fits_same_filterseed
137-
@check_fits_underlying_law_match
138-
def fits_bootstrap_bias_table(
139-
fits_experiments_bootstrap_bias,
140-
fits_name_with_covmat_label,
141-
fits_experiments,
142-
fits,
143-
use_fitcommondata,
144-
):
145-
"""Produce a table with bias for each experiment for each fit, along with
146-
variance calculated from doing a bootstrap sample
147-
"""
148-
col = ["bias", "bias std. dev."]
149-
dfs = []
150-
for fit, experiments, biases in zip(
151-
fits_name_with_covmat_label, fits_experiments, fits_experiments_bootstrap_bias
152-
):
153-
records = []
154-
for biasres, experiment in zip(biases, experiments):
155-
records.append(
156-
dict(
157-
experiment=str(experiment),
158-
bias=biasres.mean(),
159-
bias_std=biasres.std(),
160-
)
161-
)
162-
df = pd.DataFrame.from_records(
163-
records, columns=("experiment", "bias", "bias_std"), index=("experiment")
164-
)
165-
df.columns = pd.MultiIndex.from_product(([str(fit)], col))
166-
dfs.append(df)
167-
return pd.concat(dfs, axis=1, sort=True)
168-
169-
170-
VarianceData = namedtuple("VarianceData", ("variance", "ndata"))
171-
172-
173-
@check_fit_isclosure
174-
@check_use_fitcommondata
175-
def variance_dataset(results, fit, use_fitcommondata):
176-
"""calculate the variance for a given dataset, which is the spread of
177-
replicas measured in the space of the covariance matrix. Given by:
178-
179-
E_rep [ (T - E_rep[T])_i C^{-1}_ij (T - E_rep[T])_j ]
180-
181-
where E_rep is the expectation value across replicas. The quantity is the
182-
same as squaring `phi_data`, however it is redefined here in a way which can
183-
be made fully independent of the closure data. This is useful when checking
184-
the variance of data which was not included in the fit.
185-
186-
"""
187-
dt, th = results
188-
diff = th.central_value[:, np.newaxis] - th.error_members
189-
var_unnorm = calc_chi2(dt.sqrtcovmat, diff).mean()
190-
return VarianceData(var_unnorm, len(th))
191-
192-
193-
@check_fit_isclosure
194-
@check_use_fitcommondata
195-
def variance_experiment(dataset_inputs_results, fit, use_fitcommondata):
196-
"""Like variance_dataset but for a whole experiment"""
197-
return variance_dataset(dataset_inputs_results, fit, use_fitcommondata)
198-
199-
200-
@check_fit_isclosure
201-
def bootstrap_variance_experiment(dataset_inputs_results, bootstrap_samples=500):
202-
"""Calculate the variance as in `variance_experiment` but performs bootstrap
203-
sample of the estimator. Returns an array of variance for each resample,
204-
normalised to the number of data in the experiment.
205-
"""
206-
dt_ct, th_ct = dataset_inputs_results
207-
diff = th_ct.central_value[:, np.newaxis] - th_ct.error_members
208-
var_unnorm_boot = bootstrap_values(
209-
diff,
210-
bootstrap_samples,
211-
apply_func=(lambda x, y: calc_chi2(y, x)),
212-
args=[dt_ct.sqrtcovmat],
213-
).mean(
214-
axis=0
215-
) # mean across replicas
216-
return var_unnorm_boot / len(th_ct) # normalise by n_data
217-
218-
219-
experiments_boostrap_variance = collect(
220-
"bootstrap_variance_experiment", ("group_dataset_inputs_by_experiment",)
221-
)
222-
223-
fits_exps_bootstrap_var = collect("experiments_boostrap_variance", ("fits", "fitcontext"))
224-
225-
226-
@table
227-
@check_use_fitcommondata
228-
@check_fits_areclosures
229-
@check_fits_same_filterseed
230-
@check_fits_underlying_law_match
231-
def fits_bootstrap_variance_table(
232-
fits_exps_bootstrap_var,
233-
fits_name_with_covmat_label,
234-
fits_experiments,
235-
fits,
236-
use_fitcommondata,
237-
):
238-
"""Produce a table with variance and its error. Variance is defined as
239-
240-
var = sum_ij E_rep[(E_rep[T_i], T_i) invcov_ij (E_rep[T_j], T_j)] / N_data
241-
242-
which is the expectation value across replicas of the chi2 between the central
243-
theory predictions and replica theory predictions. It is the same as phi^2
244-
and gives the variance of the theory predictions in units of the covariance
245-
matrix, normalised by the number of data points.
246-
247-
The error is the standard deviation across bootstrap samples.
248-
249-
"""
250-
col = ["variance", "variance std. dev."]
251-
dfs = []
252-
for fit, experiments, fit_vars in zip(
253-
fits_name_with_covmat_label, fits_experiments, fits_exps_bootstrap_var
254-
):
255-
records = []
256-
for var_res, experiment in zip(fit_vars, experiments):
257-
records.append(
258-
dict(
259-
experiment=str(experiment),
260-
variance=var_res.mean(),
261-
variance_std=var_res.std(),
262-
)
263-
)
264-
df = pd.DataFrame.from_records(
265-
records,
266-
columns=("experiment", "variance", "variance_std"),
267-
index=("experiment"),
268-
)
269-
df.columns = pd.MultiIndex.from_product(([str(fit)], col))
270-
dfs.append(df)
271-
return pd.concat(dfs, axis=1, sort=True)
272-
273-
27427
experiments_bootstrap_chi2_central = collect(
27528
"dataset_inputs_bootstrap_chi2_central", ("group_dataset_inputs_by_experiment",)
27629
)
27730

31+
27832
fits_exps_bootstrap_chi2_central = collect(
27933
"experiments_bootstrap_chi2_central", ("fits", "fitcontext")
28034
)
35+
36+
28137
fits_level_1_noise = collect("total_chi2_data", ("fits", "fitinputcontext", "fitunderlyinglaw"))
28238

28339

@@ -336,20 +92,15 @@ def delta_chi2_table(
33692
dfs = []
33793
cols = ("ndata", r"$\Delta_{chi^2}$ (normalised by ndata)")
33894
for label, experiments, exps_chi2, exps_level_1_noise in zip(
339-
fits_name_with_covmat_label,
340-
fits_experiments,
341-
fits_exps_chi2,
342-
fits_exps_level_1_noise,
95+
fits_name_with_covmat_label, fits_experiments, fits_exps_chi2, fits_exps_level_1_noise
34396
):
34497
records = []
34598
for experiment, exp_chi2, level_1_noise in zip(experiments, exps_chi2, exps_level_1_noise):
34699
delta_chi2 = (exp_chi2.central_result - level_1_noise.central_result) / exp_chi2.ndata
347100
npoints = exp_chi2.ndata
348101
records.append(dict(experiment=str(experiment), npoints=npoints, delta_chi2=delta_chi2))
349102
df = pd.DataFrame.from_records(
350-
records,
351-
columns=("experiment", "npoints", "delta_chi2"),
352-
index=("experiment",),
103+
records, columns=("experiment", "npoints", "delta_chi2"), index=("experiment",)
353104
)
354105
df.columns = pd.MultiIndex.from_product(([label], cols))
355106
dfs.append(df)

validphys2/src/validphys/compareclosuretemplates/comparecard.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -69,4 +69,4 @@ pdfscalespecs:
6969
Q: 1.651
7070

7171
actions_:
72-
- bias_data #report(main=true)
72+
- report(main=true)

0 commit comments

Comments
 (0)