-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathgenerate_data.py
157 lines (145 loc) · 6.4 KB
/
generate_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#!/usr/bin/env python
"""Generate data using PolyChord.
Runs the PolyChord data used in the paper and stores it in the directory
'chains'.
Requires:
* diagnostic module (and its dependencies),
* PolyChord >= v1.14
Results are generated using dyPolyChord's interface for convenience, but
all the results in the paper use standard (rather than dynamic) nested
sampling.
### Random seeding
Random seeding is used for reproducible results, which is only
possible when PolyChord is run *without* MPI due to the unpredictable
order in which threads will provide samples (see the PolyChord
documentation for more details). As generating repeated runs is "embarrassingly
parallel" we can instead parallelise using concurrent.futures via nestcheck's
parallel_apply function.
Note also that PolyChord's random number generator can vary between systems and
compilers, so your results may not exactly match those in the paper.
"""
import copy
import os
import nestcheck.parallel_utils
import dyPolyChord.python_likelihoods as likelihoods
import dyPolyChord.python_priors as priors
import dyPolyChord.output_processing
import dyPolyChord.polychord_utils
import dyPolyChord.pypolychord_utils
import dyPolyChord
import diagnostic.results_utils
import diagnostic.data_loading
import diagnostic.settings
try:
# This initialises MPI, allowing running multiple runs from the same python
# instance even if PolyChord is installed with MPI (so you don't have to
# reinstall it without MPI).
from mpi4py import MPI # pylint: disable=unused-import
except ImportError:
pass
def main():
"""Generate PolyChord runs. Also processes the results into a DataFrame and
caches it.
Nested sampling runs are generating for different settings by looping over:
* likelihood_list: different likelihoods;
* nd_nl_nr_list: list of tuples, each containing (number of dimensions,
nlive, nrepeats);
* inds: labels for repeated runs to generate with each setting.
"""
# Settings
# --------
# If true, many runs are made at the same time via concurrent.futures
parallel = True
# Run settings
inds = list(range(1, 101))
# dimensions, nlive, nrepeat settings
# -----------------------------------
nd_nl_nr_list = diagnostic.settings.get_nd_nl_nr_list()
# Likelihood and prior settings
# -----------------------------
likelihood_list = [likelihoods.LogGammaMix(),
likelihoods.Gaussian(sigma=1)]
# PolyChord settings
settings_dict = {
'do_clustering': True,
'posteriors': False,
'equals': False,
'base_dir': 'chains',
'feedback': -1,
'precision_criterion': 0.001,
'nlives': {},
'write_dead': True,
'write_stats': True,
'write_paramnames': False,
'write_prior': False,
'write_live': False,
'write_resume': False,
'read_resume': False,
'max_ndead': -1,
'cluster_posteriors': False,
'boost_posterior': 0.0}
if 'ed' in os.getcwd().split('/'):
# running on laptop - don't use all the processors so I can do other
# stuff without everything getting slow
max_workers = 6
else:
max_workers = None # running on cluster
print('Running with max_workers={}'.format(max_workers))
prior_scale = 30
prior = priors.Uniform(-prior_scale, prior_scale)
# Before running in parallel make sure base_dir exists, as if multiple
# threads try to make one at the same time mkdir throws an error.
if not os.path.exists(settings_dict['base_dir']):
os.makedirs(settings_dict['base_dir'])
if not os.path.exists(settings_dict['base_dir'] + '/clusters'):
os.makedirs(settings_dict['base_dir'] + '/clusters')
for likelihood in likelihood_list:
for ndim, nlive, num_repeats in nd_nl_nr_list:
run_func = dyPolyChord.pypolychord_utils.RunPyPolyChord(
likelihood, prior, ndim)
# make list of settings dictionaries for the different repeats
file_root = dyPolyChord.output_processing.settings_root(
type(likelihood).__name__,
type(prior).__name__, ndim,
prior_scale=prior_scale, nrepeats=num_repeats,
nlive_const=nlive, dynamic_goal=None)
settings_dict['nlive'] = nlive
settings_dict['num_repeats'] = num_repeats
settings_list = []
for extra_root in inds:
settings = copy.deepcopy(settings_dict)
settings['seed'] = extra_root
settings['file_root'] = file_root
settings['file_root'] += '_' + str(extra_root).zfill(3)
settings_list.append(settings)
# Do the nested sampling
# ----------------------
# For standard nested sampling just run PolyChord
desc = '{} ndim={} nlive={} nrep={}'.format(
type(likelihood).__name__, ndim, nlive, num_repeats)
nestcheck.parallel_utils.parallel_apply(
run_func, settings_list,
max_workers=max_workers, parallel=parallel,
tqdm_kwargs={'desc': desc, 'leave': True})
# Cache results DataFrame
# -----------------------
diagnostic.data_loading.get_results_df(
[type(likelihood).__name__.replace('Mix', ' mix')],
[(ndim, nlive, num_repeats)], n_simulate=100,
nrun=inds[-1], summary=True, save=True, load=True,
thread_pvalue=False, bs_stat_dist=False,
include_rmse=True, include_true_values=True, parallel=True)
if ((ndim, nlive, num_repeats) ==
diagnostic.settings.get_default_nd_nl_nr()):
# Cache bs stat and thread values df too
# Use summary=True as caching is done to raw values and this
# protects against unexpected kwarg errors as summary pops some
# kwargs before getting values
diagnostic.data_loading.get_results_df(
[type(likelihood).__name__.replace('Mix', ' mix')],
[(ndim, nlive, num_repeats)], n_simulate=100,
nrun=inds[-1], summary=True, save=True, load=True,
thread_pvalue=True, bs_stat_dist=True,
include_rmse=True, include_true_values=True, parallel=True)
if __name__ == '__main__':
main()