From badd446e949022a9b6d1e21d9b90f62165849805 Mon Sep 17 00:00:00 2001 From: Michael Osthege <m.osthege@fz-juelich.de> Date: Sun, 21 Jul 2019 21:26:10 +0200 Subject: [PATCH 1/4] test with and without parallelization across cores demonstrates issue #3555 --- pymc3/tests/test_step.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/pymc3/tests/test_step.py b/pymc3/tests/test_step.py index 32547b1d88..ed9ed0e99e 100644 --- a/pymc3/tests/test_step.py +++ b/pymc3/tests/test_step.py @@ -915,7 +915,7 @@ def test_checks_population_size(self): trace = sample(draws=100, chains=4, step=step) pass - def test_parallelized_chains_are_random(self): + def test_nonparallelized_chains_are_random(self): with Model() as model: x = Normal("x", 0, 1) for stepper in TestPopulationSamplers.steppers: @@ -928,6 +928,19 @@ def test_parallelized_chains_are_random(self): ) pass + def test_parallelized_chains_are_random(self): + with Model() as model: + x = Normal("x", 0, 1) + for stepper in TestPopulationSamplers.steppers: + step = stepper() + trace = sample(chains=4, draws=20, tune=0, step=DEMetropolis(), parallelize=True) + samples = np.array(trace.get_values("x", combine=False))[:, 5] + + assert len(set(samples)) == 4, "Parallelized {} " "chains are identical.".format( + stepper + ) + pass + @pytest.mark.xfail(condition=(theano.config.floatX == "float32"), reason="Fails on float32") class TestNutsCheckTrace: From 6324408b48fdf7f65336505d690ea93031403417 Mon Sep 17 00:00:00 2001 From: Michael Osthege <m.osthege@fz-juelich.de> Date: Sun, 21 Jul 2019 22:05:16 +0200 Subject: [PATCH 2/4] replace parallelize kwarg by reliance on cores setting closes #3555 --- pymc3/sampling.py | 6 +++--- pymc3/tests/test_step.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index 8fb17df4f5..b3628d857f 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -452,7 +452,7 @@ def sample(draws=500, step=None, init='auto', n_init=200000, start=None, trace=N if has_population_samplers: _log.info('Population sampling ({} chains)'.format(chains)) _print_step_hierarchy(step) - trace = _sample_population(**sample_args) + trace = _sample_population(**sample_args, parallelize=cores > 1) else: _log.info('Sequential sampling ({} chains in 1 job)'.format(chains)) _print_step_hierarchy(step) @@ -689,7 +689,7 @@ def __init__(self, steppers, parallelize): if parallelize: try: # configure a child process for each stepper - _log.info('Attempting to parallelize chains.') + _log.info('Attempting to parallelize chains to all cores. You can turn this off with `pm.sample(cores=1)`.') import multiprocessing for c, stepper in enumerate(tqdm(steppers)): slave_end, master_end = multiprocessing.Pipe() @@ -714,7 +714,7 @@ def __init__(self, steppers, parallelize): _log.debug('Error was: ', exec_info=True) else: _log.info('Chains are not parallelized. You can enable this by passing ' - 'pm.sample(parallelize=True).') + 'pm.sample(cores=2).') return super().__init__() def __enter__(self): diff --git a/pymc3/tests/test_step.py b/pymc3/tests/test_step.py index ed9ed0e99e..d67c0ef1d2 100644 --- a/pymc3/tests/test_step.py +++ b/pymc3/tests/test_step.py @@ -920,7 +920,7 @@ def test_nonparallelized_chains_are_random(self): x = Normal("x", 0, 1) for stepper in TestPopulationSamplers.steppers: step = stepper() - trace = sample(chains=4, draws=20, tune=0, step=DEMetropolis()) + trace = sample(chains=4, cores=1, draws=20, tune=0, step=DEMetropolis()) samples = np.array(trace.get_values("x", combine=False))[:, 5] assert len(set(samples)) == 4, "Parallelized {} " "chains are identical.".format( @@ -933,7 +933,7 @@ def test_parallelized_chains_are_random(self): x = Normal("x", 0, 1) for stepper in TestPopulationSamplers.steppers: step = stepper() - trace = sample(chains=4, draws=20, tune=0, step=DEMetropolis(), parallelize=True) + trace = sample(chains=4, cores=4, draws=20, tune=0, step=DEMetropolis()) samples = np.array(trace.get_values("x", combine=False))[:, 5] assert len(set(samples)) == 4, "Parallelized {} " "chains are identical.".format( From 7ac416a1ee12caf41a50e958293e270eeb0dd57e Mon Sep 17 00:00:00 2001 From: Michael Osthege <m.osthege@fz-juelich.de> Date: Sun, 21 Jul 2019 22:08:07 +0200 Subject: [PATCH 3/4] add the changes from pull 3559 --- RELEASE-NOTES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 37c8366838..dfc7eae382 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -11,6 +11,7 @@ ### Maintenance - Moved math operations out of `Rice`, `TruncatedNormal`, `Triangular` and `ZeroInflatedNegativeBinomial` `random` methods. Math operations on values returned by `draw_values` might not broadcast well, and all the `size` aware broadcasting is left to `generate_samples`. Fixes [#3481](https://github.com/pymc-devs/pymc3/issues/3481) and [#3508](https://github.com/pymc-devs/pymc3/issues/3508) +- Parallelization of population steppers (`DEMetropolis`) is now set via the `cores` argument. ([#3559](https://github.com/pymc-devs/pymc3/pull/3559)) ## PyMC3 3.7 (May 29 2019) From 971c8e0008fc87631b4dd0687c10a0618fd9d53a Mon Sep 17 00:00:00 2001 From: michaelosthege <thecakedev@hotmail.com> Date: Sun, 21 Jul 2019 23:01:13 +0200 Subject: [PATCH 4/4] use more general suggestion in the log message Co-Authored-By: Colin <ColCarroll@users.noreply.github.com> --- pymc3/sampling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc3/sampling.py b/pymc3/sampling.py index b3628d857f..1c9b515b13 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -714,7 +714,7 @@ def __init__(self, steppers, parallelize): _log.debug('Error was: ', exec_info=True) else: _log.info('Chains are not parallelized. You can enable this by passing ' - 'pm.sample(cores=2).') + '`pm.sample(cores=n)`, where n > 1.') return super().__init__() def __enter__(self):