From cb75f1341435320ef0012fe8e93de8ff3e6ed2fe Mon Sep 17 00:00:00 2001 From: danhphan Date: Sat, 29 Jan 2022 14:56:34 +1100 Subject: [PATCH 1/5] fix and add test_vector_inputs for OrderedProbit --- pymc/distributions/discrete.py | 4 ++- pymc/tests/test_distributions_random.py | 33 ++++++++++++++++++++++++- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/pymc/distributions/discrete.py b/pymc/distributions/discrete.py index 021f430fc0..143119b92c 100644 --- a/pymc/distributions/discrete.py +++ b/pymc/distributions/discrete.py @@ -1964,7 +1964,9 @@ def dist(cls, eta, cutpoints, sigma=1, *args, **kwargs): _log_p = at.concatenate( [ at.shape_padright(normal_lccdf(0, sigma, probits[..., 0])), - log_diff_normal_cdf(0, sigma, probits[..., :-1], probits[..., 1:]), + log_diff_normal_cdf( + 0, at.shape_padright(sigma), probits[..., :-1], probits[..., 1:] + ), at.shape_padright(normal_lcdf(0, sigma, probits[..., -1])), ], axis=-1, diff --git a/pymc/tests/test_distributions_random.py b/pymc/tests/test_distributions_random.py index 0ec757c37b..410c68d7e0 100644 --- a/pymc/tests/test_distributions_random.py +++ b/pymc/tests/test_distributions_random.py @@ -21,6 +21,7 @@ import numpy as np import numpy.random as nr import numpy.testing as npt +import pandas as pd import pytest import scipy.stats as st @@ -368,7 +369,7 @@ def check_pymc_params_match_rv_op(self): assert_almost_equal(expected_value, actual_variable.eval(), decimal=self.decimal) def check_rv_size(self): - # test sizes + # test sizes sizes_to_check = self.sizes_to_check or [None, (), 1, (1,), 5, (4, 5), (2, 4, 2)] sizes_expected = self.sizes_expected or [(), (), (1,), (1,), (5,), (4, 5), (2, 4, 2)] for size, expected in zip(sizes_to_check, sizes_expected): @@ -1698,6 +1699,35 @@ class TestOrderedProbit(BaseTestDistributionRandom): "check_rv_size", ] + def test_vector_inputs(self): + """ + This test checks when providing vector inputs for `eta` and `sigma` parameters using advanced indexing. + """ + df = pd.DataFrame({ + 'X' : ['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B', 'B', 'B'], + 'Y' : [1, 1, 1, 2, 2, 3, 4, 5, 1, 1, 1, 1, 2, 2, 3, 3] + }) + + df.Y = df.Y.astype(int) + grp_idx = pd.Categorical(df.X).codes + K = df.Y.nunique() + + with pm.Model() as opb: + cutpoints = pm.Normal("cutpoints", 0.0, 1.5, shape=K-1, + transform=pm.distributions.transforms.ordered, + initval=np.arange(K-1)) + + mu = pm.Normal("mu", mu=K/2, sd=K, shape=2) + sigma = pm.HalfNormal("sigma", 1, shape=2) + + y_obs = pm.OrderedProbit("y_obs", + cutpoints=cutpoints, + eta=mu[grp_idx], + sigma=sigma[grp_idx], + observed=df.Y-1) + + assert df.Y.shape == y_obs.eval().shape + class TestOrderedMultinomial(BaseTestDistributionRandom): pymc_dist = _OrderedMultinomial @@ -1825,6 +1855,7 @@ def check_errors(self): shape=15, ) + def check_random_variable_prior(self): """ This test checks for shape correctness when using MatrixNormal distribution From 8d1d9d966052d25385085a1081b6b1e0c0f7f48c Mon Sep 17 00:00:00 2001 From: danhphan Date: Sat, 29 Jan 2022 21:36:59 +1100 Subject: [PATCH 2/5] simplify test_vector_inputs --- pymc/tests/test_distributions_random.py | 49 ++++++++++++------------- 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/pymc/tests/test_distributions_random.py b/pymc/tests/test_distributions_random.py index 410c68d7e0..a04362a6ba 100644 --- a/pymc/tests/test_distributions_random.py +++ b/pymc/tests/test_distributions_random.py @@ -21,7 +21,6 @@ import numpy as np import numpy.random as nr import numpy.testing as npt -import pandas as pd import pytest import scipy.stats as st @@ -369,7 +368,7 @@ def check_pymc_params_match_rv_op(self): assert_almost_equal(expected_value, actual_variable.eval(), decimal=self.decimal) def check_rv_size(self): - # test sizes + # test sizes sizes_to_check = self.sizes_to_check or [None, (), 1, (1,), 5, (4, 5), (2, 4, 2)] sizes_expected = self.sizes_expected or [(), (), (1,), (1,), (5,), (4, 5), (2, 4, 2)] for size, expected in zip(sizes_to_check, sizes_expected): @@ -1700,33 +1699,32 @@ class TestOrderedProbit(BaseTestDistributionRandom): ] def test_vector_inputs(self): - """ + """ This test checks when providing vector inputs for `eta` and `sigma` parameters using advanced indexing. """ - df = pd.DataFrame({ - 'X' : ['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B', 'B', 'B'], - 'Y' : [1, 1, 1, 2, 2, 3, 4, 5, 1, 1, 1, 1, 2, 2, 3, 3] - }) - - df.Y = df.Y.astype(int) - grp_idx = pd.Categorical(df.X).codes - K = df.Y.nunique() - - with pm.Model() as opb: - cutpoints = pm.Normal("cutpoints", 0.0, 1.5, shape=K-1, - transform=pm.distributions.transforms.ordered, - initval=np.arange(K-1)) - - mu = pm.Normal("mu", mu=K/2, sd=K, shape=2) - sigma = pm.HalfNormal("sigma", 1, shape=2) + categorical = pm.OrderedProbit.dist( + eta=0, + cutpoints=np.array([-2.0, 0, 2.0]), + sigma=1.0, + ) + p = categorical.owner.inputs[3].eval() + assert p.shape == (4,) - y_obs = pm.OrderedProbit("y_obs", - cutpoints=cutpoints, - eta=mu[grp_idx], - sigma=sigma[grp_idx], - observed=df.Y-1) + categorical = pm.OrderedProbit.dist( + eta=np.array([1.0, 2.0, 3.0, 4.0, 5.0]), + cutpoints=np.array([-2.0, 0, 2.0]), + sigma=1, + ) + p = categorical.owner.inputs[3].eval() + assert p.shape == (5, 4) - assert df.Y.shape == y_obs.eval().shape + categorical = pm.OrderedProbit.dist( + eta=np.array([1.0, 2.0, 3.0, 4.0, 5.0]), + cutpoints=np.array([-2.0, 0, 2.0]), + sigma=np.array([1.0, 2.0, 3.0, 4.0, 5.0]), + ) + p = categorical.owner.inputs[3].eval() + assert p.shape == (5, 4) class TestOrderedMultinomial(BaseTestDistributionRandom): @@ -1855,7 +1853,6 @@ def check_errors(self): shape=15, ) - def check_random_variable_prior(self): """ This test checks for shape correctness when using MatrixNormal distribution From 32f6c890bb63b85df0832a5af6557f085cee85f8 Mon Sep 17 00:00:00 2001 From: danhphan Date: Fri, 4 Feb 2022 17:25:51 +1100 Subject: [PATCH 3/5] simplify test_shape_inputs for _OrderedProbit --- pymc/tests/test_distributions_random.py | 46 ++++++++++++------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/pymc/tests/test_distributions_random.py b/pymc/tests/test_distributions_random.py index a04362a6ba..eedf830168 100644 --- a/pymc/tests/test_distributions_random.py +++ b/pymc/tests/test_distributions_random.py @@ -1698,33 +1698,33 @@ class TestOrderedProbit(BaseTestDistributionRandom): "check_rv_size", ] - def test_vector_inputs(self): + @pytest.mark.parametrize( + "eta, cutpoints, sigma, expected", + [ + (0, [-2.0, 0, 2.0], 1.0, (4,)), + ([-1], [-2.0, 0, 2.0], [2.0], (1, 4)), + ([1.0, -2.0], [-1.0, 0, 1.0], 1.0, (2, 4)), + ([1.0, -2.0, 3.0], [-2.0, 0, 2.0], [-1.0, -2.0, 5.0], (3, 4)), + ([[1.0, -1.0, 0.0], [-1.0, 3.0, 5.0]], [-2.0, 0, 1.0], [-1.0, -2.0, 5.0], (2, 3, 4)), + ( + [[1.0, -2.0, 3.0], [1.0, 2.0, -4.0]], + [-2.0, 0, 1.0], + [[0.0, 2.0, -4.0], [-1.0, 1.0, 3.0]], + (2, 3, 4), + ), + ], + ) + def test_shape_inputs(self, eta, cutpoints, sigma, expected): """ - This test checks when providing vector inputs for `eta` and `sigma` parameters using advanced indexing. + This test checks when providing different shapes for `eta` and `sigma` parameters. """ - categorical = pm.OrderedProbit.dist( - eta=0, - cutpoints=np.array([-2.0, 0, 2.0]), - sigma=1.0, - ) - p = categorical.owner.inputs[3].eval() - assert p.shape == (4,) - - categorical = pm.OrderedProbit.dist( - eta=np.array([1.0, 2.0, 3.0, 4.0, 5.0]), - cutpoints=np.array([-2.0, 0, 2.0]), - sigma=1, - ) - p = categorical.owner.inputs[3].eval() - assert p.shape == (5, 4) - - categorical = pm.OrderedProbit.dist( - eta=np.array([1.0, 2.0, 3.0, 4.0, 5.0]), - cutpoints=np.array([-2.0, 0, 2.0]), - sigma=np.array([1.0, 2.0, 3.0, 4.0, 5.0]), + categorical = _OrderedProbit.dist( + eta=eta, + cutpoints=cutpoints, + sigma=sigma, ) p = categorical.owner.inputs[3].eval() - assert p.shape == (5, 4) + assert p.shape == expected class TestOrderedMultinomial(BaseTestDistributionRandom): From 466a941b8869712489a94168a323a3711995eec3 Mon Sep 17 00:00:00 2001 From: danhphan Date: Fri, 4 Feb 2022 17:34:08 +1100 Subject: [PATCH 4/5] add test_shape_inputs for _OrderedLogistic --- pymc/tests/test_distributions_random.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/pymc/tests/test_distributions_random.py b/pymc/tests/test_distributions_random.py index eedf830168..73adc47abb 100644 --- a/pymc/tests/test_distributions_random.py +++ b/pymc/tests/test_distributions_random.py @@ -1688,6 +1688,26 @@ class TestOrderedLogistic(BaseTestDistributionRandom): "check_rv_size", ] + @pytest.mark.parametrize( + "eta, cutpoints, expected", + [ + (0, [-2.0, 0, 2.0], (4,)), + ([-1], [-2.0, 0, 2.0], (1, 4)), + ([1.0, -2.0], [-1.0, 0, 1.0], (2, 4)), + ([[1.0, -1.0, 0.0], [-1.0, 3.0, 5.0]], [-2.0, 0, 1.0], (2, 3, 4)), + ], + ) + def test_shape_inputs(self, eta, cutpoints, expected): + """ + This test checks when providing different shapes for `eta` parameters. + """ + categorical = _OrderedLogistic.dist( + eta=eta, + cutpoints=cutpoints, + ) + p = categorical.owner.inputs[3].eval() + assert p.shape == expected + class TestOrderedProbit(BaseTestDistributionRandom): pymc_dist = _OrderedProbit From 9b311bf8b4d8fae93db2a085fb5be1e8c730c570 Mon Sep 17 00:00:00 2001 From: danhphan Date: Sun, 6 Feb 2022 09:05:40 +1100 Subject: [PATCH 5/5] add 2d cutpoints and positive sigma --- pymc/tests/test_distributions_random.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/pymc/tests/test_distributions_random.py b/pymc/tests/test_distributions_random.py index 73adc47abb..93c3943aa9 100644 --- a/pymc/tests/test_distributions_random.py +++ b/pymc/tests/test_distributions_random.py @@ -1694,7 +1694,9 @@ class TestOrderedLogistic(BaseTestDistributionRandom): (0, [-2.0, 0, 2.0], (4,)), ([-1], [-2.0, 0, 2.0], (1, 4)), ([1.0, -2.0], [-1.0, 0, 1.0], (2, 4)), - ([[1.0, -1.0, 0.0], [-1.0, 3.0, 5.0]], [-2.0, 0, 1.0], (2, 3, 4)), + (np.zeros((3, 2)), [-2.0, 0, 1.0], (3, 2, 4)), + (np.ones((5, 2)), [[-2.0, 0, 1.0], [-1.0, 0, 1.0]], (5, 2, 4)), + (np.ones((3, 5, 2)), [[-2.0, 0, 1.0], [-1.0, 0, 1.0]], (3, 5, 2, 4)), ], ) def test_shape_inputs(self, eta, cutpoints, expected): @@ -1722,16 +1724,12 @@ class TestOrderedProbit(BaseTestDistributionRandom): "eta, cutpoints, sigma, expected", [ (0, [-2.0, 0, 2.0], 1.0, (4,)), - ([-1], [-2.0, 0, 2.0], [2.0], (1, 4)), + ([-1], [-1.0, 0, 2.0], [2.0], (1, 4)), ([1.0, -2.0], [-1.0, 0, 1.0], 1.0, (2, 4)), - ([1.0, -2.0, 3.0], [-2.0, 0, 2.0], [-1.0, -2.0, 5.0], (3, 4)), - ([[1.0, -1.0, 0.0], [-1.0, 3.0, 5.0]], [-2.0, 0, 1.0], [-1.0, -2.0, 5.0], (2, 3, 4)), - ( - [[1.0, -2.0, 3.0], [1.0, 2.0, -4.0]], - [-2.0, 0, 1.0], - [[0.0, 2.0, -4.0], [-1.0, 1.0, 3.0]], - (2, 3, 4), - ), + ([1.0, -2.0, 3.0], [-1.0, 0, 2.0], np.ones((1, 3)), (1, 3, 4)), + (np.zeros((2, 3)), [-2.0, 0, 1.0], [1.0, 2.0, 5.0], (2, 3, 4)), + (np.ones((2, 3)), [-1.0, 0, 1.0], np.ones((2, 3)), (2, 3, 4)), + (np.zeros((5, 2)), [[-2, 0, 1], [-1, 0, 1]], np.ones((2, 5, 2)), (2, 5, 2, 4)), ], ) def test_shape_inputs(self, eta, cutpoints, sigma, expected):