From 05462f2feb15381bff0427b72a9aa3fc11532cfc Mon Sep 17 00:00:00 2001
From: hottwaj <jonathan.a.clarke@gmail.com>
Date: Wed, 26 Feb 2020 10:25:20 +0000
Subject: [PATCH 01/15] Initial changes to allow pymc3.Data() to support both
 int and float input data (previously all input data was coerced to float) WIP
 for #3813

---
 pymc3/data.py  | 14 ++++++++++++--
 pymc3/model.py |  8 +++++---
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/pymc3/data.py b/pymc3/data.py
index c638478b08..e39809123e 100644
--- a/pymc3/data.py
+++ b/pymc3/data.py
@@ -478,10 +478,20 @@ class Data:
     For more information, take a look at this example notebook
     https://docs.pymc.io/notebooks/data_container.html
     """
-    def __new__(self, name, value):
+    def __new__(self, name, value, dtype = None):
+        if dtype is None:
+            if hasattr(value, 'dtype'):
+                # if no dtype given, but available as attr of value, use that as dtype
+                dtype = value.dtype
+            elif isinstance(value, int):
+                dtype = int
+            else:
+                # otherwise, assume float
+                dtype = float
+                
         # `pm.model.pandas_to_array` takes care of parameter `value` and
         # transforms it to something digestible for pymc3
-        shared_object = theano.shared(pm.model.pandas_to_array(value), name)
+        shared_object = theano.shared(pm.model.pandas_to_array(value, dtype = dtype), name)
 
         # To draw the node for this variable in the graphviz Digraph we need
         # its shape.
diff --git a/pymc3/model.py b/pymc3/model.py
index 3de6e4f380..a4616fd9f3 100644
--- a/pymc3/model.py
+++ b/pymc3/model.py
@@ -1473,7 +1473,7 @@ def init_value(self):
         return self.tag.test_value
 
 
-def pandas_to_array(data):
+def pandas_to_array(data, dtype = float):
     if hasattr(data, 'values'):  # pandas
         if data.isnull().any().any():  # missing values
             ret = np.ma.MaskedArray(data.values, data.isnull().values)
@@ -1492,8 +1492,10 @@ def pandas_to_array(data):
         ret = generator(data)
     else:
         ret = np.asarray(data)
-    return pm.floatX(ret)
-
+    if dtype in [float, np.float32, np.float64]:
+        return pm.floatX(ret)
+    elif dtype in [int, np.int32, np.int64]:
+        return pm.intX(ret)
 
 def as_tensor(data, name, model, distribution):
     dtype = distribution.dtype

From 0041e9f36abb0223d33366ac82f34b559d229c29 Mon Sep 17 00:00:00 2001
From: hottwaj <jonathan.a.clarke@gmail.com>
Date: Wed, 26 Feb 2020 10:34:47 +0000
Subject: [PATCH 02/15] added exception for invalid dtype input to
 pandas_to_array

---
 pymc3/model.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pymc3/model.py b/pymc3/model.py
index a4616fd9f3..62123267da 100644
--- a/pymc3/model.py
+++ b/pymc3/model.py
@@ -1496,6 +1496,8 @@ def pandas_to_array(data, dtype = float):
         return pm.floatX(ret)
     elif dtype in [int, np.int32, np.int64]:
         return pm.intX(ret)
+    else:
+        raise ValueError('Unsupported type for pandas_to_array: %s' % str(dtype))
 
 def as_tensor(data, name, model, distribution):
     dtype = distribution.dtype

From 38536aaf2d993ee21a020b7d64801be90d9645a8 Mon Sep 17 00:00:00 2001
From: AlexAndorra <andorra.alexandre@gmail.com>
Date: Sun, 17 May 2020 11:19:43 +0200
Subject: [PATCH 03/15] Refined implementation

---
 pymc3/data.py | 27 +++++++++------------------
 1 file changed, 9 insertions(+), 18 deletions(-)

diff --git a/pymc3/data.py b/pymc3/data.py
index a80d62ec4d..fa569dd332 100644
--- a/pymc3/data.py
+++ b/pymc3/data.py
@@ -479,10 +479,10 @@ class Data:
     https://docs.pymc.io/notebooks/data_container.html
     """
 
-    def __new__(self, name, value, dtype = None):
-        if dtype is None:
-            if hasattr(value, 'dtype'):
-                # if no dtype given, but available as attr of value, use that as dtype
+    def __new__(self, name, value, dtype=None):
+        if not dtype:
+            if hasattr(value, "dtype"):
+                # if no dtype given but available as attr of value, use that as dtype
                 dtype = value.dtype
             elif isinstance(value, int):
                 dtype = int
@@ -490,33 +490,24 @@ def __new__(self, name, value, dtype = None):
                 # otherwise, assume float
                 dtype = float
 
-        # `pm.model.pandas_to_array` takes care of parameter `value` and
-        # transforms it to something digestible for pymc3
-        shared_object = theano.shared(pm.model.pandas_to_array(value, dtype = dtype), name)
-
-        # To draw the node for this variable in the graphviz Digraph we need
-        # its shape.
-        shared_object.dshape = tuple(shared_object.shape.eval())
-
         # Add data container to the named variables of the model.
         try:
             model = pm.Model.get_context()
         except TypeError:
-            raise TypeError("No model on context stack, which is needed to "
-                            "instantiate a data container. Add variable "
-                            "inside a 'with model:' block.")
-
+            raise TypeError(
+                "No model on context stack, which is needed to instantiate a data container. "
+                "Add variable inside a 'with model:' block."
+            )
         name = model.name_for(name)
 
         # `pm.model.pandas_to_array` takes care of parameter `value` and
         # transforms it to something digestible for pymc3
-        shared_object = theano.shared(pm.model.pandas_to_array(value), name)
+        shared_object = theano.shared(pm.model.pandas_to_array(value, dtype=dtype), name)
 
         # To draw the node for this variable in the graphviz Digraph we need
         # its shape.
         shared_object.dshape = tuple(shared_object.shape.eval())
 
-
         model.add_random_variable(shared_object)
 
         return shared_object

From 08b3ba46c290b2d0b711786e57310392652f319e Mon Sep 17 00:00:00 2001
From: AlexAndorra <andorra.alexandre@gmail.com>
Date: Sun, 17 May 2020 15:27:37 +0200
Subject: [PATCH 04/15] Finished dtype conversion handling

---
 pymc3/model.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/pymc3/model.py b/pymc3/model.py
index d572dd3829..da21d53811 100644
--- a/pymc3/model.py
+++ b/pymc3/model.py
@@ -1482,7 +1482,7 @@ def init_value(self):
         return self.tag.test_value
 
 
-def pandas_to_array(data, dtype = float):
+def pandas_to_array(data, dtype=float):
     if hasattr(data, 'values'):  # pandas
         if data.isnull().any().any():  # missing values
             ret = np.ma.MaskedArray(data.values, data.isnull().values)
@@ -1501,13 +1501,15 @@ def pandas_to_array(data, dtype = float):
         ret = generator(data)
     else:
         ret = np.asarray(data)
-    if dtype in [float, np.float32, np.float64]:
-        return pm.floatX(ret)
-    elif dtype in [int, np.int32, np.int64]:
+
+    if dtype in [int, np.int8, np.int16, np.int32, np.int64]:
         return pm.intX(ret)
+    elif dtype in [float, np.float16,  np.float32, np.float64]:
+        return pm.floatX(ret)
     else:
         raise ValueError('Unsupported type for pandas_to_array: %s' % str(dtype))
 
+
 def as_tensor(data, name, model, distribution):
     dtype = distribution.dtype
     data = pandas_to_array(data).astype(dtype)

From 1fd0e8b0e2377b98015f76c077989113d358da8c Mon Sep 17 00:00:00 2001
From: AlexAndorra <andorra.alexandre@gmail.com>
Date: Sun, 17 May 2020 18:08:30 +0200
Subject: [PATCH 05/15] Added SharedVariable option to getattr_value

---
 pymc3/distributions/distribution.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index 8aaa171d14..437001880b 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -111,6 +111,9 @@ def getattr_value(self, val):
         if isinstance(val, tt.TensorVariable):
             return val.tag.test_value
 
+        if isinstance(val, tt.sharedvar.TensorSharedVariable):
+            return val.get_value()
+
         if isinstance(val, theano_constant):
             return val.value
 

From 766285c60958bec25b170198bfc0007ceb9b9c21 Mon Sep 17 00:00:00 2001
From: AlexAndorra <andorra.alexandre@gmail.com>
Date: Sun, 17 May 2020 19:33:16 +0200
Subject: [PATCH 06/15] Added dtype handling to set_data function

---
 pymc3/model.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/pymc3/model.py b/pymc3/model.py
index da21d53811..45d1629794 100644
--- a/pymc3/model.py
+++ b/pymc3/model.py
@@ -1244,7 +1244,7 @@ def set_data(new_data, model=None):
     ----------
     new_data: dict
         New values for the data containers. The keys of the dictionary are
-        the  variables names in the model and the values are the objects
+        the variables' names in the model and the values are the objects
         with which to update.
     model: Model (optional if in `with` context)
 
@@ -1266,7 +1266,7 @@ def set_data(new_data, model=None):
     .. code:: ipython
 
         >>> with model:
-        ...     pm.set_data({'x': [5,6,9]})
+        ...     pm.set_data({'x': [5., 6., 9.]})
         ...     y_test = pm.sample_posterior_predictive(trace)
         >>> y_test['obs'].mean(axis=0)
         array([4.6088569 , 5.54128318, 8.32953844])
@@ -1275,7 +1275,15 @@ def set_data(new_data, model=None):
 
     for variable_name, new_value in new_data.items():
         if isinstance(model[variable_name], SharedVariable):
-            model[variable_name].set_value(pandas_to_array(new_value))
+            if hasattr(new_value, "dtype"):
+                # if no dtype given but available as attr of value, use that as dtype
+                dtype = new_value.dtype
+            elif isinstance(new_value, int):
+                dtype = int
+            else:
+                # otherwise, assume float
+                dtype = float
+            model[variable_name].set_value(pandas_to_array(new_value, dtype=dtype))
         else:
             message = 'The variable `{}` must be defined as `pymc3.' \
                       'Data` inside the model to allow updating. The ' \

From 63132e74942228a266336db15b351253510a1000 Mon Sep 17 00:00:00 2001
From: AlexAndorra <andorra.alexandre@gmail.com>
Date: Sun, 17 May 2020 19:33:57 +0200
Subject: [PATCH 07/15] Added tests for pm.Data used for index variables

---
 pymc3/tests/test_data_container.py | 36 +++++++++++++++++++++++++++---
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/pymc3/tests/test_data_container.py b/pymc3/tests/test_data_container.py
index e49cab457a..46e0531ae2 100644
--- a/pymc3/tests/test_data_container.py
+++ b/pymc3/tests/test_data_container.py
@@ -36,16 +36,16 @@ def test_sample(self):
             x_shared = pm.Data('x_shared', x)
             b = pm.Normal('b', 0., 10.)
             pm.Normal('obs', b * x_shared, np.sqrt(1e-2), observed=y)
-            prior_trace0 = pm.sample_prior_predictive(1000)
 
+            prior_trace0 = pm.sample_prior_predictive(1000)
             trace = pm.sample(1000, init=None, tune=1000, chains=1)
             pp_trace0 = pm.sample_posterior_predictive(trace, 1000)
             pp_trace01 = pm.fast_sample_posterior_predictive(trace, 1000)
 
             x_shared.set_value(x_pred)
+            prior_trace1 = pm.sample_prior_predictive(1000)
             pp_trace1 = pm.sample_posterior_predictive(trace, samples=1000)
             pp_trace11 = pm.fast_sample_posterior_predictive(trace, samples=1000)
-            prior_trace1 = pm.sample_prior_predictive(1000)
 
         assert prior_trace0['b'].shape == (1000,)
         assert prior_trace0['obs'].shape == (1000, 100)
@@ -109,6 +109,36 @@ def test_sample_after_set_data(self):
         np.testing.assert_allclose(new_y, pp_tracef['obs'].mean(axis=0),
                                    atol=1e-1)
 
+    def test_shared_data_as_index(self):
+        """
+        Allow pm.Data to be used for index variables, i.e with integers as well as floats.
+        See https://github.com/pymc-devs/pymc3/issues/3813
+        """
+        with pm.Model() as model:
+            index = pm.Data('index', [2, 0, 1, 0, 2], dtype=int)
+            y = pm.Data('y', [1., 2., 3., 2., 1.])
+            alpha = pm.Normal('alpha', 0, 1.5, shape=3)
+            pm.Normal('obs', alpha[index], np.sqrt(1e-2), observed=y)
+
+            prior_trace = pm.sample_prior_predictive(1000, var_names=["alpha"])
+            trace = pm.sample(1000, init=None, tune=1000, chains=1)
+
+        # Predict on new data
+        new_index = np.array([0, 1, 2])
+        new_y = [5., 6., 9.]
+        with model:
+            pm.set_data(new_data={'index': new_index, 'y': new_y})
+            pp_trace = pm.sample_posterior_predictive(trace, 1000, var_names=["alpha", "obs"])
+            pp_tracef = pm.fast_sample_posterior_predictive(trace, 1000, var_names=["alpha", "obs"])
+
+        assert prior_trace['alpha'].shape == (1000, 3)
+        assert trace['alpha'].shape == (1000, 3)
+        assert pp_trace['alpha'].shape == (1000, 3)
+        assert pp_trace['obs'].shape == (1000, 3)
+        assert pp_tracef['alpha'].shape == (1000, 3)
+        assert pp_tracef['obs'].shape == (1000, 3)
+
+
     def test_creation_of_data_outside_model_context(self):
         with pytest.raises((IndexError, TypeError)) as error:
             pm.Data('data', [1.1, 2.2, 3.3])
@@ -147,7 +177,7 @@ def test_model_to_graphviz_for_model_with_data_container(self):
 
 def test_data_naming():
     """
-    This is a test for issue #3793 -- `Data` objects in named models are 
+    This is a test for issue #3793 -- `Data` objects in named models are
     not given model-relative names.
     """
     with pm.Model("named_model") as model:

From 1bd864282fd03c212bab3a7037cc96aab3f64207 Mon Sep 17 00:00:00 2001
From: AlexAndorra <andorra.alexandre@gmail.com>
Date: Sun, 17 May 2020 19:55:06 +0200
Subject: [PATCH 08/15] Added tests for using pm.data as RV input

---
 pymc3/tests/test_data_container.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/pymc3/tests/test_data_container.py b/pymc3/tests/test_data_container.py
index 46e0531ae2..fa38900751 100644
--- a/pymc3/tests/test_data_container.py
+++ b/pymc3/tests/test_data_container.py
@@ -139,6 +139,31 @@ def test_shared_data_as_index(self):
         assert pp_tracef['obs'].shape == (1000, 3)
 
 
+    def test_shared_data_as_rv_input(self):
+        """
+        Allow pm.Data to be used as input for other RVs.
+        See https://github.com/pymc-devs/pymc3/issues/3842
+        """
+        with pm.Model() as m:
+            x = pm.Data("x", [1.0, 2.0, 3.0])
+            _ = pm.Normal("y", mu=x, shape=3)
+            trace = pm.sample(chains=1)
+
+        np.testing.assert_allclose(np.array([1.0, 2.0, 3.0]), x.get_value(),
+                                   atol=1e-1)
+        np.testing.assert_allclose(np.array([1.0, 2.0, 3.0]), trace["y"].mean(0),
+                                  atol=1e-1)
+
+        with m:
+            pm.set_data({"x": np.array([2.0, 4.0, 6.0])})
+            trace = pm.sample(chains=1)
+
+        np.testing.assert_allclose(np.array([2.0, 4.0, 6.0]), x.get_value(),
+                                   atol=1e-1)
+        np.testing.assert_allclose(np.array([2.0, 4.0, 6.0]), trace["y"].mean(0),
+                                  atol=1e-1)
+
+
     def test_creation_of_data_outside_model_context(self):
         with pytest.raises((IndexError, TypeError)) as error:
             pm.Data('data', [1.1, 2.2, 3.3])

From 6597f28116393511f70bc6bef0b0e81950ac13b3 Mon Sep 17 00:00:00 2001
From: AlexAndorra <andorra.alexandre@gmail.com>
Date: Sun, 17 May 2020 20:50:52 +0200
Subject: [PATCH 09/15] Ran Black on data tests files

---
 pymc3/tests/test_data_container.py | 158 ++++++++++++++---------------
 1 file changed, 77 insertions(+), 81 deletions(-)

diff --git a/pymc3/tests/test_data_container.py b/pymc3/tests/test_data_container.py
index fa38900751..38b640ac5a 100644
--- a/pymc3/tests/test_data_container.py
+++ b/pymc3/tests/test_data_container.py
@@ -20,22 +20,22 @@
 
 class TestData(SeededTest):
     def test_deterministic(self):
-        data_values = np.array([.5, .4, 5, 2])
+        data_values = np.array([0.5, 0.4, 5, 2])
         with pm.Model() as model:
-            X = pm.Data('X', data_values)
-            pm.Normal('y', 0, 1, observed=X)
+            X = pm.Data("X", data_values)
+            pm.Normal("y", 0, 1, observed=X)
             model.logp(model.test_point)
 
     def test_sample(self):
         x = np.random.normal(size=100)
         y = x + np.random.normal(scale=1e-2, size=100)
 
-        x_pred = np.linspace(-3, 3, 200, dtype='float32')
+        x_pred = np.linspace(-3, 3, 200, dtype="float32")
 
         with pm.Model():
-            x_shared = pm.Data('x_shared', x)
-            b = pm.Normal('b', 0., 10.)
-            pm.Normal('obs', b * x_shared, np.sqrt(1e-2), observed=y)
+            x_shared = pm.Data("x_shared", x)
+            b = pm.Normal("b", 0.0, 10.0)
+            pm.Normal("obs", b * x_shared, np.sqrt(1e-2), observed=y)
 
             prior_trace0 = pm.sample_prior_predictive(1000)
             trace = pm.sample(1000, init=None, tune=1000, chains=1)
@@ -47,67 +47,61 @@ def test_sample(self):
             pp_trace1 = pm.sample_posterior_predictive(trace, samples=1000)
             pp_trace11 = pm.fast_sample_posterior_predictive(trace, samples=1000)
 
-        assert prior_trace0['b'].shape == (1000,)
-        assert prior_trace0['obs'].shape == (1000, 100)
-        assert prior_trace1['obs'].shape == (1000, 200)
+        assert prior_trace0["b"].shape == (1000,)
+        assert prior_trace0["obs"].shape == (1000, 100)
+        assert prior_trace1["obs"].shape == (1000, 200)
 
-        assert pp_trace0['obs'].shape == (1000, 100)
-        assert pp_trace01['obs'].shape == (1000, 100)
+        assert pp_trace0["obs"].shape == (1000, 100)
+        assert pp_trace01["obs"].shape == (1000, 100)
 
-        np.testing.assert_allclose(x, pp_trace0['obs'].mean(axis=0), atol=1e-1)
-        np.testing.assert_allclose(x, pp_trace01['obs'].mean(axis=0), atol=1e-1)
+        np.testing.assert_allclose(x, pp_trace0["obs"].mean(axis=0), atol=1e-1)
+        np.testing.assert_allclose(x, pp_trace01["obs"].mean(axis=0), atol=1e-1)
 
-        assert pp_trace1['obs'].shape == (1000, 200)
-        assert pp_trace11['obs'].shape == (1000, 200)
+        assert pp_trace1["obs"].shape == (1000, 200)
+        assert pp_trace11["obs"].shape == (1000, 200)
 
-        np.testing.assert_allclose(x_pred, pp_trace1['obs'].mean(axis=0),
-                                   atol=1e-1)
-        np.testing.assert_allclose(x_pred, pp_trace11['obs'].mean(axis=0),
-                                   atol=1e-1)
+        np.testing.assert_allclose(x_pred, pp_trace1["obs"].mean(axis=0), atol=1e-1)
+        np.testing.assert_allclose(x_pred, pp_trace11["obs"].mean(axis=0), atol=1e-1)
 
     def test_sample_posterior_predictive_after_set_data(self):
         with pm.Model() as model:
-            x = pm.Data('x', [1., 2., 3.])
-            y = pm.Data('y', [1., 2., 3.])
-            beta = pm.Normal('beta', 0, 10.)
-            pm.Normal('obs', beta * x, np.sqrt(1e-2), observed=y)
+            x = pm.Data("x", [1.0, 2.0, 3.0])
+            y = pm.Data("y", [1.0, 2.0, 3.0])
+            beta = pm.Normal("beta", 0, 10.0)
+            pm.Normal("obs", beta * x, np.sqrt(1e-2), observed=y)
             trace = pm.sample(1000, tune=1000, chains=1)
         # Predict on new data.
         with model:
             x_test = [5, 6, 9]
-            pm.set_data(new_data={'x': x_test})
+            pm.set_data(new_data={"x": x_test})
             y_test = pm.sample_posterior_predictive(trace)
             y_test1 = pm.fast_sample_posterior_predictive(trace)
 
-        assert y_test['obs'].shape == (1000, 3)
-        assert y_test1['obs'].shape == (1000, 3)
-        np.testing.assert_allclose(x_test, y_test['obs'].mean(axis=0),
-                                   atol=1e-1)
-        np.testing.assert_allclose(x_test, y_test1['obs'].mean(axis=0),
-                                   atol=1e-1)
+        assert y_test["obs"].shape == (1000, 3)
+        assert y_test1["obs"].shape == (1000, 3)
+        np.testing.assert_allclose(x_test, y_test["obs"].mean(axis=0), atol=1e-1)
+        np.testing.assert_allclose(x_test, y_test1["obs"].mean(axis=0), atol=1e-1)
 
     def test_sample_after_set_data(self):
         with pm.Model() as model:
-            x = pm.Data('x', [1., 2., 3.])
-            y = pm.Data('y', [1., 2., 3.])
-            beta = pm.Normal('beta', 0, 10.)
-            pm.Normal('obs', beta * x, np.sqrt(1e-2), observed=y)
+            x = pm.Data("x", [1.0, 2.0, 3.0])
+            y = pm.Data("y", [1.0, 2.0, 3.0])
+            beta = pm.Normal("beta", 0, 10.0)
+            pm.Normal("obs", beta * x, np.sqrt(1e-2), observed=y)
             pm.sample(1000, init=None, tune=1000, chains=1)
         # Predict on new data.
-        new_x = [5., 6., 9.]
-        new_y = [5., 6., 9.]
+        new_x = [5.0, 6.0, 9.0]
+        new_y = [5.0, 6.0, 9.0]
         with model:
-            pm.set_data(new_data={'x': new_x, 'y': new_y})
+            pm.set_data(new_data={"x": new_x, "y": new_y})
             new_trace = pm.sample(1000, init=None, tune=1000, chains=1)
             pp_trace = pm.sample_posterior_predictive(new_trace, 1000)
             pp_tracef = pm.fast_sample_posterior_predictive(new_trace, 1000)
 
-        assert pp_trace['obs'].shape == (1000, 3)
-        assert pp_tracef['obs'].shape == (1000, 3)
-        np.testing.assert_allclose(new_y, pp_trace['obs'].mean(axis=0),
-                                   atol=1e-1)
-        np.testing.assert_allclose(new_y, pp_tracef['obs'].mean(axis=0),
-                                   atol=1e-1)
+        assert pp_trace["obs"].shape == (1000, 3)
+        assert pp_tracef["obs"].shape == (1000, 3)
+        np.testing.assert_allclose(new_y, pp_trace["obs"].mean(axis=0), atol=1e-1)
+        np.testing.assert_allclose(new_y, pp_tracef["obs"].mean(axis=0), atol=1e-1)
 
     def test_shared_data_as_index(self):
         """
@@ -115,29 +109,32 @@ def test_shared_data_as_index(self):
         See https://github.com/pymc-devs/pymc3/issues/3813
         """
         with pm.Model() as model:
-            index = pm.Data('index', [2, 0, 1, 0, 2], dtype=int)
-            y = pm.Data('y', [1., 2., 3., 2., 1.])
-            alpha = pm.Normal('alpha', 0, 1.5, shape=3)
-            pm.Normal('obs', alpha[index], np.sqrt(1e-2), observed=y)
+            index = pm.Data("index", [2, 0, 1, 0, 2], dtype=int)
+            y = pm.Data("y", [1.0, 2.0, 3.0, 2.0, 1.0])
+            alpha = pm.Normal("alpha", 0, 1.5, shape=3)
+            pm.Normal("obs", alpha[index], np.sqrt(1e-2), observed=y)
 
             prior_trace = pm.sample_prior_predictive(1000, var_names=["alpha"])
             trace = pm.sample(1000, init=None, tune=1000, chains=1)
 
         # Predict on new data
         new_index = np.array([0, 1, 2])
-        new_y = [5., 6., 9.]
+        new_y = [5.0, 6.0, 9.0]
         with model:
-            pm.set_data(new_data={'index': new_index, 'y': new_y})
-            pp_trace = pm.sample_posterior_predictive(trace, 1000, var_names=["alpha", "obs"])
-            pp_tracef = pm.fast_sample_posterior_predictive(trace, 1000, var_names=["alpha", "obs"])
-
-        assert prior_trace['alpha'].shape == (1000, 3)
-        assert trace['alpha'].shape == (1000, 3)
-        assert pp_trace['alpha'].shape == (1000, 3)
-        assert pp_trace['obs'].shape == (1000, 3)
-        assert pp_tracef['alpha'].shape == (1000, 3)
-        assert pp_tracef['obs'].shape == (1000, 3)
-
+            pm.set_data(new_data={"index": new_index, "y": new_y})
+            pp_trace = pm.sample_posterior_predictive(
+                trace, 1000, var_names=["alpha", "obs"]
+            )
+            pp_tracef = pm.fast_sample_posterior_predictive(
+                trace, 1000, var_names=["alpha", "obs"]
+            )
+
+        assert prior_trace["alpha"].shape == (1000, 3)
+        assert trace["alpha"].shape == (1000, 3)
+        assert pp_trace["alpha"].shape == (1000, 3)
+        assert pp_trace["obs"].shape == (1000, 3)
+        assert pp_tracef["alpha"].shape == (1000, 3)
+        assert pp_tracef["obs"].shape == (1000, 3)
 
     def test_shared_data_as_rv_input(self):
         """
@@ -149,43 +146,42 @@ def test_shared_data_as_rv_input(self):
             _ = pm.Normal("y", mu=x, shape=3)
             trace = pm.sample(chains=1)
 
-        np.testing.assert_allclose(np.array([1.0, 2.0, 3.0]), x.get_value(),
-                                   atol=1e-1)
-        np.testing.assert_allclose(np.array([1.0, 2.0, 3.0]), trace["y"].mean(0),
-                                  atol=1e-1)
+        np.testing.assert_allclose(np.array([1.0, 2.0, 3.0]), x.get_value(), atol=1e-1)
+        np.testing.assert_allclose(
+            np.array([1.0, 2.0, 3.0]), trace["y"].mean(0), atol=1e-1
+        )
 
         with m:
             pm.set_data({"x": np.array([2.0, 4.0, 6.0])})
             trace = pm.sample(chains=1)
 
-        np.testing.assert_allclose(np.array([2.0, 4.0, 6.0]), x.get_value(),
-                                   atol=1e-1)
-        np.testing.assert_allclose(np.array([2.0, 4.0, 6.0]), trace["y"].mean(0),
-                                  atol=1e-1)
-
+        np.testing.assert_allclose(np.array([2.0, 4.0, 6.0]), x.get_value(), atol=1e-1)
+        np.testing.assert_allclose(
+            np.array([2.0, 4.0, 6.0]), trace["y"].mean(0), atol=1e-1
+        )
 
     def test_creation_of_data_outside_model_context(self):
         with pytest.raises((IndexError, TypeError)) as error:
-            pm.Data('data', [1.1, 2.2, 3.3])
-        error.match('No model on context stack')
+            pm.Data("data", [1.1, 2.2, 3.3])
+        error.match("No model on context stack")
 
     def test_set_data_to_non_data_container_variables(self):
         with pm.Model() as model:
-            x = np.array([1., 2., 3.])
-            y = np.array([1., 2., 3.])
-            beta = pm.Normal('beta', 0, 10.)
-            pm.Normal('obs', beta * x, np.sqrt(1e-2), observed=y)
+            x = np.array([1.0, 2.0, 3.0])
+            y = np.array([1.0, 2.0, 3.0])
+            beta = pm.Normal("beta", 0, 10.0)
+            pm.Normal("obs", beta * x, np.sqrt(1e-2), observed=y)
             pm.sample(1000, init=None, tune=1000, chains=1)
         with pytest.raises(TypeError) as error:
-            pm.set_data({'beta': [1.1, 2.2, 3.3]}, model=model)
-        error.match('defined as `pymc3.Data` inside the model')
+            pm.set_data({"beta": [1.1, 2.2, 3.3]}, model=model)
+        error.match("defined as `pymc3.Data` inside the model")
 
     def test_model_to_graphviz_for_model_with_data_container(self):
         with pm.Model() as model:
-            x = pm.Data('x', [1., 2., 3.])
-            y = pm.Data('y', [1., 2., 3.])
-            beta = pm.Normal('beta', 0, 10.)
-            pm.Normal('obs', beta * x, np.sqrt(1e-2), observed=y)
+            x = pm.Data("x", [1.0, 2.0, 3.0])
+            y = pm.Data("y", [1.0, 2.0, 3.0])
+            beta = pm.Normal("beta", 0, 10.0)
+            pm.Normal("obs", beta * x, np.sqrt(1e-2), observed=y)
             pm.sample(1000, init=None, tune=1000, chains=1)
 
         g = pm.model_to_graphviz(model)

From b3a9dee069e35244f148b7e737cffb4f66c57f50 Mon Sep 17 00:00:00 2001
From: AlexAndorra <andorra.alexandre@gmail.com>
Date: Sun, 17 May 2020 22:02:04 +0200
Subject: [PATCH 10/15] Added release note

---
 RELEASE-NOTES.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index 6ebf36917c..e42893d16b 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -3,7 +3,7 @@
 ## PyMC3 3.9 (On deck)
 
 ### New features
-- use [fastprogress](https://github.com/fastai/fastprogress) instead of tqdm [#3693](https://github.com/pymc-devs/pymc3/pull/3693)
+- Use [fastprogress](https://github.com/fastai/fastprogress) instead of tqdm [#3693](https://github.com/pymc-devs/pymc3/pull/3693).
 - `DEMetropolis` can now tune both `lambda` and `scaling` parameters, but by default neither of them are tuned. See [#3743](https://github.com/pymc-devs/pymc3/pull/3743) for more info.
 - `DEMetropolisZ`, an improved variant of `DEMetropolis` brings better parallelization and higher efficiency with fewer chains with a slower initial convergence. This implementation is experimental. See [#3784](https://github.com/pymc-devs/pymc3/pull/3784) for more info.
 - Notebooks that give insight into `DEMetropolis`, `DEMetropolisZ` and the `DifferentialEquation` interface are now located in the [Tutorials/Deep Dive](https://docs.pymc.io/nb_tutorials/index.html) section.
@@ -14,6 +14,8 @@
 - `pm.sample` now has support for adapting dense mass matrix using `QuadPotentialFullAdapt` (see [#3596](https://github.com/pymc-devs/pymc3/pull/3596), [#3705](https://github.com/pymc-devs/pymc3/pull/3705), [#3858](https://github.com/pymc-devs/pymc3/pull/3858), and [#3893](https://github.com/pymc-devs/pymc3/pull/3893)). Use `init="adapt_full"` or `init="jitter+adapt_full"` to use.
 - `Moyal` distribution added (see [#3870](https://github.com/pymc-devs/pymc3/pull/3870)).
 - `pm.LKJCholeskyCov` now automatically computes and returns the unpacked Cholesky decomposition, the correlations and the standard deviations of the covariance matrix (see [#3881](https://github.com/pymc-devs/pymc3/pull/3881)).
+- `pm.Data` container can now be used for index variables, i.e with integer data and not only floats (see [#3925](https://github.com/pymc-devs/pymc3/pull/3925)).
+- `pm.Data` container can now be used as input for other random variables (see [#3925](https://github.com/pymc-devs/pymc3/pull/3925)).
 
 ### Maintenance
 - Tuning results no longer leak into sequentially sampled `Metropolis` chains (see #3733 and #3796).

From 5e5440c1e0b41630b9cc82c9d6de90f73c8254f6 Mon Sep 17 00:00:00 2001
From: AlexAndorra <andorra.alexandre@gmail.com>
Date: Mon, 18 May 2020 10:11:04 +0200
Subject: [PATCH 11/15] Updated release notes

---
 RELEASE-NOTES.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index e42893d16b..490f905dea 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -14,8 +14,8 @@
 - `pm.sample` now has support for adapting dense mass matrix using `QuadPotentialFullAdapt` (see [#3596](https://github.com/pymc-devs/pymc3/pull/3596), [#3705](https://github.com/pymc-devs/pymc3/pull/3705), [#3858](https://github.com/pymc-devs/pymc3/pull/3858), and [#3893](https://github.com/pymc-devs/pymc3/pull/3893)). Use `init="adapt_full"` or `init="jitter+adapt_full"` to use.
 - `Moyal` distribution added (see [#3870](https://github.com/pymc-devs/pymc3/pull/3870)).
 - `pm.LKJCholeskyCov` now automatically computes and returns the unpacked Cholesky decomposition, the correlations and the standard deviations of the covariance matrix (see [#3881](https://github.com/pymc-devs/pymc3/pull/3881)).
-- `pm.Data` container can now be used for index variables, i.e with integer data and not only floats (see [#3925](https://github.com/pymc-devs/pymc3/pull/3925)).
-- `pm.Data` container can now be used as input for other random variables (see [#3925](https://github.com/pymc-devs/pymc3/pull/3925)).
+- `pm.Data` container can now be used for index variables, i.e with integer data and not only floats (issue [#3813](https://github.com/pymc-devs/pymc3/issues/3813), fixed by [#3925](https://github.com/pymc-devs/pymc3/pull/3925)).
+- `pm.Data` container can now be used as input for other random variables (issue [#3842](https://github.com/pymc-devs/pymc3/issues/3842), fixed by [#3925](https://github.com/pymc-devs/pymc3/pull/3925)).
 
 ### Maintenance
 - Tuning results no longer leak into sequentially sampled `Metropolis` chains (see #3733 and #3796).

From 0d07347b7a9a6edbfbe93dac2471ab6f35fb829a Mon Sep 17 00:00:00 2001
From: AlexAndorra <andorra.alexandre@gmail.com>
Date: Mon, 18 May 2020 12:21:54 +0200
Subject: [PATCH 12/15] Updated code in light of Luciano's comments

---
 pymc3/data.py                      | 14 ++------------
 pymc3/model.py                     | 19 +++----------------
 pymc3/tests/test_data_container.py |  2 +-
 3 files changed, 6 insertions(+), 29 deletions(-)

diff --git a/pymc3/data.py b/pymc3/data.py
index fa569dd332..35f797a576 100644
--- a/pymc3/data.py
+++ b/pymc3/data.py
@@ -479,17 +479,7 @@ class Data:
     https://docs.pymc.io/notebooks/data_container.html
     """
 
-    def __new__(self, name, value, dtype=None):
-        if not dtype:
-            if hasattr(value, "dtype"):
-                # if no dtype given but available as attr of value, use that as dtype
-                dtype = value.dtype
-            elif isinstance(value, int):
-                dtype = int
-            else:
-                # otherwise, assume float
-                dtype = float
-
+    def __new__(self, name, value):
         # Add data container to the named variables of the model.
         try:
             model = pm.Model.get_context()
@@ -502,7 +492,7 @@ def __new__(self, name, value, dtype=None):
 
         # `pm.model.pandas_to_array` takes care of parameter `value` and
         # transforms it to something digestible for pymc3
-        shared_object = theano.shared(pm.model.pandas_to_array(value, dtype=dtype), name)
+        shared_object = theano.shared(pm.model.pandas_to_array(value), name)
 
         # To draw the node for this variable in the graphviz Digraph we need
         # its shape.
diff --git a/pymc3/model.py b/pymc3/model.py
index 45d1629794..d891e909fc 100644
--- a/pymc3/model.py
+++ b/pymc3/model.py
@@ -1275,15 +1275,7 @@ def set_data(new_data, model=None):
 
     for variable_name, new_value in new_data.items():
         if isinstance(model[variable_name], SharedVariable):
-            if hasattr(new_value, "dtype"):
-                # if no dtype given but available as attr of value, use that as dtype
-                dtype = new_value.dtype
-            elif isinstance(new_value, int):
-                dtype = int
-            else:
-                # otherwise, assume float
-                dtype = float
-            model[variable_name].set_value(pandas_to_array(new_value, dtype=dtype))
+            model[variable_name].set_value(pandas_to_array(new_value))
         else:
             message = 'The variable `{}` must be defined as `pymc3.' \
                       'Data` inside the model to allow updating. The ' \
@@ -1490,7 +1482,7 @@ def init_value(self):
         return self.tag.test_value
 
 
-def pandas_to_array(data, dtype=float):
+def pandas_to_array(data):
     if hasattr(data, 'values'):  # pandas
         if data.isnull().any().any():  # missing values
             ret = np.ma.MaskedArray(data.values, data.isnull().values)
@@ -1510,12 +1502,7 @@ def pandas_to_array(data, dtype=float):
     else:
         ret = np.asarray(data)
 
-    if dtype in [int, np.int8, np.int16, np.int32, np.int64]:
-        return pm.intX(ret)
-    elif dtype in [float, np.float16,  np.float32, np.float64]:
-        return pm.floatX(ret)
-    else:
-        raise ValueError('Unsupported type for pandas_to_array: %s' % str(dtype))
+    return ret
 
 
 def as_tensor(data, name, model, distribution):
diff --git a/pymc3/tests/test_data_container.py b/pymc3/tests/test_data_container.py
index 38b640ac5a..9bdee14f8b 100644
--- a/pymc3/tests/test_data_container.py
+++ b/pymc3/tests/test_data_container.py
@@ -109,7 +109,7 @@ def test_shared_data_as_index(self):
         See https://github.com/pymc-devs/pymc3/issues/3813
         """
         with pm.Model() as model:
-            index = pm.Data("index", [2, 0, 1, 0, 2], dtype=int)
+            index = pm.Data("index", [2, 0, 1, 0, 2])
             y = pm.Data("y", [1.0, 2.0, 3.0, 2.0, 1.0])
             alpha = pm.Normal("alpha", 0, 1.5, shape=3)
             pm.Normal("obs", alpha[index], np.sqrt(1e-2), observed=y)

From 41200ef7bb67607827c114ab05b3845cce5c84f0 Mon Sep 17 00:00:00 2001
From: AlexAndorra <andorra.alexandre@gmail.com>
Date: Mon, 18 May 2020 15:34:54 +0200
Subject: [PATCH 13/15] Fixed implementation of integer checking

---
 pymc3/data.py  | 17 ++++++++++++++++-
 pymc3/model.py | 24 +++++++++++++++++++++---
 2 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/pymc3/data.py b/pymc3/data.py
index 35f797a576..67e8fb1d89 100644
--- a/pymc3/data.py
+++ b/pymc3/data.py
@@ -480,6 +480,21 @@ class Data:
     """
 
     def __new__(self, name, value):
+        if isinstance(value, list):
+            print(value, type(value))
+            value = np.array(value)
+            print("Converted list to np: ", value, type(value), value.dtype)
+
+        # Type handling to enable index variables
+        # set int type when appropriate:
+        if "int" in str(value.dtype):
+            dtype = pm.intX(value).dtype
+            print(value, ": ", dtype)
+        # otherwise, assume float
+        else:
+            dtype = theano.config.floatX
+            print(value, ": ", dtype)
+
         # Add data container to the named variables of the model.
         try:
             model = pm.Model.get_context()
@@ -492,7 +507,7 @@ def __new__(self, name, value):
 
         # `pm.model.pandas_to_array` takes care of parameter `value` and
         # transforms it to something digestible for pymc3
-        shared_object = theano.shared(pm.model.pandas_to_array(value), name)
+        shared_object = theano.shared(pm.model.pandas_to_array(value, dtype=dtype), name)
 
         # To draw the node for this variable in the graphviz Digraph we need
         # its shape.
diff --git a/pymc3/model.py b/pymc3/model.py
index d891e909fc..65a6191a9f 100644
--- a/pymc3/model.py
+++ b/pymc3/model.py
@@ -1275,7 +1275,20 @@ def set_data(new_data, model=None):
 
     for variable_name, new_value in new_data.items():
         if isinstance(model[variable_name], SharedVariable):
-            model[variable_name].set_value(pandas_to_array(new_value))
+            if isinstance(new_value, list):
+                print(new_value, type(new_value))
+                new_value = np.array(new_value)
+                print("Converted list to np: ", new_value, type(new_value), new_value.dtype)
+            # Type handling to enable index variables
+            # set int type when appropriate:
+            if "int" in str(new_value.dtype):
+                dtype = pm.intX(new_value).dtype
+                print(new_value, ": ", dtype)
+            # otherwise, assume float
+            else:
+                dtype = theano.config.floatX
+                print(new_value, ": ", dtype)
+            model[variable_name].set_value(pandas_to_array(new_value, dtype=dtype))
         else:
             message = 'The variable `{}` must be defined as `pymc3.' \
                       'Data` inside the model to allow updating. The ' \
@@ -1482,7 +1495,7 @@ def init_value(self):
         return self.tag.test_value
 
 
-def pandas_to_array(data):
+def pandas_to_array(data, dtype=theano.config.floatX):
     if hasattr(data, 'values'):  # pandas
         if data.isnull().any().any():  # missing values
             ret = np.ma.MaskedArray(data.values, data.isnull().values)
@@ -1502,7 +1515,12 @@ def pandas_to_array(data):
     else:
         ret = np.asarray(data)
 
-    return ret
+    if "int" in str(dtype):
+        print("in pandas function, int boucle: ", ret, str(dtype))
+        return pm.intX(ret)
+    else:
+        print("in pandas function, float boucle: ", ret, str(dtype))
+        return pm.floatX(ret)
 
 
 def as_tensor(data, name, model, distribution):

From 7c359d81bb0ff29439698730b0ab179b7036fd90 Mon Sep 17 00:00:00 2001
From: AlexAndorra <andorra.alexandre@gmail.com>
Date: Mon, 18 May 2020 15:59:19 +0200
Subject: [PATCH 14/15] Simplified implementation of type checking

---
 pymc3/data.py  | 14 +-------------
 pymc3/model.py | 21 +++++----------------
 2 files changed, 6 insertions(+), 29 deletions(-)

diff --git a/pymc3/data.py b/pymc3/data.py
index 67e8fb1d89..04d8901fae 100644
--- a/pymc3/data.py
+++ b/pymc3/data.py
@@ -481,19 +481,7 @@ class Data:
 
     def __new__(self, name, value):
         if isinstance(value, list):
-            print(value, type(value))
             value = np.array(value)
-            print("Converted list to np: ", value, type(value), value.dtype)
-
-        # Type handling to enable index variables
-        # set int type when appropriate:
-        if "int" in str(value.dtype):
-            dtype = pm.intX(value).dtype
-            print(value, ": ", dtype)
-        # otherwise, assume float
-        else:
-            dtype = theano.config.floatX
-            print(value, ": ", dtype)
 
         # Add data container to the named variables of the model.
         try:
@@ -507,7 +495,7 @@ def __new__(self, name, value):
 
         # `pm.model.pandas_to_array` takes care of parameter `value` and
         # transforms it to something digestible for pymc3
-        shared_object = theano.shared(pm.model.pandas_to_array(value, dtype=dtype), name)
+        shared_object = theano.shared(pm.model.pandas_to_array(value), name)
 
         # To draw the node for this variable in the graphviz Digraph we need
         # its shape.
diff --git a/pymc3/model.py b/pymc3/model.py
index 65a6191a9f..8d1cb2aacf 100644
--- a/pymc3/model.py
+++ b/pymc3/model.py
@@ -1276,19 +1276,8 @@ def set_data(new_data, model=None):
     for variable_name, new_value in new_data.items():
         if isinstance(model[variable_name], SharedVariable):
             if isinstance(new_value, list):
-                print(new_value, type(new_value))
                 new_value = np.array(new_value)
-                print("Converted list to np: ", new_value, type(new_value), new_value.dtype)
-            # Type handling to enable index variables
-            # set int type when appropriate:
-            if "int" in str(new_value.dtype):
-                dtype = pm.intX(new_value).dtype
-                print(new_value, ": ", dtype)
-            # otherwise, assume float
-            else:
-                dtype = theano.config.floatX
-                print(new_value, ": ", dtype)
-            model[variable_name].set_value(pandas_to_array(new_value, dtype=dtype))
+            model[variable_name].set_value(pandas_to_array(new_value))
         else:
             message = 'The variable `{}` must be defined as `pymc3.' \
                       'Data` inside the model to allow updating. The ' \
@@ -1495,7 +1484,7 @@ def init_value(self):
         return self.tag.test_value
 
 
-def pandas_to_array(data, dtype=theano.config.floatX):
+def pandas_to_array(data):
     if hasattr(data, 'values'):  # pandas
         if data.isnull().any().any():  # missing values
             ret = np.ma.MaskedArray(data.values, data.isnull().values)
@@ -1515,11 +1504,11 @@ def pandas_to_array(data, dtype=theano.config.floatX):
     else:
         ret = np.asarray(data)
 
-    if "int" in str(dtype):
-        print("in pandas function, int boucle: ", ret, str(dtype))
+    # type handling to enable index variables when data is int:
+    if "int" in str(data.dtype):
         return pm.intX(ret)
+    # otherwise, assume float:
     else:
-        print("in pandas function, float boucle: ", ret, str(dtype))
         return pm.floatX(ret)
 
 

From f7bf6dbf4f919d2436782fd074419303985a06ef Mon Sep 17 00:00:00 2001
From: AlexAndorra <andorra.alexandre@gmail.com>
Date: Mon, 18 May 2020 16:16:10 +0200
Subject: [PATCH 15/15] Corrected implementation for other uses of
 pandas_to_array

---
 pymc3/model.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/pymc3/model.py b/pymc3/model.py
index 8d1cb2aacf..8d54a45878 100644
--- a/pymc3/model.py
+++ b/pymc3/model.py
@@ -1505,9 +1505,13 @@ def pandas_to_array(data):
         ret = np.asarray(data)
 
     # type handling to enable index variables when data is int:
-    if "int" in str(data.dtype):
-        return pm.intX(ret)
-    # otherwise, assume float:
+    if hasattr(data, "dtype"):
+        if "int" in str(data.dtype):
+            return pm.intX(ret)
+        # otherwise, assume float:
+        else:
+            return pm.floatX(ret)
+    # needed for uses of this function other than with pm.Data:
     else:
         return pm.floatX(ret)