Skip to content

allow pymc3.Data() to support both int and float input data #3816

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions pymc3/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,10 +478,20 @@ class Data:
For more information, take a look at this example notebook
https://docs.pymc.io/notebooks/data_container.html
"""
def __new__(self, name, value):
def __new__(self, name, value, dtype = None):
if dtype is None:
if hasattr(value, 'dtype'):
# if no dtype given, but available as attr of value, use that as dtype
dtype = value.dtype
elif isinstance(value, int):
dtype = int
else:
# otherwise, assume float
dtype = float

# `pm.model.pandas_to_array` takes care of parameter `value` and
# transforms it to something digestible for pymc3
shared_object = theano.shared(pm.model.pandas_to_array(value), name)
shared_object = theano.shared(pm.model.pandas_to_array(value, dtype = dtype), name)

# To draw the node for this variable in the graphviz Digraph we need
# its shape.
Expand Down
10 changes: 7 additions & 3 deletions pymc3/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -1473,7 +1473,7 @@ def init_value(self):
return self.tag.test_value


def pandas_to_array(data):
def pandas_to_array(data, dtype = float):
if hasattr(data, 'values'): # pandas
if data.isnull().any().any(): # missing values
ret = np.ma.MaskedArray(data.values, data.isnull().values)
Expand All @@ -1492,8 +1492,12 @@ def pandas_to_array(data):
ret = generator(data)
else:
ret = np.asarray(data)
return pm.floatX(ret)

if dtype in [float, np.float32, np.float64]:
return pm.floatX(ret)
elif dtype in [int, np.int32, np.int64]:
return pm.intX(ret)
else:
raise ValueError('Unsupported type for pandas_to_array: %s' % str(dtype))

def as_tensor(data, name, model, distribution):
dtype = distribution.dtype
Expand Down