forked from pymc-devs/pymc
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathtest_model_helpers.py
217 lines (177 loc) · 9.03 KB
/
test_model_helpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
import numpy as np
import numpy.ma as ma
import numpy.testing as npt
import pandas as pd
import pymc3 as pm
import scipy.sparse as sps
import theano
import theano.tensor as tt
import theano.sparse as sparse
class TestHelperFunc:
def test_pandas_to_array_casting(self):
"""
Ensure that pandas_to_array returns the dense array, masked array,
graph variable, TensorVariable, or sparse matrix as appropriate
when it has to cast the variable.
"""
# Force cast of input
input_type = 'float32' if theano.config.floatX is 'float64' else 'float64'
# Create the various inputs to the function
sparse_input = sps.csr_matrix(np.eye(3), dtype=input_type)
dense_input = np.arange(9, dtype=input_type).reshape((3, 3))
input_name = 'input_variable'
theano_graph_input = tt.as_tensor(dense_input, name=input_name)
pandas_input = pd.DataFrame(dense_input)
# All the even numbers are replaced with NaN
missing_pandas_input = pd.DataFrame(np.array([[np.nan, 1, np.nan],
[3, np.nan, 5],
[np.nan, 7, np.nan]], dtype=input_type))
masked_array_input = ma.array(dense_input,
mask=(np.mod(dense_input, 2) == 0))
# Create a generator object. Apparently the generator object needs to
# yield numpy arrays.
square_generator = (np.array([i**2], dtype=input_type) for i in range(100))
# Alias the function to be tested
func = pm.model.pandas_to_array
#####
# Perform the various tests
#####
# Check function behavior with dense arrays and pandas dataframes
# without missing values
for input_value in [dense_input, pandas_input]:
func_output = func(input_value)
assert isinstance(func_output, np.ndarray)
assert func_output.shape == input_value.shape
npt.assert_allclose(func_output, dense_input)
# Check function behavior with sparse matrix inputs
sparse_output = func(sparse_input)
assert sps.issparse(sparse_output)
assert sparse_output.shape == sparse_input.shape
npt.assert_allclose(sparse_output.toarray(),
sparse_input.toarray())
# Check function behavior when using masked array inputs and pandas
# objects with missing data
for input_value in [masked_array_input, missing_pandas_input]:
func_output = func(input_value)
assert isinstance(func_output, ma.core.MaskedArray)
assert func_output.shape == input_value.shape
npt.assert_allclose(func_output, masked_array_input)
# Check function behavior with Theano graph variable
theano_output = func(theano_graph_input)
assert isinstance(theano_output, theano.gof.graph.Variable)
assert theano_output.owner.inputs[0].name == input_name
# Check function behavior with generator data
generator_output = func(square_generator)
# Make sure the returned object has .set_gen and .set_default methods
assert hasattr(generator_output, "set_gen")
assert hasattr(generator_output, "set_default")
# Make sure the returned object is a Theano TensorVariable
assert isinstance(generator_output, tt.TensorVariable)
def test_pandas_to_array_not_casting(self):
"""
Ensure that pandas_to_array returns the dense array, masked array,
graph variable, TensorVariable, or sparse matrix as appropriate
when it does not have to cast the variable.
"""
# Input type to force the function not to cast.
# It could also have been int64 but for the missing values variable would have been invalid
input_type = theano.config.floatX
# Create the various inputs to the function
sparse_input = sps.csr_matrix(np.eye(3), dtype=input_type)
dense_input = np.arange(9, dtype=input_type).reshape((3, 3))
input_name = 'input_variable'
theano_graph_input = tt.as_tensor(dense_input, name=input_name)
pandas_input = pd.DataFrame(dense_input)
# All the even numbers are replaced with NaN
missing_pandas_input = pd.DataFrame(np.array([[np.nan, 1, np.nan],
[3, np.nan, 5],
[np.nan, 7, np.nan]], dtype=input_type))
masked_array_input = ma.array(dense_input,
mask=(np.mod(dense_input, 2) == 0))
# Create a generator object. Apparently the generator object needs to
# yield numpy arrays.
square_generator = (np.array([i**2], dtype=input_type) for i in range(100))
# Alias the function to be tested
func = pm.model.pandas_to_array
#####
# Perform the various tests
#####
# Check function behavior with dense arrays and pandas dataframes
# without missing values
for input_value in [dense_input, pandas_input]:
func_output = func(input_value)
assert isinstance(func_output, np.ndarray)
assert func_output.shape == input_value.shape
npt.assert_allclose(func_output, dense_input)
# Check function behavior with sparse matrix inputs
sparse_output = func(sparse_input)
assert sps.issparse(sparse_output)
assert sparse_output.shape == sparse_input.shape
npt.assert_allclose(sparse_output.toarray(),
sparse_input.toarray())
# Check function behavior when using masked array inputs and pandas
# objects with missing data
for input_value in [masked_array_input, missing_pandas_input]:
func_output = func(input_value)
assert isinstance(func_output, ma.core.MaskedArray)
assert func_output.shape == input_value.shape
npt.assert_allclose(func_output, masked_array_input)
# Check function behavior with Theano graph variable
theano_output = func(theano_graph_input)
assert isinstance(theano_output, theano.gof.graph.Variable)
assert theano_output.name == input_name
# Check function behavior with generator data
generator_output = func(square_generator)
# Make sure the returned object has .set_gen and .set_default methods
assert hasattr(generator_output, "set_gen")
assert hasattr(generator_output, "set_default")
# Make sure the returned object is a Theano TensorVariable
assert isinstance(generator_output, tt.TensorVariable)
def test_as_tensor(self):
"""
Check returned values for `data` given known inputs to `as_tensor()`.
Note that ndarrays should return a TensorConstant and sparse inputs
should return a Sparse Theano object.
"""
# Create the various inputs to the function
input_name = 'testing_inputs'
sparse_input = sps.csr_matrix(np.eye(3))
dense_input = np.arange(9).reshape((3, 3))
masked_array_input = ma.array(dense_input,
mask=(np.mod(dense_input, 2) == 0))
# Create a fake model and fake distribution to be used for the test
fake_model = pm.Model()
with fake_model:
fake_distribution = pm.Normal.dist(mu=0, sigma=1)
# Create the testval attribute simply for the sake of model testing
fake_distribution.testval = None
# Alias the function to be tested
func = pm.model.as_tensor
# Check function behavior using the various inputs
dense_output = func(dense_input,
input_name,
fake_model,
fake_distribution)
sparse_output = func(sparse_input,
input_name,
fake_model,
fake_distribution)
masked_output = func(masked_array_input,
input_name,
fake_model,
fake_distribution)
# Ensure that the missing values are appropriately set to None
for func_output in [dense_output, sparse_output]:
assert func_output.missing_values is None
# Ensure that the Theano variable names are correctly set.
# Note that the output for masked inputs do not have their names set
# to the passed value.
for func_output in [dense_output, sparse_output]:
assert func_output.name == input_name
# Ensure the that returned functions are all of the correct type
assert isinstance(dense_output, tt.TensorConstant)
assert sparse.basic._is_sparse_variable(sparse_output)
# Masked output is something weird. Just ensure it has missing values
# self.assertIsInstance(masked_output, tt.TensorConstant)
assert masked_output.missing_values is not None
return None