1- import numpy
2- from scipy .sparse import issparse
1+ import numpy as np
2+ import scipy .sparse as sp
33
44import Orange .data
55from Orange .statistics import distribution , basic_stats
@@ -26,11 +26,11 @@ def __init__(self, variable, value=0):
2626 self .value = value
2727
2828 def transform (self , c ):
29- if issparse (c ):
30- c .data = numpy .where (numpy .isnan (c .data ), self .value , c .data )
29+ if sp . issparse (c ):
30+ c .data = np .where (np .isnan (c .data ), self .value , c .data )
3131 return c
3232 else :
33- return numpy .where (numpy .isnan (c ), self .value , c )
33+ return np .where (np .isnan (c ), self .value , c )
3434
3535
3636class BaseImputeMethod (Reprable ):
@@ -83,7 +83,7 @@ class DropInstances(BaseImputeMethod):
8383
8484 def __call__ (self , data , variable ):
8585 index = data .domain .index (variable )
86- return numpy .isnan (data [:, index ]).reshape (- 1 )
86+ return np .isnan (data [:, index ]).reshape (- 1 )
8787
8888
8989class Average (BaseImputeMethod ):
@@ -154,13 +154,13 @@ def __init__(self, variable, model):
154154
155155 def __call__ (self , data ):
156156 if isinstance (data , Orange .data .Instance ):
157- column = numpy .array ([float (data [self .variable ])])
157+ column = np .array ([float (data [self .variable ])])
158158 else :
159- column = numpy .array (data .get_column_view (self .variable )[0 ],
159+ column = np .array (data .get_column_view (self .variable )[0 ],
160160 copy = True )
161161
162- mask = numpy .isnan (column )
163- if not numpy .any (mask ):
162+ mask = np .isnan (column )
163+ if not np .any (mask ):
164164 return column
165165
166166 if isinstance (data , Orange .data .Instance ):
@@ -224,7 +224,9 @@ def domain_with_class_var(domain, class_var):
224224
225225class IsDefined (Transformation ):
226226 def transform (self , c ):
227- return ~ numpy .isnan (c )
227+ if sp .issparse (c ):
228+ c = c .toarray ()
229+ return ~ np .isnan (c )
228230
229231
230232class AsValue (BaseImputeMethod ):
@@ -243,7 +245,7 @@ def __call__(self, data, variable):
243245 base_value = variable .base_value ,
244246 compute_value = Lookup (
245247 variable ,
246- numpy .arange (len (variable .values ), dtype = int ),
248+ np .arange (len (variable .values ), dtype = int ),
247249 unknown = len (variable .values ))
248250 )
249251 return var
@@ -281,29 +283,32 @@ def __init__(self, variable, distribution):
281283 self .distribution = distribution
282284
283285 if variable .is_discrete :
284- counts = numpy .array (distribution )
286+ counts = np .array (distribution )
285287 elif variable .is_continuous :
286- counts = numpy .array (distribution )[1 , :]
288+ counts = np .array (distribution )[1 , :]
287289 else :
288290 raise TypeError ("Only discrete and continuous "
289291 "variables are supported" )
290- csum = numpy .sum (counts )
292+ csum = np .sum (counts )
291293 if csum > 0 :
292294 self .sample_prob = counts / csum
293295 else :
294- self .sample_prob = numpy .ones_like (counts ) / len (counts )
296+ self .sample_prob = np .ones_like (counts ) / len (counts )
295297
296298 def transform (self , c ):
297- c = numpy .array (c , copy = True )
298- nanindices = numpy .flatnonzero (numpy .isnan (c ))
299+ if not sp .issparse (c ):
300+ c = np .array (c , copy = True )
301+ else :
302+ c = c .toarray ().ravel ()
303+ nanindices = np .flatnonzero (np .isnan (c ))
299304
300305 if self .variable .is_discrete :
301- sample = numpy .random .choice (
306+ sample = np .random .choice (
302307 len (self .variable .values ), size = len (nanindices ),
303308 replace = True , p = self .sample_prob )
304309 else :
305- sample = numpy .random .choice (
306- numpy .asarray (self .distribution )[0 , :], size = len (nanindices ),
310+ sample = np .random .choice (
311+ np .asarray (self .distribution )[0 , :], size = len (nanindices ),
307312 replace = True , p = self .sample_prob )
308313
309314 c [nanindices ] = sample
@@ -328,9 +333,9 @@ def __call__(self, data, variable):
328333 raise ValueError ("'{}' has an unknown distribution"
329334 .format (variable ))
330335
331- if variable .is_discrete and numpy .sum (dist ) == 0 :
336+ if variable .is_discrete and np .sum (dist ) == 0 :
332337 dist += 1 / len (dist )
333- elif variable .is_continuous and numpy .sum (dist [1 , :]) == 0 :
338+ elif variable .is_continuous and np .sum (dist [1 , :]) == 0 :
334339 dist [1 , :] += 1 / dist .shape [1 ]
335340 return variable .copy (
336341 compute_value = ReplaceUnknownsRandom (variable , dist ))
0 commit comments