@@ -1981,10 +1981,9 @@ def infer(self, handler: "Table"):
1981
1981
new_self .read_metadata (handler )
1982
1982
return new_self
1983
1983
1984
- def convert (
1985
- self , values : np .ndarray , nan_rep , encoding , errors , start = None , stop = None
1986
- ):
1984
+ def convert (self , values : np .ndarray , nan_rep , encoding : str , errors : str ):
1987
1985
""" set the values from this selection: take = take ownership """
1986
+ assert isinstance (values , np .ndarray ), type (values )
1988
1987
1989
1988
# values is a recarray
1990
1989
if values .dtype .fields is not None :
@@ -1993,21 +1992,23 @@ def convert(
1993
1992
values = _maybe_convert (values , self .kind , encoding , errors )
1994
1993
1995
1994
kwargs = dict ()
1995
+ kwargs ["name" ] = _ensure_decoded (self .index_name )
1996
+
1996
1997
if self .freq is not None :
1997
1998
kwargs ["freq" ] = _ensure_decoded (self .freq )
1998
- if self .index_name is not None :
1999
- kwargs ["name" ] = _ensure_decoded (self .index_name )
1999
+
2000
2000
# making an Index instance could throw a number of different errors
2001
2001
try :
2002
- self . values = Index (values , ** kwargs )
2002
+ new_pd_index = Index (values , ** kwargs )
2003
2003
except ValueError :
2004
2004
# if the output freq is different that what we recorded,
2005
2005
# it should be None (see also 'doc example part 2')
2006
2006
if "freq" in kwargs :
2007
2007
kwargs ["freq" ] = None
2008
- self . values = Index (values , ** kwargs )
2008
+ new_pd_index = Index (values , ** kwargs )
2009
2009
2010
- self .values = _set_tz (self .values , self .tz )
2010
+ new_pd_index = _set_tz (new_pd_index , self .tz )
2011
+ self .values = new_pd_index
2011
2012
2012
2013
def take_data (self ):
2013
2014
""" return the values & release the memory """
@@ -2167,35 +2168,19 @@ class GenericIndexCol(IndexCol):
2167
2168
def is_indexed (self ) -> bool :
2168
2169
return False
2169
2170
2170
- def convert (
2171
- self ,
2172
- values ,
2173
- nan_rep ,
2174
- encoding ,
2175
- errors ,
2176
- start : Optional [int ] = None ,
2177
- stop : Optional [int ] = None ,
2178
- ):
2179
- """ set the values from this selection: take = take ownership
2171
+ def convert (self , values : np .ndarray , nan_rep , encoding : str , errors : str ):
2172
+ """
2173
+ Set the values from this selection.
2180
2174
2181
2175
Parameters
2182
2176
----------
2183
-
2184
2177
values : np.ndarray
2185
2178
nan_rep : str
2186
2179
encoding : str
2187
2180
errors : str
2188
- start : int, optional
2189
- Table row number: the start of the sub-selection.
2190
- stop : int, optional
2191
- Table row number: the end of the sub-selection. Values larger than
2192
- the underlying table's row count are normalized to that.
2193
2181
"""
2194
- assert self .table is not None # for mypy
2195
-
2196
- _start = start if start is not None else 0
2197
- _stop = min (stop , self .table .nrows ) if stop is not None else self .table .nrows
2198
- self .values = Int64Index (np .arange (_stop - _start ))
2182
+ assert isinstance (values , np .ndarray ), type (values )
2183
+ self .values = Int64Index (np .arange (len (values )))
2199
2184
2200
2185
def get_attr (self ):
2201
2186
pass
@@ -2395,10 +2380,11 @@ def validate_attr(self, append):
2395
2380
"items dtype in table!"
2396
2381
)
2397
2382
2398
- def convert (self , values , nan_rep , encoding , errors , start = None , stop = None ):
2383
+ def convert (self , values : np . ndarray , nan_rep , encoding : str , errors : str ):
2399
2384
"""set the data from this selection (and convert to the correct dtype
2400
2385
if we can)
2401
2386
"""
2387
+ assert isinstance (values , np .ndarray ), type (values )
2402
2388
2403
2389
# values is a recarray
2404
2390
if values .dtype .fields is not None :
@@ -2410,69 +2396,74 @@ def convert(self, values, nan_rep, encoding, errors, start=None, stop=None):
2410
2396
else :
2411
2397
self .data = values
2412
2398
2399
+ own_data = self .data
2400
+
2413
2401
# use the meta if needed
2414
2402
meta = _ensure_decoded (self .meta )
2415
2403
2404
+ assert self .dtype is not None
2405
+
2416
2406
# convert to the correct dtype
2417
- if self .dtype is not None :
2418
- dtype = _ensure_decoded (self .dtype )
2407
+ dtype = _ensure_decoded (self .dtype )
2419
2408
2420
- # reverse converts
2421
- if dtype == "datetime64" :
2409
+ # reverse converts
2410
+ if dtype == "datetime64" :
2422
2411
2423
- # recreate with tz if indicated
2424
- self . data = _set_tz (self . data , self .tz , coerce = True )
2412
+ # recreate with tz if indicated
2413
+ own_data = _set_tz (own_data , self .tz , coerce = True )
2425
2414
2426
- elif dtype == "timedelta64" :
2427
- self .data = np .asarray (self .data , dtype = "m8[ns]" )
2428
- elif dtype == "date" :
2429
- try :
2430
- self .data = np .asarray (
2431
- [date .fromordinal (v ) for v in self .data ], dtype = object
2432
- )
2433
- except ValueError :
2434
- self .data = np .asarray (
2435
- [date .fromtimestamp (v ) for v in self .data ], dtype = object
2436
- )
2437
-
2438
- elif meta == "category" :
2439
-
2440
- # we have a categorical
2441
- categories = self .metadata
2442
- codes = self .data .ravel ()
2443
-
2444
- # if we have stored a NaN in the categories
2445
- # then strip it; in theory we could have BOTH
2446
- # -1s in the codes and nulls :<
2447
- if categories is None :
2448
- # Handle case of NaN-only categorical columns in which case
2449
- # the categories are an empty array; when this is stored,
2450
- # pytables cannot write a zero-len array, so on readback
2451
- # the categories would be None and `read_hdf()` would fail.
2452
- categories = Index ([], dtype = np .float64 )
2453
- else :
2454
- mask = isna (categories )
2455
- if mask .any ():
2456
- categories = categories [~ mask ]
2457
- codes [codes != - 1 ] -= mask .astype (int ).cumsum ().values
2458
-
2459
- self .data = Categorical .from_codes (
2460
- codes , categories = categories , ordered = self .ordered
2415
+ elif dtype == "timedelta64" :
2416
+ own_data = np .asarray (own_data , dtype = "m8[ns]" )
2417
+ elif dtype == "date" :
2418
+ try :
2419
+ own_data = np .asarray (
2420
+ [date .fromordinal (v ) for v in own_data ], dtype = object
2421
+ )
2422
+ except ValueError :
2423
+ own_data = np .asarray (
2424
+ [date .fromtimestamp (v ) for v in own_data ], dtype = object
2461
2425
)
2462
2426
2427
+ elif meta == "category" :
2428
+
2429
+ # we have a categorical
2430
+ categories = self .metadata
2431
+ codes = own_data .ravel ()
2432
+
2433
+ # if we have stored a NaN in the categories
2434
+ # then strip it; in theory we could have BOTH
2435
+ # -1s in the codes and nulls :<
2436
+ if categories is None :
2437
+ # Handle case of NaN-only categorical columns in which case
2438
+ # the categories are an empty array; when this is stored,
2439
+ # pytables cannot write a zero-len array, so on readback
2440
+ # the categories would be None and `read_hdf()` would fail.
2441
+ categories = Index ([], dtype = np .float64 )
2463
2442
else :
2443
+ mask = isna (categories )
2444
+ if mask .any ():
2445
+ categories = categories [~ mask ]
2446
+ codes [codes != - 1 ] -= mask .astype (int ).cumsum ().values
2464
2447
2465
- try :
2466
- self .data = self .data .astype (dtype , copy = False )
2467
- except TypeError :
2468
- self .data = self .data .astype ("O" , copy = False )
2448
+ own_data = Categorical .from_codes (
2449
+ codes , categories = categories , ordered = self .ordered
2450
+ )
2451
+
2452
+ else :
2453
+
2454
+ try :
2455
+ own_data = own_data .astype (dtype , copy = False )
2456
+ except TypeError :
2457
+ own_data = own_data .astype ("O" , copy = False )
2469
2458
2470
2459
# convert nans / decode
2471
2460
if _ensure_decoded (self .kind ) == "string" :
2472
- self . data = _unconvert_string_array (
2473
- self . data , nan_rep = nan_rep , encoding = encoding , errors = errors
2461
+ own_data = _unconvert_string_array (
2462
+ own_data , nan_rep = nan_rep , encoding = encoding , errors = errors
2474
2463
)
2475
2464
2465
+ self .data = own_data
2466
+
2476
2467
def get_attr (self ):
2477
2468
""" get the data for this column """
2478
2469
self .values = getattr (self .attrs , self .kind_attr , None )
@@ -3613,8 +3604,6 @@ def read_axes(
3613
3604
nan_rep = self .nan_rep ,
3614
3605
encoding = self .encoding ,
3615
3606
errors = self .errors ,
3616
- start = start ,
3617
- stop = stop ,
3618
3607
)
3619
3608
3620
3609
return True
@@ -4873,16 +4862,15 @@ def _unconvert_string_array(data, nan_rep=None, encoding=None, errors="strict"):
4873
4862
return data .reshape (shape )
4874
4863
4875
4864
4876
- def _maybe_convert (values : np .ndarray , val_kind , encoding , errors ):
4865
+ def _maybe_convert (values : np .ndarray , val_kind , encoding : str , errors : str ):
4877
4866
val_kind = _ensure_decoded (val_kind )
4878
4867
if _need_convert (val_kind ):
4879
4868
conv = _get_converter (val_kind , encoding , errors )
4880
- # conv = np.frompyfunc(conv, 1, 1)
4881
4869
values = conv (values )
4882
4870
return values
4883
4871
4884
4872
4885
- def _get_converter (kind : str , encoding , errors ):
4873
+ def _get_converter (kind : str , encoding : str , errors : str ):
4886
4874
if kind == "datetime64" :
4887
4875
return lambda x : np .asarray (x , dtype = "M8[ns]" )
4888
4876
elif kind == "string" :
0 commit comments