Skip to content

Commit 0bde512

Browse files
jbrockmendeljreback
authored andcommitted
CLN: clean pytables convert methods (#30125)
1 parent b71cab4 commit 0bde512

File tree

1 file changed

+70
-82
lines changed

1 file changed

+70
-82
lines changed

pandas/io/pytables.py

+70-82
Original file line numberDiff line numberDiff line change
@@ -1981,10 +1981,9 @@ def infer(self, handler: "Table"):
19811981
new_self.read_metadata(handler)
19821982
return new_self
19831983

1984-
def convert(
1985-
self, values: np.ndarray, nan_rep, encoding, errors, start=None, stop=None
1986-
):
1984+
def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
19871985
""" set the values from this selection: take = take ownership """
1986+
assert isinstance(values, np.ndarray), type(values)
19881987

19891988
# values is a recarray
19901989
if values.dtype.fields is not None:
@@ -1993,21 +1992,23 @@ def convert(
19931992
values = _maybe_convert(values, self.kind, encoding, errors)
19941993

19951994
kwargs = dict()
1995+
kwargs["name"] = _ensure_decoded(self.index_name)
1996+
19961997
if self.freq is not None:
19971998
kwargs["freq"] = _ensure_decoded(self.freq)
1998-
if self.index_name is not None:
1999-
kwargs["name"] = _ensure_decoded(self.index_name)
1999+
20002000
# making an Index instance could throw a number of different errors
20012001
try:
2002-
self.values = Index(values, **kwargs)
2002+
new_pd_index = Index(values, **kwargs)
20032003
except ValueError:
20042004
# if the output freq is different that what we recorded,
20052005
# it should be None (see also 'doc example part 2')
20062006
if "freq" in kwargs:
20072007
kwargs["freq"] = None
2008-
self.values = Index(values, **kwargs)
2008+
new_pd_index = Index(values, **kwargs)
20092009

2010-
self.values = _set_tz(self.values, self.tz)
2010+
new_pd_index = _set_tz(new_pd_index, self.tz)
2011+
self.values = new_pd_index
20112012

20122013
def take_data(self):
20132014
""" return the values & release the memory """
@@ -2167,35 +2168,19 @@ class GenericIndexCol(IndexCol):
21672168
def is_indexed(self) -> bool:
21682169
return False
21692170

2170-
def convert(
2171-
self,
2172-
values,
2173-
nan_rep,
2174-
encoding,
2175-
errors,
2176-
start: Optional[int] = None,
2177-
stop: Optional[int] = None,
2178-
):
2179-
""" set the values from this selection: take = take ownership
2171+
def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
2172+
"""
2173+
Set the values from this selection.
21802174
21812175
Parameters
21822176
----------
2183-
21842177
values : np.ndarray
21852178
nan_rep : str
21862179
encoding : str
21872180
errors : str
2188-
start : int, optional
2189-
Table row number: the start of the sub-selection.
2190-
stop : int, optional
2191-
Table row number: the end of the sub-selection. Values larger than
2192-
the underlying table's row count are normalized to that.
21932181
"""
2194-
assert self.table is not None # for mypy
2195-
2196-
_start = start if start is not None else 0
2197-
_stop = min(stop, self.table.nrows) if stop is not None else self.table.nrows
2198-
self.values = Int64Index(np.arange(_stop - _start))
2182+
assert isinstance(values, np.ndarray), type(values)
2183+
self.values = Int64Index(np.arange(len(values)))
21992184

22002185
def get_attr(self):
22012186
pass
@@ -2395,10 +2380,11 @@ def validate_attr(self, append):
23952380
"items dtype in table!"
23962381
)
23972382

2398-
def convert(self, values, nan_rep, encoding, errors, start=None, stop=None):
2383+
def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
23992384
"""set the data from this selection (and convert to the correct dtype
24002385
if we can)
24012386
"""
2387+
assert isinstance(values, np.ndarray), type(values)
24022388

24032389
# values is a recarray
24042390
if values.dtype.fields is not None:
@@ -2410,69 +2396,74 @@ def convert(self, values, nan_rep, encoding, errors, start=None, stop=None):
24102396
else:
24112397
self.data = values
24122398

2399+
own_data = self.data
2400+
24132401
# use the meta if needed
24142402
meta = _ensure_decoded(self.meta)
24152403

2404+
assert self.dtype is not None
2405+
24162406
# convert to the correct dtype
2417-
if self.dtype is not None:
2418-
dtype = _ensure_decoded(self.dtype)
2407+
dtype = _ensure_decoded(self.dtype)
24192408

2420-
# reverse converts
2421-
if dtype == "datetime64":
2409+
# reverse converts
2410+
if dtype == "datetime64":
24222411

2423-
# recreate with tz if indicated
2424-
self.data = _set_tz(self.data, self.tz, coerce=True)
2412+
# recreate with tz if indicated
2413+
own_data = _set_tz(own_data, self.tz, coerce=True)
24252414

2426-
elif dtype == "timedelta64":
2427-
self.data = np.asarray(self.data, dtype="m8[ns]")
2428-
elif dtype == "date":
2429-
try:
2430-
self.data = np.asarray(
2431-
[date.fromordinal(v) for v in self.data], dtype=object
2432-
)
2433-
except ValueError:
2434-
self.data = np.asarray(
2435-
[date.fromtimestamp(v) for v in self.data], dtype=object
2436-
)
2437-
2438-
elif meta == "category":
2439-
2440-
# we have a categorical
2441-
categories = self.metadata
2442-
codes = self.data.ravel()
2443-
2444-
# if we have stored a NaN in the categories
2445-
# then strip it; in theory we could have BOTH
2446-
# -1s in the codes and nulls :<
2447-
if categories is None:
2448-
# Handle case of NaN-only categorical columns in which case
2449-
# the categories are an empty array; when this is stored,
2450-
# pytables cannot write a zero-len array, so on readback
2451-
# the categories would be None and `read_hdf()` would fail.
2452-
categories = Index([], dtype=np.float64)
2453-
else:
2454-
mask = isna(categories)
2455-
if mask.any():
2456-
categories = categories[~mask]
2457-
codes[codes != -1] -= mask.astype(int).cumsum().values
2458-
2459-
self.data = Categorical.from_codes(
2460-
codes, categories=categories, ordered=self.ordered
2415+
elif dtype == "timedelta64":
2416+
own_data = np.asarray(own_data, dtype="m8[ns]")
2417+
elif dtype == "date":
2418+
try:
2419+
own_data = np.asarray(
2420+
[date.fromordinal(v) for v in own_data], dtype=object
2421+
)
2422+
except ValueError:
2423+
own_data = np.asarray(
2424+
[date.fromtimestamp(v) for v in own_data], dtype=object
24612425
)
24622426

2427+
elif meta == "category":
2428+
2429+
# we have a categorical
2430+
categories = self.metadata
2431+
codes = own_data.ravel()
2432+
2433+
# if we have stored a NaN in the categories
2434+
# then strip it; in theory we could have BOTH
2435+
# -1s in the codes and nulls :<
2436+
if categories is None:
2437+
# Handle case of NaN-only categorical columns in which case
2438+
# the categories are an empty array; when this is stored,
2439+
# pytables cannot write a zero-len array, so on readback
2440+
# the categories would be None and `read_hdf()` would fail.
2441+
categories = Index([], dtype=np.float64)
24632442
else:
2443+
mask = isna(categories)
2444+
if mask.any():
2445+
categories = categories[~mask]
2446+
codes[codes != -1] -= mask.astype(int).cumsum().values
24642447

2465-
try:
2466-
self.data = self.data.astype(dtype, copy=False)
2467-
except TypeError:
2468-
self.data = self.data.astype("O", copy=False)
2448+
own_data = Categorical.from_codes(
2449+
codes, categories=categories, ordered=self.ordered
2450+
)
2451+
2452+
else:
2453+
2454+
try:
2455+
own_data = own_data.astype(dtype, copy=False)
2456+
except TypeError:
2457+
own_data = own_data.astype("O", copy=False)
24692458

24702459
# convert nans / decode
24712460
if _ensure_decoded(self.kind) == "string":
2472-
self.data = _unconvert_string_array(
2473-
self.data, nan_rep=nan_rep, encoding=encoding, errors=errors
2461+
own_data = _unconvert_string_array(
2462+
own_data, nan_rep=nan_rep, encoding=encoding, errors=errors
24742463
)
24752464

2465+
self.data = own_data
2466+
24762467
def get_attr(self):
24772468
""" get the data for this column """
24782469
self.values = getattr(self.attrs, self.kind_attr, None)
@@ -3613,8 +3604,6 @@ def read_axes(
36133604
nan_rep=self.nan_rep,
36143605
encoding=self.encoding,
36153606
errors=self.errors,
3616-
start=start,
3617-
stop=stop,
36183607
)
36193608

36203609
return True
@@ -4873,16 +4862,15 @@ def _unconvert_string_array(data, nan_rep=None, encoding=None, errors="strict"):
48734862
return data.reshape(shape)
48744863

48754864

4876-
def _maybe_convert(values: np.ndarray, val_kind, encoding, errors):
4865+
def _maybe_convert(values: np.ndarray, val_kind, encoding: str, errors: str):
48774866
val_kind = _ensure_decoded(val_kind)
48784867
if _need_convert(val_kind):
48794868
conv = _get_converter(val_kind, encoding, errors)
4880-
# conv = np.frompyfunc(conv, 1, 1)
48814869
values = conv(values)
48824870
return values
48834871

48844872

4885-
def _get_converter(kind: str, encoding, errors):
4873+
def _get_converter(kind: str, encoding: str, errors: str):
48864874
if kind == "datetime64":
48874875
return lambda x: np.asarray(x, dtype="M8[ns]")
48884876
elif kind == "string":

0 commit comments

Comments
 (0)