CLN: clean pytables convert methods (#30125)

jbrockmendel · jreback · commit 0bde51283c9f · 2019-12-08T12:28:56.000-05:00
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
@@ -1981,10 +1981,9 @@ def infer(self, handler: "Table"):
         new_self.read_metadata(handler)
         return new_self
 
-    def convert(
-        self, values: np.ndarray, nan_rep, encoding, errors, start=None, stop=None
-    ):
+    def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
         """ set the values from this selection: take = take ownership """
+        assert isinstance(values, np.ndarray), type(values)
 
         # values is a recarray
         if values.dtype.fields is not None:
@@ -1993,21 +1992,23 @@ def convert(
         values = _maybe_convert(values, self.kind, encoding, errors)
 
         kwargs = dict()
+        kwargs["name"] = _ensure_decoded(self.index_name)
+
         if self.freq is not None:
             kwargs["freq"] = _ensure_decoded(self.freq)
-        if self.index_name is not None:
-            kwargs["name"] = _ensure_decoded(self.index_name)
+
         # making an Index instance could throw a number of different errors
         try:
-            self.values = Index(values, **kwargs)
+            new_pd_index = Index(values, **kwargs)
         except ValueError:
             # if the output freq is different that what we recorded,
             # it should be None (see also 'doc example part 2')
             if "freq" in kwargs:
                 kwargs["freq"] = None
-            self.values = Index(values, **kwargs)
+            new_pd_index = Index(values, **kwargs)
 
-        self.values = _set_tz(self.values, self.tz)
+        new_pd_index = _set_tz(new_pd_index, self.tz)
+        self.values = new_pd_index
 
     def take_data(self):
         """ return the values & release the memory """
@@ -2167,35 +2168,19 @@ class GenericIndexCol(IndexCol):
     def is_indexed(self) -> bool:
         return False
 
-    def convert(
-        self,
-        values,
-        nan_rep,
-        encoding,
-        errors,
-        start: Optional[int] = None,
-        stop: Optional[int] = None,
-    ):
-        """ set the values from this selection: take = take ownership
+    def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
+        """
+        Set the values from this selection.
 
         Parameters
         ----------
-
         values : np.ndarray
         nan_rep : str
         encoding : str
         errors : str
-        start : int, optional
-            Table row number: the start of the sub-selection.
-        stop : int, optional
-            Table row number: the end of the sub-selection. Values larger than
-            the underlying table's row count are normalized to that.
         """
-        assert self.table is not None  # for mypy
-
-        _start = start if start is not None else 0
-        _stop = min(stop, self.table.nrows) if stop is not None else self.table.nrows
-        self.values = Int64Index(np.arange(_stop - _start))
+        assert isinstance(values, np.ndarray), type(values)
+        self.values = Int64Index(np.arange(len(values)))
 
     def get_attr(self):
         pass
@@ -2395,10 +2380,11 @@ def validate_attr(self, append):
                     "items dtype in table!"
                 )
 
-    def convert(self, values, nan_rep, encoding, errors, start=None, stop=None):
+    def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
         """set the data from this selection (and convert to the correct dtype
         if we can)
         """
+        assert isinstance(values, np.ndarray), type(values)
 
         # values is a recarray
         if values.dtype.fields is not None:
@@ -2410,69 +2396,74 @@ def convert(self, values, nan_rep, encoding, errors, start=None, stop=None):
         else:
             self.data = values
 
+        own_data = self.data
+
         # use the meta if needed
         meta = _ensure_decoded(self.meta)
 
+        assert self.dtype is not None
+
         # convert to the correct dtype
-        if self.dtype is not None:
-            dtype = _ensure_decoded(self.dtype)
+        dtype = _ensure_decoded(self.dtype)
 
-            # reverse converts
-            if dtype == "datetime64":
+        # reverse converts
+        if dtype == "datetime64":
 
-                # recreate with tz if indicated
-                self.data = _set_tz(self.data, self.tz, coerce=True)
+            # recreate with tz if indicated
+            own_data = _set_tz(own_data, self.tz, coerce=True)
 
-            elif dtype == "timedelta64":
-                self.data = np.asarray(self.data, dtype="m8[ns]")
-            elif dtype == "date":
-                try:
-                    self.data = np.asarray(
-                        [date.fromordinal(v) for v in self.data], dtype=object
-                    )
-                except ValueError:
-                    self.data = np.asarray(
-                        [date.fromtimestamp(v) for v in self.data], dtype=object
-                    )
-
-            elif meta == "category":
-
-                # we have a categorical
-                categories = self.metadata
-                codes = self.data.ravel()
-
-                # if we have stored a NaN in the categories
-                # then strip it; in theory we could have BOTH
-                # -1s in the codes and nulls :<
-                if categories is None:
-                    # Handle case of NaN-only categorical columns in which case
-                    # the categories are an empty array; when this is stored,
-                    # pytables cannot write a zero-len array, so on readback
-                    # the categories would be None and `read_hdf()` would fail.
-                    categories = Index([], dtype=np.float64)
-                else:
-                    mask = isna(categories)
-                    if mask.any():
-                        categories = categories[~mask]
-                        codes[codes != -1] -= mask.astype(int).cumsum().values
-
-                self.data = Categorical.from_codes(
-                    codes, categories=categories, ordered=self.ordered
+        elif dtype == "timedelta64":
+            own_data = np.asarray(own_data, dtype="m8[ns]")
+        elif dtype == "date":
+            try:
+                own_data = np.asarray(
+                    [date.fromordinal(v) for v in own_data], dtype=object
+                )
+            except ValueError:
+                own_data = np.asarray(
+                    [date.fromtimestamp(v) for v in own_data], dtype=object
                 )
 
+        elif meta == "category":
+
+            # we have a categorical
+            categories = self.metadata
+            codes = own_data.ravel()
+
+            # if we have stored a NaN in the categories
+            # then strip it; in theory we could have BOTH
+            # -1s in the codes and nulls :<
+            if categories is None:
+                # Handle case of NaN-only categorical columns in which case
+                # the categories are an empty array; when this is stored,
+                # pytables cannot write a zero-len array, so on readback
+                # the categories would be None and `read_hdf()` would fail.
+                categories = Index([], dtype=np.float64)
             else:
+                mask = isna(categories)
+                if mask.any():
+                    categories = categories[~mask]
+                    codes[codes != -1] -= mask.astype(int).cumsum().values
 
-                try:
-                    self.data = self.data.astype(dtype, copy=False)
-                except TypeError:
-                    self.data = self.data.astype("O", copy=False)
+            own_data = Categorical.from_codes(
+                codes, categories=categories, ordered=self.ordered
+            )
+
+        else:
+
+            try:
+                own_data = own_data.astype(dtype, copy=False)
+            except TypeError:
+                own_data = own_data.astype("O", copy=False)
 
         # convert nans / decode
         if _ensure_decoded(self.kind) == "string":
-            self.data = _unconvert_string_array(
-                self.data, nan_rep=nan_rep, encoding=encoding, errors=errors
+            own_data = _unconvert_string_array(
+                own_data, nan_rep=nan_rep, encoding=encoding, errors=errors
             )
 
+        self.data = own_data
+
     def get_attr(self):
         """ get the data for this column """
         self.values = getattr(self.attrs, self.kind_attr, None)
@@ -3613,8 +3604,6 @@ def read_axes(
                 nan_rep=self.nan_rep,
                 encoding=self.encoding,
                 errors=self.errors,
-                start=start,
-                stop=stop,
             )
 
         return True
@@ -4873,16 +4862,15 @@ def _unconvert_string_array(data, nan_rep=None, encoding=None, errors="strict"):
     return data.reshape(shape)
 
 
-def _maybe_convert(values: np.ndarray, val_kind, encoding, errors):
+def _maybe_convert(values: np.ndarray, val_kind, encoding: str, errors: str):
     val_kind = _ensure_decoded(val_kind)
     if _need_convert(val_kind):
         conv = _get_converter(val_kind, encoding, errors)
-        # conv = np.frompyfunc(conv, 1, 1)
         values = conv(values)
     return values
 
 
-def _get_converter(kind: str, encoding, errors):
+def _get_converter(kind: str, encoding: str, errors: str):
     if kind == "datetime64":
         return lambda x: np.asarray(x, dtype="M8[ns]")
     elif kind == "string":