From e377b6967f3f9b42557757397b41ab22a103ff33 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Thu, 29 Nov 2018 23:51:20 +0100
Subject: [PATCH 01/16] API: Series.str-accessor infers dtype

---
 doc/source/whatsnew/v0.24.0.rst |   3 +
 pandas/core/strings.py          | 207 +++++++++++++++++++++++++-------
 pandas/tests/test_strings.py    |  56 +++------
 3 files changed, 182 insertions(+), 84 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index f888648a9363e..ac869b522502b 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -374,6 +374,8 @@ Backwards incompatible API changes
 
 - A newly constructed empty :class:`DataFrame` with integer as the ``dtype`` will now only be cast to ``float64`` if ``index`` is specified (:issue:`22858`)
 - :meth:`Series.str.cat` will now raise if `others` is a `set` (:issue:`23009`)
+- The `.str`-accessor will perform more rigorous type checking for inputs. Previously, some types that were never intended to be used
+  "worked" purely due to limitations of dtype checking -- e.g. ``bytes``, which is now disabled except for `encode`, `decode` and `len` (:issue:`23011`, :issue:`23163`)
 - Passing scalar values to :class:`DatetimeIndex` or :class:`TimedeltaIndex` will now raise ``TypeError`` instead of ``ValueError`` (:issue:`23539`)
 - ``max_rows`` and ``max_cols`` parameters removed from :class:`HTMLFormatter` since truncation is handled by :class:`DataFrameFormatter` (:issue:`23818`)
 
@@ -1363,6 +1365,7 @@ Strings
 - Bug in :meth:`Index.str.partition` was not nan-safe (:issue:`23558`).
 - Bug in :meth:`Index.str.split` was not nan-safe (:issue:`23677`).
 - Bug :func:`Series.str.contains` not respecting the ``na`` argument for a ``Categorical`` dtype ``Series`` (:issue:`22158`)
+- Bug in the ``__name__`` attribute of several methods of :class:`Series.str`, which were set incorrectly (:issue:`23551`)
 
 Interval
 ^^^^^^^^
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 0b791f6f91aa3..164ba0f450420 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -9,19 +9,20 @@
 import pandas._libs.lib as lib
 import pandas._libs.ops as libops
 import pandas.compat as compat
-from pandas.compat import zip
+from pandas.compat import wraps, zip
 from pandas.util._decorators import Appender, deprecate_kwarg
 
 from pandas.core.dtypes.common import (
     ensure_object, is_bool_dtype, is_categorical_dtype, is_integer,
-    is_list_like, is_object_dtype, is_re, is_scalar, is_string_like)
-from pandas.core.dtypes.generic import ABCIndex, ABCSeries
+    is_list_like, is_re, is_scalar, is_string_like)
+from pandas.core.dtypes.generic import ABCIndex, ABCMultiIndex, ABCSeries
 from pandas.core.dtypes.missing import isna
 
 from pandas.core.algorithms import take_1d
 from pandas.core.base import NoNewAttributesMixin
 import pandas.core.common as com
 
+
 _cpython_optimized_encoders = (
     "utf-8", "utf8", "latin-1", "latin1", "iso-8859-1", "mbcs", "ascii"
 )
@@ -1733,12 +1734,80 @@ def str_encode(arr, encoding, errors="strict"):
     return _na_map(f, arr)
 
 
-def _noarg_wrapper(f, docstring=None, **kargs):
+def forbid_nonstring_types(forbidden, name=None):
+    """
+    Decorator to forbid specific types for a method of StringMethods.
+
+    For calling `.str.{method}` on a Series or Index, it is necessary to first
+    initialize the :class:`StringMethods` object, and then call the method.
+    However, different methods allow different input types, and so this can not
+    be checked during :meth:`StringMethods.__init__`, but must be done on a
+    per-method basis. This decorator exists to facilitate this process, and
+    make it explicit which (inferred) types are disallowed by the method.
+
+    :meth:`StringMethods.__init__` allows the *union* of types its different
+    methods allow (after skipping NaNs; see :meth:`StringMethods._validate`),
+    namely: ['string', 'unicode', 'empty', 'bytes', 'mixed', 'mixed-integer'].
+
+    The default string types ['string', 'unicode', 'empty'] are allowed for all
+    methods. For the additional types ['bytes', 'mixed', 'mixed-integer'], each
+    method then needs to forbid the types it is not intended for.
+
+    Parameters
+    ----------
+    forbidden : list or None
+        List of forbidden non-string types, may be one or more of
+        `['bytes', 'mixed', 'mixed-integer']`.
+    name : str, default None
+        Name of the method to use in the error message. By default, this is
+        None, in which case the name from the method being wrapped will be
+        copied. However, for working with further wrappers (like _pat_wrapper
+        and _noarg_wrapper), it is necessary to specify the name.
+
+    Returns
+    -------
+    func : wrapper
+        The method to which the decorator is applied, with an added check that
+        enforces the inferred type to not be in the list of forbidden types.
+
+    Raises
+    ------
+    TypeError
+        If the inferred type of the underlying data is in `forbidden`.
+    """
+
+    # deal with None
+    forbidden = [] if forbidden is None else forbidden
+    # deal with single string instead of list
+    forbidden = [forbidden] if isinstance(forbidden, str) else forbidden
+
+    allowed_types = {'string', 'unicode', 'empty',
+                     'bytes', 'mixed', 'mixed-integer'} - set(forbidden)
+
+    def _forbid_nonstring_types(func):
+        func_name = func.__name__ if name is None else name
+
+        @wraps(func)
+        def wrapper(self, *args, **kwargs):
+            if self._inferred_dtype not in allowed_types:
+                msg = ('Cannot use .str.{name} with values of inferred dtype '
+                       '{inf_type!r}.'.format(name=func_name,
+                                              inf_type=self._inferred_dtype))
+                raise TypeError(msg)
+            return func(self, *args, **kwargs)
+        wrapper.__name__ = func_name
+        return wrapper
+    return _forbid_nonstring_types
+
+
+def _noarg_wrapper(f, name=None, docstring=None, forbidden_types=['bytes'],
+                   **kargs):
+    @forbid_nonstring_types(forbidden_types, name=name)
     def wrapper(self):
         result = _na_map(f, self._parent, **kargs)
         return self._wrap_result(result)
 
-    wrapper.__name__ = f.__name__
+    wrapper.__name__ = f.__name__ if name is None else name
     if docstring is not None:
         wrapper.__doc__ = docstring
     else:
@@ -1747,22 +1816,26 @@ def wrapper(self):
     return wrapper
 
 
-def _pat_wrapper(f, flags=False, na=False, **kwargs):
+def _pat_wrapper(f, flags=False, na=False, name=None,
+                 forbidden_types=['bytes'], **kwargs):
+    @forbid_nonstring_types(forbidden_types, name=name)
     def wrapper1(self, pat):
         result = f(self._parent, pat)
         return self._wrap_result(result)
 
+    @forbid_nonstring_types(forbidden_types, name=name)
     def wrapper2(self, pat, flags=0, **kwargs):
         result = f(self._parent, pat, flags=flags, **kwargs)
         return self._wrap_result(result)
 
+    @forbid_nonstring_types(forbidden_types, name=name)
     def wrapper3(self, pat, na=np.nan):
         result = f(self._parent, pat, na=na)
         return self._wrap_result(result)
 
     wrapper = wrapper3 if na else wrapper2 if flags else wrapper1
 
-    wrapper.__name__ = f.__name__
+    wrapper.__name__ = f.__name__ if name is None else name
     if f.__doc__:
         wrapper.__doc__ = f.__doc__
 
@@ -1793,7 +1866,7 @@ class StringMethods(NoNewAttributesMixin):
     """
 
     def __init__(self, data):
-        self._validate(data)
+        self._inferred_dtype = self._validate(data)
         self._is_categorical = is_categorical_dtype(data)
 
         # .values.categories works for both Series/Index
@@ -1804,38 +1877,32 @@ def __init__(self, data):
 
     @staticmethod
     def _validate(data):
-        from pandas.core.index import Index
-
-        if (isinstance(data, ABCSeries) and
-                not ((is_categorical_dtype(data.dtype) and
-                      is_object_dtype(data.values.categories)) or
-                     (is_object_dtype(data.dtype)))):
-            # it's neither a string series not a categorical series with
-            # strings inside the categories.
-            # this really should exclude all series with any non-string values
-            # (instead of test for object dtype), but that isn't practical for
-            # performance reasons until we have a str dtype (GH 9343)
+        if isinstance(data, ABCMultiIndex):
+            raise AttributeError('Can only use .str accessor with Index, '
+                                 'not MultiIndex')
+
+        # see _libs/lib.pyx for list of inferred types
+        allowed_types = ['string', 'unicode', 'empty',
+                         'bytes', 'mixed', 'mixed-integer']
+
+        values = getattr(data, 'values', data)  # Series / Index
+        values = getattr(values, 'categories', values)  # categorical / normal
+
+        # missing values obfuscate type inference -> skip
+        inferred_dtype = lib.infer_dtype(values, skipna=True)
+
+        if inferred_dtype not in allowed_types:
+            # this is a "first line of defence" and just checks that the type
+            # is in the *union* of the allowed types over all methods below;
+            # this restriction is then refined on a per-method basis using the
+            # decorator @forbid_nonstring_types
+            #
+            # this really should exclude all series/index with any non-string
+            # values, but that isn't practical for performance reasons until we
+            # have a str dtype (GH 9343 / 13877)
             raise AttributeError("Can only use .str accessor with string "
-                                 "values, which use np.object_ dtype in "
-                                 "pandas")
-        elif isinstance(data, Index):
-            # can't use ABCIndex to exclude non-str
-
-            # see src/inference.pyx which can contain string values
-            allowed_types = ('string', 'unicode', 'mixed', 'mixed-integer')
-            if is_categorical_dtype(data.dtype):
-                inf_type = data.categories.inferred_type
-            else:
-                inf_type = data.inferred_type
-            if inf_type not in allowed_types:
-                message = ("Can only use .str accessor with string values "
-                           "(i.e. inferred_type is 'string', 'unicode' or "
-                           "'mixed')")
-                raise AttributeError(message)
-            if data.nlevels > 1:
-                message = ("Can only use .str accessor with Index, not "
-                           "MultiIndex")
-                raise AttributeError(message)
+                                 "values!")
+        return inferred_dtype
 
     def __getitem__(self, key):
         if isinstance(key, slice):
@@ -2037,12 +2104,13 @@ def _get_series_list(self, others, ignore_index=False):
                     warnings.warn('list-likes other than Series, Index, or '
                                   'np.ndarray WITHIN another list-like are '
                                   'deprecated and will be removed in a future '
-                                  'version.', FutureWarning, stacklevel=3)
+                                  'version.', FutureWarning, stacklevel=4)
                 return (los, join_warn)
             elif all(not is_list_like(x) for x in others):
                 return ([Series(others, index=idx)], False)
         raise TypeError(err_msg)
 
+    @forbid_nonstring_types(['bytes', 'mixed', 'mixed-integer'])
     def cat(self, others=None, sep=None, na_rep=None, join=None):
         """
         Concatenate strings in the Series/Index with given separator.
@@ -2223,7 +2291,7 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
                           "Index/DataFrame in `others`. To enable alignment "
                           "and silence this warning, pass `join='left'|"
                           "'outer'|'inner'|'right'`. The future default will "
-                          "be `join='left'`.", FutureWarning, stacklevel=2)
+                          "be `join='left'`.", FutureWarning, stacklevel=3)
 
         # if join is None, _get_series_list already force-aligned indexes
         join = 'left' if join is None else join
@@ -2385,6 +2453,7 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
     @Appender(_shared_docs['str_split'] % {
         'side': 'beginning',
         'method': 'split'})
+    @forbid_nonstring_types(['bytes'])
     def split(self, pat=None, n=-1, expand=False):
         result = str_split(self._parent, pat, n=n)
         return self._wrap_result(result, expand=expand)
@@ -2392,6 +2461,7 @@ def split(self, pat=None, n=-1, expand=False):
     @Appender(_shared_docs['str_split'] % {
         'side': 'end',
         'method': 'rsplit'})
+    @forbid_nonstring_types(['bytes'])
     def rsplit(self, pat=None, n=-1, expand=False):
         result = str_rsplit(self._parent, pat, n=n)
         return self._wrap_result(result, expand=expand)
@@ -2485,6 +2555,7 @@ def rsplit(self, pat=None, n=-1, expand=False):
         'also': 'rpartition : Split the string at the last occurrence of `sep`'
     })
     @deprecate_kwarg(old_arg_name='pat', new_arg_name='sep')
+    @forbid_nonstring_types(['bytes'])
     def partition(self, sep=' ', expand=True):
         f = lambda x: x.partition(sep)
         result = _na_map(f, self._parent)
@@ -2497,44 +2568,52 @@ def partition(self, sep=' ', expand=True):
         'also': 'partition : Split the string at the first occurrence of `sep`'
     })
     @deprecate_kwarg(old_arg_name='pat', new_arg_name='sep')
+    @forbid_nonstring_types(['bytes'])
     def rpartition(self, sep=' ', expand=True):
         f = lambda x: x.rpartition(sep)
         result = _na_map(f, self._parent)
         return self._wrap_result(result, expand=expand)
 
     @copy(str_get)
+    @forbid_nonstring_types(['bytes'])
     def get(self, i):
         result = str_get(self._parent, i)
         return self._wrap_result(result)
 
     @copy(str_join)
+    @forbid_nonstring_types(['bytes'])
     def join(self, sep):
         result = str_join(self._parent, sep)
         return self._wrap_result(result)
 
     @copy(str_contains)
+    @forbid_nonstring_types(['bytes'])
     def contains(self, pat, case=True, flags=0, na=np.nan, regex=True):
         result = str_contains(self._parent, pat, case=case, flags=flags, na=na,
                               regex=regex)
         return self._wrap_result(result, fill_value=na)
 
     @copy(str_match)
+    @forbid_nonstring_types(['bytes'])
     def match(self, pat, case=True, flags=0, na=np.nan):
         result = str_match(self._parent, pat, case=case, flags=flags, na=na)
         return self._wrap_result(result, fill_value=na)
 
     @copy(str_replace)
+    @forbid_nonstring_types(['bytes'])
     def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True):
         result = str_replace(self._parent, pat, repl, n=n, case=case,
                              flags=flags, regex=regex)
         return self._wrap_result(result)
 
     @copy(str_repeat)
+    @forbid_nonstring_types(['bytes'])
     def repeat(self, repeats):
         result = str_repeat(self._parent, repeats)
         return self._wrap_result(result)
 
     @copy(str_pad)
+    @forbid_nonstring_types(['bytes'])
     def pad(self, width, side='left', fillchar=' '):
         result = str_pad(self._parent, width, side=side, fillchar=fillchar)
         return self._wrap_result(result)
@@ -2558,17 +2637,21 @@ def pad(self, width, side='left', fillchar=' '):
 
     @Appender(_shared_docs['str_pad'] % dict(side='left and right',
                                              method='center'))
+    @forbid_nonstring_types(['bytes'])
     def center(self, width, fillchar=' '):
         return self.pad(width, side='both', fillchar=fillchar)
 
     @Appender(_shared_docs['str_pad'] % dict(side='right', method='ljust'))
+    @forbid_nonstring_types(['bytes'])
     def ljust(self, width, fillchar=' '):
         return self.pad(width, side='right', fillchar=fillchar)
 
     @Appender(_shared_docs['str_pad'] % dict(side='left', method='rjust'))
+    @forbid_nonstring_types(['bytes'])
     def rjust(self, width, fillchar=' '):
         return self.pad(width, side='left', fillchar=fillchar)
 
+    @forbid_nonstring_types(['bytes'])
     def zfill(self, width):
         """
         Pad strings in the Series/Index by prepending '0' characters.
@@ -2633,22 +2716,26 @@ def zfill(self, width):
         return self._wrap_result(result)
 
     @copy(str_slice)
+    @forbid_nonstring_types(['bytes'])
     def slice(self, start=None, stop=None, step=None):
         result = str_slice(self._parent, start, stop, step)
         return self._wrap_result(result)
 
     @copy(str_slice_replace)
+    @forbid_nonstring_types(['bytes'])
     def slice_replace(self, start=None, stop=None, repl=None):
         result = str_slice_replace(self._parent, start, stop, repl)
         return self._wrap_result(result)
 
     @copy(str_decode)
     def decode(self, encoding, errors="strict"):
+        # need to allow bytes here
         result = str_decode(self._parent, encoding, errors)
         return self._wrap_result(result)
 
     @copy(str_encode)
     def encode(self, encoding, errors="strict"):
+        # allowing bytes here for easily dealing with mixed str/bytes Series
         result = str_encode(self._parent, encoding, errors)
         return self._wrap_result(result)
 
@@ -2717,28 +2804,33 @@ def encode(self, encoding, errors="strict"):
 
     @Appender(_shared_docs['str_strip'] % dict(side='left and right sides',
                                                method='strip'))
+    @forbid_nonstring_types(['bytes'])
     def strip(self, to_strip=None):
         result = str_strip(self._parent, to_strip, side='both')
         return self._wrap_result(result)
 
     @Appender(_shared_docs['str_strip'] % dict(side='left side',
                                                method='lstrip'))
+    @forbid_nonstring_types(['bytes'])
     def lstrip(self, to_strip=None):
         result = str_strip(self._parent, to_strip, side='left')
         return self._wrap_result(result)
 
     @Appender(_shared_docs['str_strip'] % dict(side='right side',
                                                method='rstrip'))
+    @forbid_nonstring_types(['bytes'])
     def rstrip(self, to_strip=None):
         result = str_strip(self._parent, to_strip, side='right')
         return self._wrap_result(result)
 
     @copy(str_wrap)
+    @forbid_nonstring_types(['bytes'])
     def wrap(self, width, **kwargs):
         result = str_wrap(self._parent, width, **kwargs)
         return self._wrap_result(result)
 
     @copy(str_get_dummies)
+    @forbid_nonstring_types(['bytes'])
     def get_dummies(self, sep='|'):
         # we need to cast to Series of strings as only that has all
         # methods available for making the dummies...
@@ -2748,20 +2840,23 @@ def get_dummies(self, sep='|'):
                                  name=name, expand=True)
 
     @copy(str_translate)
+    @forbid_nonstring_types(['bytes'])
     def translate(self, table, deletechars=None):
         result = str_translate(self._parent, table, deletechars)
         return self._wrap_result(result)
 
-    count = _pat_wrapper(str_count, flags=True)
-    startswith = _pat_wrapper(str_startswith, na=True)
-    endswith = _pat_wrapper(str_endswith, na=True)
-    findall = _pat_wrapper(str_findall, flags=True)
+    count = _pat_wrapper(str_count, flags=True, name='count')
+    startswith = _pat_wrapper(str_startswith, na=True, name='startswith')
+    endswith = _pat_wrapper(str_endswith, na=True, name='endswith')
+    findall = _pat_wrapper(str_findall, flags=True, name='findall')
 
     @copy(str_extract)
+    @forbid_nonstring_types(['bytes'])
     def extract(self, pat, flags=0, expand=True):
         return str_extract(self, pat, flags=flags, expand=expand)
 
     @copy(str_extractall)
+    @forbid_nonstring_types(['bytes'])
     def extractall(self, pat, flags=0):
         return str_extractall(self._orig, pat, flags=flags)
 
@@ -2791,6 +2886,7 @@ def extractall(self, pat, flags=0):
     @Appender(_shared_docs['find'] %
               dict(side='lowest', method='find',
                    also='rfind : Return highest indexes in each strings'))
+    @forbid_nonstring_types(['bytes'])
     def find(self, sub, start=0, end=None):
         result = str_find(self._parent, sub, start=start, end=end, side='left')
         return self._wrap_result(result)
@@ -2798,11 +2894,13 @@ def find(self, sub, start=0, end=None):
     @Appender(_shared_docs['find'] %
               dict(side='highest', method='rfind',
                    also='find : Return lowest indexes in each strings'))
+    @forbid_nonstring_types(['bytes'])
     def rfind(self, sub, start=0, end=None):
         result = str_find(self._parent, sub,
                           start=start, end=end, side='right')
         return self._wrap_result(result)
 
+    @forbid_nonstring_types(['bytes'])
     def normalize(self, form):
         """Return the Unicode normal form for the strings in the Series/Index.
         For more information on the forms, see the
@@ -2849,6 +2947,7 @@ def normalize(self, form):
     @Appender(_shared_docs['index'] %
               dict(side='lowest', similar='find', method='index',
                    also='rindex : Return highest indexes in each strings'))
+    @forbid_nonstring_types(['bytes'])
     def index(self, sub, start=0, end=None):
         result = str_index(self._parent, sub,
                            start=start, end=end, side='left')
@@ -2857,6 +2956,7 @@ def index(self, sub, start=0, end=None):
     @Appender(_shared_docs['index'] %
               dict(side='highest', similar='rfind', method='rindex',
                    also='index : Return lowest indexes in each strings'))
+    @forbid_nonstring_types(['bytes'])
     def rindex(self, sub, start=0, end=None):
         result = str_index(self._parent, sub,
                            start=start, end=end, side='right')
@@ -2906,7 +3006,8 @@ def rindex(self, sub, start=0, end=None):
     5    3.0
     dtype: float64
     """)
-    len = _noarg_wrapper(len, docstring=_shared_docs['len'], dtype=int)
+    len = _noarg_wrapper(len, docstring=_shared_docs['len'],
+                         forbidden_types=None, dtype=int)
 
     _shared_docs['casemethods'] = ("""
     Convert strings in the Series/Index to %(type)s.
@@ -2980,18 +3081,23 @@ def rindex(self, sub, start=0, end=None):
                                       method='capitalize')
     _shared_docs['swapcase'] = dict(type='be swapcased', method='swapcase')
     lower = _noarg_wrapper(lambda x: x.lower(),
+                           name='lower',
                            docstring=_shared_docs['casemethods'] %
                            _shared_docs['lower'])
     upper = _noarg_wrapper(lambda x: x.upper(),
+                           name='upper',
                            docstring=_shared_docs['casemethods'] %
                            _shared_docs['upper'])
     title = _noarg_wrapper(lambda x: x.title(),
+                           name='title',
                            docstring=_shared_docs['casemethods'] %
                            _shared_docs['title'])
     capitalize = _noarg_wrapper(lambda x: x.capitalize(),
+                                name='capitalize',
                                 docstring=_shared_docs['casemethods'] %
                                 _shared_docs['capitalize'])
     swapcase = _noarg_wrapper(lambda x: x.swapcase(),
+                              name='swapcase',
                               docstring=_shared_docs['casemethods'] %
                               _shared_docs['swapcase'])
 
@@ -3145,30 +3251,39 @@ def rindex(self, sub, start=0, end=None):
     _shared_docs['isnumeric'] = dict(type='numeric', method='isnumeric')
     _shared_docs['isdecimal'] = dict(type='decimal', method='isdecimal')
     isalnum = _noarg_wrapper(lambda x: x.isalnum(),
+                             name='isalnum',
                              docstring=_shared_docs['ismethods'] %
                              _shared_docs['isalnum'])
     isalpha = _noarg_wrapper(lambda x: x.isalpha(),
+                             name='isalpha',
                              docstring=_shared_docs['ismethods'] %
                              _shared_docs['isalpha'])
     isdigit = _noarg_wrapper(lambda x: x.isdigit(),
+                             name='isdigit',
                              docstring=_shared_docs['ismethods'] %
                              _shared_docs['isdigit'])
     isspace = _noarg_wrapper(lambda x: x.isspace(),
+                             name='isspace',
                              docstring=_shared_docs['ismethods'] %
                              _shared_docs['isspace'])
     islower = _noarg_wrapper(lambda x: x.islower(),
+                             name='islower',
                              docstring=_shared_docs['ismethods'] %
                              _shared_docs['islower'])
     isupper = _noarg_wrapper(lambda x: x.isupper(),
+                             name='isupper',
                              docstring=_shared_docs['ismethods'] %
                              _shared_docs['isupper'])
     istitle = _noarg_wrapper(lambda x: x.istitle(),
+                             name='istitle',
                              docstring=_shared_docs['ismethods'] %
                              _shared_docs['istitle'])
     isnumeric = _noarg_wrapper(lambda x: compat.u_safe(x).isnumeric(),
+                               name='isnumeric',
                                docstring=_shared_docs['ismethods'] %
                                _shared_docs['isnumeric'])
     isdecimal = _noarg_wrapper(lambda x: compat.u_safe(x).isdecimal(),
+                               name='isdecimal',
                                docstring=_shared_docs['ismethods'] %
                                _shared_docs['isdecimal'])
 
diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
index 117984ce89743..30cdea554e778 100644
--- a/pandas/tests/test_strings.py
+++ b/pandas/tests/test_strings.py
@@ -135,7 +135,10 @@ def any_allowed_skipna_inferred_dtype(request):
     """
     Fixture for all (inferred) dtypes allowed in StringMethods.__init__
 
-    The covered (inferred) types are:
+    Returns an np.ndarray that will be inferred to have the given dtype (when
+    skipping missing values).
+
+    The allowed (inferred) types are:
     * 'string'
     * 'unicode' (if PY2)
     * 'empty'
@@ -156,9 +159,12 @@ def any_allowed_skipna_inferred_dtype(request):
     >>> import pandas._libs.lib as lib
     >>>
     >>> def test_something(any_allowed_skipna_inferred_dtype):
-    ...     inferred_dtype, values = any_skipna_inferred_dtype
+    ...     inferred_dtype, values = any_allowed_skipna_inferred_dtype
     ...     # will pass
     ...     assert lib.infer_dtype(values, skipna=True) == inferred_dtype
+    ...
+    ...     # constructor for .str-accessor will also pass
+    ...     pd.Series(values).str
     """
     inferred_dtype, values = request.param
     values = np.array(values, dtype=object)  # object dtype to avoid casting
@@ -188,20 +194,6 @@ def test_api_per_dtype(self, box, dtype, any_skipna_inferred_dtype):
             pytest.xfail(reason='Conversion to numpy array fails because '
                          'the ._values-attribute is not a numpy array for '
                          'PeriodArray/IntervalArray; see GH 23553')
-        if box == Index and inferred_dtype in ['empty', 'bytes']:
-            pytest.xfail(reason='Raising too restrictively; '
-                         'solved by GH 23167')
-        if (box == Index and dtype == object
-                and inferred_dtype in ['boolean', 'date', 'time']):
-            pytest.xfail(reason='Inferring incorrectly because of NaNs; '
-                         'solved by GH 23167')
-        if (box == Series
-                and (dtype == object and inferred_dtype not in [
-                    'string', 'unicode', 'empty',
-                    'bytes', 'mixed', 'mixed-integer'])
-                or (dtype == 'category'
-                    and inferred_dtype in ['decimal', 'boolean', 'time'])):
-            pytest.xfail(reason='Not raising correctly; solved by GH 23167')
 
         types_passing_constructor = ['string', 'unicode', 'empty',
                                      'bytes', 'mixed', 'mixed-integer']
@@ -229,25 +221,19 @@ def test_api_per_method(self, box, dtype,
         method_name, args, kwargs = any_string_method
 
         # TODO: get rid of these xfails
-        if (method_name not in ['encode', 'decode', 'len']
-                and inferred_dtype == 'bytes'):
-            pytest.xfail(reason='Not raising for "bytes", see GH 23011;'
-                         'Also: malformed method names, see GH 23551; '
-                         'solved by GH 23167')
-        if (method_name == 'cat'
-                and inferred_dtype in ['mixed', 'mixed-integer']):
-            pytest.xfail(reason='Bad error message; should raise better; '
-                         'solved by GH 23167')
-        if box == Index and inferred_dtype in ['empty', 'bytes']:
-            pytest.xfail(reason='Raising too restrictively; '
-                         'solved by GH 23167')
-        if (box == Index and dtype == object
-                and inferred_dtype in ['boolean', 'date', 'time']):
-            pytest.xfail(reason='Inferring incorrectly because of NaNs; '
-                         'solved by GH 23167')
         if box == Index and dtype == 'category':
             pytest.xfail(reason='Broken methods on CategoricalIndex; '
                          'see GH 23556')
+        if (method_name in ['partition', 'rpartition'] and box == Index
+                and inferred_dtype == 'empty'):
+            pytest.xfail(reason='Method cannot deal with empty Index')
+        if (method_name == 'split' and box == Index and values.size == 0
+                and kwargs.get('expand', None) is not None):
+            pytest.xfail(reason='Split fails on empty Series when expand=True')
+        if (method_name == 'get_dummies' and box == Index
+                and inferred_dtype == 'empty' and (dtype == object
+                                                   or values.size == 0)):
+            pytest.xfail(reason='Need to fortify get_dummies corner cases')
 
         t = box(values, dtype=dtype)  # explicit dtype to avoid casting
         method = getattr(t.str, method_name)
@@ -630,12 +616,6 @@ def test_str_cat_align_mixed_inputs(self, join):
         with pytest.raises(ValueError, match=rgx):
             s.str.cat([t, z], join=join)
 
-    def test_str_cat_raises(self):
-        # non-strings hiding behind object dtype
-        s = Series([1, 2, 3, 4], dtype='object')
-        with pytest.raises(TypeError, match="unsupported operand type.*"):
-            s.str.cat(s)
-
     def test_str_cat_special_cases(self):
         s = Series(['a', 'b', 'c', 'd'])
         t = Series(['d', 'a', 'e', 'b'], index=[3, 0, 4, 1])

From 88b7b5336307d9fc92a9dab35e69ca05ba6ae582 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Fri, 30 Nov 2018 00:16:28 +0100
Subject: [PATCH 02/16] Forbid encode on pure bytes as well

---
 doc/source/whatsnew/v0.24.0.rst | 2 +-
 pandas/core/strings.py          | 2 +-
 pandas/tests/test_strings.py    | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index ac869b522502b..2c9aecdde2f45 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -375,7 +375,7 @@ Backwards incompatible API changes
 - A newly constructed empty :class:`DataFrame` with integer as the ``dtype`` will now only be cast to ``float64`` if ``index`` is specified (:issue:`22858`)
 - :meth:`Series.str.cat` will now raise if `others` is a `set` (:issue:`23009`)
 - The `.str`-accessor will perform more rigorous type checking for inputs. Previously, some types that were never intended to be used
-  "worked" purely due to limitations of dtype checking -- e.g. ``bytes``, which is now disabled except for `encode`, `decode` and `len` (:issue:`23011`, :issue:`23163`)
+  "worked" purely due to limitations of dtype checking -- e.g. ``bytes``, which is now disabled except for `decode` and `len` (:issue:`23011`, :issue:`23163`)
 - Passing scalar values to :class:`DatetimeIndex` or :class:`TimedeltaIndex` will now raise ``TypeError`` instead of ``ValueError`` (:issue:`23539`)
 - ``max_rows`` and ``max_cols`` parameters removed from :class:`HTMLFormatter` since truncation is handled by :class:`DataFrameFormatter` (:issue:`23818`)
 
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 164ba0f450420..67b88b11c1bef 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -2734,8 +2734,8 @@ def decode(self, encoding, errors="strict"):
         return self._wrap_result(result)
 
     @copy(str_encode)
+    @forbid_nonstring_types(['bytes'])
     def encode(self, encoding, errors="strict"):
-        # allowing bytes here for easily dealing with mixed str/bytes Series
         result = str_encode(self._parent, encoding, errors)
         return self._wrap_result(result)
 
diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
index 30cdea554e778..46c44adfd4f6d 100644
--- a/pandas/tests/test_strings.py
+++ b/pandas/tests/test_strings.py
@@ -238,7 +238,7 @@ def test_api_per_method(self, box, dtype,
         t = box(values, dtype=dtype)  # explicit dtype to avoid casting
         method = getattr(t.str, method_name)
 
-        bytes_allowed = method_name in ['encode', 'decode', 'len']
+        bytes_allowed = method_name in ['decode', 'len']
         # as of v0.23.4, all methods except 'cat' are very lenient with the
         # allowed data types, just returning NaN for entries that error.
         # This could be changed with an 'errors'-kwarg to the `str`-accessor,

From b19a40d4d4bef93e8a6c9839f972b924002e6f1f Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Fri, 30 Nov 2018 01:05:46 +0100
Subject: [PATCH 03/16] Remove merge artefact

---
 pandas/tests/test_strings.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
index 46c44adfd4f6d..d6c6a8652e728 100644
--- a/pandas/tests/test_strings.py
+++ b/pandas/tests/test_strings.py
@@ -135,10 +135,7 @@ def any_allowed_skipna_inferred_dtype(request):
     """
     Fixture for all (inferred) dtypes allowed in StringMethods.__init__
 
-    Returns an np.ndarray that will be inferred to have the given dtype (when
-    skipping missing values).
-
-    The allowed (inferred) types are:
+    The covered (inferred) types are:
     * 'string'
     * 'unicode' (if PY2)
     * 'empty'

From fb7da6b5a63f2b5647bad11dff5463b0b778d9e8 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Fri, 30 Nov 2018 09:05:58 +0100
Subject: [PATCH 04/16] fix isort

---
 pandas/core/strings.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 67b88b11c1bef..4d9f1567b371a 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -22,7 +22,6 @@
 from pandas.core.base import NoNewAttributesMixin
 import pandas.core.common as com
 
-
 _cpython_optimized_encoders = (
     "utf-8", "utf8", "latin-1", "latin1", "iso-8859-1", "mbcs", "ascii"
 )

From f8ffb0d1337564fcdf1062b854a6125a323f3ced Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Sun, 2 Dec 2018 23:32:22 +0100
Subject: [PATCH 05/16] merge in API: fix str-accessor on CategoricalIndex

---
 .circleci/config.yml                     |   4 +-
 .travis.yml                              |  24 +-
 asv_bench/benchmarks/categoricals.py     |   8 +
 ci/README.txt                            |  17 -
 ci/azure/linux.yml                       |  10 +-
 ci/azure/macos.yml                       |   6 +-
 ci/azure/windows-py27.yml                |   2 +-
 ci/azure/windows.yml                     |   2 +-
 ci/deps/travis-36.yaml                   |   2 +-
 ci/print_versions.py                     |  29 --
 ci/run_tests.sh                          |  52 ++
 ci/script_multi.sh                       |  49 --
 ci/script_single.sh                      |  39 --
 ci/upload_coverage.sh                    |  11 -
 doc/source/comparison_with_r.rst         |  88 ++--
 doc/source/comparison_with_sql.rst       |  20 +-
 doc/source/comparison_with_stata.rst     |  23 +-
 doc/source/computation.rst               |  82 ++--
 doc/source/io.rst                        | 371 +++++++-------
 doc/source/timeseries.rst                | 518 ++++++++++----------
 doc/source/whatsnew/v0.24.0.rst          |   8 +-
 environment.yml                          |  18 +-
 pandas/_libs/parsers.pyx                 |   4 +
 pandas/_libs/tslib.pyx                   | 141 +++---
 pandas/_libs/tslibs/conversion.pyx       |  15 +-
 pandas/core/arrays/categorical.py        |  10 +
 pandas/core/arrays/datetimelike.py       | 224 ++++++++-
 pandas/core/arrays/datetimes.py          |   4 +-
 pandas/core/arrays/timedeltas.py         |   2 +-
 pandas/core/frame.py                     |  22 +-
 pandas/core/generic.py                   | 131 +++--
 pandas/core/indexes/category.py          |   6 +-
 pandas/core/indexes/datetimelike.py      | 222 +--------
 pandas/core/indexes/datetimes.py         |   6 +-
 pandas/core/indexes/period.py            |   4 +-
 pandas/core/indexes/timedeltas.py        |   5 +-
 pandas/core/reshape/tile.py              |   7 +-
 pandas/core/strings.py                   |   9 +-
 pandas/io/parsers.py                     |  16 +-
 pandas/tests/frame/test_convert_to.py    |  28 +-
 pandas/tests/io/parser/test_na_values.py |  18 +
 pandas/tests/reshape/test_tile.py        |   8 +
 pandas/tests/test_strings.py             |   4 +-
 pandas/tests/util/test_hashing.py        | 585 ++++++++++++-----------
 requirements-dev.txt                     |  18 +-
 setup.cfg                                |  46 +-
 46 files changed, 1547 insertions(+), 1371 deletions(-)
 delete mode 100644 ci/README.txt
 delete mode 100755 ci/print_versions.py
 create mode 100755 ci/run_tests.sh
 delete mode 100755 ci/script_multi.sh
 delete mode 100755 ci/script_single.sh
 delete mode 100755 ci/upload_coverage.sh

diff --git a/.circleci/config.yml b/.circleci/config.yml
index dc4162a0674fd..6b516b21722ac 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -34,5 +34,5 @@ jobs:
           command: |
             export PATH="$MINICONDA_DIR/bin:$PATH"
             source activate pandas-dev
-            echo "pytest --strict --durations=10 --color=no --junitxml=$CIRCLE_TEST_REPORTS/reports/junit.xml --skip-slow --skip-network pandas"
-            pytest       --strict --durations=10 --color=no --junitxml=$CIRCLE_TEST_REPORTS/reports/junit.xml --skip-slow --skip-network pandas
+            echo "pytest -m "not slow and not network" --strict --durations=10 --color=no --junitxml=$CIRCLE_TEST_REPORTS/reports/junit.xml pandas"
+            pytest       -m "not slow and not network" --strict --durations=10 --color=no --junitxml=$CIRCLE_TEST_REPORTS/reports/junit.xml pandas
diff --git a/.travis.yml b/.travis.yml
index 3217fc5aa1ed6..6bbc44fba864a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -34,28 +34,28 @@ matrix:
     include:
     - dist: trusty
       env:
-        - JOB="3.7" ENV_FILE="ci/deps/travis-37.yaml" TEST_ARGS="--skip-slow --skip-network"
+        - JOB="3.7" ENV_FILE="ci/deps/travis-37.yaml" PATTERN="not slow and not network"
 
     - dist: trusty
       env:
-        - JOB="2.7, locale, slow, old NumPy" ENV_FILE="ci/deps/travis-27-locale.yaml" LOCALE_OVERRIDE="zh_CN.UTF-8" SLOW=true
+        - JOB="2.7, locale, slow, old NumPy" ENV_FILE="ci/deps/travis-27-locale.yaml" LOCALE_OVERRIDE="zh_CN.UTF-8" PATTERN="slow"
       addons:
         apt:
           packages:
           - language-pack-zh-hans
     - dist: trusty
       env:
-        - JOB="2.7" ENV_FILE="ci/deps/travis-27.yaml" TEST_ARGS="--skip-slow"
+        - JOB="2.7" ENV_FILE="ci/deps/travis-27.yaml" PATTERN="not slow"
       addons:
         apt:
           packages:
           - python-gtk2
     - dist: trusty
       env:
-        - JOB="3.6, lint, coverage" ENV_FILE="ci/deps/travis-36.yaml" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" COVERAGE=true LINT=true
+        - JOB="3.6, lint, coverage" ENV_FILE="ci/deps/travis-36.yaml" PATTERN="not slow and not network" PANDAS_TESTING_MODE="deprecate" COVERAGE=true LINT=true
     - dist: trusty
       env:
-        - JOB="3.7, NumPy dev" ENV_FILE="ci/deps/travis-37-numpydev.yaml" TEST_ARGS="--skip-slow --skip-network -W error" PANDAS_TESTING_MODE="deprecate"
+        - JOB="3.7, NumPy dev" ENV_FILE="ci/deps/travis-37-numpydev.yaml" PATTERN="not slow and not network" TEST_ARGS="-W error" PANDAS_TESTING_MODE="deprecate"
       addons:
         apt:
           packages:
@@ -64,7 +64,7 @@ matrix:
     # In allow_failures
     - dist: trusty
       env:
-        - JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" SLOW=true
+        - JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow"
 
     # In allow_failures
     - dist: trusty
@@ -73,7 +73,7 @@ matrix:
     allow_failures:
       - dist: trusty
         env:
-          - JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" SLOW=true
+          - JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow"
       - dist: trusty
         env:
           - JOB="3.6, doc" ENV_FILE="ci/deps/travis-36-doc.yaml" DOC=true
@@ -107,20 +107,16 @@ script:
   - echo "script start"
   - source activate pandas-dev
   - ci/run_build_docs.sh
-  - ci/script_single.sh
-  - ci/script_multi.sh
+  - ci/run_tests.sh
   - ci/code_checks.sh
 
-after_success:
-  -  ci/upload_coverage.sh
-
 after_script:
   - echo "after_script start"
   - source activate pandas-dev && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd
   - if [ -e test-data-single.xml ]; then
-    ci/print_skipped.py test-data-single.xml;
+        ci/print_skipped.py test-data-single.xml;
     fi
   - if [ -e test-data-multiple.xml ]; then
-    ci/print_skipped.py test-data-multiple.xml;
+        ci/print_skipped.py test-data-multiple.xml;
     fi
   - echo "after_script done"
diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py
index 8a0fbc48755b5..7318b40efc8fb 100644
--- a/asv_bench/benchmarks/categoricals.py
+++ b/asv_bench/benchmarks/categoricals.py
@@ -46,6 +46,8 @@ def setup(self):
         self.values_some_nan = list(np.tile(self.categories + [np.nan], N))
         self.values_all_nan = [np.nan] * len(self.values)
         self.values_all_int8 = np.ones(N, 'int8')
+        self.categorical = pd.Categorical(self.values, self.categories)
+        self.series = pd.Series(self.categorical)
 
     def time_regular(self):
         pd.Categorical(self.values, self.categories)
@@ -68,6 +70,12 @@ def time_all_nan(self):
     def time_from_codes_all_int8(self):
         pd.Categorical.from_codes(self.values_all_int8, self.categories)
 
+    def time_existing_categorical(self):
+        pd.Categorical(self.categorical)
+
+    def time_existing_series(self):
+        pd.Categorical(self.series)
+
 
 class ValueCounts(object):
 
diff --git a/ci/README.txt b/ci/README.txt
deleted file mode 100644
index bb71dc25d6093..0000000000000
--- a/ci/README.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-Travis is a ci service that's well-integrated with GitHub.
-The following types of breakage should be detected
-by Travis builds:
-
-1) Failing tests on any supported version of Python.
-2) Pandas should install and the tests should run if no optional deps are installed.
-That also means tests which rely on optional deps need to raise SkipTest()
-if the dep is missing.
-3) unicode related fails when running under exotic locales.
-
-We tried running the vbench suite for a while, but with varying load
-on Travis machines, that wasn't useful.
-
-Travis currently (4/2013) has a 5-job concurrency limit. Exceeding it
-basically doubles the total runtime for a commit through travis, and
-since dep+pandas installation is already quite long, this should become
-a hard limit on concurrent travis runs.
diff --git a/ci/azure/linux.yml b/ci/azure/linux.yml
index 7fa8a9a1783f9..fe64307e9d08f 100644
--- a/ci/azure/linux.yml
+++ b/ci/azure/linux.yml
@@ -12,18 +12,18 @@ jobs:
       py27_np_120:
         ENV_FILE: ci/deps/azure-27-compat.yaml
         CONDA_PY: "27"
-        TEST_ARGS: "--skip-slow --skip-network"
+        PATTERN: "not slow and not network"
 
       py37_locale:
         ENV_FILE: ci/deps/azure-37-locale.yaml
         CONDA_PY: "37"
-        TEST_ARGS: "--skip-slow --skip-network"
+        PATTERN: "not slow and not network"
         LOCALE_OVERRIDE: "zh_CN.UTF-8"
 
       py36_locale_slow:
         ENV_FILE: ci/deps/azure-36-locale_slow.yaml
         CONDA_PY: "36"
-        TEST_ARGS: "--only-slow --skip-network"
+        PATTERN: "not slow and not network"
         LOCALE_OVERRIDE: "it_IT.UTF-8"
 
   steps:
@@ -43,9 +43,7 @@ jobs:
     - script: |
         export PATH=$HOME/miniconda3/bin:$PATH
         source activate pandas-dev
-        ci/script_single.sh
-        ci/script_multi.sh
-        echo "[Test done]"
+        ci/run_tests.sh
       displayName: 'Test'
     - script: |
         export PATH=$HOME/miniconda3/bin:$PATH
diff --git a/ci/azure/macos.yml b/ci/azure/macos.yml
index d537f0c70cbec..98409576a5a87 100644
--- a/ci/azure/macos.yml
+++ b/ci/azure/macos.yml
@@ -12,7 +12,7 @@ jobs:
       py35_np_120:
         ENV_FILE: ci/deps/azure-macos-35.yaml
         CONDA_PY: "35"
-        TEST_ARGS: "--skip-slow --skip-network"
+        PATTERN: "not slow and not network"
 
   steps:
     - script: |
@@ -31,9 +31,7 @@ jobs:
     - script: |
         export PATH=$HOME/miniconda3/bin:$PATH
         source activate pandas-dev
-        ci/script_single.sh
-        ci/script_multi.sh
-        echo "[Test done]"
+        ci/run_tests.sh
       displayName: 'Test'
     - script: |
         export PATH=$HOME/miniconda3/bin:$PATH
diff --git a/ci/azure/windows-py27.yml b/ci/azure/windows-py27.yml
index ac918f3becd2e..0d9aea816c4ad 100644
--- a/ci/azure/windows-py27.yml
+++ b/ci/azure/windows-py27.yml
@@ -37,7 +37,7 @@ jobs:
       displayName: 'Build'
     - script: |
         call activate pandas-dev
-        pytest --junitxml=test-data.xml --skip-slow --skip-network pandas -n 2 -r sxX --strict --durations=10 %*
+        pytest -m "not slow and not network" --junitxml=test-data.xml pandas -n 2 -r sxX --strict --durations=10 %*
       displayName: 'Test'
     - task: PublishTestResults@2
       inputs:
diff --git a/ci/azure/windows.yml b/ci/azure/windows.yml
index f0ebba509e441..b69c210ca27ba 100644
--- a/ci/azure/windows.yml
+++ b/ci/azure/windows.yml
@@ -28,7 +28,7 @@ jobs:
       displayName: 'Build'
     - script: |
         call activate pandas-dev
-        pytest --junitxml=test-data.xml --skip-slow --skip-network pandas -n 2 -r sxX --strict --durations=10 %*
+        pytest -m "not slow and not network" --junitxml=test-data.xml pandas -n 2 -r sxX --strict --durations=10 %*
       displayName: 'Test'
     - task: PublishTestResults@2
       inputs:
diff --git a/ci/deps/travis-36.yaml b/ci/deps/travis-36.yaml
index 1781f67041f44..de76f5d6d763f 100644
--- a/ci/deps/travis-36.yaml
+++ b/ci/deps/travis-36.yaml
@@ -9,7 +9,7 @@ dependencies:
   - fastparquet
   - flake8>=3.5
   - flake8-comprehensions
-  - flake8-rst=0.4.2
+  - flake8-rst>=0.6.0
   - gcsfs
   - geopandas
   - html5lib
diff --git a/ci/print_versions.py b/ci/print_versions.py
deleted file mode 100755
index a2c93748b0388..0000000000000
--- a/ci/print_versions.py
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/usr/bin/env python
-
-
-def show_versions(as_json=False):
-    import imp
-    import os
-    fn = __file__
-    this_dir = os.path.dirname(fn)
-    pandas_dir = os.path.abspath(os.path.join(this_dir, ".."))
-    sv_path = os.path.join(pandas_dir, 'pandas', 'util')
-    mod = imp.load_module(
-        'pvmod', *imp.find_module('print_versions', [sv_path]))
-    return mod.show_versions(as_json)
-
-
-if __name__ == '__main__':
-    # optparse is 2.6-safe
-    from optparse import OptionParser
-    parser = OptionParser()
-    parser.add_option("-j", "--json", metavar="FILE", nargs=1,
-                      help="Save output as JSON into file, "
-                           "pass in '-' to output to stdout")
-
-    (options, args) = parser.parse_args()
-
-    if options.json == "-":
-        options.json = True
-
-    show_versions(as_json=options.json)
diff --git a/ci/run_tests.sh b/ci/run_tests.sh
new file mode 100755
index 0000000000000..77efc60a8cf97
--- /dev/null
+++ b/ci/run_tests.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+if [ "$DOC" ]; then
+    echo "We are not running pytest as this is a doc-build"
+    exit 0
+fi
+
+# Workaround for pytest-xdist flaky collection order
+# https://github.com/pytest-dev/pytest/issues/920
+# https://github.com/pytest-dev/pytest/issues/1075
+export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))')
+
+if [ -n "$LOCALE_OVERRIDE" ]; then
+    export LC_ALL="$LOCALE_OVERRIDE"
+    export LANG="$LOCALE_OVERRIDE"
+    PANDAS_LOCALE=`python -c 'import pandas; pandas.get_option("display.encoding")'`
+    if [[ "$LOCALE_OVERIDE" != "$PANDAS_LOCALE" ]]; then
+        echo "pandas could not detect the locale. System locale: $LOCALE_OVERRIDE, pandas detected: $PANDAS_LOCALE"
+        # TODO Not really aborting the tests until https://github.com/pandas-dev/pandas/issues/23923 is fixed
+        # exit 1
+    fi
+fi
+if [[ "not network" == *"$PATTERN"* ]]; then
+    export http_proxy=http://1.2.3.4 https_proxy=http://1.2.3.4;
+fi
+
+
+if [ -n "$PATTERN" ]; then
+    PATTERN=" and $PATTERN"
+fi
+
+for TYPE in single multiple
+do
+    if [ "$COVERAGE" ]; then
+        COVERAGE_FNAME="/tmp/coc-$TYPE.xml"
+        COVERAGE="-s --cov=pandas --cov-report=xml:$COVERAGE_FNAME"
+    fi
+
+    TYPE_PATTERN=$TYPE
+    NUM_JOBS=1
+    if [[ "$TYPE_PATTERN" == "multiple" ]]; then
+        TYPE_PATTERN="not single"
+        NUM_JOBS=2
+    fi
+
+    pytest -m "$TYPE_PATTERN$PATTERN" -n $NUM_JOBS -s --strict --durations=10 --junitxml=test-data-$TYPE.xml $TEST_ARGS $COVERAGE pandas
+
+    if [[ "$COVERAGE" && $? == 0 ]]; then
+        echo "uploading coverage for $TYPE tests"
+        bash <(curl -s https://codecov.io/bash) -Z -c -F $TYPE -f $COVERAGE_FNAME
+    fi
+done
diff --git a/ci/script_multi.sh b/ci/script_multi.sh
deleted file mode 100755
index fba0c7ba19dd4..0000000000000
--- a/ci/script_multi.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash -e
-
-echo "[script multi]"
-
-if [ -n "$LOCALE_OVERRIDE" ]; then
-    export LC_ALL="$LOCALE_OVERRIDE";
-    export LANG="$LOCALE_OVERRIDE";
-    echo "Setting LC_ALL to $LOCALE_OVERRIDE"
-
-    pycmd='import pandas; print("pandas detected console encoding: %s" % pandas.get_option("display.encoding"))'
-    python -c "$pycmd"
-fi
-
-# Enforce absent network during testing by faking a proxy
-if echo "$TEST_ARGS" | grep -e --skip-network -q; then
-    export http_proxy=http://1.2.3.4 https_proxy=http://1.2.3.4;
-fi
-
-# Workaround for pytest-xdist flaky collection order
-# https://github.com/pytest-dev/pytest/issues/920
-# https://github.com/pytest-dev/pytest/issues/1075
-export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 4294967295))')
-echo PYTHONHASHSEED=$PYTHONHASHSEED
-
-if [ "$DOC" ]; then
-    echo "We are not running pytest as this is a doc-build"
-
-elif [ "$COVERAGE" ]; then
-    echo pytest -s -n 2 -m "not single" --durations=10 --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas
-    pytest      -s -n 2 -m "not single" --durations=10 --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas
-
-elif [ "$SLOW" ]; then
-    TEST_ARGS="--only-slow --skip-network"
-    # The `-m " and slow"` is redundant here, as `--only-slow` is already used (via $TEST_ARGS). But is needed, because with
-    # `--only-slow` fast tests are skipped, but each of them is printed in the log (which can be avoided with `-q`),
-    # and also added to `test-data-multiple.xml`, and then printed in the log in the call to `ci/print_skipped.py`.
-    # Printing them to the log makes the log exceed the maximum size allowed by Travis and makes the build fail.
-    echo pytest -n 2 -m "not single and slow" --durations=10 --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas
-    pytest      -n 2 -m "not single and slow" --durations=10 --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas
-
-else
-    echo pytest -n 2 -m "not single" --durations=10 --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas
-    pytest      -n 2 -m "not single" --durations=10 --junitxml=test-data-multiple.xml --strict $TEST_ARGS pandas # TODO: doctest
-
-fi
-
-RET="$?"
-
-exit "$RET"
diff --git a/ci/script_single.sh b/ci/script_single.sh
deleted file mode 100755
index cbbb7a49541c2..0000000000000
--- a/ci/script_single.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-
-echo "[script_single]"
-
-if [ -n "$LOCALE_OVERRIDE" ]; then
-    echo "Setting LC_ALL and LANG to $LOCALE_OVERRIDE"
-    export LC_ALL="$LOCALE_OVERRIDE";
-    export LANG="$LOCALE_OVERRIDE";
-
-    pycmd='import pandas; print("pandas detected console encoding: %s" % pandas.get_option("display.encoding"))'
-    python -c "$pycmd"
-fi
-
-if [ "$SLOW" ]; then
-    TEST_ARGS="--only-slow --skip-network"
-fi
-
-# Enforce absent network during testing by faking a proxy
-if echo "$TEST_ARGS" | grep -e --skip-network -q; then
-    export http_proxy=http://1.2.3.4 https_proxy=http://1.2.3.4;
-fi
-
-if [ "$DOC" ]; then
-    echo "We are not running pytest as this is a doc-build"
-
-elif [ "$COVERAGE" ]; then
-    echo pytest -s -m "single" --durations=10 --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=test-data-single.xml $TEST_ARGS pandas
-    pytest      -s -m "single" --durations=10 --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=test-data-single.xml $TEST_ARGS pandas
-    echo pytest -s --strict scripts
-    pytest      -s --strict scripts
-else
-    echo pytest -m "single" --durations=10 --junitxml=test-data-single.xml --strict $TEST_ARGS pandas
-    pytest      -m "single" --durations=10 --junitxml=test-data-single.xml --strict $TEST_ARGS pandas
-
-fi
-
-RET="$?"
-
-exit "$RET"
diff --git a/ci/upload_coverage.sh b/ci/upload_coverage.sh
deleted file mode 100755
index 88aca20590505..0000000000000
--- a/ci/upload_coverage.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/bash
-
-if [ -z "$COVERAGE" ]; then
-   echo "coverage is not selected for this build"
-   exit 0
-fi
-
-
-echo "uploading coverage"
-bash <(curl -s https://codecov.io/bash) -Z -c -F single -f /tmp/cov-single.xml
-bash <(curl -s https://codecov.io/bash) -Z -c -F multiple -f /tmp/cov-multiple.xml
diff --git a/doc/source/comparison_with_r.rst b/doc/source/comparison_with_r.rst
index eecacde8ad14e..704b0c4d80537 100644
--- a/doc/source/comparison_with_r.rst
+++ b/doc/source/comparison_with_r.rst
@@ -6,7 +6,7 @@
 
    import pandas as pd
    import numpy as np
-   pd.options.display.max_rows=15
+   pd.options.display.max_rows = 15
 
 Comparison with R / R libraries
 *******************************
@@ -165,16 +165,15 @@ function.
 
 .. ipython:: python
 
-   df = pd.DataFrame({
-     'v1': [1,3,5,7,8,3,5,np.nan,4,5,7,9],
-     'v2': [11,33,55,77,88,33,55,np.nan,44,55,77,99],
-     'by1': ["red", "blue", 1, 2, np.nan, "big", 1, 2, "red", 1, np.nan, 12],
-     'by2': ["wet", "dry", 99, 95, np.nan, "damp", 95, 99, "red", 99, np.nan,
-             np.nan]
-   })
+   df = pd.DataFrame(
+       {'v1': [1, 3, 5, 7, 8, 3, 5, np.nan, 4, 5, 7, 9],
+        'v2': [11, 33, 55, 77, 88, 33, 55, np.nan, 44, 55, 77, 99],
+        'by1': ["red", "blue", 1, 2, np.nan, "big", 1, 2, "red", 1, np.nan, 12],
+        'by2': ["wet", "dry", 99, 95, np.nan, "damp", 95, 99, "red", 99, np.nan,
+                np.nan]})
 
-   g = df.groupby(['by1','by2'])
-   g[['v1','v2']].mean()
+   g = df.groupby(['by1', 'by2'])
+   g[['v1', 'v2']].mean()
 
 For more details and examples see :ref:`the groupby documentation
 <groupby.split>`.
@@ -195,7 +194,7 @@ The :meth:`~pandas.DataFrame.isin` method is similar to R ``%in%`` operator:
 
 .. ipython:: python
 
-   s = pd.Series(np.arange(5),dtype=np.float32)
+   s = pd.Series(np.arange(5), dtype=np.float32)
    s.isin([2, 4])
 
 The ``match`` function returns a vector of the positions of matches
@@ -234,11 +233,11 @@ In ``pandas`` we may use :meth:`~pandas.pivot_table` method to handle this:
    import random
    import string
 
-   baseball = pd.DataFrame({
-      'team': ["team %d" % (x+1) for x in range(5)]*5,
-      'player': random.sample(list(string.ascii_lowercase),25),
-      'batting avg': np.random.uniform(.200, .400, 25)
-      })
+   baseball = pd.DataFrame(
+       {'team': ["team %d" % (x + 1) for x in range(5)] * 5,
+        'player': random.sample(list(string.ascii_lowercase), 25),
+        'batting avg': np.random.uniform(.200, .400, 25)})
+
    baseball.pivot_table(values='batting avg', columns='team', aggfunc=np.max)
 
 For more details and examples see :ref:`the reshaping documentation
@@ -341,15 +340,13 @@ In ``pandas`` the equivalent expression, using the
 
 .. ipython:: python
 
-   df = pd.DataFrame({
-       'x': np.random.uniform(1., 168., 120),
-       'y': np.random.uniform(7., 334., 120),
-       'z': np.random.uniform(1.7, 20.7, 120),
-       'month': [5,6,7,8]*30,
-       'week': np.random.randint(1,4, 120)
-   })
+   df = pd.DataFrame({'x': np.random.uniform(1., 168., 120),
+                      'y': np.random.uniform(7., 334., 120),
+                      'z': np.random.uniform(1.7, 20.7, 120),
+                      'month': [5, 6, 7, 8] * 30,
+                      'week': np.random.randint(1, 4, 120)})
 
-   grouped = df.groupby(['month','week'])
+   grouped = df.groupby(['month', 'week'])
    grouped['x'].agg([np.mean, np.std])
 
 
@@ -374,8 +371,8 @@ In Python, since ``a`` is a list, you can simply use list comprehension.
 
 .. ipython:: python
 
-   a = np.array(list(range(1,24))+[np.NAN]).reshape(2,3,4)
-   pd.DataFrame([tuple(list(x)+[val]) for x, val in np.ndenumerate(a)])
+   a = np.array(list(range(1, 24)) + [np.NAN]).reshape(2, 3, 4)
+   pd.DataFrame([tuple(list(x) + [val]) for x, val in np.ndenumerate(a)])
 
 |meltlist|_
 ~~~~~~~~~~~~
@@ -393,7 +390,7 @@ In Python, this list would be a list of tuples, so
 
 .. ipython:: python
 
-   a = list(enumerate(list(range(1,5))+[np.NAN]))
+   a = list(enumerate(list(range(1, 5)) + [np.NAN]))
    pd.DataFrame(a)
 
 For more details and examples see :ref:`the Into to Data Structures
@@ -419,12 +416,13 @@ In Python, the :meth:`~pandas.melt` method is the R equivalent:
 
 .. ipython:: python
 
-   cheese = pd.DataFrame({'first' : ['John', 'Mary'],
-                       'last' : ['Doe', 'Bo'],
-                       'height' : [5.5, 6.0],
-                       'weight' : [130, 150]})
+   cheese = pd.DataFrame({'first': ['John', 'Mary'],
+                          'last': ['Doe', 'Bo'],
+                          'height': [5.5, 6.0],
+                          'weight': [130, 150]})
+
    pd.melt(cheese, id_vars=['first', 'last'])
-   cheese.set_index(['first', 'last']).stack() # alternative way
+   cheese.set_index(['first', 'last']).stack()  # alternative way
 
 For more details and examples see :ref:`the reshaping documentation
 <reshaping.melt>`.
@@ -452,16 +450,15 @@ In Python the best way is to make use of :meth:`~pandas.pivot_table`:
 
 .. ipython:: python
 
-   df = pd.DataFrame({
-        'x': np.random.uniform(1., 168., 12),
-        'y': np.random.uniform(7., 334., 12),
-        'z': np.random.uniform(1.7, 20.7, 12),
-        'month': [5,6,7]*4,
-        'week': [1,2]*6
-   })
+   df = pd.DataFrame({'x': np.random.uniform(1., 168., 12),
+                      'y': np.random.uniform(7., 334., 12),
+                      'z': np.random.uniform(1.7, 20.7, 12),
+                      'month': [5, 6, 7] * 4,
+                      'week': [1, 2] * 6})
+
    mdf = pd.melt(df, id_vars=['month', 'week'])
-   pd.pivot_table(mdf, values='value', index=['variable','week'],
-                    columns=['month'], aggfunc=np.mean)
+   pd.pivot_table(mdf, values='value', index=['variable', 'week'],
+                  columns=['month'], aggfunc=np.mean)
 
 Similarly for ``dcast`` which uses a data.frame called ``df`` in R to
 aggregate information based on ``Animal`` and ``FeedType``:
@@ -491,13 +488,14 @@ using :meth:`~pandas.pivot_table`:
        'Amount': [10, 7, 4, 2, 5, 6, 2],
    })
 
-   df.pivot_table(values='Amount', index='Animal', columns='FeedType', aggfunc='sum')
+   df.pivot_table(values='Amount', index='Animal', columns='FeedType',
+                  aggfunc='sum')
 
 The second approach is to use the :meth:`~pandas.DataFrame.groupby` method:
 
 .. ipython:: python
 
-   df.groupby(['Animal','FeedType'])['Amount'].sum()
+   df.groupby(['Animal', 'FeedType'])['Amount'].sum()
 
 For more details and examples see :ref:`the reshaping documentation
 <reshaping.pivot>` or :ref:`the groupby documentation<groupby.split>`.
@@ -516,8 +514,8 @@ In pandas this is accomplished with ``pd.cut`` and ``astype("category")``:
 
 .. ipython:: python
 
-   pd.cut(pd.Series([1,2,3,4,5,6]), 3)
-   pd.Series([1,2,3,2,2,3]).astype("category")
+   pd.cut(pd.Series([1, 2, 3, 4, 5, 6]), 3)
+   pd.Series([1, 2, 3, 2, 2, 3]).astype("category")
 
 For more details and examples see :ref:`categorical introduction <categorical>` and the
 :ref:`API documentation <api.categorical>`. There is also a documentation regarding the
diff --git a/doc/source/comparison_with_sql.rst b/doc/source/comparison_with_sql.rst
index db143cd586441..021f37eb5c66f 100644
--- a/doc/source/comparison_with_sql.rst
+++ b/doc/source/comparison_with_sql.rst
@@ -23,7 +23,8 @@ structure.
 
 .. ipython:: python
 
-    url = 'https://raw.github.com/pandas-dev/pandas/master/pandas/tests/data/tips.csv'
+    url = ('https://raw.github.com/pandas-dev'
+           '/pandas/master/pandas/tests/data/tips.csv')
     tips = pd.read_csv(url)
     tips.head()
 
@@ -387,7 +388,7 @@ Top N rows with offset
 
 .. ipython:: python
 
-    tips.nlargest(10+5, columns='tip').tail(10)
+    tips.nlargest(10 + 5, columns='tip').tail(10)
 
 Top N rows per group
 ~~~~~~~~~~~~~~~~~~~~
@@ -411,8 +412,7 @@ Top N rows per group
                         .groupby(['day'])
                         .cumcount() + 1)
          .query('rn < 3')
-         .sort_values(['day','rn'])
-    )
+         .sort_values(['day', 'rn']))
 
 the same using `rank(method='first')` function
 
@@ -421,8 +421,7 @@ the same using `rank(method='first')` function
     (tips.assign(rnk=tips.groupby(['day'])['total_bill']
                          .rank(method='first', ascending=False))
          .query('rnk < 3')
-         .sort_values(['day','rnk'])
-    )
+         .sort_values(['day', 'rnk']))
 
 .. code-block:: sql
 
@@ -445,11 +444,10 @@ Notice that when using ``rank(method='min')`` function
 .. ipython:: python
 
     (tips[tips['tip'] < 2]
-         .assign(rnk_min=tips.groupby(['sex'])['tip']
-                             .rank(method='min'))
-         .query('rnk_min < 3')
-         .sort_values(['sex','rnk_min'])
-    )
+        .assign(rnk_min=tips.groupby(['sex'])['tip']
+                            .rank(method='min'))
+        .query('rnk_min < 3')
+        .sort_values(['sex', 'rnk_min']))
 
 
 UPDATE
diff --git a/doc/source/comparison_with_stata.rst b/doc/source/comparison_with_stata.rst
index 6c518983d5904..e039843b22065 100644
--- a/doc/source/comparison_with_stata.rst
+++ b/doc/source/comparison_with_stata.rst
@@ -102,9 +102,7 @@ and the values are the data.
 
 .. ipython:: python
 
-   df = pd.DataFrame({
-            'x': [1, 3, 5],
-            'y': [2, 4, 6]})
+   df = pd.DataFrame({'x': [1, 3, 5], 'y': [2, 4, 6]})
    df
 
 
@@ -128,7 +126,8 @@ the data set if presented with a url.
 
 .. ipython:: python
 
-   url = 'https://raw.github.com/pandas-dev/pandas/master/pandas/tests/data/tips.csv'
+   url = ('https://raw.github.com/pandas-dev'
+          '/pandas/master/pandas/tests/data/tips.csv')
    tips = pd.read_csv(url)
    tips.head()
 
@@ -278,17 +277,17 @@ see the :ref:`timeseries documentation<timeseries>` for more details.
    tips['date1_year'] = tips['date1'].dt.year
    tips['date2_month'] = tips['date2'].dt.month
    tips['date1_next'] = tips['date1'] + pd.offsets.MonthBegin()
-   tips['months_between'] = (tips['date2'].dt.to_period('M') -
-                             tips['date1'].dt.to_period('M'))
+   tips['months_between'] = (tips['date2'].dt.to_period('M')
+                             - tips['date1'].dt.to_period('M'))
 
-   tips[['date1','date2','date1_year','date2_month',
-         'date1_next','months_between']].head()
+   tips[['date1', 'date2', 'date1_year', 'date2_month', 'date1_next',
+         'months_between']].head()
 
 .. ipython:: python
    :suppress:
 
-   tips = tips.drop(['date1','date2','date1_year',
-      'date2_month','date1_next','months_between'], axis=1)
+   tips = tips.drop(['date1', 'date2', 'date1_year', 'date2_month',
+                     'date1_next', 'months_between'], axis=1)
 
 Selection of Columns
 ~~~~~~~~~~~~~~~~~~~~
@@ -472,7 +471,7 @@ The following tables will be used in the merge examples
                        'value': np.random.randn(4)})
    df1
    df2 = pd.DataFrame({'key': ['B', 'D', 'D', 'E'],
-                        'value': np.random.randn(4)})
+                       'value': np.random.randn(4)})
    df2
 
 In Stata, to perform a merge, one data set must be in memory
@@ -661,7 +660,7 @@ In pandas this would be written as:
 
 .. ipython:: python
 
-   tips.groupby(['sex','smoker']).first()
+   tips.groupby(['sex', 'smoker']).first()
 
 
 Other Considerations
diff --git a/doc/source/computation.rst b/doc/source/computation.rst
index 0d2021de8f88e..251dce5141ea5 100644
--- a/doc/source/computation.rst
+++ b/doc/source/computation.rst
@@ -4,14 +4,15 @@
    :suppress:
 
    import numpy as np
+   import matplotlib.pyplot as plt
+
+   import pandas as pd
+
    np.random.seed(123456)
    np.set_printoptions(precision=4, suppress=True)
-   import pandas as pd
-   import matplotlib
-   # matplotlib.style.use('default')
-   import matplotlib.pyplot as plt
+   pd.options.display.max_rows = 15
+
    plt.close('all')
-   pd.options.display.max_rows=15
 
 .. _computation:
 
@@ -75,7 +76,8 @@ series in the DataFrame, also excluding NA/null values.
 
 .. ipython:: python
 
-   frame = pd.DataFrame(np.random.randn(1000, 5), columns=['a', 'b', 'c', 'd', 'e'])
+   frame = pd.DataFrame(np.random.randn(1000, 5),
+                        columns=['a', 'b', 'c', 'd', 'e'])
    frame.cov()
 
 ``DataFrame.cov`` also supports an optional ``min_periods`` keyword that
@@ -127,7 +129,8 @@ Wikipedia has articles covering the above correlation coefficients:
 
 .. ipython:: python
 
-   frame = pd.DataFrame(np.random.randn(1000, 5), columns=['a', 'b', 'c', 'd', 'e'])
+   frame = pd.DataFrame(np.random.randn(1000, 5),
+                        columns=['a', 'b', 'c', 'd', 'e'])
    frame.iloc[::2] = np.nan
 
    # Series with Series
@@ -163,9 +166,10 @@ compute the correlation based on histogram intersection:
 .. ipython:: python
 
    # histogram intersection
-   histogram_intersection = lambda a, b: np.minimum(
-       np.true_divide(a, a.sum()), np.true_divide(b, b.sum())
-   ).sum()
+   def histogram_intersection(a, b):
+       return np.minimum(np.true_divide(a, a.sum()),
+                         np.true_divide(b, b.sum())).sum()
+
    frame.corr(method=histogram_intersection)
 
 A related method :meth:`~DataFrame.corrwith` is implemented on DataFrame to 
@@ -192,7 +196,7 @@ assigned the mean of the ranks (by default) for the group:
 .. ipython:: python
 
    s = pd.Series(np.random.np.random.randn(5), index=list('abcde'))
-   s['d'] = s['b'] # so there's a tie
+   s['d'] = s['b']  # so there's a tie
    s.rank()
 
 :meth:`~DataFrame.rank` is also a DataFrame method and can rank either the rows 
@@ -202,7 +206,7 @@ ranking.
 .. ipython:: python
 
    df = pd.DataFrame(np.random.np.random.randn(10, 6))
-   df[4] = df[2][:5] # some ties
+   df[4] = df[2][:5]  # some ties
    df
    df.rank(1)
 
@@ -243,7 +247,8 @@ objects, :class:`~pandas.core.window.Rolling`, :class:`~pandas.core.window.Expan
 
 .. ipython:: python
 
-   s = pd.Series(np.random.randn(1000), index=pd.date_range('1/1/2000', periods=1000))
+   s = pd.Series(np.random.randn(1000),
+                 index=pd.date_range('1/1/2000', periods=1000))
    s = s.cumsum()
    s
 
@@ -258,7 +263,7 @@ These object provide tab-completion of the available methods and properties.
 
 .. code-block:: ipython
 
-   In [14]: r.
+   In [14]: r.<TAB>                                          # noqa: E225, E999
    r.agg         r.apply       r.count       r.exclusions  r.max         r.median      r.name        r.skew        r.sum
    r.aggregate   r.corr        r.cov         r.kurt        r.mean        r.min         r.quantile    r.std         r.var
 
@@ -336,7 +341,9 @@ compute the mean absolute deviation on a rolling basis:
 
 .. ipython:: python
 
-   mad = lambda x: np.fabs(x - x.mean()).mean()
+   def mad(x):
+       return np.fabs(x - x.mean()).mean()
+
    @savefig rolling_apply_ex.png
    s.rolling(window=60).apply(mad, raw=True).plot(style='k')
 
@@ -376,7 +383,8 @@ The list of recognized types are the `scipy.signal window functions
 
 .. ipython:: python
 
-   ser = pd.Series(np.random.randn(10), index=pd.date_range('1/1/2000', periods=10))
+   ser = pd.Series(np.random.randn(10),
+                   index=pd.date_range('1/1/2000', periods=10))
 
    ser.rolling(window=5, win_type='triang').mean()
 
@@ -423,7 +431,9 @@ This can be particularly useful for a non-regular time frequency index.
 .. ipython:: python
 
    dft = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]},
-                      index=pd.date_range('20130101 09:00:00', periods=5, freq='s'))
+                      index=pd.date_range('20130101 09:00:00',
+                                          periods=5,
+                                          freq='s'))
    dft
 
 This is a regular frequency index. Using an integer window parameter works to roll along the window frequency.
@@ -445,12 +455,12 @@ Using a non-regular, but still monotonic index, rolling with an integer window d
 .. ipython:: python
 
    dft = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]},
-                      index = pd.Index([pd.Timestamp('20130101 09:00:00'),
-                                        pd.Timestamp('20130101 09:00:02'),
-                                        pd.Timestamp('20130101 09:00:03'),
-                                        pd.Timestamp('20130101 09:00:05'),
-                                        pd.Timestamp('20130101 09:00:06')],
-                                       name='foo'))
+                      index=pd.Index([pd.Timestamp('20130101 09:00:00'),
+                                      pd.Timestamp('20130101 09:00:02'),
+                                      pd.Timestamp('20130101 09:00:03'),
+                                      pd.Timestamp('20130101 09:00:05'),
+                                      pd.Timestamp('20130101 09:00:06')],
+                                     name='foo'))
    dft
    dft.rolling(2).sum()
 
@@ -496,11 +506,11 @@ from present information back to past information. This allows the rolling windo
 .. ipython:: python
 
    df = pd.DataFrame({'x': 1},
-                     index = [pd.Timestamp('20130101 09:00:01'),
-                              pd.Timestamp('20130101 09:00:02'),
-                              pd.Timestamp('20130101 09:00:03'),
-                              pd.Timestamp('20130101 09:00:04'),
-                              pd.Timestamp('20130101 09:00:06')])
+                     index=[pd.Timestamp('20130101 09:00:01'),
+                            pd.Timestamp('20130101 09:00:02'),
+                            pd.Timestamp('20130101 09:00:03'),
+                            pd.Timestamp('20130101 09:00:04'),
+                            pd.Timestamp('20130101 09:00:06')])
 
    df["right"] = df.rolling('2s', closed='right').x.sum()  # default
    df["both"] = df.rolling('2s', closed='both').x.sum()
@@ -601,7 +611,8 @@ can even be omitted:
 
 .. ipython:: python
 
-   covs = df[['B','C','D']].rolling(window=50).cov(df[['A','B','C']], pairwise=True)
+   covs = (df[['B', 'C', 'D']].rolling(window=50)
+                              .cov(df[['A', 'B', 'C']], pairwise=True))
    covs.loc['2002-09-22':]
 
 .. ipython:: python
@@ -637,7 +648,7 @@ perform multiple computations on the data. These operations are similar to the :
    dfa = pd.DataFrame(np.random.randn(1000, 3),
                       index=pd.date_range('1/1/2000', periods=1000),
                       columns=['A', 'B', 'C'])
-   r = dfa.rolling(window=60,min_periods=1)
+   r = dfa.rolling(window=60, min_periods=1)
    r
 
 We can aggregate by passing a function to the entire DataFrame, or select a 
@@ -649,7 +660,7 @@ Series (or multiple Series) via standard ``__getitem__``.
 
    r['A'].aggregate(np.sum)
 
-   r[['A','B']].aggregate(np.sum)
+   r[['A', 'B']].aggregate(np.sum)
 
 As you can see, the result of the aggregation will have the selected columns, or all
 columns if none are selected.
@@ -683,24 +694,21 @@ By passing a dict to ``aggregate`` you can apply a different aggregation to the
 columns of a ``DataFrame``:
 
 .. ipython:: python
-   :okexcept:
-   :okwarning:
 
-   r.agg({'A' : np.sum,
-          'B' : lambda x: np.std(x, ddof=1)})
+   r.agg({'A': np.sum, 'B': lambda x: np.std(x, ddof=1)})
 
 The function names can also be strings. In order for a string to be valid it
 must be implemented on the windowed object
 
 .. ipython:: python
 
-   r.agg({'A' : 'sum', 'B' : 'std'})
+   r.agg({'A': 'sum', 'B': 'std'})
 
 Furthermore you can pass a nested dict to indicate different aggregations on different columns.
 
 .. ipython:: python
 
-   r.agg({'A' : ['sum','std'], 'B' : ['mean','std'] })
+   r.agg({'A': ['sum', 'std'], 'B': ['mean', 'std']})
 
 
 .. _stats.moments.expanding:
diff --git a/doc/source/io.rst b/doc/source/io.rst
index 2b91836d5449d..372a7b8a325e7 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -5,25 +5,23 @@
 .. ipython:: python
    :suppress:
 
-   import os
    import csv
-   from pandas.compat import StringIO, BytesIO
-   import pandas as pd
-   ExcelWriter = pd.ExcelWriter
+   import os
 
+   import matplotlib.pyplot as plt
    import numpy as np
-   np.random.seed(123456)
+   import pandas as pd
+   from pandas.compat import StringIO, BytesIO
+
+
    randn = np.random.randn
    np.set_printoptions(precision=4, suppress=True)
-
-   import matplotlib.pyplot as plt
    plt.close('all')
-
-   import pandas.util.testing as tm
    pd.options.display.max_rows = 15
    clipdf = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C': ['p', 'q', 'r']},
                          index=['x', 'y', 'z'])
 
+
 ===============================
 IO Tools (Text, CSV, HDF5, ...)
 ===============================
@@ -146,7 +144,10 @@ usecols : list-like or callable, default ``None``
 
   .. ipython:: python
 
-     data = 'col1,col2,col3\na,b,1\na,b,2\nc,d,3'
+     data = ('col1,col2,col3\n'
+             'a,b,1\n'
+             'a,b,2\n'
+             'c,d,3')
      pd.read_csv(StringIO(data))
      pd.read_csv(StringIO(data), usecols=lambda x: x.upper() in ['COL1', 'COL3'])
 
@@ -192,7 +193,10 @@ skiprows : list-like or integer, default ``None``
 
   .. ipython:: python
 
-     data = 'col1,col2,col3\na,b,1\na,b,2\nc,d,3'
+     data = ('col1,col2,col3\n'
+             'a,b,1\n'
+             'a,b,2\n'
+             'c,d,3')
      pd.read_csv(StringIO(data))
      pd.read_csv(StringIO(data), skiprows=lambda x: x % 2 != 0)
 
@@ -367,7 +371,10 @@ columns:
 
 .. ipython:: python
 
-    data = 'a,b,c\n1,2,3\n4,5,6\n7,8,9'
+    data = ('a,b,c\n'
+            '1,2,3\n'
+            '4,5,6\n'
+            '7,8,9')
     print(data)
 
     df = pd.read_csv(StringIO(data), dtype=object)
@@ -388,7 +395,11 @@ of :func:`~pandas.read_csv`:
 
 .. ipython:: python
 
-    data = "col_1\n1\n2\n'A'\n4.22"
+    data = ("col_1\n"
+            "1\n"
+            "2\n"
+            "'A'\n"
+            "4.22")
     df = pd.read_csv(StringIO(data), converters={'col_1': str})
     df
     df['col_1'].apply(type).value_counts()
@@ -427,7 +438,8 @@ worth trying.
    .. ipython:: python
         :okwarning:
 
-        df = pd.DataFrame({'col_1': list(range(500000)) + ['a', 'b'] + list(range(500000))})
+        col_1 = list(range(500000)) + ['a', 'b'] + list(range(500000))
+        df = pd.DataFrame({'col_1': col_1})
         df.to_csv('foo.csv')
         mixed_df = pd.read_csv('foo.csv')
         mixed_df['col_1'].apply(type).value_counts()
@@ -455,7 +467,10 @@ Specifying Categorical dtype
 
 .. ipython:: python
 
-   data = 'col1,col2,col3\na,b,1\na,b,2\nc,d,3'
+   data = ('col1,col2,col3\n'
+           'a,b,1\n'
+           'a,b,2\n'
+           'c,d,3')
 
    pd.read_csv(StringIO(data))
    pd.read_csv(StringIO(data)).dtypes
@@ -479,7 +494,6 @@ that column's ``dtype``.
 .. ipython:: python
 
    from pandas.api.types import CategoricalDtype
-
    dtype = CategoricalDtype(['d', 'c', 'b', 'a'], ordered=True)
    pd.read_csv(StringIO(data), dtype={'col1': dtype}).dtypes
 
@@ -525,7 +539,10 @@ used as the column names:
 
 .. ipython:: python
 
-    data = 'a,b,c\n1,2,3\n4,5,6\n7,8,9'
+    data = ('a,b,c\n'
+            '1,2,3\n'
+            '4,5,6\n'
+            '7,8,9')
     print(data)
     pd.read_csv(StringIO(data))
 
@@ -544,7 +561,11 @@ If the header is in a row other than the first, pass the row number to
 
 .. ipython:: python
 
-    data = 'skip this skip it\na,b,c\n1,2,3\n4,5,6\n7,8,9'
+    data = ('skip this skip it\n'
+            'a,b,c\n'
+            '1,2,3\n'
+            '4,5,6\n'
+            '7,8,9')
     pd.read_csv(StringIO(data), header=1)
 
 .. note::
@@ -565,7 +586,9 @@ distinguish between them so as to prevent overwriting data:
 
 .. ipython :: python
 
-   data = 'a,b,a\n0,1,2\n3,4,5'
+   data = ('a,b,a\n'
+           '0,1,2\n'
+           '3,4,5')
    pd.read_csv(StringIO(data))
 
 There is no more duplicate data because ``mangle_dupe_cols=True`` by default,
@@ -633,7 +656,13 @@ be ignored. By default, completely blank lines will be ignored as well.
 
 .. ipython:: python
 
-   data = '\na,b,c\n  \n# commented line\n1,2,3\n\n4,5,6'
+   data = ('\n'
+           'a,b,c\n'
+           '  \n'
+           '# commented line\n'
+           '1,2,3\n'
+           '\n'
+           '4,5,6')
    print(data)
    pd.read_csv(StringIO(data), comment='#')
 
@@ -641,7 +670,12 @@ If ``skip_blank_lines=False``, then ``read_csv`` will not ignore blank lines:
 
 .. ipython:: python
 
-   data = 'a,b,c\n\n1,2,3\n\n\n4,5,6'
+   data = ('a,b,c\n'
+           '\n'
+           '1,2,3\n'
+           '\n'
+           '\n'
+           '4,5,6')
    pd.read_csv(StringIO(data), skip_blank_lines=False)
 
 .. warning::
@@ -652,20 +686,32 @@ If ``skip_blank_lines=False``, then ``read_csv`` will not ignore blank lines:
 
    .. ipython:: python
 
-      data = '#comment\na,b,c\nA,B,C\n1,2,3'
+      data = ('#comment\n'
+              'a,b,c\n'
+              'A,B,C\n'
+              '1,2,3')
       pd.read_csv(StringIO(data), comment='#', header=1)
-      data = 'A,B,C\n#comment\na,b,c\n1,2,3'
+      data = ('A,B,C\n'
+              '#comment\n'
+              'a,b,c\n'
+              '1,2,3')
       pd.read_csv(StringIO(data), comment='#', skiprows=2)
 
    If both ``header`` and ``skiprows`` are specified, ``header`` will be
    relative to the end of ``skiprows``. For example:
 
-   .. ipython:: python
+.. ipython:: python
 
-      data = ('# empty\n# second empty line\n# third empty'
-                'line\nX,Y,Z\n1,2,3\nA,B,C\n1,2.,4.\n5.,NaN,10.0')
-      print(data)
-      pd.read_csv(StringIO(data), comment='#', skiprows=4, header=1)
+   data = ('# empty\n'
+           '# second empty line\n'
+           '# third emptyline\n'
+           'X,Y,Z\n'
+           '1,2,3\n'
+           'A,B,C\n'
+           '1,2.,4.\n'
+           '5.,NaN,10.0\n')
+   print(data)
+   pd.read_csv(StringIO(data), comment='#', skiprows=4, header=1)
 
 .. _io.comments:
 
@@ -677,10 +723,10 @@ Sometimes comments or meta data may be included in a file:
 .. ipython:: python
    :suppress:
 
-   data =  ("ID,level,category\n"
-            "Patient1,123000,x # really unpleasant\n"
-            "Patient2,23000,y # wouldn't take his medicine\n"
-            "Patient3,1234018,z # awesome")
+   data = ("ID,level,category\n"
+           "Patient1,123000,x # really unpleasant\n"
+           "Patient2,23000,y # wouldn't take his medicine\n"
+           "Patient3,1234018,z # awesome")
 
    with open('tmp.csv', 'w') as fh:
        fh.write(data)
@@ -718,7 +764,10 @@ result in byte strings being decoded to unicode in the result:
 
 .. ipython:: python
 
-   data = b'word,length\nTr\xc3\xa4umen,7\nGr\xc3\xbc\xc3\x9fe,5'.decode('utf8').encode('latin-1')
+   data = (b'word,length\n'
+           b'Tr\xc3\xa4umen,7\n'
+           b'Gr\xc3\xbc\xc3\x9fe,5')
+   data = data.decode('utf8').encode('latin-1')
    df = pd.read_csv(BytesIO(data), encoding='latin-1')
    df
    df['word'][1]
@@ -738,12 +787,16 @@ first column will be used as the ``DataFrame``'s row names:
 
 .. ipython:: python
 
-    data = 'a,b,c\n4,apple,bat,5.7\n8,orange,cow,10'
+    data = ('a,b,c\n'
+            '4,apple,bat,5.7\n'
+            '8,orange,cow,10')
     pd.read_csv(StringIO(data))
 
 .. ipython:: python
 
-    data = 'index,a,b,c\n4,apple,bat,5.7\n8,orange,cow,10'
+    data = ('index,a,b,c\n'
+            '4,apple,bat,5.7\n'
+            '8,orange,cow,10')
     pd.read_csv(StringIO(data), index_col=0)
 
 Ordinarily, you can achieve this behavior using the ``index_col`` option.
@@ -754,7 +807,9 @@ index column inference and discard the last column, pass ``index_col=False``:
 
 .. ipython:: python
 
-    data = 'a,b,c\n4,apple,bat,\n8,orange,cow,'
+    data = ('a,b,c\n'
+            '4,apple,bat,\n'
+            '8,orange,cow,')
     print(data)
     pd.read_csv(StringIO(data))
     pd.read_csv(StringIO(data), index_col=False)
@@ -764,7 +819,9 @@ If a subset of data is being parsed using the ``usecols`` option, the
 
 .. ipython:: python
 
-    data = 'a,b,c\n4,apple,bat,\n8,orange,cow,'
+    data = ('a,b,c\n'
+            '4,apple,bat,\n'
+            '8,orange,cow,')
     print(data)
     pd.read_csv(StringIO(data), usecols=['b', 'c'])
     pd.read_csv(StringIO(data), usecols=['b', 'c'], index_col=0)
@@ -812,12 +869,12 @@ column names:
 .. ipython:: python
    :suppress:
 
-   data =  ("KORD,19990127, 19:00:00, 18:56:00, 0.8100\n"
-            "KORD,19990127, 20:00:00, 19:56:00, 0.0100\n"
-            "KORD,19990127, 21:00:00, 20:56:00, -0.5900\n"
-            "KORD,19990127, 21:00:00, 21:18:00, -0.9900\n"
-            "KORD,19990127, 22:00:00, 21:56:00, -0.5900\n"
-            "KORD,19990127, 23:00:00, 22:56:00, -0.5900")
+   data = ("KORD,19990127, 19:00:00, 18:56:00, 0.8100\n"
+           "KORD,19990127, 20:00:00, 19:56:00, 0.0100\n"
+           "KORD,19990127, 21:00:00, 20:56:00, -0.5900\n"
+           "KORD,19990127, 21:00:00, 21:18:00, -0.9900\n"
+           "KORD,19990127, 22:00:00, 21:56:00, -0.5900\n"
+           "KORD,19990127, 23:00:00, 22:56:00, -0.5900")
 
    with open('tmp.csv', 'w') as fh:
        fh.write(data)
@@ -895,9 +952,8 @@ take full advantage of the flexibility of the date parsing API:
 
 .. ipython:: python
 
-   import pandas.io.date_converters as conv
    df = pd.read_csv('tmp.csv', header=None, parse_dates=date_spec,
-                    date_parser=conv.parse_date_time)
+                    date_parser=pd.io.date_converters.parse_date_time)
    df
 
 Pandas will try to call the ``date_parser`` function in three different ways. If
@@ -990,9 +1046,12 @@ DD/MM/YYYY instead. For convenience, a ``dayfirst`` keyword is provided:
 .. ipython:: python
    :suppress:
 
-   data = "date,value,cat\n1/6/2000,5,a\n2/6/2000,10,b\n3/6/2000,15,c"
+   data = ("date,value,cat\n"
+           "1/6/2000,5,a\n"
+           "2/6/2000,10,b\n"
+           "3/6/2000,15,c")
    with open('tmp.csv', 'w') as fh:
-        fh.write(data)
+       fh.write(data)
 
 .. ipython:: python
 
@@ -1016,9 +1075,12 @@ writing to a file). For example:
 
    val = '0.3066101993807095471566981359501369297504425048828125'
    data = 'a,b,c\n1,2,{0}'.format(val)
-   abs(pd.read_csv(StringIO(data), engine='c', float_precision=None)['c'][0] - float(val))
-   abs(pd.read_csv(StringIO(data), engine='c', float_precision='high')['c'][0] - float(val))
-   abs(pd.read_csv(StringIO(data), engine='c', float_precision='round_trip')['c'][0] - float(val))
+   abs(pd.read_csv(StringIO(data), engine='c',
+                   float_precision=None)['c'][0] - float(val))
+   abs(pd.read_csv(StringIO(data), engine='c',
+                   float_precision='high')['c'][0] - float(val))
+   abs(pd.read_csv(StringIO(data), engine='c',
+                   float_precision='round_trip')['c'][0] - float(val))
 
 
 .. _io.thousands:
@@ -1033,10 +1095,10 @@ correctly:
 .. ipython:: python
    :suppress:
 
-   data =  ("ID|level|category\n"
-            "Patient1|123,000|x\n"
-            "Patient2|23,000|y\n"
-            "Patient3|1,234,018|z")
+   data = ("ID|level|category\n"
+           "Patient1|123,000|x\n"
+           "Patient2|23,000|y\n"
+           "Patient3|1,234,018|z")
 
    with open('tmp.csv', 'w') as fh:
        fh.write(data)
@@ -1132,10 +1194,10 @@ as a ``Series``:
 .. ipython:: python
    :suppress:
 
-   data =  ("level\n"
-            "Patient1,123000\n"
-            "Patient2,23000\n"
-            "Patient3,1234018")
+   data = ("level\n"
+           "Patient1,123000\n"
+           "Patient2,23000\n"
+           "Patient3,1234018")
 
    with open('tmp.csv', 'w') as fh:
        fh.write(data)
@@ -1144,7 +1206,7 @@ as a ``Series``:
 
    print(open('tmp.csv').read())
 
-   output =  pd.read_csv('tmp.csv', squeeze=True)
+   output = pd.read_csv('tmp.csv', squeeze=True)
    output
 
    type(output)
@@ -1166,7 +1228,9 @@ options as follows:
 
 .. ipython:: python
 
-    data= 'a,b,c\n1,Yes,2\n3,No,4'
+    data = ('a,b,c\n'
+            '1,Yes,2\n'
+            '3,No,4')
     print(data)
     pd.read_csv(StringIO(data))
     pd.read_csv(StringIO(data), true_values=['Yes'], false_values=['No'])
@@ -1183,11 +1247,17 @@ too many fields will raise an error by default:
 .. ipython:: python
    :suppress:
 
-    data = 'a,b,c\n1,2,3\n4,5,6,7\n8,9,10'
+    data = ('a,b,c\n'
+            '1,2,3\n'
+            '4,5,6,7\n'
+            '8,9,10')
 
 .. code-block:: ipython
 
-    In [27]: data = 'a,b,c\n1,2,3\n4,5,6,7\n8,9,10'
+    In [27]: data = ('a,b,c\n'
+                     '1,2,3\n'
+                     '4,5,6,7\n'
+                     '8,9,10')
 
     In [28]: pd.read_csv(StringIO(data))
     ---------------------------------------------------------------------------
@@ -1437,7 +1507,7 @@ returned object:
 
 .. ipython:: python
 
-   df = pd.read_csv("data/mindex_ex.csv", index_col=[0,1])
+   df = pd.read_csv("data/mindex_ex.csv", index_col=[0, 1])
    df
    df.loc[1978]
 
@@ -1480,7 +1550,6 @@ with ``df.to_csv(..., index=False)``, then any ``names`` on the columns index wi
 .. ipython:: python
    :suppress:
 
-   import os
    os.remove('mi.csv')
    os.remove('mi2.csv')
 
@@ -1966,9 +2035,8 @@ Preserve string indices:
 
 .. ipython:: python
 
-   si = pd.DataFrame(np.zeros((4, 4)),
-            columns=list(range(4)),
-            index=[str(i) for i in range(4)])
+   si = pd.DataFrame(np.zeros((4, 4)), columns=list(range(4)),
+                     index=[str(i) for i in range(4)])
    si
    si.index
    si.columns
@@ -2020,11 +2088,11 @@ data:
 
 .. ipython:: python
 
-   timeit pd.read_json(jsonfloats)
+   %timeit pd.read_json(jsonfloats)
 
 .. ipython:: python
 
-   timeit pd.read_json(jsonfloats, numpy=True)
+   %timeit pd.read_json(jsonfloats, numpy=True)
 
 The speedup is less noticeable for smaller datasets:
 
@@ -2034,11 +2102,11 @@ The speedup is less noticeable for smaller datasets:
 
 .. ipython:: python
 
-   timeit pd.read_json(jsonfloats)
+   %timeit pd.read_json(jsonfloats)
 
 .. ipython:: python
 
-   timeit pd.read_json(jsonfloats, numpy=True)
+   %timeit pd.read_json(jsonfloats, numpy=True)
 
 .. warning::
 
@@ -2059,7 +2127,6 @@ The speedup is less noticeable for smaller datasets:
 .. ipython:: python
    :suppress:
 
-   import os
    os.remove('test.json')
 
 .. _io.json_normalize:
@@ -2081,20 +2148,16 @@ into a flat table.
 .. ipython:: python
 
    data = [{'state': 'Florida',
-             'shortname': 'FL',
-             'info': {
-                  'governor': 'Rick Scott'
-             },
-             'counties': [{'name': 'Dade', 'population': 12345},
+            'shortname': 'FL',
+            'info': {'governor': 'Rick Scott'},
+            'counties': [{'name': 'Dade', 'population': 12345},
                          {'name': 'Broward', 'population': 40000},
                          {'name': 'Palm Beach', 'population': 60000}]},
-            {'state': 'Ohio',
-             'shortname': 'OH',
-             'info': {
-                  'governor': 'John Kasich'
-             },
-             'counties': [{'name': 'Summit', 'population': 1234},
-                          {'name': 'Cuyahoga', 'population': 1337}]}]
+           {'state': 'Ohio',
+            'shortname': 'OH',
+            'info': {'governor': 'John Kasich'},
+            'counties': [{'name': 'Summit', 'population': 1234},
+                         {'name': 'Cuyahoga', 'population': 1337}]}]
 
    json_normalize(data, 'counties', ['state', 'shortname', ['info', 'governor']])
 
@@ -2142,11 +2205,10 @@ a JSON string with two fields, ``schema`` and ``data``.
 
 .. ipython:: python
 
-   df = pd.DataFrame(
-       {'A': [1, 2, 3],
-        'B': ['a', 'b', 'c'],
-        'C': pd.date_range('2016-01-01', freq='d', periods=3),
-       }, index=pd.Index(range(3), name='idx'))
+   df = pd.DataFrame({'A': [1, 2, 3],
+                      'B': ['a', 'b', 'c'],
+                      'C': pd.date_range('2016-01-01', freq='d', periods=3)},
+                     index=pd.Index(range(3), name='idx'))
    df
    df.to_json(orient='table', date_format="iso")
 
@@ -2322,7 +2384,6 @@ as a string:
 .. ipython:: python
    :suppress:
 
-   import os
    file_path = os.path.abspath(os.path.join('source', '_static', 'banklist.html'))
 
 .. ipython:: python
@@ -2820,8 +2881,8 @@ For example, to read in a ``MultiIndex`` index without names:
 
 .. ipython:: python
 
-   df = pd.DataFrame({'a':[1, 2, 3, 4], 'b':[5, 6, 7, 8]},
-                     index=pd.MultiIndex.from_product([['a', 'b'],['c', 'd']]))
+   df = pd.DataFrame({'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8]},
+                     index=pd.MultiIndex.from_product([['a', 'b'], ['c', 'd']]))
    df.to_excel('path_to_file.xlsx')
    df = pd.read_excel('path_to_file.xlsx', index_col=[0, 1])
    df
@@ -2842,7 +2903,8 @@ should be passed to ``index_col`` and ``header``:
 
 .. ipython:: python
 
-   df.columns = pd.MultiIndex.from_product([['a'], ['b', 'd']], names=['c1', 'c2'])
+   df.columns = pd.MultiIndex.from_product([['a'], ['b', 'd']],
+                                           names=['c1', 'c2'])
    df.to_excel('path_to_file.xlsx')
    df = pd.read_excel('path_to_file.xlsx', index_col=[0, 1], header=[0, 1])
    df
@@ -2850,7 +2912,6 @@ should be passed to ``index_col`` and ``header``:
 .. ipython:: python
    :suppress:
 
-   import os
    os.remove('path_to_file.xlsx')
 
 
@@ -2997,7 +3058,7 @@ one can pass an :class:`~pandas.io.excel.ExcelWriter`.
 
 .. code-block:: python
 
-   with ExcelWriter('path_to_file.xlsx') as writer:
+   with pd.ExcelWriter('path_to_file.xlsx') as writer:
        df1.to_excel(writer, sheet_name='Sheet1')
        df2.to_excel(writer, sheet_name='Sheet2')
 
@@ -3029,7 +3090,7 @@ Pandas supports writing Excel files to buffer-like objects such as ``StringIO``
    bio = BytesIO()
 
    # By setting the 'engine' in the ExcelWriter constructor.
-   writer = ExcelWriter(bio, engine='xlsxwriter')
+   writer = pd.ExcelWriter(bio, engine='xlsxwriter')
    df.to_excel(writer, sheet_name='Sheet1')
 
    # Save the workbook
@@ -3082,7 +3143,7 @@ argument to ``to_excel`` and to ``ExcelWriter``. The built-in engines are:
    df.to_excel('path_to_file.xlsx', sheet_name='Sheet1', engine='xlsxwriter')
 
    # By setting the 'engine' in the ExcelWriter constructor.
-   writer = ExcelWriter('path_to_file.xlsx', engine='xlsxwriter')
+   writer = pd.ExcelWriter('path_to_file.xlsx', engine='xlsxwriter')
 
    # Or via pandas configuration.
    from pandas import options                                     # noqa: E402
@@ -3172,7 +3233,6 @@ any pickled pandas object (or any other pickled object) from file:
 .. ipython:: python
    :suppress:
 
-   import os
    os.remove('foo.pkl')
 
 .. warning::
@@ -3249,7 +3309,6 @@ The default is to 'infer':
 .. ipython:: python
    :suppress:
 
-   import os
    os.remove("data.pkl.compress")
    os.remove("data.pkl.xz")
    os.remove("data.pkl.gz")
@@ -3306,7 +3365,7 @@ pandas objects.
 
 .. ipython:: python
 
-   pd.to_msgpack('foo2.msg', {'dict': [{ 'df': df }, {'string': 'foo'},
+   pd.to_msgpack('foo2.msg', {'dict': [{'df': df}, {'string': 'foo'},
                                        {'scalar': 1.}, {'s': s}]})
    pd.read_msgpack('foo2.msg')
 
@@ -3365,7 +3424,6 @@ dict:
 
 .. ipython:: python
 
-   np.random.seed(1234)
    index = pd.date_range('1/1/2000', periods=8)
    s = pd.Series(randn(5), index=['a', 'b', 'c', 'd', 'e'])
    df = pd.DataFrame(randn(8, 3), index=index,
@@ -3421,7 +3479,6 @@ Closing a Store and using a context manager:
    :suppress:
 
    store.close()
-   import os
    os.remove('store.h5')
 
 
@@ -3434,8 +3491,8 @@ similar to how ``read_csv`` and ``to_csv`` work.
 
 .. ipython:: python
 
-   df_tl = pd.DataFrame(dict(A=list(range(5)), B=list(range(5))))
-   df_tl.to_hdf('store_tl.h5','table', append=True)
+   df_tl = pd.DataFrame({'A': list(range(5)), 'B': list(range(5))})
+   df_tl.to_hdf('store_tl.h5', 'table', append=True)
    pd.read_hdf('store_tl.h5', 'table', where=['index>2'])
 
 .. ipython:: python
@@ -3447,10 +3504,6 @@ similar to how ``read_csv`` and ``to_csv`` work.
 
 HDFStore will by default not drop rows that are all missing. This behavior can be changed by setting ``dropna=True``.
 
-.. ipython:: python
-   :suppress:
-
-   import os
 
 .. ipython:: python
 
@@ -3459,12 +3512,12 @@ HDFStore will by default not drop rows that are all missing. This behavior can b
    df_with_missing
 
    df_with_missing.to_hdf('file.h5', 'df_with_missing',
-                           format='table', mode='w')
+                          format='table', mode='w')
 
    pd.read_hdf('file.h5', 'df_with_missing')
 
    df_with_missing.to_hdf('file.h5', 'df_with_missing',
-                           format='table', mode='w', dropna=True)
+                          format='table', mode='w', dropna=True)
    pd.read_hdf('file.h5', 'df_with_missing')
 
 
@@ -3478,13 +3531,13 @@ This is also true for the major axis of a ``Panel``:
 .. ipython:: python
 
    matrix = [[[np.nan, np.nan, np.nan], [1, np.nan, np.nan]],
-            [[np.nan, np.nan, np.nan], [np.nan, 5, 6]],
-            [[np.nan, np.nan, np.nan], [np.nan, 3, np.nan]]]
+             [[np.nan, np.nan, np.nan], [np.nan, 5, 6]],
+             [[np.nan, np.nan, np.nan], [np.nan, 3, np.nan]]]
 
-   panel_with_major_axis_all_missing=pd.Panel(matrix,
-           items=['Item1', 'Item2', 'Item3'],
-           major_axis=[1, 2],
-           minor_axis=['A', 'B', 'C'])
+   panel_with_major_axis_all_missing = pd.Panel(matrix,
+                                                items=['Item1', 'Item2', 'Item3'],
+                                                major_axis=[1, 2],
+                                                minor_axis=['A', 'B', 'C'])
 
    panel_with_major_axis_all_missing
 
@@ -3585,7 +3638,7 @@ everything in the sub-store and **below**, so be *careful*.
 
    store.put('foo/bar/bah', df)
    store.append('food/orange', df)
-   store.append('food/apple',  df)
+   store.append('food/apple', df)
    store
 
    # a list of keys are returned
@@ -3660,14 +3713,15 @@ defaults to `nan`.
     df_mixed = pd.DataFrame({'A': randn(8),
                              'B': randn(8),
                              'C': np.array(randn(8), dtype='float32'),
-                             'string':'string',
+                             'string': 'string',
                              'int': 1,
                              'bool': True,
                              'datetime64': pd.Timestamp('20010102')},
                             index=list(range(8)))
-    df_mixed.loc[df_mixed.index[3:5], ['A', 'B', 'string', 'datetime64']] = np.nan
+    df_mixed.loc[df_mixed.index[3:5],
+                 ['A', 'B', 'string', 'datetime64']] = np.nan
 
-    store.append('df_mixed', df_mixed, min_itemsize = {'values': 50})
+    store.append('df_mixed', df_mixed, min_itemsize={'values': 50})
     df_mixed1 = store.select('df_mixed')
     df_mixed1
     df_mixed1.get_dtype_counts()
@@ -3820,7 +3874,8 @@ Works with a Panel as well.
 
    store.append('wp', wp)
    store
-   store.select('wp', "major_axis>pd.Timestamp('20000102') & minor_axis=['A', 'B']")
+   store.select('wp',
+                "major_axis>pd.Timestamp('20000102') & minor_axis=['A', 'B']")
 
 The ``columns`` keyword can be supplied to select a list of columns to be
 returned, this is equivalent to passing a
@@ -3863,7 +3918,10 @@ specified in the format: ``<float>(<unit>)``, where float may be signed (and fra
 .. ipython:: python
 
    from datetime import timedelta
-   dftd = pd.DataFrame(dict(A = pd.Timestamp('20130101'), B = [ pd.Timestamp('20130101') + timedelta(days=i, seconds=10) for i in range(10) ]))
+   dftd = pd.DataFrame({'A': pd.Timestamp('20130101'),
+                        'B': [pd.Timestamp('20130101') + timedelta(days=i,
+                                                                   seconds=10)
+                              for i in range(10)]})
    dftd['C'] = dftd['A'] - dftd['B']
    dftd
    store.append('dftd', dftd, data_columns=True)
@@ -3940,14 +3998,14 @@ be ``data_columns``.
 
    df_dc = df.copy()
    df_dc['string'] = 'foo'
-   df_dc.loc[df_dc.index[4: 6], 'string'] = np.nan
-   df_dc.loc[df_dc.index[7: 9], 'string'] = 'bar'
+   df_dc.loc[df_dc.index[4:6], 'string'] = np.nan
+   df_dc.loc[df_dc.index[7:9], 'string'] = 'bar'
    df_dc['string2'] = 'cool'
-   df_dc.loc[df_dc.index[1: 3], ['B', 'C']] = 1.0
+   df_dc.loc[df_dc.index[1:3], ['B', 'C']] = 1.0
    df_dc
 
    # on-disk operations
-   store.append('df_dc', df_dc, data_columns = ['B', 'C', 'string', 'string2'])
+   store.append('df_dc', df_dc, data_columns=['B', 'C', 'string', 'string2'])
    store.select('df_dc', where='B > 0')
 
    # getting creative
@@ -3976,7 +4034,7 @@ The default is 50,000 rows returned in a chunk.
 .. ipython:: python
 
    for df in store.select('df', chunksize=3):
-      print(df)
+       print(df)
 
 .. note::
 
@@ -4003,12 +4061,12 @@ chunks.
    store.append('dfeq', dfeq, data_columns=['number'])
 
    def chunks(l, n):
-        return [l[i: i+n] for i in range(0, len(l), n)]
+       return [l[i:i + n] for i in range(0, len(l), n)]
 
    evens = [2, 4, 6, 8, 10]
    coordinates = store.select_as_coordinates('dfeq', 'number=evens')
    for c in chunks(coordinates, 2):
-        print(store.select('dfeq', where=c))
+       print(store.select('dfeq', where=c))
 
 Advanced Queries
 ++++++++++++++++
@@ -4105,13 +4163,13 @@ results.
 .. ipython:: python
 
    df_mt = pd.DataFrame(randn(8, 6), index=pd.date_range('1/1/2000', periods=8),
-                                     columns=['A', 'B', 'C', 'D', 'E', 'F'])
+                        columns=['A', 'B', 'C', 'D', 'E', 'F'])
    df_mt['foo'] = 'bar'
    df_mt.loc[df_mt.index[1], ('A', 'B')] = np.nan
 
    # you can also create the tables individually
-   store.append_to_multiple({'df1_mt': ['A', 'B'], 'df2_mt': None },
-                             df_mt, selector='df1_mt')
+   store.append_to_multiple({'df1_mt': ['A', 'B'], 'df2_mt': None},
+                            df_mt, selector='df1_mt')
    store
 
    # individual tables were created
@@ -4120,7 +4178,7 @@ results.
 
    # as a multiple
    store.select_as_multiple(['df1_mt', 'df2_mt'], where=['A>0', 'B>0'],
-                             selector = 'df1_mt')
+                            selector='df1_mt')
 
 
 Delete from a Table
@@ -4159,7 +4217,7 @@ the table using a ``where`` that selects all but the missing data.
 .. ipython:: python
 
    # returns the number of rows deleted
-   store.remove('wp', 'major_axis > 20000102' )
+   store.remove('wp', 'major_axis > 20000102')
    store.select('wp')
 
 .. warning::
@@ -4332,7 +4390,7 @@ stored in a more efficient manner.
 .. ipython:: python
 
    dfcat = pd.DataFrame({'A': pd.Series(list('aabbcdba')).astype('category'),
-                         'B': np.random.randn(8) })
+                         'B': np.random.randn(8)})
    dfcat
    dfcat.dtypes
    cstore = pd.HDFStore('cats.h5', mode='w')
@@ -4346,7 +4404,6 @@ stored in a more efficient manner.
    :okexcept:
 
    cstore.close()
-   import os
    os.remove('cats.h5')
 
 
@@ -4374,7 +4431,7 @@ Passing a ``min_itemsize`` dict will cause all passed columns to be created as *
 
 .. ipython:: python
 
-   dfs = pd.DataFrame(dict(A='foo', B='bar'), index=list(range(5)))
+   dfs = pd.DataFrame({'A': 'foo', 'B': 'bar'}, index=list(range(5)))
    dfs
 
    # A and B have a size of 30
@@ -4393,7 +4450,7 @@ You could inadvertently turn an actual ``nan`` value into a missing value.
 
 .. ipython:: python
 
-   dfss = pd.DataFrame(dict(A=['foo', 'bar', 'nan']))
+   dfss = pd.DataFrame({'A': ['foo', 'bar', 'nan']})
    dfss
 
    store.append('dfss', dfss)
@@ -4420,11 +4477,10 @@ It is possible to write an ``HDFStore`` object that can easily be imported into
 
 .. ipython:: python
 
-   np.random.seed(1)
    df_for_r = pd.DataFrame({"first": np.random.rand(100),
                             "second": np.random.rand(100),
                             "class": np.random.randint(0, 2, (100, ))},
-                            index=range(100))
+                           index=range(100))
    df_for_r.head()
 
    store_export = pd.HDFStore('export.h5')
@@ -4435,7 +4491,6 @@ It is possible to write an ``HDFStore`` object that can easily be imported into
    :suppress:
 
    store_export.close()
-   import os
    os.remove('export.h5')
 
 In R this file can be read into a ``data.frame`` object using the ``rhdf5``
@@ -4523,7 +4578,6 @@ Performance
    :suppress:
 
    store.close()
-   import os
    os.remove('store.h5')
 
 
@@ -4589,7 +4643,6 @@ Read from a feather file.
 .. ipython:: python
    :suppress:
 
-   import os
    os.remove('example.feather')
 
 
@@ -4673,7 +4726,6 @@ Read only certain columns of a parquet file.
 .. ipython:: python
    :suppress:
 
-   import os
    os.remove('example_pa.parquet')
    os.remove('example_fp.parquet')
 
@@ -4722,7 +4774,8 @@ Parquet supports partitioning of data based on the values of one or more columns
 .. ipython:: python
 
     df = pd.DataFrame({'a': [0, 0, 1, 1], 'b': [0, 1, 0, 1]})
-    df.to_parquet(fname='test', engine='pyarrow', partition_cols=['a'], compression=None)
+    df.to_parquet(fname='test', engine='pyarrow',
+                  partition_cols=['a'], compression=None)
 
 The `fname` specifies the parent directory to which data will be saved.
 The `partition_cols` are the column names by which the dataset will be partitioned.
@@ -4835,14 +4888,15 @@ the database using :func:`~pandas.DataFrame.to_sql`.
 
    import datetime
    c = ['id', 'Date', 'Col_1', 'Col_2', 'Col_3']
-   d = [(26, datetime.datetime(2010,10,18), 'X', 27.5, True),
-   (42, datetime.datetime(2010,10,19), 'Y', -12.5, False),
-   (63, datetime.datetime(2010,10,20), 'Z', 5.73, True)]
+   d = [(26, datetime.datetime(2010, 10, 18), 'X', 27.5, True),
+        (42, datetime.datetime(2010, 10, 19), 'Y', -12.5, False),
+        (63, datetime.datetime(2010, 10, 20), 'Z', 5.73, True)]
 
-   data  = pd.DataFrame(d, columns=c)
+   data = pd.DataFrame(d, columns=c)
 
 .. ipython:: python
 
+    data
     data.to_sql('data', engine)
 
 With some databases, writing large DataFrames can result in errors due to
@@ -4999,7 +5053,8 @@ Specifying this will return an iterator through chunks of the query result:
 
 .. ipython:: python
 
-    for chunk in pd.read_sql_query("SELECT * FROM data_chunks", engine, chunksize=5):
+    for chunk in pd.read_sql_query("SELECT * FROM data_chunks",
+                                   engine, chunksize=5):
         print(chunk)
 
 You can also run a plain query without creating a ``DataFrame`` with
@@ -5064,12 +5119,12 @@ If you have an SQLAlchemy description of your database you can express where con
 
    metadata = sa.MetaData()
    data_table = sa.Table('data', metadata,
-       sa.Column('index', sa.Integer),
-       sa.Column('Date', sa.DateTime),
-       sa.Column('Col_1', sa.String),
-       sa.Column('Col_2', sa.Float),
-       sa.Column('Col_3', sa.Boolean),
-   )
+                         sa.Column('index', sa.Integer),
+                         sa.Column('Date', sa.DateTime),
+                         sa.Column('Col_1', sa.String),
+                         sa.Column('Col_2', sa.Float),
+                         sa.Column('Col_3', sa.Boolean),
+                         )
 
    pd.read_sql(sa.select([data_table]).where(data_table.c.Col_3 == True), engine)
 
@@ -5239,7 +5294,6 @@ values will have ``object`` data type.
 .. ipython:: python
    :suppress:
 
-   import os
    os.remove('stata.dta')
 
 .. _io.stata-categorical:
@@ -5452,9 +5506,6 @@ And here's the code:
 
 .. code-block:: python
 
-   import os
-   import pandas as pd
-   import sqlite3
    from numpy.random import randn
 
    sz = 1000000
diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst
index 4fa1cb8be9234..bca7b6a601dd2 100644
--- a/doc/source/timeseries.rst
+++ b/doc/source/timeseries.rst
@@ -4,18 +4,12 @@
 .. ipython:: python
    :suppress:
 
-   from datetime import datetime, timedelta, time
    import numpy as np
    import pandas as pd
-   from pandas import offsets
+
    np.random.seed(123456)
-   randn = np.random.randn
-   randint = np.random.randint
    np.set_printoptions(precision=4, suppress=True)
-   pd.options.display.max_rows=15
-   import dateutil
-   import pytz
-   from dateutil.relativedelta import relativedelta
+   pd.options.display.max_rows = 15
 
 ********************************
 Time Series / Date functionality
@@ -32,7 +26,10 @@ Parsing time series information from various sources and formats
 
 .. ipython:: python
 
-   dti = pd.to_datetime(['1/1/2018', np.datetime64('2018-01-01'), datetime(2018, 1, 1)])
+   import datetime
+
+   dti = pd.to_datetime(['1/1/2018', np.datetime64('2018-01-01'),
+                         datetime.datetime(2018, 1, 1)])
    dti
 
 Generate sequences of fixed-frequency dates and time spans
@@ -69,7 +66,7 @@ Performing date and time arithmetic with absolute or relative time increments
     saturday = friday + pd.Timedelta('1 day')
     saturday.day_name()
     # Add 1 business day (Friday --> Monday)
-    monday = friday + pd.tseries.offsets.BDay()
+    monday = friday + pd.offsets.BDay()
     monday.day_name()
 
 pandas provides a relatively compact and self-contained set of tools for
@@ -110,12 +107,14 @@ However, :class:`Series` and :class:`DataFrame` can directly also support the ti
 
    pd.Series(pd.date_range('2000', freq='D', periods=3))
 
-:class:`Series` and :class:`DataFrame` have extended data type support and functionality for ``datetime`` and ``timedelta``
-data when the time data is used as data itself. The ``Period`` and ``DateOffset`` data will be stored as ``object`` data.
+:class:`Series` and :class:`DataFrame` have extended data type support and functionality for ``datetime``, ``timedelta``
+and ``Period`` data when passed into those constructors. ``DateOffset``
+data however will be stored as ``object`` data.
 
 .. ipython:: python
 
    pd.Series(pd.period_range('1/1/2011', freq='M', periods=3))
+   pd.Series([pd.DateOffset(1), pd.DateOffset(2)])
    pd.Series(pd.date_range('1/1/2011', freq='M', periods=3))
 
 Lastly, pandas represents null date times, time deltas, and time spans as ``NaT`` which
@@ -141,7 +140,7 @@ time.
 
 .. ipython:: python
 
-   pd.Timestamp(datetime(2012, 5, 1))
+   pd.Timestamp(datetime.datetime(2012, 5, 1))
    pd.Timestamp('2012-05-01')
    pd.Timestamp(2012, 5, 1)
 
@@ -163,7 +162,9 @@ and :class:`PeriodIndex` respectively.
 
 .. ipython:: python
 
-   dates = [pd.Timestamp('2012-05-01'), pd.Timestamp('2012-05-02'), pd.Timestamp('2012-05-03')]
+   dates = [pd.Timestamp('2012-05-01'),
+            pd.Timestamp('2012-05-02'),
+            pd.Timestamp('2012-05-03')]
    ts = pd.Series(np.random.randn(3), dates)
 
    type(ts.index)
@@ -327,7 +328,7 @@ which can be specified. These are computed from the starting point specified by
                    1349979305, 1350065705], unit='s')
 
    pd.to_datetime([1349720105100, 1349720105200, 1349720105300,
-                   1349720105400, 1349720105500 ], unit='ms')
+                   1349720105400, 1349720105500], unit='ms')
 
 .. note::
 
@@ -400,7 +401,9 @@ To generate an index with timestamps, you can use either the ``DatetimeIndex`` o
 
 .. ipython:: python
 
-   dates = [datetime(2012, 5, 1), datetime(2012, 5, 2), datetime(2012, 5, 3)]
+   dates = [datetime.datetime(2012, 5, 1),
+            datetime.datetime(2012, 5, 2),
+            datetime.datetime(2012, 5, 3)]
 
    # Note the frequency information
    index = pd.DatetimeIndex(dates)
@@ -418,8 +421,8 @@ to create a ``DatetimeIndex``. The default frequency for ``date_range`` is a
 
 .. ipython:: python
 
-   start = datetime(2011, 1, 1)
-   end = datetime(2012, 1, 1)
+   start = datetime.datetime(2011, 1, 1)
+   end = datetime.datetime(2012, 1, 1)
 
    index = pd.date_range(start, end)
    index
@@ -486,7 +489,7 @@ used if a custom frequency string is passed.
 
    weekmask = 'Mon Wed Fri'
 
-   holidays = [datetime(2011, 1, 5), datetime(2011, 3, 14)]
+   holidays = [datetime.datetime(2011, 1, 5), datetime.datetime(2011, 3, 14)]
 
    pd.bdate_range(start, end, freq='C', weekmask=weekmask, holidays=holidays)
 
@@ -564,7 +567,7 @@ Dates and strings that parse to timestamps can be passed as indexing parameters:
 
    ts['1/31/2011']
 
-   ts[datetime(2011, 12, 25):]
+   ts[datetime.datetime(2011, 12, 25):]
 
    ts['10/31/2011':'12/31/2011']
 
@@ -583,9 +586,8 @@ would include matching times on an included date:
 
 .. ipython:: python
 
-   dft = pd.DataFrame(randn(100000,1),
-                      columns=['A'],
-                      index=pd.date_range('20130101',periods=100000,freq='T'))
+   dft = pd.DataFrame(np.random.randn(100000, 1), columns=['A'],
+                      index=pd.date_range('20130101', periods=100000, freq='T'))
    dft
    dft['2013']
 
@@ -622,10 +624,9 @@ We are stopping on the included end-point as it is part of the index:
 
    dft2 = pd.DataFrame(np.random.randn(20, 1),
                        columns=['A'],
-                       index=pd.MultiIndex.from_product([pd.date_range('20130101',
-                                                                       periods=10,
-                                                                       freq='12H'),
-                                                        ['a', 'b']]))
+                       index=pd.MultiIndex.from_product(
+                           [pd.date_range('20130101', periods=10, freq='12H'),
+                            ['a', 'b']]))
    dft2
    dft2.loc['2013-01-05']
    idx = pd.IndexSlice
@@ -681,7 +682,7 @@ If the timestamp string is treated as a slice, it can be used to index ``DataFra
 .. ipython:: python
 
     dft_minute = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]},
-                               index=series_minute.index)
+                              index=series_minute.index)
     dft_minute['2011-12-31 23']
 
 
@@ -693,18 +694,16 @@ If the timestamp string is treated as a slice, it can be used to index ``DataFra
 
    .. ipython:: python
 
-     dft_minute.loc['2011-12-31 23:59']
+      dft_minute.loc['2011-12-31 23:59']
 
 Note also that ``DatetimeIndex`` resolution cannot be less precise than day.
 
 .. ipython:: python
 
     series_monthly = pd.Series([1, 2, 3],
-                              pd.DatetimeIndex(['2011-12',
-                                                '2012-01',
-                                                '2012-02']))
+                               pd.DatetimeIndex(['2011-12', '2012-01', '2012-02']))
     series_monthly.index.resolution
-    series_monthly['2011-12'] # returns Series
+    series_monthly['2011-12']  # returns Series
 
 
 Exact Indexing
@@ -716,13 +715,14 @@ These ``Timestamp`` and ``datetime`` objects have exact ``hours, minutes,`` and
 
 .. ipython:: python
 
-   dft[datetime(2013, 1, 1):datetime(2013,2,28)]
+   dft[datetime.datetime(2013, 1, 1):datetime.datetime(2013, 2, 28)]
 
 With no defaults.
 
 .. ipython:: python
 
-   dft[datetime(2013, 1, 1, 10, 12, 0):datetime(2013, 2, 28, 10, 12, 0)]
+   dft[datetime.datetime(2013, 1, 1, 10, 12, 0):
+       datetime.datetime(2013, 2, 28, 10, 12, 0)]
 
 
 Truncating & Fancy Indexing
@@ -823,120 +823,119 @@ on :ref:`.dt accessors<basics.dt_accessors>`.
 DateOffset Objects
 ------------------
 
-In the preceding examples, we created ``DatetimeIndex`` objects at various
-frequencies by passing in :ref:`frequency strings <timeseries.offset_aliases>`
-like 'M', 'W', and 'BM' to the ``freq`` keyword. Under the hood, these frequency
-strings are being translated into an instance of :class:`DateOffset`,
-which represents a regular frequency increment. Specific offset logic like
-"month", "business day", or "one hour" is represented in its various subclasses.
-
-.. csv-table::
-    :header: "Class name", "Description"
-    :widths: 15, 65
-
-    DateOffset, "Generic offset class, defaults to 1 calendar day"
-    BDay, "business day (weekday)"
-    CDay, "custom business day"
-    Week, "one week, optionally anchored on a day of the week"
-    WeekOfMonth, "the x-th day of the y-th week of each month"
-    LastWeekOfMonth, "the x-th day of the last week of each month"
-    MonthEnd, "calendar month end"
-    MonthBegin, "calendar month begin"
-    BMonthEnd, "business month end"
-    BMonthBegin, "business month begin"
-    CBMonthEnd, "custom business month end"
-    CBMonthBegin, "custom business month begin"
-    SemiMonthEnd, "15th (or other day_of_month) and calendar month end"
-    SemiMonthBegin, "15th (or other day_of_month) and calendar month begin"
-    QuarterEnd, "calendar quarter end"
-    QuarterBegin, "calendar quarter begin"
-    BQuarterEnd, "business quarter end"
-    BQuarterBegin, "business quarter begin"
-    FY5253Quarter, "retail (aka 52-53 week) quarter"
-    YearEnd, "calendar year end"
-    YearBegin, "calendar year begin"
-    BYearEnd, "business year end"
-    BYearBegin, "business year begin"
-    FY5253, "retail (aka 52-53 week) year"
-    BusinessHour, "business hour"
-    CustomBusinessHour, "custom business hour"
-    Hour, "one hour"
-    Minute, "one minute"
-    Second, "one second"
-    Milli, "one millisecond"
-    Micro, "one microsecond"
-    Nano, "one nanosecond"
-
-The basic ``DateOffset`` takes the same arguments as
-``dateutil.relativedelta``, which works as follows:
-
-.. ipython:: python
-
-   d = datetime(2008, 8, 18, 9, 0)
-   d + relativedelta(months=4, days=5)
-
-We could have done the same thing with ``DateOffset``:
-
-.. ipython:: python
-
-   from pandas.tseries.offsets import *
-   d + DateOffset(months=4, days=5)
+In the preceding examples, frequency strings (e.g. ``'D'``) were used to specify
+a frequency that defined:
 
-The key features of a ``DateOffset`` object are:
+* how the date times in :class:`DatetimeIndex` were spaced when using :meth:`date_range`
+* the frequency of a :class:`Period` or :class:`PeriodIndex`
 
-* It can be added / subtracted to/from a datetime object to obtain a
-  shifted date.
-* It can be multiplied by an integer (positive or negative) so that the
-  increment will be applied multiple times.
-* It has :meth:`~pandas.DateOffset.rollforward` and
-  :meth:`~pandas.DateOffset.rollback` methods for moving a date forward or 
-  backward to the next or previous "offset date".
+These frequency strings map to a :class:`DateOffset` object and its subclasses. A :class:`DateOffset`
+is similar to a :class:`Timedelta` that represents a duration of time but follows specific calendar duration rules.
+For example, a :class:`Timedelta` day will always increment ``datetimes`` by 24 hours, while a :class:`DateOffset` day
+will increment ``datetimes`` to the same time the next day whether a day represents 23, 24 or 25 hours due to daylight
+savings time. However, all :class:`DateOffset` subclasses that are an hour or smaller
+(``Hour``, ``Minute``, ``Second``, ``Milli``, ``Micro``, ``Nano``) behave like
+:class:`Timedelta` and respect absolute time.
 
-Subclasses of ``DateOffset`` define the ``apply`` function which dictates
-custom date increment logic, such as adding business days:
-
-.. code-block:: python
-
-    class BDay(DateOffset):
-        """DateOffset increments between business days"""
-        def apply(self, other):
-            ...
+The basic :class:`DateOffset` acts similar to ``dateutil.relativedelta`` (`relativedelta documentation`_)
+that shifts a date time by the corresponding calendar duration specified. The
+arithmetic operator (``+``) or the ``apply`` method can be used to perform the shift.
 
 .. ipython:: python
 
-   d - 5 * BDay()
-   d + BMonthEnd()
-
-The ``rollforward`` and ``rollback`` methods do exactly what you would expect:
-
-.. ipython:: python
-
-   d
-   offset = BMonthEnd()
-   offset.rollforward(d)
-   offset.rollback(d)
-
-It's definitely worth exploring the ``pandas.tseries.offsets`` module and the
-various docstrings for the classes.
+   # This particular day contains a day light savings time transition
+   ts = pd.Timestamp('2016-10-30 00:00:00', tz='Europe/Helsinki')
+   # Respects absolute time
+   ts + pd.Timedelta(days=1)
+   # Respects calendar time
+   ts + pd.DateOffset(days=1)
+   friday = pd.Timestamp('2018-01-05')
+   friday.day_name()
+   # Add 2 business days (Friday --> Tuesday)
+   two_business_days = 2 * pd.offsets.BDay()
+   two_business_days.apply(friday)
+   friday + two_business_days
+   (friday + two_business_days).day_name()
+
+Most ``DateOffsets`` have associated frequencies strings, or offset aliases, that can be passed
+into ``freq`` keyword arguments. The available date offsets and associated frequency strings can be found below:
 
-These operations (``apply``, ``rollforward`` and ``rollback``) preserve time 
-(hour, minute, etc) information by default. To reset time, use ``normalize``
-before or after applying the operation (depending on whether you want the
-time information included in the operation.
+.. csv-table::
+    :header: "Date Offset", "Frequency String", "Description"
+    :widths: 15, 15, 65
+
+    ``DateOffset``, None, "Generic offset class, defaults to 1 calendar day"
+    ``BDay`` or ``BusinessDay``, ``'B'``,"business day (weekday)"
+    ``CDay`` or ``CustomBusinessDay``, ``'C'``, "custom business day"
+    ``Week``, ``'W'``, "one week, optionally anchored on a day of the week"
+    ``WeekOfMonth``, ``'WOM'``, "the x-th day of the y-th week of each month"
+    ``LastWeekOfMonth``, ``'LWOM'``, "the x-th day of the last week of each month"
+    ``MonthEnd``, ``'M'``, "calendar month end"
+    ``MonthBegin``, ``'MS'``, "calendar month begin"
+    ``BMonthEnd`` or ``BusinessMonthEnd``, ``'BM'``, "business month end"
+    ``BMonthBegin`` or ``BusinessMonthBegin``, ``'BMS'``, "business month begin"
+    ``CBMonthEnd`` or ``CustomBusinessMonthEnd``, ``'CBM'``, "custom business month end"
+    ``CBMonthBegin`` or ``CustomBusinessMonthBegin``, ``'CBMS'``, "custom business month begin"
+    ``SemiMonthEnd``, ``'SM'``, "15th (or other day_of_month) and calendar month end"
+    ``SemiMonthBegin``, ``'SMS'``, "15th (or other day_of_month) and calendar month begin"
+    ``QuarterEnd``, ``'Q'``, "calendar quarter end"
+    ``QuarterBegin``, ``'QS'``, "calendar quarter begin"
+    ``BQuarterEnd``, ``'BQ``, "business quarter end"
+    ``BQuarterBegin``, ``'BQS'``, "business quarter begin"
+    ``FY5253Quarter``, ``'REQ'``, "retail (aka 52-53 week) quarter"
+    ``YearEnd``, ``'A'``, "calendar year end"
+    ``YearBegin``, ``'AS'`` or ``'BYS'``,"calendar year begin"
+    ``BYearEnd``, ``'BA'``, "business year end"
+    ``BYearBegin``, ``'BAS'``, "business year begin"
+    ``FY5253``, ``'RE'``, "retail (aka 52-53 week) year"
+    ``Easter``, None, "Easter holiday"
+    ``BusinessHour``, ``'BH'``, "business hour"
+    ``CustomBusinessHour``, ``'CBH'``, "custom business hour"
+    ``Day``, ``'D'``, "one absolute day"
+    ``Hour``, ``'H'``, "one hour"
+    ``Minute``, ``'T'`` or ``'min'``,"one minute"
+    ``Second``, ``'S'``, "one second"
+    ``Milli``, ``'L'`` or ``'ms'``, "one millisecond"
+    ``Micro``, ``'U'`` or ``'us'``, "one microsecond"
+    ``Nano``, ``'N'``, "one nanosecond"
+
+``DateOffsets`` additionally have :meth:`rollforward` and :meth:`rollback`
+methods for moving a date forward or backward respectively to a valid offset
+date relative to the offset. For example, business offsets will roll dates
+that land on the weekends (Saturday and Sunday) forward to Monday since
+business offsets operate on the weekdays.
+
+.. ipython:: python
+
+   ts = pd.Timestamp('2018-01-06 00:00:00')
+   ts.day_name()
+   # BusinessHour's valid offset dates are Monday through Friday
+   offset = pd.offsets.BusinessHour(start='09:00')
+   # Bring the date to the closest offset date (Monday)
+   offset.rollforward(ts)
+   # Date is brought to the closest offset date first and then the hour is added
+   ts + offset
+
+These operations preserve time (hour, minute, etc) information by default.
+To reset time to midnight, use :meth:`normalize` before or after applying
+the operation (depending on whether you want the time information included
+in the operation).
 
 .. ipython:: python
 
    ts = pd.Timestamp('2014-01-01 09:00')
-   day = Day()
+   day = pd.offsets.Day()
    day.apply(ts)
    day.apply(ts).normalize()
 
    ts = pd.Timestamp('2014-01-01 22:00')
-   hour = Hour()
+   hour = pd.offsets.Hour()
    hour.apply(ts)
    hour.apply(ts).normalize()
    hour.apply(pd.Timestamp("2014-01-01 23:30")).normalize()
 
+.. _relativedelta documentation: https://dateutil.readthedocs.io/en/stable/relativedelta.html
+
 .. _timeseries.dayvscalendarday:
 
 Day vs. CalendarDay
@@ -968,27 +967,28 @@ particular day of the week:
 
 .. ipython:: python
 
+   d = datetime.datetime(2008, 8, 18, 9, 0)
    d
-   d + Week()
-   d + Week(weekday=4)
-   (d + Week(weekday=4)).weekday()
+   d + pd.offsets.Week()
+   d + pd.offsets.Week(weekday=4)
+   (d + pd.offsets.Week(weekday=4)).weekday()
 
-   d - Week()
+   d - pd.offsets.Week()
 
 The ``normalize`` option will be effective for addition and subtraction.
 
 .. ipython:: python
 
-   d + Week(normalize=True)
-   d - Week(normalize=True)
+   d + pd.offsets.Week(normalize=True)
+   d - pd.offsets.Week(normalize=True)
 
 
 Another example is parameterizing ``YearEnd`` with the specific ending month:
 
 .. ipython:: python
 
-   d + YearEnd()
-   d + YearEnd(month=6)
+   d + pd.offsets.YearEnd()
+   d + pd.offsets.YearEnd(month=6)
 
 
 .. _timeseries.offsetseries:
@@ -1004,9 +1004,9 @@ apply the offset to each element.
    rng = pd.date_range('2012-01-01', '2012-01-03')
    s = pd.Series(rng)
    rng
-   rng + DateOffset(months=2)
-   s + DateOffset(months=2)
-   s - DateOffset(months=2)
+   rng + pd.DateOffset(months=2)
+   s + pd.DateOffset(months=2)
+   s - pd.DateOffset(months=2)
 
 If the offset class maps directly to a ``Timedelta`` (``Day``, ``Hour``,
 ``Minute``, ``Second``, ``Micro``, ``Milli``, ``Nano``) it can be
@@ -1015,10 +1015,10 @@ used exactly like a ``Timedelta`` - see the
 
 .. ipython:: python
 
-   s - Day(2)
+   s - pd.offsets.Day(2)
    td = s - pd.Series(pd.date_range('2011-12-29', '2011-12-31'))
    td
-   td + Minute(15)
+   td + pd.offsets.Minute(15)
 
 Note that some offsets (such as ``BQuarterEnd``) do not have a
 vectorized implementation.  They can still be used but may
@@ -1027,7 +1027,7 @@ calculate significantly slower and will show a ``PerformanceWarning``
 .. ipython:: python
    :okwarning:
 
-   rng + BQuarterEnd()
+   rng + pd.offsets.BQuarterEnd()
 
 
 .. _timeseries.custombusinessdays:
@@ -1043,15 +1043,17 @@ As an interesting example, let's look at Egypt where a Friday-Saturday weekend i
 
 .. ipython:: python
 
-    from pandas.tseries.offsets import CustomBusinessDay
     weekmask_egypt = 'Sun Mon Tue Wed Thu'
 
     # They also observe International Workers' Day so let's
     # add that for a couple of years
 
-    holidays = ['2012-05-01', datetime(2013, 5, 1), np.datetime64('2014-05-01')]
-    bday_egypt = CustomBusinessDay(holidays=holidays, weekmask=weekmask_egypt)
-    dt = datetime(2013, 4, 30)
+    holidays = ['2012-05-01',
+                datetime.datetime(2013, 5, 1),
+                np.datetime64('2014-05-01')]
+    bday_egypt = pd.offsets.CustomBusinessDay(holidays=holidays,
+                                              weekmask=weekmask_egypt)
+    dt = datetime.datetime(2013, 4, 30)
     dt + 2 * bday_egypt
 
 Let's map to the weekday names:
@@ -1060,7 +1062,8 @@ Let's map to the weekday names:
 
     dts = pd.date_range(dt, periods=5, freq=bday_egypt)
 
-    pd.Series(dts.weekday, dts).map(pd.Series('Mon Tue Wed Thu Fri Sat Sun'.split()))
+    pd.Series(dts.weekday, dts).map(
+        pd.Series('Mon Tue Wed Thu Fri Sat Sun'.split()))
 
 Holiday calendars can be used to provide the list of holidays.  See the
 :ref:`holiday calendar<timeseries.holiday>` section for more information.
@@ -1069,10 +1072,10 @@ Holiday calendars can be used to provide the list of holidays.  See the
 
     from pandas.tseries.holiday import USFederalHolidayCalendar
 
-    bday_us = CustomBusinessDay(calendar=USFederalHolidayCalendar())
+    bday_us = pd.offsets.CustomBusinessDay(calendar=USFederalHolidayCalendar())
 
     # Friday before MLK Day
-    dt = datetime(2014, 1, 17)
+    dt = datetime.datetime(2014, 1, 17)
 
     # Tuesday after MLK Day (Monday is skipped because it's a holiday)
     dt + bday_us
@@ -1082,15 +1085,15 @@ in the usual way.
 
 .. ipython:: python
 
-    from pandas.tseries.offsets import CustomBusinessMonthBegin
-    bmth_us = CustomBusinessMonthBegin(calendar=USFederalHolidayCalendar())
+    bmth_us = pd.offsets.CustomBusinessMonthBegin(
+        calendar=USFederalHolidayCalendar())
 
     # Skip new years
-    dt = datetime(2013, 12, 17)
+    dt = datetime.datetime(2013, 12, 17)
     dt + bmth_us
 
     # Define date index with custom offset
-    pd.DatetimeIndex(start='20100101',end='20120101',freq=bmth_us)
+    pd.DatetimeIndex(start='20100101', end='20120101', freq=bmth_us)
 
 .. note::
 
@@ -1111,13 +1114,13 @@ allowing to use specific start and end times.
 
 By default, ``BusinessHour`` uses 9:00 - 17:00 as business hours.
 Adding ``BusinessHour`` will increment ``Timestamp`` by hourly frequency.
-If target ``Timestamp`` is out of business hours, move to the next business hour 
-then increment it. If the result exceeds the business hours end, the remaining 
+If target ``Timestamp`` is out of business hours, move to the next business hour
+then increment it. If the result exceeds the business hours end, the remaining
 hours are added to the next business day.
 
 .. ipython:: python
 
-    bh = BusinessHour()
+    bh = pd.offsets.BusinessHour()
     bh
 
     # 2014-08-01 is Friday
@@ -1134,19 +1137,19 @@ hours are added to the next business day.
     pd.Timestamp('2014-08-01 16:30') + bh
 
     # Adding 2 business hours
-    pd.Timestamp('2014-08-01 10:00') + BusinessHour(2)
+    pd.Timestamp('2014-08-01 10:00') + pd.offsets.BusinessHour(2)
 
     # Subtracting 3 business hours
-    pd.Timestamp('2014-08-01 10:00') + BusinessHour(-3)
+    pd.Timestamp('2014-08-01 10:00') + pd.offsets.BusinessHour(-3)
 
-You can also specify ``start`` and ``end`` time by keywords. The argument must 
-be a ``str`` with an ``hour:minute`` representation or a ``datetime.time`` 
-instance. Specifying seconds, microseconds and nanoseconds as business hour 
+You can also specify ``start`` and ``end`` time by keywords. The argument must
+be a ``str`` with an ``hour:minute`` representation or a ``datetime.time``
+instance. Specifying seconds, microseconds and nanoseconds as business hour
 results in ``ValueError``.
 
 .. ipython:: python
 
-    bh = BusinessHour(start='11:00', end=time(20, 0))
+    bh = pd.offsets.BusinessHour(start='11:00', end=datetime.time(20, 0))
     bh
 
     pd.Timestamp('2014-08-01 13:00') + bh
@@ -1159,7 +1162,7 @@ Valid business hours are distinguished by whether it started from valid ``Busine
 
 .. ipython:: python
 
-    bh = BusinessHour(start='17:00', end='09:00')
+    bh = pd.offsets.BusinessHour(start='17:00', end='09:00')
     bh
 
     pd.Timestamp('2014-08-01 17:00') + bh
@@ -1184,22 +1187,22 @@ under the default business hours (9:00 - 17:00), there is no gap (0 minutes) bet
 .. ipython:: python
 
     # This adjusts a Timestamp to business hour edge
-    BusinessHour().rollback(pd.Timestamp('2014-08-02 15:00'))
-    BusinessHour().rollforward(pd.Timestamp('2014-08-02 15:00'))
+    pd.offsets.BusinessHour().rollback(pd.Timestamp('2014-08-02 15:00'))
+    pd.offsets.BusinessHour().rollforward(pd.Timestamp('2014-08-02 15:00'))
 
     # It is the same as BusinessHour().apply(pd.Timestamp('2014-08-01 17:00')).
     # And it is the same as BusinessHour().apply(pd.Timestamp('2014-08-04 09:00'))
-    BusinessHour().apply(pd.Timestamp('2014-08-02 15:00'))
+    pd.offsets.BusinessHour().apply(pd.Timestamp('2014-08-02 15:00'))
 
     # BusinessDay results (for reference)
-    BusinessHour().rollforward(pd.Timestamp('2014-08-02'))
+    pd.offsets.BusinessHour().rollforward(pd.Timestamp('2014-08-02'))
 
     # It is the same as BusinessDay().apply(pd.Timestamp('2014-08-01'))
     # The result is the same as rollworward because BusinessDay never overlap.
-    BusinessHour().apply(pd.Timestamp('2014-08-02'))
+    pd.offsets.BusinessHour().apply(pd.Timestamp('2014-08-02'))
 
-``BusinessHour`` regards Saturday and Sunday as holidays. To use arbitrary 
-holidays, you can use ``CustomBusinessHour`` offset, as explained in the 
+``BusinessHour`` regards Saturday and Sunday as holidays. To use arbitrary
+holidays, you can use ``CustomBusinessHour`` offset, as explained in the
 following subsection.
 
 .. _timeseries.custombusinesshour:
@@ -1216,9 +1219,9 @@ as ``BusinessHour`` except that it skips specified custom holidays.
 .. ipython:: python
 
     from pandas.tseries.holiday import USFederalHolidayCalendar
-    bhour_us = CustomBusinessHour(calendar=USFederalHolidayCalendar())
+    bhour_us = pd.offsets.CustomBusinessHour(calendar=USFederalHolidayCalendar())
     # Friday before MLK Day
-    dt = datetime(2014, 1, 17, 15)
+    dt = datetime.datetime(2014, 1, 17, 15)
 
     dt + bhour_us
 
@@ -1229,7 +1232,8 @@ You can use keyword arguments supported by either ``BusinessHour`` and ``CustomB
 
 .. ipython:: python
 
-    bhour_mon = CustomBusinessHour(start='10:00', weekmask='Tue Wed Thu Fri')
+    bhour_mon = pd.offsets.CustomBusinessHour(start='10:00',
+                                              weekmask='Tue Wed Thu Fri')
 
     # Monday is skipped because it's a holiday, business hour starts from 10:00
     dt + bhour_mon * 2
@@ -1285,7 +1289,7 @@ most functions:
 
    pd.date_range(start, periods=5, freq='B')
 
-   pd.date_range(start, periods=5, freq=BDay())
+   pd.date_range(start, periods=5, freq=pd.offsets.BDay())
 
 You can combine together day and intraday offsets:
 
@@ -1352,39 +1356,39 @@ anchor point, and moved ``|n|-1`` additional steps forwards or backwards.
 
 .. ipython:: python
 
-   pd.Timestamp('2014-01-02') + MonthBegin(n=1)
-   pd.Timestamp('2014-01-02') + MonthEnd(n=1)
+   pd.Timestamp('2014-01-02') + pd.offsets.MonthBegin(n=1)
+   pd.Timestamp('2014-01-02') + pd.offsets.MonthEnd(n=1)
 
-   pd.Timestamp('2014-01-02') - MonthBegin(n=1)
-   pd.Timestamp('2014-01-02') - MonthEnd(n=1)
+   pd.Timestamp('2014-01-02') - pd.offsets.MonthBegin(n=1)
+   pd.Timestamp('2014-01-02') - pd.offsets.MonthEnd(n=1)
 
-   pd.Timestamp('2014-01-02') + MonthBegin(n=4)
-   pd.Timestamp('2014-01-02') - MonthBegin(n=4)
+   pd.Timestamp('2014-01-02') + pd.offsets.MonthBegin(n=4)
+   pd.Timestamp('2014-01-02') - pd.offsets.MonthBegin(n=4)
 
 If the given date *is* on an anchor point, it is moved ``|n|`` points forwards
 or backwards.
 
 .. ipython:: python
 
-   pd.Timestamp('2014-01-01') + MonthBegin(n=1)
-   pd.Timestamp('2014-01-31') + MonthEnd(n=1)
+   pd.Timestamp('2014-01-01') + pd.offsets.MonthBegin(n=1)
+   pd.Timestamp('2014-01-31') + pd.offsets.MonthEnd(n=1)
 
-   pd.Timestamp('2014-01-01') - MonthBegin(n=1)
-   pd.Timestamp('2014-01-31') - MonthEnd(n=1)
+   pd.Timestamp('2014-01-01') - pd.offsets.MonthBegin(n=1)
+   pd.Timestamp('2014-01-31') - pd.offsets.MonthEnd(n=1)
 
-   pd.Timestamp('2014-01-01') + MonthBegin(n=4)
-   pd.Timestamp('2014-01-31') - MonthBegin(n=4)
+   pd.Timestamp('2014-01-01') + pd.offsets.MonthBegin(n=4)
+   pd.Timestamp('2014-01-31') - pd.offsets.MonthBegin(n=4)
 
 For the case when ``n=0``, the date is not moved if on an anchor point, otherwise
 it is rolled forward to the next anchor point.
 
 .. ipython:: python
 
-   pd.Timestamp('2014-01-02') + MonthBegin(n=0)
-   pd.Timestamp('2014-01-02') + MonthEnd(n=0)
+   pd.Timestamp('2014-01-02') + pd.offsets.MonthBegin(n=0)
+   pd.Timestamp('2014-01-02') + pd.offsets.MonthEnd(n=0)
 
-   pd.Timestamp('2014-01-01') + MonthBegin(n=0)
-   pd.Timestamp('2014-01-31') + MonthEnd(n=0)
+   pd.Timestamp('2014-01-01') + pd.offsets.MonthBegin(n=0)
+   pd.Timestamp('2014-01-31') + pd.offsets.MonthEnd(n=0)
 
 .. _timeseries.holiday:
 
@@ -1427,10 +1431,13 @@ An example of how holidays and holiday calendars are defined:
             USMemorialDay,
             Holiday('July 4th', month=7, day=4, observance=nearest_workday),
             Holiday('Columbus Day', month=10, day=1,
-                offset=DateOffset(weekday=MO(2))), #same as 2*Week(weekday=2)
-            ]
+                    offset=pd.DateOffset(weekday=MO(2)))]
+
     cal = ExampleCalendar()
-    cal.holidays(datetime(2012, 1, 1), datetime(2012, 12, 31))
+    cal.holidays(datetime.datetime(2012, 1, 1), datetime.datetime(2012, 12, 31))
+
+:hint:
+   **weekday=MO(2)** is same as **2 * Week(weekday=2)**
 
 Using this calendar, creating an index or doing offset arithmetic skips weekends
 and holidays (i.e., Memorial Day/July 4th).  For example, the below defines
@@ -1440,14 +1447,13 @@ or ``Timestamp`` objects.
 
 .. ipython:: python
 
-    from pandas.tseries.offsets import CDay
     pd.DatetimeIndex(start='7/1/2012', end='7/10/2012',
-        freq=CDay(calendar=cal)).to_pydatetime()
-    offset = CustomBusinessDay(calendar=cal)
-    datetime(2012, 5, 25) + offset
-    datetime(2012, 7, 3) + offset
-    datetime(2012, 7, 3) + 2 * offset
-    datetime(2012, 7, 6) + offset
+                     freq=pd.offsets.CDay(calendar=cal)).to_pydatetime()
+    offset = pd.offsets.CustomBusinessDay(calendar=cal)
+    datetime.datetime(2012, 5, 25) + offset
+    datetime.datetime(2012, 7, 3) + offset
+    datetime.datetime(2012, 7, 3) + 2 * offset
+    datetime.datetime(2012, 7, 6) + offset
 
 Ranges are defined by the ``start_date`` and ``end_date`` class attributes
 of ``AbstractHolidayCalendar``.  The defaults are shown below.
@@ -1462,8 +1468,8 @@ datetime/Timestamp/string.
 
 .. ipython:: python
 
-    AbstractHolidayCalendar.start_date = datetime(2012, 1, 1)
-    AbstractHolidayCalendar.end_date = datetime(2012, 12, 31)
+    AbstractHolidayCalendar.start_date = datetime.datetime(2012, 1, 1)
+    AbstractHolidayCalendar.end_date = datetime.datetime(2012, 12, 31)
     cal.holidays()
 
 Every calendar class is accessible by name using the ``get_calendar`` function
@@ -1490,7 +1496,7 @@ Shifting / Lagging
 ~~~~~~~~~~~~~~~~~~
 
 One may want to *shift* or *lag* the values in a time series back and forward in
-time. The method for this is :meth:`~Series.shift`, which is available on all of 
+time. The method for this is :meth:`~Series.shift`, which is available on all of
 the pandas objects.
 
 .. ipython:: python
@@ -1500,16 +1506,16 @@ the pandas objects.
    ts.shift(1)
 
 The ``shift`` method accepts an ``freq`` argument which can accept a
-``DateOffset`` class or other ``timedelta``-like object or also an 
+``DateOffset`` class or other ``timedelta``-like object or also an
 :ref:`offset alias <timeseries.offset_aliases>`:
 
 .. ipython:: python
 
-   ts.shift(5, freq=offsets.BDay())
+   ts.shift(5, freq=pd.offsets.BDay())
    ts.shift(5, freq='BM')
 
 Rather than changing the alignment of the data and the index, ``DataFrame`` and
-``Series`` objects also have a :meth:`~Series.tshift` convenience method that 
+``Series`` objects also have a :meth:`~Series.tshift` convenience method that
 changes all the dates in the index by a specified number of offsets:
 
 .. ipython:: python
@@ -1522,35 +1528,35 @@ is not being realigned.
 Frequency Conversion
 ~~~~~~~~~~~~~~~~~~~~
 
-The primary function for changing frequencies is the :meth:`~Series.asfreq` 
-method. For a ``DatetimeIndex``, this is basically just a thin, but convenient 
-wrapper around :meth:`~Series.reindex`  which generates a ``date_range`` and 
+The primary function for changing frequencies is the :meth:`~Series.asfreq`
+method. For a ``DatetimeIndex``, this is basically just a thin, but convenient
+wrapper around :meth:`~Series.reindex`  which generates a ``date_range`` and
 calls ``reindex``.
 
 .. ipython:: python
 
-   dr = pd.date_range('1/1/2010', periods=3, freq=3 * offsets.BDay())
-   ts = pd.Series(randn(3), index=dr)
+   dr = pd.date_range('1/1/2010', periods=3, freq=3 * pd.offsets.BDay())
+   ts = pd.Series(np.random.randn(3), index=dr)
    ts
-   ts.asfreq(BDay())
+   ts.asfreq(pd.offsets.BDay())
 
 ``asfreq`` provides a further convenience so you can specify an interpolation
 method for any gaps that may appear after the frequency conversion.
 
 .. ipython:: python
 
-   ts.asfreq(BDay(), method='pad')
+   ts.asfreq(pd.offsets.BDay(), method='pad')
 
 Filling Forward / Backward
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Related to ``asfreq`` and ``reindex`` is :meth:`~Series.fillna`, which is 
+Related to ``asfreq`` and ``reindex`` is :meth:`~Series.fillna`, which is
 documented in the :ref:`missing data section <missing_data.fillna>`.
 
 Converting to Python Datetimes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-``DatetimeIndex`` can be converted to an array of Python native 
+``DatetimeIndex`` can be converted to an array of Python native
 :py:class:`datetime.datetime` objects using the ``to_pydatetime`` method.
 
 .. _timeseries.resampling:
@@ -1563,13 +1569,13 @@ Resampling
    The interface to ``.resample`` has changed in 0.18.0 to be more groupby-like and hence more flexible.
    See the :ref:`whatsnew docs <whatsnew_0180.breaking.resample>` for a comparison with prior versions.
 
-Pandas has a simple, powerful, and efficient functionality for performing 
-resampling operations during frequency conversion (e.g., converting secondly 
-data into 5-minutely data). This is extremely common in, but not limited to, 
+Pandas has a simple, powerful, and efficient functionality for performing
+resampling operations during frequency conversion (e.g., converting secondly
+data into 5-minutely data). This is extremely common in, but not limited to,
 financial applications.
 
-:meth:`~Series.resample` is a time-based groupby, followed by a reduction method 
-on each of its groups. See some :ref:`cookbook examples <cookbook.resample>` for 
+:meth:`~Series.resample` is a time-based groupby, followed by a reduction method
+on each of its groups. See some :ref:`cookbook examples <cookbook.resample>` for
 some advanced strategies.
 
 Starting in version 0.18.1, the ``resample()`` function can be used directly from
@@ -1577,7 +1583,7 @@ Starting in version 0.18.1, the ``resample()`` function can be used directly fro
 
 .. note::
 
-   ``.resample()`` is similar to using a :meth:`~Series.rolling` operation with 
+   ``.resample()`` is similar to using a :meth:`~Series.rolling` operation with
    a time-based offset, see a discussion :ref:`here <stats.moments.ts-versus-resampling>`.
 
 Basics
@@ -1624,7 +1630,7 @@ labels.
 
 .. ipython:: python
 
-   ts.resample('5Min').mean() # by default label='left'
+   ts.resample('5Min').mean()  # by default label='left'
 
    ts.resample('5Min', label='left').mean()
 
@@ -1632,8 +1638,8 @@ labels.
 
 .. note::
 
-    The default values for ``label`` and ``closed`` is 'left' for all 
-    frequency offsets except for 'M', 'A', 'Q', 'BM', 'BA', 'BQ', and 'W' 
+    The default values for ``label`` and ``closed`` is 'left' for all
+    frequency offsets except for 'M', 'A', 'Q', 'BM', 'BA', 'BQ', and 'W'
     which all have a default of 'right'.
 
     .. ipython:: python
@@ -1680,9 +1686,9 @@ Sparse Resampling
 ~~~~~~~~~~~~~~~~~
 
 Sparse timeseries are the ones where you have a lot fewer points relative
-to the amount of time you are looking to resample. Naively upsampling a sparse 
-series can potentially generate lots of intermediate values. When you don't want 
-to use a method to fill these values, e.g. ``fill_method`` is ``None``, then 
+to the amount of time you are looking to resample. Naively upsampling a sparse
+series can potentially generate lots of intermediate values. When you don't want
+to use a method to fill these values, e.g. ``fill_method`` is ``None``, then
 intermediate values will be filled with ``NaN``.
 
 Since ``resample`` is a time-based groupby, the following is a method to efficiently
@@ -1737,7 +1743,7 @@ We can select a specific column or columns using standard getitem.
 
    r['A'].mean()
 
-   r[['A','B']].mean()
+   r[['A', 'B']].mean()
 
 You can pass a list or dict of functions to do aggregation with, outputting a ``DataFrame``:
 
@@ -1758,21 +1764,21 @@ columns of a ``DataFrame``:
 .. ipython:: python
    :okexcept:
 
-   r.agg({'A' : np.sum,
-          'B' : lambda x: np.std(x, ddof=1)})
+   r.agg({'A': np.sum,
+          'B': lambda x: np.std(x, ddof=1)})
 
 The function names can also be strings. In order for a string to be valid it
 must be implemented on the resampled object:
 
 .. ipython:: python
 
-   r.agg({'A' : 'sum', 'B' : 'std'})
+   r.agg({'A': 'sum', 'B': 'std'})
 
 Furthermore, you can also specify multiple aggregation functions for each column separately.
 
 .. ipython:: python
 
-   r.agg({'A' : ['sum','std'], 'B' : ['mean','std'] })
+   r.agg({'A': ['sum', 'std'], 'B': ['mean', 'std']})
 
 
 If a ``DataFrame`` does not have a datetimelike index, but instead you want
@@ -1784,9 +1790,9 @@ to resample based on datetimelike column in the frame, it can passed to the
    df = pd.DataFrame({'date': pd.date_range('2015-01-01', freq='W', periods=5),
                       'a': np.arange(5)},
                      index=pd.MultiIndex.from_arrays([
-                              [1,2,3,4,5],
-                              pd.date_range('2015-01-01', freq='W', periods=5)],
-                          names=['v','d']))
+                         [1, 2, 3, 4, 5],
+                         pd.date_range('2015-01-01', freq='W', periods=5)],
+                         names=['v', 'd']))
    df
    df.resample('M', on='date').sum()
 
@@ -1845,13 +1851,13 @@ If ``Period`` freq is daily or higher (``D``, ``H``, ``T``, ``S``, ``L``, ``U``,
 .. ipython:: python
 
    p = pd.Period('2014-07-01 09:00', freq='H')
-   p + Hour(2)
-   p + timedelta(minutes=120)
+   p + pd.offsets.Hour(2)
+   p + datetime.timedelta(minutes=120)
    p + np.timedelta64(7200, 's')
 
 .. code-block:: ipython
 
-   In [1]: p + Minute(5)
+   In [1]: p + pd.offsets.Minute(5)
    Traceback
       ...
    ValueError: Input has different freq from Period(freq=H)
@@ -1861,11 +1867,11 @@ If ``Period`` has other frequencies, only the same ``offsets`` can be added. Oth
 .. ipython:: python
 
    p = pd.Period('2014-07', freq='M')
-   p + MonthEnd(3)
+   p + pd.offsets.MonthEnd(3)
 
 .. code-block:: ipython
 
-   In [1]: p + MonthBegin(3)
+   In [1]: p + pd.offsets.MonthBegin(3)
    Traceback
       ...
    ValueError: Input has different freq from Period(freq=M)
@@ -1923,11 +1929,11 @@ objects:
 
    idx = pd.period_range('2014-07-01 09:00', periods=5, freq='H')
    idx
-   idx + Hour(2)
+   idx + pd.offsets.Hour(2)
 
    idx = pd.period_range('2014-07', periods=5, freq='M')
    idx
-   idx + MonthEnd(3)
+   idx + pd.offsets.MonthEnd(3)
 
 ``PeriodIndex`` has its own dtype named ``period``, refer to :ref:`Period Dtypes <timeseries.period_dtype>`.
 
@@ -1977,7 +1983,7 @@ You can pass in dates and strings to ``Series`` and ``DataFrame`` with ``PeriodI
 
    ps['2011-01']
 
-   ps[datetime(2011, 12, 25):]
+   ps[datetime.datetime(2011, 12, 25):]
 
    ps['10/31/2011':'12/31/2011']
 
@@ -1987,9 +1993,11 @@ Passing a string representing a lower frequency than ``PeriodIndex`` returns par
 
    ps['2011']
 
-   dfp = pd.DataFrame(np.random.randn(600,1),
+   dfp = pd.DataFrame(np.random.randn(600, 1),
                       columns=['A'],
-                      index=pd.period_range('2013-01-01 9:00', periods=600, freq='T'))
+                      index=pd.period_range('2013-01-01 9:00',
+                                            periods=600,
+                                            freq='T'))
    dfp
    dfp['2013-01-01 10H']
 
@@ -2178,6 +2186,8 @@ time zones by starting with ``dateutil/``.
 
 .. ipython:: python
 
+   import dateutil
+
    # pytz
    rng_pytz = pd.date_range('3/6/2012 00:00', periods=10, freq='D',
                             tz='Europe/London')
@@ -2199,6 +2209,8 @@ which gives you more control over which time zone is used:
 
 .. ipython:: python
 
+   import pytz
+
    # pytz
    tz_pytz = pytz.timezone('Europe/London')
    rng_pytz = pd.date_range('3/6/2012 00:00', periods=10, freq='D',
@@ -2297,7 +2309,8 @@ To remove timezone from tz-aware ``DatetimeIndex``, use ``tz_localize(None)`` or
 
 .. ipython:: python
 
-   didx = pd.DatetimeIndex(start='2014-08-01 09:00', freq='H', periods=10, tz='US/Eastern')
+   didx = pd.DatetimeIndex(start='2014-08-01 09:00', freq='H',
+                           periods=10, tz='US/Eastern')
    didx
    didx.tz_localize(None)
    didx.tz_convert(None)
@@ -2350,7 +2363,8 @@ constructor as well as ``tz_localize``.
    rng_hourly.tz_localize('US/Eastern', ambiguous=rng_hourly_dst).tolist()
    rng_hourly.tz_localize('US/Eastern', ambiguous='NaT').tolist()
 
-   didx = pd.DatetimeIndex(start='2014-08-01 09:00', freq='H', periods=10, tz='US/Eastern')
+   didx = pd.DatetimeIndex(start='2014-08-01 09:00', freq='H',
+                           periods=10, tz='US/Eastern')
    didx
    didx.tz_localize(None)
    didx.tz_convert(None)
@@ -2373,7 +2387,7 @@ can be controlled by the ``nonexistent`` argument. The following options are ava
 
 .. ipython:: python
 
-    dti = pd.date_range(start='2015-03-29 01:30:00', periods=3, freq='H')
+    dti = pd.date_range(start='2015-03-29 02:30:00', periods=3, freq='H')
     # 2:30 is a nonexistent time
 
 Localization of nonexistent times will raise an error by default.
@@ -2401,14 +2415,14 @@ TZ Aware Dtypes
 
 .. ipython:: python
 
-   s_naive = pd.Series(pd.date_range('20130101',periods=3))
+   s_naive = pd.Series(pd.date_range('20130101', periods=3))
    s_naive
 
 ``Series/DatetimeIndex`` with a timezone **aware** value are represented with a dtype of ``datetime64[ns, tz]``.
 
 .. ipython:: python
 
-   s_aware = pd.Series(pd.date_range('20130101',periods=3,tz='US/Eastern'))
+   s_aware = pd.Series(pd.date_range('20130101', periods=3, tz='US/Eastern'))
    s_aware
 
 Both of these ``Series`` can be manipulated via the ``.dt`` accessor, see :ref:`here <basics.dt_accessors>`.
diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index 2c9aecdde2f45..f9e3dc0f5c348 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -378,6 +378,7 @@ Backwards incompatible API changes
   "worked" purely due to limitations of dtype checking -- e.g. ``bytes``, which is now disabled except for `decode` and `len` (:issue:`23011`, :issue:`23163`)
 - Passing scalar values to :class:`DatetimeIndex` or :class:`TimedeltaIndex` will now raise ``TypeError`` instead of ``ValueError`` (:issue:`23539`)
 - ``max_rows`` and ``max_cols`` parameters removed from :class:`HTMLFormatter` since truncation is handled by :class:`DataFrameFormatter` (:issue:`23818`)
+- :meth:`read_csv` will now raise a ``ValueError`` if a column with missing values is declared as having dtype ``bool`` (:issue:`20591`)
 
 .. _whatsnew_0240.api_breaking.deps:
 
@@ -1215,6 +1216,7 @@ Performance Improvements
   The speed increase is both when indexing by label (using .loc) and position(.iloc) (:issue:`20395`)
   Slicing a monotonically increasing :class:`CategoricalIndex` itself (i.e. ``ci[1000:2000]``)
   shows similar speed improvements as above (:issue:`21659`)
+- Improved performance of :meth:`CategoricalIndex.equals` when comparing to another :class:`CategoricalIndex` (:issue:`24023`)
 - Improved performance of :func:`Series.describe` in case of numeric dtpyes (:issue:`21274`)
 - Improved performance of :func:`pandas.core.groupby.GroupBy.rank` when dealing with tied rankings (:issue:`21237`)
 - Improved performance of :func:`DataFrame.set_index` with columns consisting of :class:`Period` objects (:issue:`21582`, :issue:`21606`)
@@ -1230,7 +1232,7 @@ Performance Improvements
 - Improved performance of :func:`pd.concat` for `Series` objects (:issue:`23404`)
 - Improved performance of :meth:`DatetimeIndex.normalize` and :meth:`Timestamp.normalize` for timezone naive or UTC datetimes (:issue:`23634`)
 - Improved performance of :meth:`DatetimeIndex.tz_localize` and various ``DatetimeIndex`` attributes with dateutil UTC timezone (:issue:`23772`)
-
+- Improved performance of :class:`Categorical` constructor for `Series` objects (:issue:`23814`)
 
 .. _whatsnew_0240.docs:
 
@@ -1256,6 +1258,7 @@ Categorical
 - Bug in :meth:`Categorical.take` with a user-provided ``fill_value`` not encoding the ``fill_value``, which could result in a ``ValueError``, incorrect results, or a segmentation fault (:issue:`23296`).
 - In meth:`Series.unstack`, specifying a ``fill_value`` not present in the categories now raises a ``TypeError`` rather than ignoring the ``fill_value`` (:issue:`23284`)
 - Bug when resampling :meth:`Dataframe.resample()` and aggregating on categorical data, the categorical dtype was getting lost. (:issue:`23227`)
+- Bug in many methods of the ``.str``-accessor, which always failed on calling the ``CategoricalIndex.str`` constructor (:issue:`23555`, :issue:`23556`)
 
 Datetimelike
 ^^^^^^^^^^^^
@@ -1418,6 +1421,7 @@ MultiIndex
 I/O
 ^^^
 
+
 .. _whatsnew_0240.bug_fixes.nan_with_str_dtype:
 
 Proper handling of `np.NaN` in a string data-typed column with the Python engine
@@ -1481,6 +1485,7 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form
 - Bug in :meth:`read_excel()` in which column names were not being properly converted to string sometimes in Python 2.x (:issue:`23874`)
 - Bug in :meth:`read_excel()` in which ``index_col=None`` was not being respected and parsing index columns anyway (:issue:`18792`, :issue:`20480`)
 - Bug in :meth:`read_excel()` in which ``usecols`` was not being validated for proper column names when passed in as a string (:issue:`20480`)
+- Bug in :meth:`DataFrame.to_dict` when the resulting dict contains non-Python scalars in the case of numeric data (:issue:`23753`)
 - :func:`DataFrame.to_string()`, :func:`DataFrame.to_html()`, :func:`DataFrame.to_latex()` will correctly format output when a string is passed as the ``float_format`` argument (:issue:`21625`, :issue:`22270`)
 
 Plotting
@@ -1532,6 +1537,7 @@ Reshaping
 - Bug in :func:`pandas.melt` when passing column names that are not present in ``DataFrame`` (:issue:`23575`)
 - Bug in :meth:`DataFrame.append` with a :class:`Series` with a dateutil timezone would raise a ``TypeError`` (:issue:`23682`)
 - Bug in ``Series`` construction when passing no data and ``dtype=str`` (:issue:`22477`)
+- Bug in :func:`cut` with ``bins`` as an overlapping ``IntervalIndex`` where multiple bins were returned per item instead of raising a ``ValueError`` (:issue:`23980`)
 
 .. _whatsnew_0240.bug_fixes.sparse:
 
diff --git a/environment.yml b/environment.yml
index fc35f1290f1b1..4daaa90247fa8 100644
--- a/environment.yml
+++ b/environment.yml
@@ -4,22 +4,21 @@ channels:
   - conda-forge
 dependencies:
   # required
-  - NumPy
+  - numpy>=1.15
   - python=3
   - python-dateutil>=2.5.0
   - pytz
 
   # development
-  - Cython>=0.28.2
+  - cython>=0.28.2
   - flake8
   - flake8-comprehensions
-  - flake8-rst=0.4.2
+  - flake8-rst>=0.6.0
   - gitpython
-  - hypothesis>=3.58.0
+  - hypothesis>=3.82
   - isort
   - moto
-  - pytest>=3.6
-  - setuptools>=24.2.0
+  - pytest>=4.0
   - sphinx
   - sphinxcontrib-spelling
 
@@ -28,7 +27,6 @@ dependencies:
   - blosc
   - bottleneck>=1.2.0
   - fastparquet>=0.1.2
-  - gcsfs
   - html5lib
   - ipython>=5.6.0
   - ipykernel
@@ -36,15 +34,13 @@ dependencies:
   - lxml
   - matplotlib>=2.0.0
   - nbsphinx
-  - numexpr>=2.6.1
+  - numexpr>=2.6.8
   - openpyxl
   - pyarrow>=0.7.0
-  - pymysql
   - pytables>=3.4.2
   - pytest-cov
   - pytest-xdist
-  - s3fs
-  - scipy>=0.18.1
+  - scipy>=1.1
   - seaborn
   - sqlalchemy
   - statsmodels
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index 1dc71264c94dd..a459057555cf3 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -1245,6 +1245,10 @@ cdef class TextReader:
             result, na_count = _try_bool_flex(self.parser, i, start, end,
                                               na_filter, na_hashset,
                                               self.true_set, self.false_set)
+            if user_dtype and na_count is not None:
+                if na_count > 0:
+                    raise ValueError("Bool column has NA values in "
+                                     "column {column}".format(column=i))
             return result, na_count
 
         elif dtype.kind == 'S':
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index 609608a0948c5..a3e6c7e344940 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -520,9 +520,10 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise',
     # specify error conditions
     assert is_raise or is_ignore or is_coerce
 
+    result = np.empty(n, dtype='M8[ns]')
+    iresult = result.view('i8')
+
     try:
-        result = np.empty(n, dtype='M8[ns]')
-        iresult = result.view('i8')
         for i in range(n):
             val = values[i]
 
@@ -571,16 +572,13 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise',
 
             elif is_datetime64_object(val):
                 seen_datetime = 1
-                if get_datetime64_value(val) == NPY_NAT:
-                    iresult[i] = NPY_NAT
-                else:
-                    try:
-                        iresult[i] = get_datetime64_nanos(val)
-                    except OutOfBoundsDatetime:
-                        if is_coerce:
-                            iresult[i] = NPY_NAT
-                            continue
-                        raise
+                try:
+                    iresult[i] = get_datetime64_nanos(val)
+                except OutOfBoundsDatetime:
+                    if is_coerce:
+                        iresult[i] = NPY_NAT
+                        continue
+                    raise
 
             elif is_integer_object(val) or is_float_object(val):
                 # these must be ns unit by-definition
@@ -706,62 +704,85 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise',
                     raise TypeError("{typ} is not convertible to datetime"
                                     .format(typ=type(val)))
 
-        if seen_datetime and seen_integer:
-            # we have mixed datetimes & integers
-
-            if is_coerce:
-                # coerce all of the integers/floats to NaT, preserve
-                # the datetimes and other convertibles
-                for i in range(n):
-                    val = values[i]
-                    if is_integer_object(val) or is_float_object(val):
-                        result[i] = NPY_NAT
-            elif is_raise:
-                raise ValueError(
-                    "mixed datetimes and integers in passed array")
-            else:
-                raise TypeError
-
-        if seen_datetime_offset and not utc_convert:
-            # GH 17697
-            # 1) If all the offsets are equal, return one offset for
-            #    the parsed dates to (maybe) pass to DatetimeIndex
-            # 2) If the offsets are different, then force the parsing down the
-            #    object path where an array of datetimes
-            #    (with individual dateutil.tzoffsets) are returned
-            is_same_offsets = len(out_tzoffset_vals) == 1
-            if not is_same_offsets:
-                return array_to_datetime_object(values, is_raise,
-                                                dayfirst, yearfirst)
-            else:
-                tz_offset = out_tzoffset_vals.pop()
-                tz_out = pytz.FixedOffset(tz_offset / 60.)
-        return result, tz_out
     except OutOfBoundsDatetime:
         if is_raise:
             raise
 
-        oresult = np.empty(n, dtype=object)
-        for i in range(n):
-            val = values[i]
+        return ignore_errors_out_of_bounds_fallback(values), tz_out
 
-            # set as nan except if its a NaT
-            if checknull_with_nat(val):
-                if isinstance(val, float):
-                    oresult[i] = np.nan
-                else:
-                    oresult[i] = NaT
-            elif is_datetime64_object(val):
-                if get_datetime64_value(val) == NPY_NAT:
-                    oresult[i] = NaT
-                else:
-                    oresult[i] = val.item()
-            else:
-                oresult[i] = val
-        return oresult, tz_out
     except TypeError:
         return array_to_datetime_object(values, is_raise, dayfirst, yearfirst)
 
+    if seen_datetime and seen_integer:
+        # we have mixed datetimes & integers
+
+        if is_coerce:
+            # coerce all of the integers/floats to NaT, preserve
+            # the datetimes and other convertibles
+            for i in range(n):
+                val = values[i]
+                if is_integer_object(val) or is_float_object(val):
+                    result[i] = NPY_NAT
+        elif is_raise:
+            raise ValueError("mixed datetimes and integers in passed array")
+        else:
+            return array_to_datetime_object(values, is_raise,
+                                            dayfirst, yearfirst)
+
+    if seen_datetime_offset and not utc_convert:
+        # GH#17697
+        # 1) If all the offsets are equal, return one offset for
+        #    the parsed dates to (maybe) pass to DatetimeIndex
+        # 2) If the offsets are different, then force the parsing down the
+        #    object path where an array of datetimes
+        #    (with individual dateutil.tzoffsets) are returned
+        is_same_offsets = len(out_tzoffset_vals) == 1
+        if not is_same_offsets:
+            return array_to_datetime_object(values, is_raise,
+                                            dayfirst, yearfirst)
+        else:
+            tz_offset = out_tzoffset_vals.pop()
+            tz_out = pytz.FixedOffset(tz_offset / 60.)
+    return result, tz_out
+
+
+cdef inline ignore_errors_out_of_bounds_fallback(ndarray[object] values):
+    """
+    Fallback for array_to_datetime if an OutOfBoundsDatetime is raised
+    and errors == "ignore"
+
+    Parameters
+    ----------
+    values : ndarray[object]
+
+    Returns
+    -------
+    ndarray[object]
+    """
+    cdef:
+        Py_ssize_t i, n = len(values)
+        object val
+
+    oresult = np.empty(n, dtype=object)
+
+    for i in range(n):
+        val = values[i]
+
+        # set as nan except if its a NaT
+        if checknull_with_nat(val):
+            if isinstance(val, float):
+                oresult[i] = np.nan
+            else:
+                oresult[i] = NaT
+        elif is_datetime64_object(val):
+            if get_datetime64_value(val) == NPY_NAT:
+                oresult[i] = NaT
+            else:
+                oresult[i] = val.item()
+        else:
+            oresult[i] = val
+    return oresult
+
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx
index 67c2793e4bcef..4a34065fe471f 100644
--- a/pandas/_libs/tslibs/conversion.pyx
+++ b/pandas/_libs/tslibs/conversion.pyx
@@ -62,8 +62,11 @@ cdef inline int64_t get_datetime64_nanos(object val) except? -1:
         NPY_DATETIMEUNIT unit
         npy_datetime ival
 
-    unit = get_datetime64_unit(val)
     ival = get_datetime64_value(val)
+    if ival == NPY_NAT:
+        return NPY_NAT
+
+    unit = get_datetime64_unit(val)
 
     if unit != NPY_FR_ns:
         pandas_datetime_to_datetimestruct(ival, unit, &dts)
@@ -283,10 +286,8 @@ cdef convert_to_tsobject(object ts, object tz, object unit,
     if ts is None or ts is NaT:
         obj.value = NPY_NAT
     elif is_datetime64_object(ts):
-        if ts.view('i8') == NPY_NAT:
-            obj.value = NPY_NAT
-        else:
-            obj.value = get_datetime64_nanos(ts)
+        obj.value = get_datetime64_nanos(ts)
+        if obj.value != NPY_NAT:
             dt64_to_dtstruct(obj.value, &obj.dts)
     elif is_integer_object(ts):
         if ts == NPY_NAT:
@@ -887,8 +888,8 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
         int64_t *tdata
         int64_t v, left, right, val, v_left, v_right, new_local, remaining_mins
         int64_t HOURS_NS = HOUR_SECONDS * 1000000000
-        ndarray[int64_t] trans, result, result_a, result_b, dst_hours
-        ndarray[int64_t] trans_idx, grp, delta, a_idx, b_idx, one_diff
+        ndarray[int64_t] trans, result, result_a, result_b, dst_hours, delta
+        ndarray trans_idx, grp, a_idx, b_idx, one_diff
         npy_datetimestruct dts
         bint infer_dst = False, is_dst = False, fill = False
         bint shift = False, fill_nonexist = False
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 5db851d4bf021..42696e4796fe0 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -347,6 +347,16 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
             # the "ordered" and "categories" arguments
             dtype = values.dtype._from_categorical_dtype(values.dtype,
                                                          categories, ordered)
+
+            # GH23814, for perf, if values._values already an instance of
+            # Categorical, set values to codes, and run fastpath
+            if (isinstance(values, (ABCSeries, ABCIndexClass)) and
+               isinstance(values._values, type(self))):
+                values = values._values.codes.copy()
+                if categories is None:
+                    categories = dtype.categories
+                fastpath = True
+
         else:
             # If dtype=None and values is not categorical, create a new dtype
             dtype = CategoricalDtype(categories, ordered)
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index 83ee335aa5465..a6f254c79fb51 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -10,11 +10,12 @@
 from pandas._libs.tslibs.period import (
     DIFFERENT_FREQ_INDEX, IncompatibleFrequency, Period)
 from pandas._libs.tslibs.timedeltas import Timedelta, delta_to_nanoseconds
-from pandas._libs.tslibs.timestamps import maybe_integer_op_deprecated
+from pandas._libs.tslibs.timestamps import (
+    RoundTo, maybe_integer_op_deprecated, round_nsint64)
 import pandas.compat as compat
 from pandas.errors import (
     AbstractMethodError, NullFrequencyError, PerformanceWarning)
-from pandas.util._decorators import deprecate_kwarg
+from pandas.util._decorators import Appender, deprecate_kwarg
 
 from pandas.core.dtypes.common import (
     is_bool_dtype, is_datetime64_any_dtype, is_datetime64_dtype,
@@ -80,6 +81,189 @@ def _get_attributes_dict(self):
         return {k: getattr(self, k, None) for k in self._attributes}
 
 
+class DatelikeOps(object):
+    """
+    Common ops for DatetimeIndex/PeriodIndex, but not TimedeltaIndex.
+    """
+
+    def strftime(self, date_format):
+        from pandas import Index
+        return Index(self.format(date_format=date_format),
+                     dtype=compat.text_type)
+    strftime.__doc__ = """
+    Convert to Index using specified date_format.
+
+    Return an Index of formatted strings specified by date_format, which
+    supports the same string format as the python standard library. Details
+    of the string format can be found in `python string format doc <{0}>`__
+
+    Parameters
+    ----------
+    date_format : str
+        Date format string (e.g. "%Y-%m-%d").
+
+    Returns
+    -------
+    Index
+        Index of formatted strings
+
+    See Also
+    --------
+    to_datetime : Convert the given argument to datetime.
+    DatetimeIndex.normalize : Return DatetimeIndex with times to midnight.
+    DatetimeIndex.round : Round the DatetimeIndex to the specified freq.
+    DatetimeIndex.floor : Floor the DatetimeIndex to the specified freq.
+
+    Examples
+    --------
+    >>> rng = pd.date_range(pd.Timestamp("2018-03-10 09:00"),
+    ...                     periods=3, freq='s')
+    >>> rng.strftime('%B %d, %Y, %r')
+    Index(['March 10, 2018, 09:00:00 AM', 'March 10, 2018, 09:00:01 AM',
+           'March 10, 2018, 09:00:02 AM'],
+          dtype='object')
+    """.format("https://docs.python.org/3/library/datetime.html"
+               "#strftime-and-strptime-behavior")
+
+
+class TimelikeOps(object):
+    """
+    Common ops for TimedeltaIndex/DatetimeIndex, but not PeriodIndex.
+    """
+
+    _round_doc = (
+        """
+        Perform {op} operation on the data to the specified `freq`.
+
+        Parameters
+        ----------
+        freq : str or Offset
+            The frequency level to {op} the index to. Must be a fixed
+            frequency like 'S' (second) not 'ME' (month end). See
+            :ref:`frequency aliases <timeseries.offset_aliases>` for
+            a list of possible `freq` values.
+        ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'
+            Only relevant for DatetimeIndex:
+
+            - 'infer' will attempt to infer fall dst-transition hours based on
+              order
+            - bool-ndarray where True signifies a DST time, False designates
+              a non-DST time (note that this flag is only applicable for
+              ambiguous times)
+            - 'NaT' will return NaT where there are ambiguous times
+            - 'raise' will raise an AmbiguousTimeError if there are ambiguous
+              times
+
+            .. versionadded:: 0.24.0
+        nonexistent : 'shift', 'NaT', default 'raise'
+            A nonexistent time does not exist in a particular timezone
+            where clocks moved forward due to DST.
+
+            - 'shift' will shift the nonexistent time forward to the closest
+              existing time
+            - 'NaT' will return NaT where there are nonexistent times
+            - 'raise' will raise an NonExistentTimeError if there are
+              nonexistent times
+
+            .. versionadded:: 0.24.0
+
+        Returns
+        -------
+        DatetimeIndex, TimedeltaIndex, or Series
+            Index of the same type for a DatetimeIndex or TimedeltaIndex,
+            or a Series with the same index for a Series.
+
+        Raises
+        ------
+        ValueError if the `freq` cannot be converted.
+
+        Examples
+        --------
+        **DatetimeIndex**
+
+        >>> rng = pd.date_range('1/1/2018 11:59:00', periods=3, freq='min')
+        >>> rng
+        DatetimeIndex(['2018-01-01 11:59:00', '2018-01-01 12:00:00',
+                       '2018-01-01 12:01:00'],
+                      dtype='datetime64[ns]', freq='T')
+        """)
+
+    _round_example = (
+        """>>> rng.round('H')
+        DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00',
+                       '2018-01-01 12:00:00'],
+                      dtype='datetime64[ns]', freq=None)
+
+        **Series**
+
+        >>> pd.Series(rng).dt.round("H")
+        0   2018-01-01 12:00:00
+        1   2018-01-01 12:00:00
+        2   2018-01-01 12:00:00
+        dtype: datetime64[ns]
+        """)
+
+    _floor_example = (
+        """>>> rng.floor('H')
+        DatetimeIndex(['2018-01-01 11:00:00', '2018-01-01 12:00:00',
+                       '2018-01-01 12:00:00'],
+                      dtype='datetime64[ns]', freq=None)
+
+        **Series**
+
+        >>> pd.Series(rng).dt.floor("H")
+        0   2018-01-01 11:00:00
+        1   2018-01-01 12:00:00
+        2   2018-01-01 12:00:00
+        dtype: datetime64[ns]
+        """
+    )
+
+    _ceil_example = (
+        """>>> rng.ceil('H')
+        DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00',
+                       '2018-01-01 13:00:00'],
+                      dtype='datetime64[ns]', freq=None)
+
+        **Series**
+
+        >>> pd.Series(rng).dt.ceil("H")
+        0   2018-01-01 12:00:00
+        1   2018-01-01 12:00:00
+        2   2018-01-01 13:00:00
+        dtype: datetime64[ns]
+        """
+    )
+
+    def _round(self, freq, mode, ambiguous, nonexistent):
+        # round the local times
+        values = _ensure_datetimelike_to_i8(self)
+        result = round_nsint64(values, mode, freq)
+        result = self._maybe_mask_results(result, fill_value=NaT)
+
+        attribs = self._get_attributes_dict()
+        attribs['freq'] = None
+        if 'tz' in attribs:
+            attribs['tz'] = None
+        return self._ensure_localized(
+            self._shallow_copy(result, **attribs), ambiguous, nonexistent
+        )
+
+    @Appender((_round_doc + _round_example).format(op="round"))
+    def round(self, freq, ambiguous='raise', nonexistent='raise'):
+        return self._round(
+            freq, RoundTo.NEAREST_HALF_EVEN, ambiguous, nonexistent
+        )
+
+    @Appender((_round_doc + _floor_example).format(op="floor"))
+    def floor(self, freq, ambiguous='raise', nonexistent='raise'):
+        return self._round(freq, RoundTo.MINUS_INFTY, ambiguous, nonexistent)
+
+    @Appender((_round_doc + _ceil_example).format(op="ceil"))
+    def ceil(self, freq, ambiguous='raise', nonexistent='raise'):
+        return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent)
+
+
 class DatetimeLikeArrayMixin(ExtensionOpsMixin, AttributesMixin):
     """
     Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray
@@ -1023,3 +1207,39 @@ def validate_dtype_freq(dtype, freq):
             raise IncompatibleFrequency('specified freq and dtype '
                                         'are different')
     return freq
+
+
+def _ensure_datetimelike_to_i8(other, to_utc=False):
+    """
+    Helper for coercing an input scalar or array to i8.
+
+    Parameters
+    ----------
+    other : 1d array
+    to_utc : bool, default False
+        If True, convert the values to UTC before extracting the i8 values
+        If False, extract the i8 values directly.
+
+    Returns
+    -------
+    i8 1d array
+    """
+    from pandas import Index
+    from pandas.core.arrays import PeriodArray
+
+    if lib.is_scalar(other) and isna(other):
+        return iNaT
+    elif isinstance(other, (PeriodArray, ABCIndexClass)):
+        # convert tz if needed
+        if getattr(other, 'tz', None) is not None:
+            if to_utc:
+                other = other.tz_convert('UTC')
+            else:
+                other = other.tz_localize(None)
+    else:
+        try:
+            return np.array(other, copy=False).view('i8')
+        except TypeError:
+            # period array cannot be coerced to int
+            other = Index(other)
+    return other.asi8
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index 4d3caaacca1c1..050442c530314 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -156,7 +156,9 @@ def wrapper(self, other):
     return compat.set_function_name(wrapper, opname, cls)
 
 
-class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin):
+class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin,
+                         dtl.TimelikeOps,
+                         dtl.DatelikeOps):
     """
     Assumes that subclass __new__/__init__ defines:
         tz
diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
index 856a01e41ce13..6a7ce7033efa0 100644
--- a/pandas/core/arrays/timedeltas.py
+++ b/pandas/core/arrays/timedeltas.py
@@ -129,7 +129,7 @@ def method(self, other):
     return method
 
 
-class TimedeltaArrayMixin(dtl.DatetimeLikeArrayMixin):
+class TimedeltaArrayMixin(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps):
     _typ = "timedeltaarray"
     __array_priority__ = 1000
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 06519da9a26d5..f50be694b47c6 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1298,10 +1298,10 @@ def to_dict(self, orient='dict', into=dict):
 
         >>> df.to_dict('split')
         {'index': ['row1', 'row2'], 'columns': ['col1', 'col2'],
-         'data': [[1.0, 0.5], [2.0, 0.75]]}
+         'data': [[1, 0.5], [2, 0.75]]}
 
         >>> df.to_dict('records')
-        [{'col1': 1.0, 'col2': 0.5}, {'col1': 2.0, 'col2': 0.75}]
+        [{'col1': 1, 'col2': 0.5}, {'col1': 2, 'col2': 0.75}]
 
         >>> df.to_dict('index')
         {'row1': {'col1': 1, 'col2': 0.5}, 'row2': {'col1': 2, 'col2': 0.75}}
@@ -1317,8 +1317,8 @@ def to_dict(self, orient='dict', into=dict):
 
         >>> dd = defaultdict(list)
         >>> df.to_dict('records', into=dd)
-        [defaultdict(<class 'list'>, {'col1': 1.0, 'col2': 0.5}),
-         defaultdict(<class 'list'>, {'col1': 2.0, 'col2': 0.75})]
+        [defaultdict(<class 'list'>, {'col1': 1, 'col2': 0.5}),
+         defaultdict(<class 'list'>, {'col1': 2, 'col2': 0.75})]
         """
         if not self.columns.is_unique:
             warnings.warn("DataFrame columns are not unique, some "
@@ -1334,16 +1334,18 @@ def to_dict(self, orient='dict', into=dict):
         elif orient.lower().startswith('sp'):
             return into_c((('index', self.index.tolist()),
                            ('columns', self.columns.tolist()),
-                           ('data', lib.map_infer(self.values.ravel(),
-                                                  com.maybe_box_datetimelike)
-                            .reshape(self.values.shape).tolist())))
+                           ('data', [
+                               list(map(com.maybe_box_datetimelike, t))
+                               for t in self.itertuples(index=False)]
+                            )))
         elif orient.lower().startswith('s'):
             return into_c((k, com.maybe_box_datetimelike(v))
                           for k, v in compat.iteritems(self))
         elif orient.lower().startswith('r'):
-            return [into_c((k, com.maybe_box_datetimelike(v))
-                           for k, v in zip(self.columns, np.atleast_1d(row)))
-                    for row in self.values]
+            return [
+                into_c((k, com.maybe_box_datetimelike(v))
+                       for k, v in compat.iteritems(row._asdict()))
+                for row in self.itertuples(index=False)]
         elif orient.lower().startswith('i'):
             if not self.index.is_unique:
                 raise ValueError(
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 08c07da39128f..c58c84b422209 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -3420,71 +3420,102 @@ class  max_speed
 
     def xs(self, key, axis=0, level=None, drop_level=True):
         """
-        Returns a cross-section (row(s) or column(s)) from the
-        Series/DataFrame. Defaults to cross-section on the rows (axis=0).
+        Return cross-section from the Series/DataFrame.
+
+        This method takes a `key` argument to select data at a particular
+        level of a MultiIndex.
 
         Parameters
         ----------
-        key : object
-            Some label contained in the index, or partially in a MultiIndex
-        axis : int, default 0
-            Axis to retrieve cross-section on
+        key : label or tuple of label
+            Label contained in the index, or partially in a MultiIndex.
+        axis : {0 or 'index', 1 or 'columns'}, default 0
+            Axis to retrieve cross-section on.
         level : object, defaults to first n levels (n=1 or len(key))
             In case of a key partially contained in a MultiIndex, indicate
             which levels are used. Levels can be referred by label or position.
-        drop_level : boolean, default True
+        drop_level : bool, default True
             If False, returns object with same levels as self.
 
+        Returns
+        -------
+        Series or DataFrame
+            Cross-section from the original Series or DataFrame
+            corresponding to the selected index levels.
+
+        See Also
+        --------
+        DataFrame.loc : Access a group of rows and columns
+            by label(s) or a boolean array.
+        DataFrame.iloc : Purely integer-location based indexing
+            for selection by position.
+
+        Notes
+        -----
+        `xs` can not be used to set values.
+
+        MultiIndex Slicers is a generic way to get/set values on
+        any level or levels.
+        It is a superset of `xs` functionality, see
+        :ref:`MultiIndex Slicers <advanced.mi_slicers>`.
+
         Examples
         --------
+        >>> d = {'num_legs': [4, 4, 2, 2],
+        ...      'num_wings': [0, 0, 2, 2],
+        ...      'class': ['mammal', 'mammal', 'mammal', 'bird'],
+        ...      'animal': ['cat', 'dog', 'bat', 'penguin'],
+        ...      'locomotion': ['walks', 'walks', 'flies', 'walks']}
+        >>> df = pd.DataFrame(data=d)
+        >>> df = df.set_index(['class', 'animal', 'locomotion'])
         >>> df
-           A  B  C
-        a  4  5  2
-        b  4  0  9
-        c  9  7  3
-        >>> df.xs('a')
-        A    4
-        B    5
-        C    2
-        Name: a
-        >>> df.xs('C', axis=1)
-        a    2
-        b    9
-        c    3
-        Name: C
+                                   num_legs  num_wings
+        class  animal  locomotion
+        mammal cat     walks              4          0
+               dog     walks              4          0
+               bat     flies              2          2
+        bird   penguin walks              2          2
 
-        >>> df
-                            A  B  C  D
-        first second third
-        bar   one    1      4  1  8  9
-              two    1      7  5  5  0
-        baz   one    1      6  6  8  0
-              three  2      5  3  5  3
-        >>> df.xs(('baz', 'three'))
-               A  B  C  D
-        third
-        2      5  3  5  3
-        >>> df.xs('one', level=1)
-                     A  B  C  D
-        first third
-        bar   1      4  1  8  9
-        baz   1      6  6  8  0
-        >>> df.xs(('baz', 2), level=[0, 'third'])
-                A  B  C  D
-        second
-        three   5  3  5  3
+        Get values at specified index
 
-        Returns
-        -------
-        xs : Series or DataFrame
+        >>> df.xs('mammal')
+                           num_legs  num_wings
+        animal locomotion
+        cat    walks              4          0
+        dog    walks              4          0
+        bat    flies              2          2
 
-        Notes
-        -----
-        xs is only for getting, not setting values.
+        Get values at several indexes
+
+        >>> df.xs(('mammal', 'dog'))
+                    num_legs  num_wings
+        locomotion
+        walks              4          0
+
+        Get values at specified index and level
+
+        >>> df.xs('cat', level=1)
+                           num_legs  num_wings
+        class  locomotion
+        mammal walks              4          0
+
+        Get values at several indexes and levels
+
+        >>> df.xs(('bird', 'walks'),
+        ...       level=[0, 'locomotion'])
+                 num_legs  num_wings
+        animal
+        penguin         2          2
+
+        Get values at specified column and axis
 
-        MultiIndex Slicers is a generic way to get/set values on any level or
-        levels.  It is a superset of xs functionality, see
-        :ref:`MultiIndex Slicers <advanced.mi_slicers>`
+        >>> df.xs('num_wings', axis=1)
+        class   animal   locomotion
+        mammal  cat      walks         0
+                dog      walks         0
+                bat      flies         2
+        bird    penguin  walks         2
+        Name: num_wings, dtype: int64
         """
         axis = self._get_axis_number(axis)
         labels = self._get_axis(axis)
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 6b84e8deea493..91c7648d5cf2e 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -13,7 +13,7 @@
     is_scalar)
 from pandas.core.dtypes.dtypes import CategoricalDtype
 from pandas.core.dtypes.generic import ABCCategorical, ABCSeries
-from pandas.core.dtypes.missing import array_equivalent, isna
+from pandas.core.dtypes.missing import isna
 
 from pandas.core import accessor
 from pandas.core.algorithms import take_1d
@@ -283,7 +283,9 @@ def equals(self, other):
 
         try:
             other = self._is_dtype_compat(other)
-            return array_equivalent(self._data, other)
+            if isinstance(other, type(self)):
+                other = other._data
+            return self._data.equals(other)
         except (TypeError, ValueError):
             pass
 
diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
index 5e25efe77d8b9..0dedd8fe1cf4b 100644
--- a/pandas/core/indexes/datetimelike.py
+++ b/pandas/core/indexes/datetimelike.py
@@ -7,8 +7,6 @@
 import numpy as np
 
 from pandas._libs import NaT, iNaT, lib
-from pandas._libs.tslibs.timestamps import RoundTo, round_nsint64
-import pandas.compat as compat
 from pandas.compat.numpy import function as nv
 from pandas.errors import AbstractMethodError
 from pandas.util._decorators import Appender, cache_readonly
@@ -19,11 +17,10 @@
     is_integer, is_integer_dtype, is_list_like, is_object_dtype,
     is_period_dtype, is_scalar, is_string_dtype)
 from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries
-from pandas.core.dtypes.missing import isna
 
 from pandas.core import algorithms, ops
-from pandas.core.arrays import PeriodArray
-from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
+from pandas.core.arrays.datetimelike import (
+    DatetimeLikeArrayMixin, _ensure_datetimelike_to_i8)
 import pandas.core.indexes.base as ibase
 from pandas.core.indexes.base import Index, _index_shared_docs
 from pandas.core.tools.timedeltas import to_timedelta
@@ -33,188 +30,6 @@
 _index_doc_kwargs = dict(ibase._index_doc_kwargs)
 
 
-class DatelikeOps(object):
-    """
-    Common ops for DatetimeIndex/PeriodIndex, but not TimedeltaIndex.
-    """
-
-    def strftime(self, date_format):
-        return Index(self.format(date_format=date_format),
-                     dtype=compat.text_type)
-    strftime.__doc__ = """
-    Convert to Index using specified date_format.
-
-    Return an Index of formatted strings specified by date_format, which
-    supports the same string format as the python standard library. Details
-    of the string format can be found in `python string format doc <{0}>`__
-
-    Parameters
-    ----------
-    date_format : str
-        Date format string (e.g. "%Y-%m-%d").
-
-    Returns
-    -------
-    Index
-        Index of formatted strings
-
-    See Also
-    --------
-    to_datetime : Convert the given argument to datetime.
-    DatetimeIndex.normalize : Return DatetimeIndex with times to midnight.
-    DatetimeIndex.round : Round the DatetimeIndex to the specified freq.
-    DatetimeIndex.floor : Floor the DatetimeIndex to the specified freq.
-
-    Examples
-    --------
-    >>> rng = pd.date_range(pd.Timestamp("2018-03-10 09:00"),
-    ...                     periods=3, freq='s')
-    >>> rng.strftime('%B %d, %Y, %r')
-    Index(['March 10, 2018, 09:00:00 AM', 'March 10, 2018, 09:00:01 AM',
-           'March 10, 2018, 09:00:02 AM'],
-          dtype='object')
-    """.format("https://docs.python.org/3/library/datetime.html"
-               "#strftime-and-strptime-behavior")
-
-
-class TimelikeOps(object):
-    """
-    Common ops for TimedeltaIndex/DatetimeIndex, but not PeriodIndex.
-    """
-
-    _round_doc = (
-        """
-        Perform {op} operation on the data to the specified `freq`.
-
-        Parameters
-        ----------
-        freq : str or Offset
-            The frequency level to {op} the index to. Must be a fixed
-            frequency like 'S' (second) not 'ME' (month end). See
-            :ref:`frequency aliases <timeseries.offset_aliases>` for
-            a list of possible `freq` values.
-        ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'
-            Only relevant for DatetimeIndex:
-
-            - 'infer' will attempt to infer fall dst-transition hours based on
-              order
-            - bool-ndarray where True signifies a DST time, False designates
-              a non-DST time (note that this flag is only applicable for
-              ambiguous times)
-            - 'NaT' will return NaT where there are ambiguous times
-            - 'raise' will raise an AmbiguousTimeError if there are ambiguous
-              times
-
-            .. versionadded:: 0.24.0
-        nonexistent : 'shift', 'NaT', default 'raise'
-            A nonexistent time does not exist in a particular timezone
-            where clocks moved forward due to DST.
-
-            - 'shift' will shift the nonexistent time forward to the closest
-              existing time
-            - 'NaT' will return NaT where there are nonexistent times
-            - 'raise' will raise an NonExistentTimeError if there are
-              nonexistent times
-
-            .. versionadded:: 0.24.0
-
-        Returns
-        -------
-        DatetimeIndex, TimedeltaIndex, or Series
-            Index of the same type for a DatetimeIndex or TimedeltaIndex,
-            or a Series with the same index for a Series.
-
-        Raises
-        ------
-        ValueError if the `freq` cannot be converted.
-
-        Examples
-        --------
-        **DatetimeIndex**
-
-        >>> rng = pd.date_range('1/1/2018 11:59:00', periods=3, freq='min')
-        >>> rng
-        DatetimeIndex(['2018-01-01 11:59:00', '2018-01-01 12:00:00',
-                       '2018-01-01 12:01:00'],
-                      dtype='datetime64[ns]', freq='T')
-        """)
-
-    _round_example = (
-        """>>> rng.round('H')
-        DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00',
-                       '2018-01-01 12:00:00'],
-                      dtype='datetime64[ns]', freq=None)
-
-        **Series**
-
-        >>> pd.Series(rng).dt.round("H")
-        0   2018-01-01 12:00:00
-        1   2018-01-01 12:00:00
-        2   2018-01-01 12:00:00
-        dtype: datetime64[ns]
-        """)
-
-    _floor_example = (
-        """>>> rng.floor('H')
-        DatetimeIndex(['2018-01-01 11:00:00', '2018-01-01 12:00:00',
-                       '2018-01-01 12:00:00'],
-                      dtype='datetime64[ns]', freq=None)
-
-        **Series**
-
-        >>> pd.Series(rng).dt.floor("H")
-        0   2018-01-01 11:00:00
-        1   2018-01-01 12:00:00
-        2   2018-01-01 12:00:00
-        dtype: datetime64[ns]
-        """
-    )
-
-    _ceil_example = (
-        """>>> rng.ceil('H')
-        DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00',
-                       '2018-01-01 13:00:00'],
-                      dtype='datetime64[ns]', freq=None)
-
-        **Series**
-
-        >>> pd.Series(rng).dt.ceil("H")
-        0   2018-01-01 12:00:00
-        1   2018-01-01 12:00:00
-        2   2018-01-01 13:00:00
-        dtype: datetime64[ns]
-        """
-    )
-
-    def _round(self, freq, mode, ambiguous, nonexistent):
-        # round the local times
-        values = _ensure_datetimelike_to_i8(self)
-        result = round_nsint64(values, mode, freq)
-        result = self._maybe_mask_results(result, fill_value=NaT)
-
-        attribs = self._get_attributes_dict()
-        attribs['freq'] = None
-        if 'tz' in attribs:
-            attribs['tz'] = None
-        return self._ensure_localized(
-            self._shallow_copy(result, **attribs), ambiguous, nonexistent
-        )
-
-    @Appender((_round_doc + _round_example).format(op="round"))
-    def round(self, freq, ambiguous='raise', nonexistent='raise'):
-        return self._round(
-            freq, RoundTo.NEAREST_HALF_EVEN, ambiguous, nonexistent
-        )
-
-    @Appender((_round_doc + _floor_example).format(op="floor"))
-    def floor(self, freq, ambiguous='raise', nonexistent='raise'):
-        return self._round(freq, RoundTo.MINUS_INFTY, ambiguous, nonexistent)
-
-    @Appender((_round_doc + _ceil_example).format(op="ceil"))
-    def ceil(self, freq, ambiguous='raise', nonexistent='raise'):
-        return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent)
-
-
 class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin):
     """ common ops mixin to support a unified interface datetimelike Index """
 
@@ -754,39 +569,6 @@ def _time_shift(self, periods, freq=None):
         return result
 
 
-def _ensure_datetimelike_to_i8(other, to_utc=False):
-    """
-    Helper for coercing an input scalar or array to i8.
-
-    Parameters
-    ----------
-    other : 1d array
-    to_utc : bool, default False
-        If True, convert the values to UTC before extracting the i8 values
-        If False, extract the i8 values directly.
-
-    Returns
-    -------
-    i8 1d array
-    """
-    if is_scalar(other) and isna(other):
-        return iNaT
-    elif isinstance(other, (PeriodArray, ABCIndexClass)):
-        # convert tz if needed
-        if getattr(other, 'tz', None) is not None:
-            if to_utc:
-                other = other.tz_convert('UTC')
-            else:
-                other = other.tz_localize(None)
-    else:
-        try:
-            return np.array(other, copy=False).view('i8')
-        except TypeError:
-            # period array cannot be coerced to int
-            other = Index(other)
-    return other.asi8
-
-
 def wrap_arithmetic_op(self, other, result):
     if result is NotImplemented:
         return NotImplemented
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 01901d022da32..8f36096d128c2 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -31,8 +31,7 @@
 import pandas.core.common as com
 from pandas.core.indexes.base import Index, _index_shared_docs
 from pandas.core.indexes.datetimelike import (
-    DatelikeOps, DatetimeIndexOpsMixin, TimelikeOps, wrap_array_method,
-    wrap_field_accessor)
+    DatetimeIndexOpsMixin, wrap_array_method, wrap_field_accessor)
 from pandas.core.indexes.numeric import Int64Index
 from pandas.core.ops import get_op_result_name
 import pandas.core.tools.datetimes as tools
@@ -62,8 +61,7 @@ def _new_DatetimeIndex(cls, d):
     return result
 
 
-class DatetimeIndex(DatetimeArray, DatelikeOps, TimelikeOps,
-                    DatetimeIndexOpsMixin, Int64Index):
+class DatetimeIndex(DatetimeArray, DatetimeIndexOpsMixin, Int64Index):
     """
     Immutable ndarray of datetime64 data, represented internally as int64, and
     which can be boxed to Timestamp objects that are subclasses of datetime and
diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
index 56df454bddf1c..3cdefb02ef8b3 100644
--- a/pandas/core/indexes/period.py
+++ b/pandas/core/indexes/period.py
@@ -26,7 +26,7 @@
 import pandas.core.indexes.base as ibase
 from pandas.core.indexes.base import _index_shared_docs, ensure_index
 from pandas.core.indexes.datetimelike import (
-    DatelikeOps, DatetimeIndexOpsMixin, wrap_arithmetic_op)
+    DatetimeIndexOpsMixin, wrap_arithmetic_op)
 from pandas.core.indexes.datetimes import DatetimeIndex, Index, Int64Index
 from pandas.core.missing import isna
 from pandas.core.ops import get_op_result_name
@@ -83,7 +83,7 @@ def _delegate_method(self, name, *args, **kwargs):
                  if x not in {"asfreq", "to_timestamp"}],
                 typ="method",
                 overwrite=True)
-class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin,
+class PeriodIndex(DatetimeIndexOpsMixin,
                   Int64Index, PeriodDelegateMixin):
     """
     Immutable ndarray holding ordinal values indicating regular periods in
diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py
index ed4e43df8f41a..e33d61d29d302 100644
--- a/pandas/core/indexes/timedeltas.py
+++ b/pandas/core/indexes/timedeltas.py
@@ -22,7 +22,7 @@
 import pandas.core.common as com
 from pandas.core.indexes.base import Index, _index_shared_docs
 from pandas.core.indexes.datetimelike import (
-    DatetimeIndexOpsMixin, TimelikeOps, wrap_arithmetic_op, wrap_array_method,
+    DatetimeIndexOpsMixin, wrap_arithmetic_op, wrap_array_method,
     wrap_field_accessor)
 from pandas.core.indexes.numeric import Int64Index
 from pandas.core.ops import get_op_result_name
@@ -31,8 +31,7 @@
 from pandas.tseries.frequencies import to_offset
 
 
-class TimedeltaIndex(TimedeltaArray, DatetimeIndexOpsMixin,
-                     TimelikeOps, Int64Index):
+class TimedeltaIndex(TimedeltaArray, DatetimeIndexOpsMixin, Int64Index):
     """
     Immutable ndarray of timedelta64 data, represented internally as int64, and
     which can be boxed to timedelta objects
diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py
index 8ad2a48e8767c..5d5f6cf8102be 100644
--- a/pandas/core/reshape/tile.py
+++ b/pandas/core/reshape/tile.py
@@ -43,7 +43,8 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
           and maximum values of `x`.
         * sequence of scalars : Defines the bin edges allowing for non-uniform
           width. No extension of the range of `x` is done.
-        * IntervalIndex : Defines the exact bins to be used.
+        * IntervalIndex : Defines the exact bins to be used. Note that
+          IntervalIndex for `bins` must be non-overlapping.
 
     right : bool, default True
         Indicates whether `bins` includes the rightmost edge or not. If
@@ -217,7 +218,9 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
                 bins[-1] += adj
 
     elif isinstance(bins, IntervalIndex):
-        pass
+        if bins.is_overlapping:
+            raise ValueError('Overlapping IntervalIndex is not accepted.')
+
     else:
         bins = np.asarray(bins)
         bins = _convert_bin_to_numeric_type(bins, dtype)
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 4d9f1567b371a..447eac8a57013 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -15,7 +15,7 @@
 from pandas.core.dtypes.common import (
     ensure_object, is_bool_dtype, is_categorical_dtype, is_integer,
     is_list_like, is_re, is_scalar, is_string_like)
-from pandas.core.dtypes.generic import ABCIndex, ABCMultiIndex, ABCSeries
+from pandas.core.dtypes.generic import ABCIndexClass, ABCMultiIndex, ABCSeries
 from pandas.core.dtypes.missing import isna
 
 from pandas.core.algorithms import take_1d
@@ -931,7 +931,7 @@ def str_extractall(arr, pat, flags=0):
     if regex.groups == 0:
         raise ValueError("pattern contains no capture groups")
 
-    if isinstance(arr, ABCIndex):
+    if isinstance(arr, ABCIndexClass):
         arr = arr.to_series().reset_index(drop=True)
 
     names = dict(zip(regex.groupindex.values(), regex.groupindex.keys()))
@@ -1920,7 +1920,7 @@ def __iter__(self):
     def _wrap_result(self, result, use_codes=True,
                      name=None, expand=None, fill_value=np.nan):
 
-        from pandas.core.index import Index, MultiIndex
+        from pandas import Index, Series, MultiIndex
 
         # for category, we do the stuff on the categories, so blow it up
         # to the full series again
@@ -1928,7 +1928,8 @@ def _wrap_result(self, result, use_codes=True,
         # so make it possible to skip this step as the method already did this
         # before the transformation...
         if use_codes and self._is_categorical:
-            result = take_1d(result, self._orig.cat.codes,
+            # if self._orig is a CategoricalIndex, there is no .cat-accessor
+            result = take_1d(result, Series(self._orig, copy=False).cat.codes,
                              fill_value=fill_value)
 
         if not hasattr(result, 'ndim') or not hasattr(result, 'dtype'):
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 7ade35f93a858..aadca1fcb3bef 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -27,9 +27,9 @@
 
 from pandas.core.dtypes.cast import astype_nansafe
 from pandas.core.dtypes.common import (
-    ensure_object, is_categorical_dtype, is_dtype_equal, is_float, is_integer,
-    is_integer_dtype, is_list_like, is_object_dtype, is_scalar,
-    is_string_dtype)
+    ensure_object, is_bool_dtype, is_categorical_dtype, is_dtype_equal,
+    is_float, is_integer, is_integer_dtype, is_list_like, is_object_dtype,
+    is_scalar, is_string_dtype)
 from pandas.core.dtypes.dtypes import CategoricalDtype
 from pandas.core.dtypes.missing import isna
 
@@ -1669,6 +1669,16 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
 
                 # type specified in dtype param
                 if cast_type and not is_dtype_equal(cvals, cast_type):
+                    try:
+                        if (is_bool_dtype(cast_type) and
+                                not is_categorical_dtype(cast_type)
+                                and na_count > 0):
+                            raise ValueError("Bool column has NA values in "
+                                             "column {column}"
+                                             .format(column=c))
+                    except (AttributeError, TypeError):
+                        # invalid input to is_bool_dtype
+                        pass
                     cvals = self._cast_types(cvals, cast_type, c)
 
             result[c] = cvals
diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py
index 61fe9d12c173c..f1eb6a33eddeb 100644
--- a/pandas/tests/frame/test_convert_to.py
+++ b/pandas/tests/frame/test_convert_to.py
@@ -150,7 +150,7 @@ def test_to_records_index_name(self):
     def test_to_records_with_unicode_index(self):
         # GH13172
         # unicode_literals conflict with to_records
-        result = DataFrame([{u'a': u'x', u'b': 'y'}]).set_index(u'a')\
+        result = DataFrame([{u'a': u'x', u'b': 'y'}]).set_index(u'a') \
             .to_records()
         expected = np.rec.array([('x', 'y')], dtype=[('a', 'O'), ('b', 'O')])
         tm.assert_almost_equal(result, expected)
@@ -281,17 +281,23 @@ def test_to_records_datetimeindex_with_tz(self, tz):
         # both converted to UTC, so they are equal
         tm.assert_numpy_array_equal(result, expected)
 
-    def test_to_dict_box_scalars(self):
-        # 14216
+    # orient - orient argument to to_dict function
+    # item_getter - function for extracting value from
+    # the resulting dict using column name and index
+    @pytest.mark.parametrize('orient,item_getter', [
+        ('dict', lambda d, col, idx: d[col][idx]),
+        ('records', lambda d, col, idx: d[idx][col]),
+        ('list', lambda d, col, idx: d[col][idx]),
+        ('split', lambda d, col, idx: d['data'][idx][d['columns'].index(col)]),
+        ('index', lambda d, col, idx: d[idx][col])
+    ])
+    def test_to_dict_box_scalars(self, orient, item_getter):
+        # 14216, 23753
         # make sure that we are boxing properly
-        d = {'a': [1], 'b': ['b']}
-
-        result = DataFrame(d).to_dict()
-        assert isinstance(list(result['a'])[0], (int, long))
-        assert isinstance(list(result['b'])[0], (int, long))
-
-        result = DataFrame(d).to_dict(orient='records')
-        assert isinstance(result[0]['a'], (int, long))
+        df = DataFrame({'a': [1, 2], 'b': [.1, .2]})
+        result = df.to_dict(orient=orient)
+        assert isinstance(item_getter(result, 'a', 0), (int, long))
+        assert isinstance(item_getter(result, 'b', 0), float)
 
     def test_frame_to_dict_tz(self):
         # GH18372 When converting to dict with orient='records' columns of
diff --git a/pandas/tests/io/parser/test_na_values.py b/pandas/tests/io/parser/test_na_values.py
index 921984bc44e50..1b6d2ee8a062e 100644
--- a/pandas/tests/io/parser/test_na_values.py
+++ b/pandas/tests/io/parser/test_na_values.py
@@ -421,3 +421,21 @@ def test_na_values_with_dtype_str_and_na_filter(all_parsers, na_filter):
 
     result = parser.read_csv(StringIO(data), na_filter=na_filter, dtype=str)
     tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("data, na_values", [
+    ("false,1\n,1\ntrue", None),
+    ("false,1\nnull,1\ntrue", None),
+    ("false,1\nnan,1\ntrue", None),
+    ("false,1\nfoo,1\ntrue", 'foo'),
+    ("false,1\nfoo,1\ntrue", ['foo']),
+    ("false,1\nfoo,1\ntrue", {'a': 'foo'}),
+])
+def test_cast_NA_to_bool_raises_error(all_parsers, data, na_values):
+    parser = all_parsers
+    msg = ("(Bool column has NA values in column [0a])|"
+           "(cannot safely convert passed user dtype of "
+           "bool for object dtyped data in column 0)")
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), header=None, names=['a', 'b'],
+                        dtype={'a': 'bool'}, na_values=na_values)
diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py
index f04e9a55a6c8d..19f1a9a8b65c7 100644
--- a/pandas/tests/reshape/test_tile.py
+++ b/pandas/tests/reshape/test_tile.py
@@ -91,6 +91,14 @@ def test_bins_from_intervalindex(self):
         tm.assert_numpy_array_equal(result.codes,
                                     np.array([1, 1, 2], dtype='int8'))
 
+    def test_bins_not_overlapping_from_intervalindex(self):
+        # see gh-23980
+        msg = "Overlapping IntervalIndex is not accepted"
+        ii = IntervalIndex.from_tuples([(0, 10), (2, 12), (4, 14)])
+
+        with pytest.raises(ValueError, match=msg):
+            cut([5, 6], bins=ii)
+
     def test_bins_not_monotonic(self):
         data = [.2, 1.4, 2.5, 6.2, 9.7, 2.1]
         pytest.raises(ValueError, cut, data, [0.1, 1.5, 1, 10])
diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
index d6c6a8652e728..7a88a96e9c609 100644
--- a/pandas/tests/test_strings.py
+++ b/pandas/tests/test_strings.py
@@ -218,9 +218,6 @@ def test_api_per_method(self, box, dtype,
         method_name, args, kwargs = any_string_method
 
         # TODO: get rid of these xfails
-        if box == Index and dtype == 'category':
-            pytest.xfail(reason='Broken methods on CategoricalIndex; '
-                         'see GH 23556')
         if (method_name in ['partition', 'rpartition'] and box == Index
                 and inferred_dtype == 'empty'):
             pytest.xfail(reason='Method cannot deal with empty Index')
@@ -247,6 +244,7 @@ def test_api_per_method(self, box, dtype,
                          + ['mixed', 'mixed-integer'] * mixed_allowed)
 
         if inferred_dtype in allowed_types:
+            # xref GH 23555, GH 23556
             method(*args, **kwargs)  # works!
         else:
             # GH 23011, GH 23163
diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py
index 9f5b4f7b90d9f..84bc1863aadd9 100644
--- a/pandas/tests/util/test_hashing.py
+++ b/pandas/tests/util/test_hashing.py
@@ -10,272 +10,319 @@
 import pandas.util.testing as tm
 
 
-class TestHashing(object):
-
-    @pytest.fixture(params=[
-        Series([1, 2, 3] * 3, dtype='int32'),
-        Series([None, 2.5, 3.5] * 3, dtype='float32'),
-        Series(['a', 'b', 'c'] * 3, dtype='category'),
-        Series(['d', 'e', 'f'] * 3),
-        Series([True, False, True] * 3),
-        Series(pd.date_range('20130101', periods=9)),
-        Series(pd.date_range('20130101', periods=9, tz='US/Eastern')),
-        Series(pd.timedelta_range('2000', periods=9))])
-    def series(self, request):
-        return request.param
-
-    def test_consistency(self):
-        # check that our hash doesn't change because of a mistake
-        # in the actual code; this is the ground truth
-        result = hash_pandas_object(Index(['foo', 'bar', 'baz']))
-        expected = Series(np.array([3600424527151052760, 1374399572096150070,
-                                    477881037637427054], dtype='uint64'),
-                          index=['foo', 'bar', 'baz'])
-        tm.assert_series_equal(result, expected)
-
-    def test_hash_array(self, series):
-        a = series.values
-        tm.assert_numpy_array_equal(hash_array(a), hash_array(a))
-
-    def test_hash_array_mixed(self):
-        result1 = hash_array(np.array([3, 4, 'All']))
-        result2 = hash_array(np.array(['3', '4', 'All']))
-        result3 = hash_array(np.array([3, 4, 'All'], dtype=object))
-        tm.assert_numpy_array_equal(result1, result2)
-        tm.assert_numpy_array_equal(result1, result3)
-
-    @pytest.mark.parametrize('val', [5, 'foo', pd.Timestamp('20130101')])
-    def test_hash_array_errors(self, val):
-        msg = 'must pass a ndarray-like'
-        with pytest.raises(TypeError, match=msg):
-            hash_array(val)
-
-    def check_equal(self, obj, **kwargs):
-        a = hash_pandas_object(obj, **kwargs)
-        b = hash_pandas_object(obj, **kwargs)
-        tm.assert_series_equal(a, b)
-
-        kwargs.pop('index', None)
-        a = hash_pandas_object(obj, **kwargs)
-        b = hash_pandas_object(obj, **kwargs)
-        tm.assert_series_equal(a, b)
-
-    def check_not_equal_with_index(self, obj):
-
-        # check that we are not hashing the same if
-        # we include the index
-        if not isinstance(obj, Index):
-            a = hash_pandas_object(obj, index=True)
-            b = hash_pandas_object(obj, index=False)
-            if len(obj):
-                assert not (a == b).all()
-
-    def test_hash_tuples(self):
-        tups = [(1, 'one'), (1, 'two'), (2, 'one')]
-        result = hash_tuples(tups)
-        expected = hash_pandas_object(MultiIndex.from_tuples(tups)).values
-        tm.assert_numpy_array_equal(result, expected)
-
-        result = hash_tuples(tups[0])
-        assert result == expected[0]
-
-    @pytest.mark.parametrize('tup', [
-        (1, 'one'), (1, np.nan), (1.0, pd.NaT, 'A'),
-        ('A', pd.Timestamp("2012-01-01"))])
-    def test_hash_tuple(self, tup):
-        # test equivalence between hash_tuples and hash_tuple
-        result = hash_tuple(tup)
-        expected = hash_tuples([tup])[0]
-        assert result == expected
-
-    @pytest.mark.parametrize('val', [
-        1, 1.4, 'A', b'A', u'A', pd.Timestamp("2012-01-01"),
-        pd.Timestamp("2012-01-01", tz='Europe/Brussels'),
-        datetime.datetime(2012, 1, 1),
-        pd.Timestamp("2012-01-01", tz='EST').to_pydatetime(),
-        pd.Timedelta('1 days'), datetime.timedelta(1),
-        pd.Period('2012-01-01', freq='D'), pd.Interval(0, 1),
-        np.nan, pd.NaT, None])
-    def test_hash_scalar(self, val):
-        result = _hash_scalar(val)
-        expected = hash_array(np.array([val], dtype=object), categorize=True)
-        assert result[0] == expected[0]
-
-    @pytest.mark.parametrize('val', [5, 'foo', pd.Timestamp('20130101')])
-    def test_hash_tuples_err(self, val):
-        msg = 'must be convertible to a list-of-tuples'
-        with pytest.raises(TypeError, match=msg):
-            hash_tuples(val)
-
-    def test_multiindex_unique(self):
-        mi = MultiIndex.from_tuples([(118, 472), (236, 118),
-                                     (51, 204), (102, 51)])
-        assert mi.is_unique is True
-        result = hash_pandas_object(mi)
-        assert result.is_unique is True
-
-    def test_multiindex_objects(self):
-        mi = MultiIndex(levels=[['b', 'd', 'a'], [1, 2, 3]],
-                        labels=[[0, 1, 0, 2], [2, 0, 0, 1]],
-                        names=['col1', 'col2'])
-        recons = mi._sort_levels_monotonic()
-
-        # these are equal
-        assert mi.equals(recons)
-        assert Index(mi.values).equals(Index(recons.values))
-
-        # _hashed_values and hash_pandas_object(..., index=False)
-        # equivalency
-        expected = hash_pandas_object(
-            mi, index=False).values
-        result = mi._hashed_values
-        tm.assert_numpy_array_equal(result, expected)
-
-        expected = hash_pandas_object(
-            recons, index=False).values
-        result = recons._hashed_values
-        tm.assert_numpy_array_equal(result, expected)
-
-        expected = mi._hashed_values
-        result = recons._hashed_values
-
-        # values should match, but in different order
-        tm.assert_numpy_array_equal(np.sort(result),
-                                    np.sort(expected))
-
-    @pytest.mark.parametrize('obj', [
-        Series([1, 2, 3]),
-        Series([1.0, 1.5, 3.2]),
-        Series([1.0, 1.5, np.nan]),
-        Series([1.0, 1.5, 3.2], index=[1.5, 1.1, 3.3]),
-        Series(['a', 'b', 'c']),
-        Series(['a', np.nan, 'c']),
-        Series(['a', None, 'c']),
-        Series([True, False, True]),
-        Series(),
-        Index([1, 2, 3]),
-        Index([True, False, True]),
-        DataFrame({'x': ['a', 'b', 'c'], 'y': [1, 2, 3]}),
-        DataFrame(),
-        tm.makeMissingDataframe(),
-        tm.makeMixedDataFrame(),
-        tm.makeTimeDataFrame(),
-        tm.makeTimeSeries(),
-        tm.makeTimedeltaIndex(),
-        tm.makePeriodIndex(),
-        Series(tm.makePeriodIndex()),
-        Series(pd.date_range('20130101', periods=3, tz='US/Eastern')),
-        MultiIndex.from_product([range(5), ['foo', 'bar', 'baz'],
-                                 pd.date_range('20130101', periods=2)]),
-        MultiIndex.from_product([pd.CategoricalIndex(list('aabc')), range(3)])
-    ])
-    def test_hash_pandas_object(self, obj):
-        self.check_equal(obj)
-        self.check_not_equal_with_index(obj)
-
-    def test_hash_pandas_object2(self, series):
-        self.check_equal(series)
-        self.check_not_equal_with_index(series)
-
-    @pytest.mark.parametrize('obj', [
-        Series([], dtype='float64'), Series([], dtype='object'), Index([])])
-    def test_hash_pandas_empty_object(self, obj):
-        # these are by-definition the same with
-        # or w/o the index as the data is empty
-        self.check_equal(obj)
-
-    @pytest.mark.parametrize('s1', [
-        Series(['a', 'b', 'c', 'd']),
-        Series([1000, 2000, 3000, 4000]),
-        Series(pd.date_range(0, periods=4))])
-    @pytest.mark.parametrize('categorize', [True, False])
-    def test_categorical_consistency(self, s1, categorize):
-        # GH15143
-        # Check that categoricals hash consistent with their values, not codes
-        # This should work for categoricals of any dtype
-        s2 = s1.astype('category').cat.set_categories(s1)
-        s3 = s2.cat.set_categories(list(reversed(s1)))
-
-        # These should all hash identically
-        h1 = hash_pandas_object(s1, categorize=categorize)
-        h2 = hash_pandas_object(s2, categorize=categorize)
-        h3 = hash_pandas_object(s3, categorize=categorize)
-        tm.assert_series_equal(h1, h2)
-        tm.assert_series_equal(h1, h3)
-
-    def test_categorical_with_nan_consistency(self):
-        c = pd.Categorical.from_codes(
-            [-1, 0, 1, 2, 3, 4],
-            categories=pd.date_range('2012-01-01', periods=5, name='B'))
-        expected = hash_array(c, categorize=False)
-        c = pd.Categorical.from_codes(
-            [-1, 0],
-            categories=[pd.Timestamp('2012-01-01')])
-        result = hash_array(c, categorize=False)
-        assert result[0] in expected
-        assert result[1] in expected
-
-    @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
-    def test_pandas_errors(self):
-        with pytest.raises(TypeError):
-            hash_pandas_object(pd.Timestamp('20130101'))
-
-        obj = tm.makePanel()
-
-        with pytest.raises(TypeError):
-            hash_pandas_object(obj)
-
-    def test_hash_keys(self):
-        # using different hash keys, should have different hashes
-        # for the same data
-
-        # this only matters for object dtypes
-        obj = Series(list('abc'))
-        a = hash_pandas_object(obj, hash_key='9876543210123456')
-        b = hash_pandas_object(obj, hash_key='9876543210123465')
-        assert (a != b).all()
-
-    def test_invalid_key(self):
-        # this only matters for object dtypes
-        msg = 'key should be a 16-byte string encoded'
-        with pytest.raises(ValueError, match=msg):
-            hash_pandas_object(Series(list('abc')), hash_key='foo')
-
-    def test_alread_encoded(self):
-        # if already encoded then ok
-
-        obj = Series(list('abc')).str.encode('utf8')
-        self.check_equal(obj)
-
-    def test_alternate_encoding(self):
-
-        obj = Series(list('abc'))
-        self.check_equal(obj, encoding='ascii')
-
-    @pytest.mark.parametrize('l_exp', range(8))
-    @pytest.mark.parametrize('l_add', [0, 1])
-    def test_same_len_hash_collisions(self, l_exp, l_add):
-        length = 2**(l_exp + 8) + l_add
-        s = tm.rands_array(length, 2)
-        result = hash_array(s, 'utf8')
-        assert not result[0] == result[1]
-
-    def test_hash_collisions(self):
-
-        # hash collisions are bad
-        # https://github.com/pandas-dev/pandas/issues/14711#issuecomment-264885726
-        L = ['Ingrid-9Z9fKIZmkO7i7Cn51Li34pJm44fgX6DYGBNj3VPlOH50m7HnBlPxfIwFMrcNJNMP6PSgLmwWnInciMWrCSAlLEvt7JkJl4IxiMrVbXSa8ZQoVaq5xoQPjltuJEfwdNlO6jo8qRRHvD8sBEBMQASrRa6TsdaPTPCBo3nwIBpE7YzzmyH0vMBhjQZLx1aCT7faSEx7PgFxQhHdKFWROcysamgy9iVj8DO2Fmwg1NNl93rIAqC3mdqfrCxrzfvIY8aJdzin2cHVzy3QUJxZgHvtUtOLxoqnUHsYbNTeq0xcLXpTZEZCxD4PGubIuCNf32c33M7HFsnjWSEjE2yVdWKhmSVodyF8hFYVmhYnMCztQnJrt3O8ZvVRXd5IKwlLexiSp4h888w7SzAIcKgc3g5XQJf6MlSMftDXm9lIsE1mJNiJEv6uY6pgvC3fUPhatlR5JPpVAHNSbSEE73MBzJrhCAbOLXQumyOXigZuPoME7QgJcBalliQol7YZ9',  # noqa
-             'Tim-b9MddTxOWW2AT1Py6vtVbZwGAmYCjbp89p8mxsiFoVX4FyDOF3wFiAkyQTUgwg9sVqVYOZo09Dh1AzhFHbgij52ylF0SEwgzjzHH8TGY8Lypart4p4onnDoDvVMBa0kdthVGKl6K0BDVGzyOXPXKpmnMF1H6rJzqHJ0HywfwS4XYpVwlAkoeNsiicHkJUFdUAhG229INzvIAiJuAHeJDUoyO4DCBqtoZ5TDend6TK7Y914yHlfH3g1WZu5LksKv68VQHJriWFYusW5e6ZZ6dKaMjTwEGuRgdT66iU5nqWTHRH8WSzpXoCFwGcTOwyuqPSe0fTe21DVtJn1FKj9F9nEnR9xOvJUO7E0piCIF4Ad9yAIDY4DBimpsTfKXCu1vdHpKYerzbndfuFe5AhfMduLYZJi5iAw8qKSwR5h86ttXV0Mc0QmXz8dsRvDgxjXSmupPxBggdlqUlC828hXiTPD7am0yETBV0F3bEtvPiNJfremszcV8NcqAoARMe']  # noqa
-
-        # these should be different!
-        result1 = hash_array(np.asarray(L[0:1], dtype=object), 'utf8')
-        expected1 = np.array([14963968704024874985], dtype=np.uint64)
-        tm.assert_numpy_array_equal(result1, expected1)
-
-        result2 = hash_array(np.asarray(L[1:2], dtype=object), 'utf8')
-        expected2 = np.array([16428432627716348016], dtype=np.uint64)
-        tm.assert_numpy_array_equal(result2, expected2)
-
-        result = hash_array(np.asarray(L, dtype=object), 'utf8')
-        tm.assert_numpy_array_equal(
-            result, np.concatenate([expected1, expected2], axis=0))
+@pytest.fixture(params=[
+    Series([1, 2, 3] * 3, dtype="int32"),
+    Series([None, 2.5, 3.5] * 3, dtype="float32"),
+    Series(["a", "b", "c"] * 3, dtype="category"),
+    Series(["d", "e", "f"] * 3),
+    Series([True, False, True] * 3),
+    Series(pd.date_range("20130101", periods=9)),
+    Series(pd.date_range("20130101", periods=9, tz="US/Eastern")),
+    Series(pd.timedelta_range("2000", periods=9))])
+def series(request):
+    return request.param
+
+
+@pytest.fixture(params=[True, False])
+def index(request):
+    return request.param
+
+
+def _check_equal(obj, **kwargs):
+    """
+    Check that hashing an objects produces the same value each time.
+
+    Parameters
+    ----------
+    obj : object
+        The object to hash.
+    kwargs : kwargs
+        Keyword arguments to pass to the hashing function.
+    """
+    a = hash_pandas_object(obj, **kwargs)
+    b = hash_pandas_object(obj, **kwargs)
+    tm.assert_series_equal(a, b)
+
+
+def _check_not_equal_with_index(obj):
+    """
+    Check the hash of an object with and without its index is not the same.
+
+    Parameters
+    ----------
+    obj : object
+        The object to hash.
+    """
+    if not isinstance(obj, Index):
+        a = hash_pandas_object(obj, index=True)
+        b = hash_pandas_object(obj, index=False)
+
+        if len(obj):
+            assert not (a == b).all()
+
+
+def test_consistency():
+    # Check that our hash doesn't change because of a mistake
+    # in the actual code; this is the ground truth.
+    result = hash_pandas_object(Index(["foo", "bar", "baz"]))
+    expected = Series(np.array([3600424527151052760, 1374399572096150070,
+                                477881037637427054], dtype="uint64"),
+                      index=["foo", "bar", "baz"])
+    tm.assert_series_equal(result, expected)
+
+
+def test_hash_array(series):
+    arr = series.values
+    tm.assert_numpy_array_equal(hash_array(arr), hash_array(arr))
+
+
+@pytest.mark.parametrize("arr2", [
+    np.array([3, 4, "All"]),
+    np.array([3, 4, "All"], dtype=object),
+])
+def test_hash_array_mixed(arr2):
+    result1 = hash_array(np.array(["3", "4", "All"]))
+    result2 = hash_array(arr2)
+
+    tm.assert_numpy_array_equal(result1, result2)
+
+
+@pytest.mark.parametrize("val", [5, "foo", pd.Timestamp("20130101")])
+def test_hash_array_errors(val):
+    msg = "must pass a ndarray-like"
+    with pytest.raises(TypeError, match=msg):
+        hash_array(val)
+
+
+def test_hash_tuples():
+    tuples = [(1, "one"), (1, "two"), (2, "one")]
+    result = hash_tuples(tuples)
+
+    expected = hash_pandas_object(MultiIndex.from_tuples(tuples)).values
+    tm.assert_numpy_array_equal(result, expected)
+
+    result = hash_tuples(tuples[0])
+    assert result == expected[0]
+
+
+@pytest.mark.parametrize("tup", [
+    (1, "one"), (1, np.nan), (1.0, pd.NaT, "A"),
+    ("A", pd.Timestamp("2012-01-01"))])
+def test_hash_tuple(tup):
+    # Test equivalence between
+    # hash_tuples and hash_tuple.
+    result = hash_tuple(tup)
+    expected = hash_tuples([tup])[0]
+
+    assert result == expected
+
+
+@pytest.mark.parametrize("val", [
+    1, 1.4, "A", b"A", u"A", pd.Timestamp("2012-01-01"),
+    pd.Timestamp("2012-01-01", tz="Europe/Brussels"),
+    datetime.datetime(2012, 1, 1),
+    pd.Timestamp("2012-01-01", tz="EST").to_pydatetime(),
+    pd.Timedelta("1 days"), datetime.timedelta(1),
+    pd.Period("2012-01-01", freq="D"), pd.Interval(0, 1),
+    np.nan, pd.NaT, None])
+def test_hash_scalar(val):
+    result = _hash_scalar(val)
+    expected = hash_array(np.array([val], dtype=object), categorize=True)
+
+    assert result[0] == expected[0]
+
+
+@pytest.mark.parametrize("val", [5, "foo", pd.Timestamp("20130101")])
+def test_hash_tuples_err(val):
+    msg = "must be convertible to a list-of-tuples"
+    with pytest.raises(TypeError, match=msg):
+        hash_tuples(val)
+
+
+def test_multiindex_unique():
+    mi = MultiIndex.from_tuples([(118, 472), (236, 118),
+                                 (51, 204), (102, 51)])
+    assert mi.is_unique is True
+
+    result = hash_pandas_object(mi)
+    assert result.is_unique is True
+
+
+def test_multiindex_objects():
+    mi = MultiIndex(levels=[["b", "d", "a"], [1, 2, 3]],
+                    labels=[[0, 1, 0, 2], [2, 0, 0, 1]],
+                    names=["col1", "col2"])
+    recons = mi._sort_levels_monotonic()
+
+    # These are equal.
+    assert mi.equals(recons)
+    assert Index(mi.values).equals(Index(recons.values))
+
+    # _hashed_values and hash_pandas_object(..., index=False) equivalency.
+    expected = hash_pandas_object(mi, index=False).values
+    result = mi._hashed_values
+
+    tm.assert_numpy_array_equal(result, expected)
+
+    expected = hash_pandas_object(recons, index=False).values
+    result = recons._hashed_values
+
+    tm.assert_numpy_array_equal(result, expected)
+
+    expected = mi._hashed_values
+    result = recons._hashed_values
+
+    # Values should match, but in different order.
+    tm.assert_numpy_array_equal(np.sort(result), np.sort(expected))
+
+
+@pytest.mark.parametrize("obj", [
+    Series([1, 2, 3]),
+    Series([1.0, 1.5, 3.2]),
+    Series([1.0, 1.5, np.nan]),
+    Series([1.0, 1.5, 3.2], index=[1.5, 1.1, 3.3]),
+    Series(["a", "b", "c"]),
+    Series(["a", np.nan, "c"]),
+    Series(["a", None, "c"]),
+    Series([True, False, True]),
+    Series(),
+    Index([1, 2, 3]),
+    Index([True, False, True]),
+    DataFrame({"x": ["a", "b", "c"], "y": [1, 2, 3]}),
+    DataFrame(),
+    tm.makeMissingDataframe(),
+    tm.makeMixedDataFrame(),
+    tm.makeTimeDataFrame(),
+    tm.makeTimeSeries(),
+    tm.makeTimedeltaIndex(),
+    tm.makePeriodIndex(),
+    Series(tm.makePeriodIndex()),
+    Series(pd.date_range("20130101", periods=3, tz="US/Eastern")),
+    MultiIndex.from_product([range(5), ["foo", "bar", "baz"],
+                             pd.date_range("20130101", periods=2)]),
+    MultiIndex.from_product([pd.CategoricalIndex(list("aabc")), range(3)])
+])
+def test_hash_pandas_object(obj, index):
+    _check_equal(obj, index=index)
+    _check_not_equal_with_index(obj)
+
+
+def test_hash_pandas_object2(series, index):
+    _check_equal(series, index=index)
+    _check_not_equal_with_index(series)
+
+
+@pytest.mark.parametrize("obj", [
+    Series([], dtype="float64"), Series([], dtype="object"), Index([])])
+def test_hash_pandas_empty_object(obj, index):
+    # These are by-definition the same with
+    # or without the index as the data is empty.
+    _check_equal(obj, index=index)
+
+
+@pytest.mark.parametrize("s1", [
+    Series(["a", "b", "c", "d"]),
+    Series([1000, 2000, 3000, 4000]),
+    Series(pd.date_range(0, periods=4))])
+@pytest.mark.parametrize("categorize", [True, False])
+def test_categorical_consistency(s1, categorize):
+    # see gh-15143
+    #
+    # Check that categoricals hash consistent with their values,
+    # not codes. This should work for categoricals of any dtype.
+    s2 = s1.astype("category").cat.set_categories(s1)
+    s3 = s2.cat.set_categories(list(reversed(s1)))
+
+    # These should all hash identically.
+    h1 = hash_pandas_object(s1, categorize=categorize)
+    h2 = hash_pandas_object(s2, categorize=categorize)
+    h3 = hash_pandas_object(s3, categorize=categorize)
+
+    tm.assert_series_equal(h1, h2)
+    tm.assert_series_equal(h1, h3)
+
+
+def test_categorical_with_nan_consistency():
+    c = pd.Categorical.from_codes(
+        [-1, 0, 1, 2, 3, 4],
+        categories=pd.date_range("2012-01-01", periods=5, name="B"))
+    expected = hash_array(c, categorize=False)
+
+    c = pd.Categorical.from_codes(
+        [-1, 0],
+        categories=[pd.Timestamp("2012-01-01")])
+    result = hash_array(c, categorize=False)
+
+    assert result[0] in expected
+    assert result[1] in expected
+
+
+@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
+@pytest.mark.parametrize("obj", [pd.Timestamp("20130101"), tm.makePanel()])
+def test_pandas_errors(obj):
+    msg = "Unexpected type for hashing"
+    with pytest.raises(TypeError, match=msg):
+        hash_pandas_object(obj)
+
+
+def test_hash_keys():
+    # Using different hash keys, should have
+    # different hashes for the same data.
+    #
+    # This only matters for object dtypes.
+    obj = Series(list("abc"))
+
+    a = hash_pandas_object(obj, hash_key="9876543210123456")
+    b = hash_pandas_object(obj, hash_key="9876543210123465")
+
+    assert (a != b).all()
+
+
+def test_invalid_key():
+    # This only matters for object dtypes.
+    msg = "key should be a 16-byte string encoded"
+
+    with pytest.raises(ValueError, match=msg):
+        hash_pandas_object(Series(list("abc")), hash_key="foo")
+
+
+def test_already_encoded(index):
+    # If already encoded, then ok.
+    obj = Series(list("abc")).str.encode("utf8")
+    _check_equal(obj, index=index)
+
+
+def test_alternate_encoding(index):
+    obj = Series(list("abc"))
+    _check_equal(obj, index=index, encoding="ascii")
+
+
+@pytest.mark.parametrize("l_exp", range(8))
+@pytest.mark.parametrize("l_add", [0, 1])
+def test_same_len_hash_collisions(l_exp, l_add):
+    length = 2**(l_exp + 8) + l_add
+    s = tm.rands_array(length, 2)
+
+    result = hash_array(s, "utf8")
+    assert not result[0] == result[1]
+
+
+def test_hash_collisions():
+    # Hash collisions are bad.
+    #
+    # https://github.com/pandas-dev/pandas/issues/14711#issuecomment-264885726
+    hashes = ["Ingrid-9Z9fKIZmkO7i7Cn51Li34pJm44fgX6DYGBNj3VPlOH50m7HnBlPxfIwFMrcNJNMP6PSgLmwWnInciMWrCSAlLEvt7JkJl4IxiMrVbXSa8ZQoVaq5xoQPjltuJEfwdNlO6jo8qRRHvD8sBEBMQASrRa6TsdaPTPCBo3nwIBpE7YzzmyH0vMBhjQZLx1aCT7faSEx7PgFxQhHdKFWROcysamgy9iVj8DO2Fmwg1NNl93rIAqC3mdqfrCxrzfvIY8aJdzin2cHVzy3QUJxZgHvtUtOLxoqnUHsYbNTeq0xcLXpTZEZCxD4PGubIuCNf32c33M7HFsnjWSEjE2yVdWKhmSVodyF8hFYVmhYnMCztQnJrt3O8ZvVRXd5IKwlLexiSp4h888w7SzAIcKgc3g5XQJf6MlSMftDXm9lIsE1mJNiJEv6uY6pgvC3fUPhatlR5JPpVAHNSbSEE73MBzJrhCAbOLXQumyOXigZuPoME7QgJcBalliQol7YZ9",  # noqa
+              "Tim-b9MddTxOWW2AT1Py6vtVbZwGAmYCjbp89p8mxsiFoVX4FyDOF3wFiAkyQTUgwg9sVqVYOZo09Dh1AzhFHbgij52ylF0SEwgzjzHH8TGY8Lypart4p4onnDoDvVMBa0kdthVGKl6K0BDVGzyOXPXKpmnMF1H6rJzqHJ0HywfwS4XYpVwlAkoeNsiicHkJUFdUAhG229INzvIAiJuAHeJDUoyO4DCBqtoZ5TDend6TK7Y914yHlfH3g1WZu5LksKv68VQHJriWFYusW5e6ZZ6dKaMjTwEGuRgdT66iU5nqWTHRH8WSzpXoCFwGcTOwyuqPSe0fTe21DVtJn1FKj9F9nEnR9xOvJUO7E0piCIF4Ad9yAIDY4DBimpsTfKXCu1vdHpKYerzbndfuFe5AhfMduLYZJi5iAw8qKSwR5h86ttXV0Mc0QmXz8dsRvDgxjXSmupPxBggdlqUlC828hXiTPD7am0yETBV0F3bEtvPiNJfremszcV8NcqAoARMe"]  # noqa
+
+    # These should be different.
+    result1 = hash_array(np.asarray(hashes[0:1], dtype=object), "utf8")
+    expected1 = np.array([14963968704024874985], dtype=np.uint64)
+    tm.assert_numpy_array_equal(result1, expected1)
+
+    result2 = hash_array(np.asarray(hashes[1:2], dtype=object), "utf8")
+    expected2 = np.array([16428432627716348016], dtype=np.uint64)
+    tm.assert_numpy_array_equal(result2, expected2)
+
+    result = hash_array(np.asarray(hashes, dtype=object), "utf8")
+    tm.assert_numpy_array_equal(result, np.concatenate([expected1,
+                                                        expected2], axis=0))
diff --git a/requirements-dev.txt b/requirements-dev.txt
index d01a21ac5fed5..5e2da69df5f26 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,23 +1,21 @@
-NumPy
+numpy>=1.15
 python-dateutil>=2.5.0
 pytz
-Cython>=0.28.2
+cython>=0.28.2
 flake8
 flake8-comprehensions
-flake8-rst==0.4.2
+flake8-rst>=0.6.0
 gitpython
-hypothesis>=3.58.0
+hypothesis>=3.82
 isort
 moto
-pytest>=3.6
-setuptools>=24.2.0
+pytest>=4.0
 sphinx
 sphinxcontrib-spelling
 beautifulsoup4>=4.2.1
 blosc
 bottleneck>=1.2.0
 fastparquet>=0.1.2
-gcsfs
 html5lib
 ipython>=5.6.0
 ipykernel
@@ -25,15 +23,13 @@ jinja2
 lxml
 matplotlib>=2.0.0
 nbsphinx
-numexpr>=2.6.1
+numexpr>=2.6.8
 openpyxl
 pyarrow>=0.7.0
-pymysql
 tables>=3.4.2
 pytest-cov
 pytest-xdist
-s3fs
-scipy>=0.18.1
+scipy>=1.1
 seaborn
 sqlalchemy
 statsmodels
diff --git a/setup.cfg b/setup.cfg
index 8fba814188af5..cc7393e5a09b9 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -31,25 +31,65 @@ exclude =
     env  # exclude asv benchmark environments from linting
 
 [flake8-rst]
-ignore =
-    F821,  # undefined name
-    W391,  # blank line at end of file [Seems to be a bug (v0.4.1)]
+ignore = E402,  # module level import not at top of file
+         W503,  # line break before binary operator
 exclude =
     doc/source/whatsnew/v0.7.0.rst
+    doc/source/whatsnew/v0.7.3.rst
+    doc/source/whatsnew/v0.8.0.rst
+    doc/source/whatsnew/v0.9.0.rst
+    doc/source/whatsnew/v0.9.1.rst
+    doc/source/whatsnew/v0.10.0.rst
     doc/source/whatsnew/v0.10.1.rst
+    doc/source/whatsnew/v0.11.0.rst
     doc/source/whatsnew/v0.12.0.rst
     doc/source/whatsnew/v0.13.0.rst
     doc/source/whatsnew/v0.13.1.rst
     doc/source/whatsnew/v0.14.0.rst
+    doc/source/whatsnew/v0.14.1.rst
     doc/source/whatsnew/v0.15.0.rst
+    doc/source/whatsnew/v0.15.1.rst
+    doc/source/whatsnew/v0.15.2.rst
     doc/source/whatsnew/v0.16.0.rst
+    doc/source/whatsnew/v0.16.1.rst
     doc/source/whatsnew/v0.16.2.rst
     doc/source/whatsnew/v0.17.0.rst
+    doc/source/whatsnew/v0.17.1.rst
     doc/source/whatsnew/v0.18.0.rst
     doc/source/whatsnew/v0.18.1.rst
+    doc/source/whatsnew/v0.19.0.rst
     doc/source/whatsnew/v0.20.0.rst
     doc/source/whatsnew/v0.21.0.rst
+    doc/source/whatsnew/v0.22.0.rst
     doc/source/whatsnew/v0.23.0.rst
+    doc/source/whatsnew/v0.23.1.rst
+    doc/source/whatsnew/v0.23.2.rst
+    doc/source/whatsnew/v0.24.0.rst
+    doc/source/10min.rst
+    doc/source/advanced.rst
+    doc/source/basics.rst
+    doc/source/categorical.rst
+    doc/source/comparison_with_r.rst
+    doc/source/comparison_with_sql.rst
+    doc/source/comparison_with_stata.rst
+    doc/source/computation.rst
+    doc/source/contributing.rst
+    doc/source/contributing_docstring.rst
+    doc/source/dsintro.rst
+    doc/source/enhancingperf.rst
+    doc/source/extending.rst
+    doc/source/groupby.rst
+    doc/source/indexing.rst
+    doc/source/io.rst
+    doc/source/merging.rst
+    doc/source/missing_data.rst
+    doc/source/options.rst
+    doc/source/release.rst
+    doc/source/reshaping.rst
+    doc/source/timedeltas.rst
+    doc/source/timeseries.rst
+    doc/source/visualization.rst
+
 
 [yapf]
 based_on_style = pep8

From 332d14b06ca33d8d003ea508c92e9649501ec585 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Mon, 3 Dec 2018 08:36:41 +0100
Subject: [PATCH 06/16] Review (jreback)

---
 doc/source/whatsnew/v0.24.0.rst | 4 ++--
 pandas/core/strings.py          | 2 --
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index 4ccdc985a751a..f5c6a4a5bbf6c 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -374,8 +374,8 @@ Backwards incompatible API changes
 
 - A newly constructed empty :class:`DataFrame` with integer as the ``dtype`` will now only be cast to ``float64`` if ``index`` is specified (:issue:`22858`)
 - :meth:`Series.str.cat` will now raise if `others` is a `set` (:issue:`23009`)
-- The `.str`-accessor will perform more rigorous type checking for inputs. Previously, some types that were never intended to be used
-  "worked" purely due to limitations of dtype checking -- e.g. ``bytes``, which is now disabled except for `decode` and `len` (:issue:`23011`, :issue:`23163`)
+- The `.str`-accessor will perform more rigorous type checking for inputs. Previously, some types that were never intended to be used,
+  "worked" due to limitations of dtype checking -- e.g. ``bytes``, which is now disabled except for `decode` and `len` (:issue:`23011`, :issue:`23163`)
 - Passing scalar values to :class:`DatetimeIndex` or :class:`TimedeltaIndex` will now raise ``TypeError`` instead of ``ValueError`` (:issue:`23539`)
 - ``max_rows`` and ``max_cols`` parameters removed from :class:`HTMLFormatter` since truncation is handled by :class:`DataFrameFormatter` (:issue:`23818`)
 - :meth:`read_csv` will now raise a ``ValueError`` if a column with missing values is declared as having dtype ``bool`` (:issue:`20591`)
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 0e86460c64ddd..138b867c5b5a8 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -1777,8 +1777,6 @@ def forbid_nonstring_types(forbidden, name=None):
 
     # deal with None
     forbidden = [] if forbidden is None else forbidden
-    # deal with single string instead of list
-    forbidden = [forbidden] if isinstance(forbidden, str) else forbidden
 
     allowed_types = {'string', 'unicode', 'empty',
                      'bytes', 'mixed', 'mixed-integer'} - set(forbidden)

From e34097fd04565137a8e8d4d837cac936dd37f2b8 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Mon, 4 Mar 2019 08:33:25 +0100
Subject: [PATCH 07/16] Add method name for casefold

---
 pandas/core/strings.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 9d4df9b1944b8..b9565f1f430e8 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -3118,6 +3118,7 @@ def rindex(self, sub, start=0, end=None):
                               docstring=_shared_docs['casemethods'] %
                               _shared_docs['swapcase'])
     casefold = _noarg_wrapper(lambda x: x.casefold(),
+                              name='casefold',
                               docstring=_shared_docs['casemethods'] %
                               _shared_docs['casefold'])
 

From 25a046c2050dae4353baf4e8d515d308ffc29a33 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Mon, 4 Mar 2019 19:31:26 +0100
Subject: [PATCH 08/16] Adapt error msg

---
 pandas/tests/test_strings.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
index e580fdd4b0580..bb3cf063c569b 100644
--- a/pandas/tests/test_strings.py
+++ b/pandas/tests/test_strings.py
@@ -3416,7 +3416,8 @@ def test_method_on_bytes(self):
         lhs = Series(np.array(list('abc'), 'S1').astype(object))
         rhs = Series(np.array(list('def'), 'S1').astype(object))
         if compat.PY3:
-            with pytest.raises(TypeError, match="can't concat str to bytes"):
+            with pytest.raises(TypeError,
+                               match="Cannot use .str.cat with values of.*"):
                 lhs.str.cat(rhs)
         else:
             result = lhs.str.cat(rhs)

From c9c74966e90ce6745c3c9481163ed2404988629c Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Fri, 8 Mar 2019 15:22:54 +0100
Subject: [PATCH 09/16] Review (jreback)

---
 doc/source/user_guide/text.rst  | 11 +++++++++++
 doc/source/whatsnew/v0.25.0.rst |  9 +++++++++
 pandas/core/strings.py          |  2 +-
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/doc/source/user_guide/text.rst b/doc/source/user_guide/text.rst
index 6f21a7d9beb36..cf5b6b85b5ac2 100644
--- a/doc/source/user_guide/text.rst
+++ b/doc/source/user_guide/text.rst
@@ -70,6 +70,17 @@ and replacing any remaining white spaces with underscores:
     ``.str`` methods which operate on elements of type ``list`` are not available on such a
     ``Series``.
 
+.. _text.warn_types:
+
+.. warning::
+
+    Before v.0.25.0, the ``.str``-accessor did only the most rudimentary type checks. Starting with
+    v.0.25.0, the type of the Series is inferred (like it has already been the case for ``Index.str``),
+    and the allowed types (i.e. strings) are enforced more rigorously.
+
+    Generally speaking, the ``.str`` accessor is intended to work only on strings. With very few
+    exceptions, other uses are not supported, and may be disabled at a later point.
+
 
 Splitting and Replacing Strings
 -------------------------------
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 136ee274ac5e7..858338595bb65 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -172,6 +172,15 @@ Conversion
 Strings
 ^^^^^^^
 
+**The ``.str``-accessor performs stricter type checks**
+
+Due to the lack of a native string dtype in numpy, :attr:`Series.str` only checked whether the data was of ``object`` dtype.
+From now on, the inferred dtype of the Series is checked to be correct (particularly, not ``'bytes'``), as :attr:`Index.str` does already.
+
+For more details, see this :ref:`warning<text.warn_types>`.
+
+**Other bugs**
+
 - Bug in the ``__name__`` attribute of several methods of :class:`Series.str`, which were set incorrectly (:issue:`23551`)
 -
 -
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index b9565f1f430e8..0c18bf6745e09 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -1764,7 +1764,7 @@ def forbid_nonstring_types(forbidden, name=None):
 
     Parameters
     ----------
-    forbidden : list or None
+    forbidden : list-of-str or None
         List of forbidden non-string types, may be one or more of
         `['bytes', 'mixed', 'mixed-integer']`.
     name : str, default None

From bf4d7cf7949052d76070b6f6db0f8c2673ebc637 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Wed, 27 Mar 2019 08:31:42 +0100
Subject: [PATCH 10/16] Review (jreback)

---
 doc/source/whatsnew/v0.25.0.rst | 45 ++++++++++++++++++++++++++-------
 1 file changed, 36 insertions(+), 9 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 05463c35c2e57..6acc04cde6267 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -153,6 +153,42 @@ returned if all the columns were dummy encoded, and a :class:`DataFrame` otherwi
 Providing any ``SparseSeries`` or ``SparseDataFrame`` to :func:`concat` will
 cause a ``SparseSeries`` or ``SparseDataFrame`` to be returned, as before.
 
+The ``.str``-accessor performs stricter type checks
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Due to the lack of a native string dtype in numpy, :attr:`Series.str` only checked whether the data was of ``object`` dtype.
+From now on, the inferred dtype of the Series is checked to be correct (particularly, not ``'bytes'``), as :attr:`Index.str` does already.
+
+*Previous Behaviour*:
+
+.. code-block:: python
+
+    In [1]: s = pd.Series(np.array(['a', 'ba', 'cba'], 'S'), dtype=object)
+
+    In [2]: s
+    Out[2]:
+    0      b'a'
+    1     b'ba'
+    2    b'cba'
+    dtype: object
+
+    In [3]: s.str.startswith(b'a')
+    Out[3]:
+    0     True
+    1    False
+    2    False
+    dtype: bool
+
+*New Behaviour*:
+
+.. ipython:: python
+    :okexcept:
+
+    s = pd.Series(np.array(['a', 'ba', 'cba'], 'S'), dtype=object)
+    s
+    s.str.startswith(b'a')
+
+For more details, see this :ref:`warning<text.warn_types>`.
 
 .. _whatsnew_0250.api_breaking.deps:
 
@@ -282,15 +318,6 @@ Conversion
 Strings
 ^^^^^^^
 
-**The ``.str``-accessor performs stricter type checks**
-
-Due to the lack of a native string dtype in numpy, :attr:`Series.str` only checked whether the data was of ``object`` dtype.
-From now on, the inferred dtype of the Series is checked to be correct (particularly, not ``'bytes'``), as :attr:`Index.str` does already.
-
-For more details, see this :ref:`warning<text.warn_types>`.
-
-**Other bugs**
-
 - Bug in the ``__name__`` attribute of several methods of :class:`Series.str`, which were set incorrectly (:issue:`23551`)
 -
 -

From 0c7e23302c4faeb2109f4d5cdeabe30fd349e468 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Thu, 30 May 2019 15:46:52 +0200
Subject: [PATCH 11/16] fix merge artefact

---
 doc/source/whatsnew/v0.25.0.rst | 2 --
 1 file changed, 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index a6945d062c82a..543514f131e73 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -302,8 +302,6 @@ This change is backward compatible for direct usage of Pandas, but if you subcla
 Pandas objects *and* give your subclasses specific ``__str__``/``__repr__`` methods,
 you may have to adjust your ``__str__``/``__repr__`` methods (:issue:`26495`).
 
->>>>>>> upstream/master
-
 .. _whatsnew_0250.api_breaking.deps:
 
 Increased minimum versions for dependencies

From 1168ca2fb2fdabeefe8d228fb4ec513078f96db6 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Thu, 30 May 2019 16:06:29 +0200
Subject: [PATCH 12/16] Review (jreback)

---
 doc/source/user_guide/text.rst |  3 +--
 pandas/core/strings.py         | 30 ++++++++++++++++++++----------
 pandas/tests/test_strings.py   |  2 +-
 3 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/doc/source/user_guide/text.rst b/doc/source/user_guide/text.rst
index a1803cd04945d..87c75e8bcd91f 100644
--- a/doc/source/user_guide/text.rst
+++ b/doc/source/user_guide/text.rst
@@ -75,8 +75,7 @@ and replacing any remaining whitespaces with underscores:
 .. warning::
 
     Before v.0.25.0, the ``.str``-accessor did only the most rudimentary type checks. Starting with
-    v.0.25.0, the type of the Series is inferred (like it has already been the case for ``Index.str``),
-    and the allowed types (i.e. strings) are enforced more rigorously.
+    v.0.25.0, the type of the Series is inferred and the allowed types (i.e. strings) are enforced more rigorously.
 
     Generally speaking, the ``.str`` accessor is intended to work only on strings. With very few
     exceptions, other uses are not supported, and may be disabled at a later point.
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index f695095f672d3..3831195518c65 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -1862,6 +1862,26 @@ def __init__(self, data):
 
     @staticmethod
     def _validate(data):
+        """
+        Auxiliary function for string methods, infers and checks dtype of data.
+
+        This is a "first line of defence" and just checks that the dtype is in
+        the *union* of the allowed types over all string methods below; this
+        restriction is then refined on a per-method basis using the decorator
+        @forbid_nonstring_types.
+
+        This really should exclude all series/index with any non-string values,
+        but that isn't practical for performance reasons until we have a str
+        dtype (GH 9343 / 13877)
+
+        Parameters
+        ----------
+        data : The content of the Series
+
+        Returns
+        -------
+        dtype : inferred dtype of data
+        """
         if isinstance(data, ABCMultiIndex):
             raise AttributeError('Can only use .str accessor with Index, '
                                  'not MultiIndex')
@@ -1877,14 +1897,6 @@ def _validate(data):
         inferred_dtype = lib.infer_dtype(values, skipna=True)
 
         if inferred_dtype not in allowed_types:
-            # this is a "first line of defence" and just checks that the type
-            # is in the *union* of the allowed types over all methods below;
-            # this restriction is then refined on a per-method basis using the
-            # decorator @forbid_nonstring_types
-            #
-            # this really should exclude all series/index with any non-string
-            # values, but that isn't practical for performance reasons until we
-            # have a str dtype (GH 9343 / 13877)
             raise AttributeError("Can only use .str accessor with string "
                                  "values!")
         return inferred_dtype
@@ -2574,7 +2586,6 @@ def rpartition(self, sep=' ', expand=True):
         return self._wrap_result(result, expand=expand)
 
     @copy(str_get)
-    @forbid_nonstring_types(['bytes'])
     def get(self, i):
         result = str_get(self._parent, i)
         return self._wrap_result(result)
@@ -2715,7 +2726,6 @@ def zfill(self, width):
         return self._wrap_result(result)
 
     @copy(str_slice)
-    @forbid_nonstring_types(['bytes'])
     def slice(self, start=None, stop=None, step=None):
         result = str_slice(self._parent, start, stop, step)
         return self._wrap_result(result)
diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
index d89cdb6cb01ee..1ba0ef3918fb7 100644
--- a/pandas/tests/test_strings.py
+++ b/pandas/tests/test_strings.py
@@ -223,7 +223,7 @@ def test_api_per_method(self, box, dtype,
         t = box(values, dtype=dtype)  # explicit dtype to avoid casting
         method = getattr(t.str, method_name)
 
-        bytes_allowed = method_name in ['decode', 'len']
+        bytes_allowed = method_name in ['decode', 'get', 'len', 'slice']
         # as of v0.23.4, all methods except 'cat' are very lenient with the
         # allowed data types, just returning NaN for entries that error.
         # This could be changed with an 'errors'-kwarg to the `str`-accessor,

From ab980ecce12ee77b943a17e1842243d3fb03aa0c Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Thu, 30 May 2019 17:08:24 +0200
Subject: [PATCH 13/16] commit whatsnew changes

---
 doc/source/whatsnew/v0.25.0.rst | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 543514f131e73..2dc3a0655a7c8 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -198,8 +198,10 @@ cause a ``SparseSeries`` or ``SparseDataFrame`` to be returned, as before.
 The ``.str``-accessor performs stricter type checks
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Due to the lack of a native string dtype in numpy, :attr:`Series.str` only checked whether the data was of ``object`` dtype.
-From now on, the inferred dtype of the Series is checked to be correct (particularly, not ``'bytes'``), as :attr:`Index.str` does already.
+Due to the lack of more fine-grained dtypes, :attr:`Series.str` so far only checked whether the data was
+of ``object`` dtype. :attr:`Series.str` will now infer the dtype data *within* the Series; in particular,
+``'bytes'``-only data will raise an exception (except for :meth:`Series.str.decode`, :meth:`Series.str.get`,
+:meth:`Series.str.len`, :meth:`Series.str.slice`), see :issue:`23163`, :issue:`23011`, :issue:`23551`.
 
 *Previous Behaviour*:
 

From a9968896f24e98f62a88fc2afaf41bc9c4d21bab Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Thu, 30 May 2019 17:10:07 +0200
Subject: [PATCH 14/16] retrigger azure


From 4adef353d52c788046ad24c944a7b9d50a6c4344 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Thu, 30 May 2019 17:17:07 +0200
Subject: [PATCH 15/16] remove mentions of 'unicode'

---
 pandas/core/strings.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 3831195518c65..f0d382f78aa3d 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -1734,11 +1734,11 @@ def forbid_nonstring_types(forbidden, name=None):
 
     :meth:`StringMethods.__init__` allows the *union* of types its different
     methods allow (after skipping NaNs; see :meth:`StringMethods._validate`),
-    namely: ['string', 'unicode', 'empty', 'bytes', 'mixed', 'mixed-integer'].
+    namely: ['string', 'empty', 'bytes', 'mixed', 'mixed-integer'].
 
-    The default string types ['string', 'unicode', 'empty'] are allowed for all
-    methods. For the additional types ['bytes', 'mixed', 'mixed-integer'], each
-    method then needs to forbid the types it is not intended for.
+    The default string types ['string', 'empty'] are allowed for all methods.
+    For the additional types ['bytes', 'mixed', 'mixed-integer'], each method
+    then needs to forbid the types it is not intended for.
 
     Parameters
     ----------
@@ -1766,8 +1766,8 @@ def forbid_nonstring_types(forbidden, name=None):
     # deal with None
     forbidden = [] if forbidden is None else forbidden
 
-    allowed_types = {'string', 'unicode', 'empty',
-                     'bytes', 'mixed', 'mixed-integer'} - set(forbidden)
+    allowed_types = {'string', 'empty', 'bytes',
+                     'mixed', 'mixed-integer'} - set(forbidden)
 
     def _forbid_nonstring_types(func):
         func_name = func.__name__ if name is None else name
@@ -1887,8 +1887,7 @@ def _validate(data):
                                  'not MultiIndex')
 
         # see _libs/lib.pyx for list of inferred types
-        allowed_types = ['string', 'unicode', 'empty',
-                         'bytes', 'mixed', 'mixed-integer']
+        allowed_types = ['string', 'empty', 'bytes', 'mixed', 'mixed-integer']
 
         values = getattr(data, 'values', data)  # Series / Index
         values = getattr(values, 'categories', values)  # categorical / normal

From f62e344f49dfe5df95a4d7caf26585e387de5386 Mon Sep 17 00:00:00 2001
From: "H. Vetinari" <h.vetinari@gmx.com>
Date: Thu, 30 May 2019 17:17:30 +0200
Subject: [PATCH 16/16] improve docstring for ._validate

---
 pandas/core/strings.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index f0d382f78aa3d..bd756491abd2f 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -1863,12 +1863,13 @@ def __init__(self, data):
     @staticmethod
     def _validate(data):
         """
-        Auxiliary function for string methods, infers and checks dtype of data.
+        Auxiliary function for StringMethods, infers and checks dtype of data.
 
-        This is a "first line of defence" and just checks that the dtype is in
-        the *union* of the allowed types over all string methods below; this
+        This is a "first line of defence" at the creation of the StringMethods-
+        object (see _make_accessor), and just checks that the dtype is in the
+        *union* of the allowed types over all string methods below; this
         restriction is then refined on a per-method basis using the decorator
-        @forbid_nonstring_types.
+        @forbid_nonstring_types (more info in the corresponding docstring).
 
         This really should exclude all series/index with any non-string values,
         but that isn't practical for performance reasons until we have a str