pandas-dev · jreback · Mar 22, 2018 · Feb 19, 2018 · Mar 2, 2018 · Mar 2, 2018
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -2,6 +2,7 @@
 import numpy as np
 
 from pandas.errors import AbstractMethodError
+from pandas.compat.numpy import function as nv
 
 _not_implemented_message = "{} does not implement {}."
 
@@ -236,6 +237,52 @@ def isna(self):
         """
         raise AbstractMethodError(self)
 
+    def _values_for_argsort(self):
+        # type: () -> ndarray
+        """Get the ndarray to be passed to np.argsort.
 def _codes_for_groupby(self, sort): 
 def _codes_for_groupby(self, sort): 
+
+        This is called from within 'ExtensionArray.argsort'.
+
+        Returns
+        -------
+        values : ndarray
+        """
+        return np.array(self)
+
+    def argsort(self, ascending=True, kind='quicksort', *args, **kwargs):
+        """Returns the indices that would sort this array.
+
+        Parameters
+        ----------
+        ascending : bool, default True
+            Whether the indices should result in an ascending
+            or descending sort.
+        kind : {'quicksort', 'mergesort', 'heapsort'}, optional
+            Sorting algorithm.
+        args, kwargs:
+            passed through to :func:`numpy.argsort`.
+
+        Returns
+        -------
+        index_array : ndarray
+            Array of indices that sort ``self``.
+
+        See Also
+        --------
+        numpy.argsort
+        """
+        # Implementor note: You have two places to override the behavior of
+        # argsort.
+        # 1. _values_for_argsort : construct the values passed to np.argsort
+        # 2. argsort : total control over sorting.
+
+        ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs)
+        values = self._values_for_argsort()
+        result = np.argsort(values, kind=kind, **kwargs)
+        if not ascending:
+            result = result[::-1]
+        return result
+
     # ------------------------------------------------------------------------
     # Indexing methods
     # ------------------------------------------------------------------------

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -1390,17 +1390,24 @@ def check_for_ordered(self, op):
                             "you can use .as_ordered() to change the "
                             "Categorical to an ordered one\n".format(op=op))
 
-    def argsort(self, ascending=True, kind='quicksort', *args, **kwargs):
-        """
-        Returns the indices that would sort the Categorical instance if
-        'sort_values' was called. This function is implemented to provide
-        compatibility with numpy ndarray objects.
+    def _values_for_argsort(self):
+        return self._codes.copy()
 
-        While an ordering is applied to the category values, arg-sorting
-        in this context refers more to organizing and grouping together
-        based on matching category values. Thus, this function can be
-        called on an unordered Categorical instance unlike the functions
-        'Categorical.min' and 'Categorical.max'.
+    def argsort(self, *args, **kwargs):
+        # TODO(PY2): use correct signature
+        # We have to do *args, **kwargs to avoid a a py2-only signature
+        # issue since np.argsort differs from argsort.
+        """Return the indicies that would sort the Categorical.
+
+        Parameters
+        ----------
+        ascending : bool, default True
+            Whether the indices should result in an ascending
+            or descending sort.
+        kind : {'quicksort', 'mergesort', 'heapsort'}, optional
+            Sorting algorithm.
+        args, kwargs:
+            passed through to :func:`numpy.argsort`.
 
         Returns
         -------
@@ -1409,12 +1416,28 @@ def argsort(self, ascending=True, kind='quicksort', *args, **kwargs):
         See also
         --------
         numpy.ndarray.argsort
+
+        Notes
+        -----
+        While an ordering is applied to the category values, arg-sorting
+        in this context refers more to organizing and grouping together
+        based on matching category values. Thus, this function can be
+        called on an unordered Categorical instance unlike the functions
+        'Categorical.min' and 'Categorical.max'.
+
+        Examples
+        --------
+        >>> pd.Categorical(['b', 'b', 'a', 'c']).argsort()
+        array([2, 0, 1, 3])
+
+        >>> cat = pd.Categorical(['b', 'b', 'a', 'c'],
+        ...                      categories=['c', 'b', 'a'],
+        ...                      ordered=True)
+        >>> cat.argsort()
+        array([3, 0, 1, 2])
         """
-        ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs)
-        result = np.argsort(self._codes.copy(), kind=kind, **kwargs)
-        if not ascending:
-            result = result[::-1]
-        return result
+        # Keep the implementation here just for the docstring.
+        return super(Categorical, self).argsort(*args, **kwargs)
 
     def sort_values(self, inplace=False, ascending=True, na_position='last'):
         """ Sorts the Categorical by category value returning a new

diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py
@@ -31,3 +31,43 @@ def test_count(self, data_missing):
     def test_apply_simple_series(self, data):
         result = pd.Series(data).apply(id)
         assert isinstance(result, pd.Series)
+
+    def test_argsort(self, data_for_sorting):
+        result = pd.Series(data_for_sorting).argsort()
+        expected = pd.Series(np.array([2, 0, 1], dtype=np.int64))
+        self.assert_series_equal(result, expected)
+
+    def test_argsort_missing(self, data_missing_for_sorting):
+        result = pd.Series(data_missing_for_sorting).argsort()
+        expected = pd.Series(np.array([1, -1, 0], dtype=np.int64))
+        self.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize('ascending', [True, False])
+    def test_sort_values(self, data_for_sorting, ascending):
+        ser = pd.Series(data_for_sorting)
+        result = ser.sort_values(ascending=ascending)
+        expected = ser.iloc[[2, 0, 1]]
+        if not ascending:
+            expected = expected[::-1]
+
+        self.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize('ascending', [True, False])
+    def test_sort_values_missing(self, data_missing_for_sorting, ascending):
+        ser = pd.Series(data_missing_for_sorting)
+        result = ser.sort_values(ascending=ascending)
+        if ascending:
+            expected = ser.iloc[[2, 0, 1]]
+        else:
+            expected = ser.iloc[[0, 2, 1]]
+        self.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize('ascending', [True, False])
+    def test_sort_values_frame(self, data_for_sorting, ascending):
+        df = pd.DataFrame({"A": [1, 2, 1],
+                           "B": data_for_sorting})
+        result = df.sort_values(['A', 'B'])
+        expected = pd.DataFrame({"A": [1, 1, 2],
+                                 'B': data_for_sorting.take([2, 0, 1])},
+                                index=[2, 0, 1])
+        self.assert_frame_equal(result, expected)
diff --git a/pandas/tests/extension/category/test_categorical.py b/pandas/tests/extension/category/test_categorical.py
@@ -29,6 +29,18 @@ def data_missing():
     return Categorical([np.nan, 'A'])
 
 
+@pytest.fixture
+def data_for_sorting():
+    return Categorical(['A', 'B', 'C'], categories=['C', 'A', 'B'],
+                       ordered=True)
+
+
+@pytest.fixture
+def data_missing_for_sorting():
+    return Categorical(['A', None, 'B'], categories=['B', 'A'],
+                       ordered=True)
+
+
 @pytest.fixture
 def na_value():
     return np.nan

diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py
@@ -30,6 +30,26 @@ def all_data(request, data, data_missing):
         return data_missing
 
 
+@pytest.fixture
+def data_for_sorting():
+    """Length-3 array with a known sort order.
+
+    This should be three items [B, C, A] with
+    A < B < C
+    """
+    raise NotImplementedError
+
+
+@pytest.fixture
+def data_missing_for_sorting():
+    """Length-3 array with a known sort order.
+
+    This should be three items [B, NA, A] with
+    A < B and NA missing.
+    """
+    raise NotImplementedError
+
+
 @pytest.fixture
 def na_cmp():
     """Binary operator for comparing NA values.

diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py
@@ -71,6 +71,7 @@ def isna(self):
         return np.array([x.is_nan() for x in self.values])
 
     def take(self, indexer, allow_fill=True, fill_value=None):
+        indexer = np.asarray(indexer)
         mask = indexer == -1
 
         indexer = _ensure_platform_int(indexer)

diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
@@ -25,6 +25,20 @@ def data_missing():
     return DecimalArray([decimal.Decimal('NaN'), decimal.Decimal(1)])
 
 
+@pytest.fixture
+def data_for_sorting():
+    return DecimalArray([decimal.Decimal('1'),
+                         decimal.Decimal('2'),
+                         decimal.Decimal('0')])
+
+
+@pytest.fixture
+def data_missing_for_sorting():
+    return DecimalArray([decimal.Decimal('1'),
+                         decimal.Decimal('NaN'),
+                         decimal.Decimal('0')])
+
+
 @pytest.fixture
 def na_cmp():
     return lambda x, y: x.is_nan() and y.is_nan()
@@ -35,19 +49,32 @@ def na_value():
     return decimal.Decimal("NaN")
 
 
-class TestDtype(base.BaseDtypeTests):
+class BaseDecimal(object):
+    @staticmethod
+    def assert_series_equal(left, right, *args, **kwargs):
+
+        left_na = left.isna()
+        right_na = right.isna()
+
+        tm.assert_series_equal(left_na, right_na)
+        return tm.assert_series_equal(left[~left_na],
+                                      right[~right_na],
+                                      *args, **kwargs)
+
+
+class TestDtype(BaseDecimal, base.BaseDtypeTests):
     pass
 
 
-class TestInterface(base.BaseInterfaceTests):
+class TestInterface(BaseDecimal, base.BaseInterfaceTests):
     pass
 
 
-class TestConstructors(base.BaseConstructorsTests):
+class TestConstructors(BaseDecimal, base.BaseConstructorsTests):
     pass
 
 
-class TestReshaping(base.BaseReshapingTests):
+class TestReshaping(BaseDecimal, base.BaseReshapingTests):
 
     def test_align(self, data, na_value):
         # Have to override since assert_series_equal doesn't
@@ -88,15 +115,15 @@ def test_align_frame(self, data, na_value):
         assert e2.loc[0, 'A'].is_nan()
 
 
-class TestGetitem(base.BaseGetitemTests):
+class TestGetitem(BaseDecimal, base.BaseGetitemTests):
     pass
 
 
-class TestMissing(base.BaseMissingTests):
+class TestMissing(BaseDecimal, base.BaseMissingTests):
     pass
 
 
-class TestMethods(base.BaseMethodsTests):
+class TestMethods(BaseDecimal, base.BaseMethodsTests):
     @pytest.mark.parametrize('dropna', [True, False])
     @pytest.mark.xfail(reason="value_counts not implemented yet.")
     def test_value_counts(self, all_data, dropna):
@@ -112,7 +139,7 @@ def test_value_counts(self, all_data, dropna):
         tm.assert_series_equal(result, expected)
 
 
-class TestCasting(base.BaseCastingTests):
+class TestCasting(BaseDecimal, base.BaseCastingTests):
     pass
 
 

diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py
@@ -29,6 +29,16 @@ def data_missing():
     return JSONArray([{}, {'a': 10}])
 
 
+@pytest.fixture
+def data_for_sorting():
+    return JSONArray([{'b': 1}, {'c': 4}, {'a': 2, 'c': 3}])
+
+
+@pytest.fixture
+def data_missing_for_sorting():
+    return JSONArray([{'b': 1}, {}, {'c': 4}])
+
+
 @pytest.fixture
 def na_value():
     return {}
@@ -68,6 +78,26 @@ class TestMethods(base.BaseMethodsTests):
     def test_value_counts(self, all_data, dropna):
         pass
 
+    @pytest.mark.skip(reason="Dictionaries are not orderable.")
+    def test_argsort(self):
+        pass
+
+    @pytest.mark.skip(reason="Dictionaries are not orderable.")
+    def test_argsort_missing(self):
+        pass
+
+    @pytest.mark.skip(reason="Dictionaries are not orderable.")
+    def test_sort_values(self):
+        pass
+
+    @pytest.mark.skip(reason="Dictionaries are not orderable.")
+    def test_sort_values_missing(self):
+        pass
+
+    @pytest.mark.skip(reason="Dictionaries are not orderable.")
+    def test_sort_values_frame(self):
+        pass
+
 
 class TestCasting(base.BaseCastingTests):
     pass