From df70d2db4becdcf65dfb51cc09ddd562724ff3dc Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 18 Feb 2021 15:25:26 +0100
Subject: [PATCH 1/9] [ArrayManager] GroupBy cython aggregations (no fallback)

---
 .github/workflows/ci.yml                      |   1 +
 pandas/core/groupby/generic.py                | 132 ++++++++++++++----
 pandas/core/internals/array_manager.py        |   2 +-
 .../tests/groupby/aggregate/test_aggregate.py |   7 +
 pandas/tests/groupby/aggregate/test_cython.py |  12 +-
 pandas/tests/groupby/aggregate/test_other.py  |   3 +
 6 files changed, 123 insertions(+), 34 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index aec19e27a33e2..41ec1c9f36be4 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -153,6 +153,7 @@ jobs:
       run: |
         source activate pandas-dev
         pytest pandas/tests/frame/methods --array-manager
+        pytest pandas/tests/groupby/aggregate/ --array-manager
 
         # indexing iset related (temporary since other tests don't pass yet)
         pytest pandas/tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_setitem_multi_index --array-manager
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 9369dc61ca5f6..1e8837db22078 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -23,6 +23,7 @@
     Mapping,
     Optional,
     Sequence,
+    Tuple,
     Type,
     TypeVar,
     Union,
@@ -33,7 +34,7 @@
 import numpy as np
 
 from pandas._libs import lib, reduction as libreduction
-from pandas._typing import ArrayLike, FrameOrSeries, FrameOrSeriesUnion
+from pandas._typing import ArrayLike, FrameOrSeries, FrameOrSeriesUnion, Manager
 from pandas.util._decorators import Appender, Substitution, doc
 
 from pandas.core.dtypes.cast import (
@@ -79,7 +80,7 @@
 )
 from pandas.core.indexes.api import Index, MultiIndex, all_indexes_same
 import pandas.core.indexes.base as ibase
-from pandas.core.internals import BlockManager
+from pandas.core.internals import ArrayManager, BlockManager
 from pandas.core.series import Series
 from pandas.core.util.numba_ import maybe_use_numba
 
@@ -157,6 +158,31 @@ def pinner(cls):
     return pinner
 
 
+def _cast_agg_result(
+    result, values: ArrayLike, how: str, reshape1d: bool = True
+) -> ArrayLike:
+    # see if we can cast the values to the desired dtype
+    # this may not be the original dtype
+    assert not isinstance(result, DataFrame)
+
+    dtype = maybe_cast_result_dtype(values.dtype, how)
+    result = maybe_downcast_numeric(result, dtype)
+
+    if isinstance(values, Categorical) and isinstance(result, np.ndarray):
+        # If the Categorical op didn't raise, it is dtype-preserving
+        result = type(values)._from_sequence(result.ravel(), dtype=values.dtype)
+        # Note this will have result.dtype == dtype from above
+
+    elif reshape1d and isinstance(result, np.ndarray) and result.ndim == 1:
+        # We went through a SeriesGroupByPath and need to reshape
+        # GH#32223 includes case with IntegerArray values
+        result = result.reshape(1, -1)
+        # test_groupby_duplicate_columns gets here with
+        #  result.dtype == int64, values.dtype=object, how="min"
+
+    return result
+
+
 @pin_allowlisted_properties(Series, base.series_apply_allowlist)
 class SeriesGroupBy(GroupBy[Series]):
     _apply_allowlist = base.series_apply_allowlist
@@ -1041,10 +1067,18 @@ def _iterate_slices(self) -> Iterable[Series]:
     def _cython_agg_general(
         self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1
     ) -> DataFrame:
-        agg_mgr = self._cython_agg_blocks(
-            how, alt=alt, numeric_only=numeric_only, min_count=min_count
-        )
-        return self._wrap_agged_blocks(agg_mgr.blocks, items=agg_mgr.items)
+        if isinstance(self.obj._mgr, BlockManager):
+            agg_mgr = self._cython_agg_blocks(
+                how, alt=alt, numeric_only=numeric_only, min_count=min_count
+            )
+            return self._wrap_agged_blocks(agg_mgr.blocks, items=agg_mgr.items)
+        elif isinstance(self.obj._mgr, ArrayManager):
+            agg_arrays, columns = self._cython_agg_arrays(
+                how, alt=alt, numeric_only=numeric_only, min_count=min_count
+            )
+            return self._wrap_agged_arrays(agg_arrays, columns)
+        else:
+            raise TypeError("Unknown manager type")
 
     def _cython_agg_blocks(
         self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1
@@ -1055,28 +1089,6 @@ def _cython_agg_blocks(
         if numeric_only:
             data = data.get_numeric_data(copy=False)
 
-        def cast_agg_result(result, values: ArrayLike, how: str) -> ArrayLike:
-            # see if we can cast the values to the desired dtype
-            # this may not be the original dtype
-            assert not isinstance(result, DataFrame)
-
-            dtype = maybe_cast_result_dtype(values.dtype, how)
-            result = maybe_downcast_numeric(result, dtype)
-
-            if isinstance(values, Categorical) and isinstance(result, np.ndarray):
-                # If the Categorical op didn't raise, it is dtype-preserving
-                result = type(values)._from_sequence(result.ravel(), dtype=values.dtype)
-                # Note this will have result.dtype == dtype from above
-
-            elif isinstance(result, np.ndarray) and result.ndim == 1:
-                # We went through a SeriesGroupByPath and need to reshape
-                # GH#32223 includes case with IntegerArray values
-                result = result.reshape(1, -1)
-                # test_groupby_duplicate_columns gets here with
-                #  result.dtype == int64, values.dtype=object, how="min"
-
-            return result
-
         def py_fallback(bvalues: ArrayLike) -> ArrayLike:
             # if self.grouper.aggregate fails, we fall back to a pure-python
             #  solution
@@ -1139,7 +1151,7 @@ def blk_func(bvalues: ArrayLike) -> ArrayLike:
 
                 result = py_fallback(bvalues)
 
-            return cast_agg_result(result, bvalues, how)
+            return _cast_agg_result(result, bvalues, how)
 
         # TypeError -> we may have an exception in trying to aggregate
         #  continue and exclude the block
@@ -1151,6 +1163,66 @@ def blk_func(bvalues: ArrayLike) -> ArrayLike:
 
         return new_mgr
 
+    def _cython_agg_arrays(
+        self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1
+    ) -> Tuple[List[ArrayLike], Index]:
+
+        data: ArrayManager = self._get_data_to_aggregate()
+
+        if numeric_only:
+            data = data.get_numeric_data(copy=False)
+
+        def py_fallback(bvalues: ArrayLike) -> ArrayLike:
+            raise NotImplementedError
+
+        def array_func(values: ArrayLike) -> ArrayLike:
+
+            try:
+                result = self.grouper._cython_operation(
+                    "aggregate",
+                    values,
+                    how,
+                    axis=1,
+                    min_count=min_count,
+                )
+            except NotImplementedError:
+                # generally if we have numeric_only=False
+                # and non-applicable functions
+                # try to python agg
+
+                if alt is None:
+                    # we cannot perform the operation
+                    # in an alternate way, exclude the block
+                    assert how == "ohlc"
+                    raise
+
+                result = py_fallback(values)
+
+            return _cast_agg_result(result, values, how, reshape1d=False)
+
+        result_arrays = [array_func(arr) for arr in data.arrays]
+
+        if not len(result_arrays):
+            raise DataError("No numeric types to aggregate")
+
+        return result_arrays, data.axes[0]
+
+    def _wrap_agged_arrays(self, arrays: List[ArrayLike], columns: Index) -> DataFrame:
+        if not self.as_index:
+            index = np.arange(arrays[0].shape[0])
+            mgr = ArrayManager(arrays, axes=[index, columns])
+            result = self.obj._constructor(mgr)
+            self._insert_inaxis_grouper_inplace(result)
+        else:
+            index = self.grouper.result_index
+            mgr = ArrayManager(arrays, axes=[index, columns])
+            result = self.obj._constructor(mgr)
+
+        if self.axis == 1:
+            result = result.T
+
+        return self._reindex_output(result)._convert(datetime=True)
+
     def _aggregate_frame(self, func, *args, **kwargs) -> DataFrame:
         if self.grouper.nkeys != 1:
             raise AssertionError("Number of keys must be 1")
@@ -1633,7 +1705,7 @@ def _wrap_frame_output(self, result, obj: DataFrame) -> DataFrame:
         else:
             return self.obj._constructor(result, index=obj.index, columns=result_index)
 
-    def _get_data_to_aggregate(self) -> BlockManager:
+    def _get_data_to_aggregate(self) -> Manager:
         obj = self._obj_with_exclusions
         if self.axis == 1:
             return obj.T._mgr
diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index 083f32488acd4..1c2c7010e3b87 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -302,7 +302,7 @@ def apply_with_block(self: T, f, align_keys=None, **kwargs) -> T:
             if hasattr(arr, "tz") and arr.tz is None:  # type: ignore[union-attr]
                 # DatetimeArray needs to be converted to ndarray for DatetimeBlock
                 arr = arr._data  # type: ignore[union-attr]
-            elif arr.dtype.kind == "m":
+            elif arr.dtype.kind == "m" and not isinstance(arr, np.ndarray):
                 # TimedeltaArray needs to be converted to ndarray for TimedeltaBlock
                 arr = arr._data  # type: ignore[union-attr]
             if isinstance(arr, np.ndarray):
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index 48527de6b2047..40c130456676d 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -9,6 +9,7 @@
 import pytest
 
 from pandas.errors import PerformanceWarning
+import pandas.util._test_decorators as td
 
 from pandas.core.dtypes.common import is_integer_dtype
 
@@ -38,6 +39,7 @@ def test_agg_regression1(tsframe):
     tm.assert_frame_equal(result, expected)
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) quantile/describe
 def test_agg_must_agg(df):
     grouped = df.groupby("A")["C"]
 
@@ -127,6 +129,7 @@ def test_groupby_aggregation_multi_level_column():
     tm.assert_frame_equal(result, expected)
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) non-cython agg
 def test_agg_apply_corner(ts, tsframe):
     # nothing to group, all NA
     grouped = ts.groupby(ts * np.nan)
@@ -203,6 +206,7 @@ def test_aggregate_str_func(tsframe, groupbyfunc):
     tm.assert_frame_equal(result, expected)
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) non-cython agg
 def test_agg_str_with_kwarg_axis_1_raises(df, reduction_func):
     gb = df.groupby(level=0)
     if reduction_func in ("idxmax", "idxmin"):
@@ -482,6 +486,7 @@ def test_agg_index_has_complex_internals(index):
     tm.assert_frame_equal(result, expected)
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) agg py_fallback
 def test_agg_split_block():
     # https://github.com/pandas-dev/pandas/issues/31522
     df = DataFrame(
@@ -499,6 +504,7 @@ def test_agg_split_block():
     tm.assert_frame_equal(result, expected)
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) agg py_fallback
 def test_agg_split_object_part_datetime():
     # https://github.com/pandas-dev/pandas/pull/31616
     df = DataFrame(
@@ -1189,6 +1195,7 @@ def test_aggregate_datetime_objects():
     tm.assert_series_equal(result, expected)
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) agg py_fallback
 def test_aggregate_numeric_object_dtype():
     # https://github.com/pandas-dev/pandas/issues/39329
     # simplified case: multiple object columns where one is all-NaN
diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py
index 8799f6faa775c..31b01072e9844 100644
--- a/pandas/tests/groupby/aggregate/test_cython.py
+++ b/pandas/tests/groupby/aggregate/test_cython.py
@@ -33,7 +33,10 @@
         "max",
     ],
 )
-def test_cythonized_aggers(op_name):
+def test_cythonized_aggers(op_name, using_array_manager):
+    if using_array_manager and op_name in {"count", "sem"}:
+        # TODO(ArrayManager) groupby count/sem
+        pytest.skip("ArrayManager groupby count/sem not yet implemented")
     data = {
         "A": [0, 0, 0, 0, 1, 1, 1, 1, 1, 1.0, np.nan, np.nan],
         "B": ["A", "B"] * 6,
@@ -265,7 +268,7 @@ def test_cython_with_timestamp_and_nat(op, data):
         "cummax",
     ],
 )
-def test_read_only_buffer_source_agg(agg):
+def test_read_only_buffer_source_agg(agg, using_array_manager):
     # https://github.com/pandas-dev/pandas/issues/36014
     df = DataFrame(
         {
@@ -273,7 +276,10 @@ def test_read_only_buffer_source_agg(agg):
             "species": ["setosa", "setosa", "setosa", "setosa", "setosa"],
         }
     )
-    df._mgr.blocks[0].values.flags.writeable = False
+    if using_array_manager:
+        df._mgr.arrays[0].flags.writeable = False
+    else:
+        df._mgr.blocks[0].values.flags.writeable = False
 
     result = df.groupby(["species"]).agg({"sepal_length": agg})
     expected = df.copy().groupby(["species"]).agg({"sepal_length": agg})
diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
index 04f17865b088a..b56051ec6c9b3 100644
--- a/pandas/tests/groupby/aggregate/test_other.py
+++ b/pandas/tests/groupby/aggregate/test_other.py
@@ -8,6 +8,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -412,6 +414,7 @@ def __call__(self, x):
         tm.assert_frame_equal(result, expected)
 
 
+@td.skip_array_manager_not_yet_implemented  # TODO(ArrayManager) columns with ndarrays
 def test_agg_over_numpy_arrays():
     # GH 3788
     df = DataFrame(

From 692175e8f55e2b587d25a0355e4b01a706a46235 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 18 Feb 2021 15:30:08 +0100
Subject: [PATCH 2/9] style

---
 pandas/core/groupby/generic.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index dad2b484c0098..373e032a0ceeb 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1207,17 +1207,14 @@ def _cython_agg_arrays(
             data = data.get_numeric_data(copy=False)
 
         def py_fallback(bvalues: ArrayLike) -> ArrayLike:
+            # TODO(ArrayManager)
             raise NotImplementedError
 
         def array_func(values: ArrayLike) -> ArrayLike:
 
             try:
                 result = self.grouper._cython_operation(
-                    "aggregate",
-                    values,
-                    how,
-                    axis=1,
-                    min_count=min_count,
+                    "aggregate", values, how, axis=1, min_count=min_count
                 )
             except NotImplementedError:
                 # generally if we have numeric_only=False

From a7bf71ee79321f484b2f2fad6fa901fb9e7290f2 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 24 Feb 2021 20:08:42 +0100
Subject: [PATCH 3/9] common _cython_agg_manager

---
 pandas/core/groupby/generic.py         | 150 ++++++++-----------------
 pandas/core/internals/array_manager.py |  26 +++++
 2 files changed, 71 insertions(+), 105 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 5e7412793e263..909dbec88e63c 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -24,7 +24,6 @@
     List,
     Mapping,
     Optional,
-    Tuple,
     Type,
     TypeVar,
     Union,
@@ -186,31 +185,6 @@ def pinner(cls):
     return pinner
 
 
-def _cast_agg_result(
-    result, values: ArrayLike, how: str, reshape1d: bool = True
-) -> ArrayLike:
-    # see if we can cast the values to the desired dtype
-    # this may not be the original dtype
-    assert not isinstance(result, DataFrame)
-
-    dtype = maybe_cast_result_dtype(values.dtype, how)
-    result = maybe_downcast_numeric(result, dtype)
-
-    if isinstance(values, Categorical) and isinstance(result, np.ndarray):
-        # If the Categorical op didn't raise, it is dtype-preserving
-        result = type(values)._from_sequence(result.ravel(), dtype=values.dtype)
-        # Note this will have result.dtype == dtype from above
-
-    elif reshape1d and isinstance(result, np.ndarray) and result.ndim == 1:
-        # We went through a SeriesGroupByPath and need to reshape
-        # GH#32223 includes case with IntegerArray values
-        result = result.reshape(1, -1)
-        # test_groupby_duplicate_columns gets here with
-        #  result.dtype == int64, values.dtype=object, how="min"
-
-    return result
-
-
 @pin_allowlisted_properties(Series, base.series_apply_allowlist)
 class SeriesGroupBy(GroupBy[Series]):
     _apply_allowlist = base.series_apply_allowlist
@@ -1104,28 +1078,48 @@ def _iterate_slices(self) -> Iterable[Series]:
     def _cython_agg_general(
         self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1
     ) -> DataFrame:
-        if isinstance(self.obj._mgr, BlockManager):
-            agg_mgr = self._cython_agg_blocks(
-                how, alt=alt, numeric_only=numeric_only, min_count=min_count
-            )
-            return self._wrap_agged_manager(agg_mgr)
-        elif isinstance(self.obj._mgr, ArrayManager):
-            agg_arrays, columns = self._cython_agg_arrays(
-                how, alt=alt, numeric_only=numeric_only, min_count=min_count
-            )
-            return self._wrap_agged_arrays(agg_arrays, columns)
-        else:
-            raise TypeError("Unknown manager type")
+        agg_mgr = self._cython_agg_manager(
+            how, alt=alt, numeric_only=numeric_only, min_count=min_count
+        )
+        return self._wrap_agged_manager(agg_mgr)
 
-    def _cython_agg_blocks(
+    def _cython_agg_manager(
         self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1
     ) -> BlockManager:
 
-        data: BlockManager = self._get_data_to_aggregate()
+        data: Manager = self._get_data_to_aggregate()
 
         if numeric_only:
             data = data.get_numeric_data(copy=False)
 
+        using_array_manager = isinstance(data, ArrayManager)
+
+        def cast_agg_result(result, values: ArrayLike, how: str) -> ArrayLike:
+            # see if we can cast the values to the desired dtype
+            # this may not be the original dtype
+            assert not isinstance(result, DataFrame)
+
+            dtype = maybe_cast_result_dtype(values.dtype, how)
+            result = maybe_downcast_numeric(result, dtype)
+
+            if isinstance(values, Categorical) and isinstance(result, np.ndarray):
+                # If the Categorical op didn't raise, it is dtype-preserving
+                result = type(values)._from_sequence(result.ravel(), dtype=values.dtype)
+                # Note this will have result.dtype == dtype from above
+
+            elif (
+                not using_array_manager
+                and isinstance(result, np.ndarray)
+                and result.ndim == 1
+            ):
+                # We went through a SeriesGroupByPath and need to reshape
+                # GH#32223 includes case with IntegerArray values
+                result = result.reshape(1, -1)
+                # test_groupby_duplicate_columns gets here with
+                #  result.dtype == int64, values.dtype=object, how="min"
+
+            return result
+
         def py_fallback(bvalues: ArrayLike) -> ArrayLike:
             # if self.grouper.aggregate fails, we fall back to a pure-python
             #  solution
@@ -1169,11 +1163,11 @@ def py_fallback(bvalues: ArrayLike) -> ArrayLike:
                 result = mgr.blocks[0].values
                 return result
 
-        def blk_func(bvalues: ArrayLike) -> ArrayLike:
+        def array_func(values: ArrayLike) -> ArrayLike:
 
             try:
                 result = self.grouper._cython_operation(
-                    "aggregate", bvalues, how, axis=1, min_count=min_count
+                    "aggregate", values, how, axis=1, min_count=min_count
                 )
             except NotImplementedError:
                 # generally if we have numeric_only=False
@@ -1186,77 +1180,20 @@ def blk_func(bvalues: ArrayLike) -> ArrayLike:
                     assert how == "ohlc"
                     raise
 
-                result = py_fallback(bvalues)
+                result = py_fallback(values)
 
-            return _cast_agg_result(result, bvalues, how)
+            return cast_agg_result(result, values, how)
 
         # TypeError -> we may have an exception in trying to aggregate
         #  continue and exclude the block
         # NotImplementedError -> "ohlc" with wrong dtype
-        new_mgr = data.grouped_reduce(blk_func, ignore_failures=True)
+        new_mgr = data.grouped_reduce(array_func, ignore_failures=True)
 
         if not len(new_mgr):
             raise DataError("No numeric types to aggregate")
 
         return new_mgr
 
-    def _cython_agg_arrays(
-        self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1
-    ) -> Tuple[List[ArrayLike], Index]:
-
-        data: ArrayManager = self._get_data_to_aggregate()
-
-        if numeric_only:
-            data = data.get_numeric_data(copy=False)
-
-        def py_fallback(bvalues: ArrayLike) -> ArrayLike:
-            # TODO(ArrayManager)
-            raise NotImplementedError
-
-        def array_func(values: ArrayLike) -> ArrayLike:
-
-            try:
-                result = self.grouper._cython_operation(
-                    "aggregate", values, how, axis=1, min_count=min_count
-                )
-            except NotImplementedError:
-                # generally if we have numeric_only=False
-                # and non-applicable functions
-                # try to python agg
-
-                if alt is None:
-                    # we cannot perform the operation
-                    # in an alternate way, exclude the block
-                    assert how == "ohlc"
-                    raise
-
-                result = py_fallback(values)
-
-            return _cast_agg_result(result, values, how, reshape1d=False)
-
-        result_arrays = [array_func(arr) for arr in data.arrays]
-
-        if not len(result_arrays):
-            raise DataError("No numeric types to aggregate")
-
-        return result_arrays, data.axes[0]
-
-    def _wrap_agged_arrays(self, arrays: List[ArrayLike], columns: Index) -> DataFrame:
-        if not self.as_index:
-            index = np.arange(arrays[0].shape[0])
-            mgr = ArrayManager(arrays, axes=[index, columns])
-            result = self.obj._constructor(mgr)
-            self._insert_inaxis_grouper_inplace(result)
-        else:
-            index = self.grouper.result_index
-            mgr = ArrayManager(arrays, axes=[index, columns])
-            result = self.obj._constructor(mgr)
-
-        if self.axis == 1:
-            result = result.T
-
-        return self._reindex_output(result)._convert(datetime=True)
-
     def _aggregate_frame(self, func, *args, **kwargs) -> DataFrame:
         if self.grouper.nkeys != 1:
             raise AssertionError("Number of keys must be 1")
@@ -1828,17 +1765,20 @@ def _wrap_transformed_output(
 
         return result
 
-    def _wrap_agged_manager(self, mgr: BlockManager) -> DataFrame:
+    def _wrap_agged_manager(self, mgr: Manager) -> DataFrame:
+        # TODO clean this up
+        axis = 1 if isinstance(mgr, BlockManager) else 0
+        axes = mgr.axes if isinstance(mgr, BlockManager) else mgr._axes
         if not self.as_index:
             index = np.arange(mgr.shape[1])
-            mgr.axes[1] = ibase.Index(index)
+            axes[axis] = ibase.Index(index)
             result = self.obj._constructor(mgr)
 
             self._insert_inaxis_grouper_inplace(result)
             result = result._consolidate()
         else:
             index = self.grouper.result_index
-            mgr.axes[1] = index
+            axes[axis] = index
             result = self.obj._constructor(mgr)
 
         if self.axis == 1:
diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index b2e01c55f7aad..bf6ded2f26bdf 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -215,6 +215,32 @@ def reduce(
         indexer = np.arange(self.shape[0])
         return new_mgr, indexer
 
+    def grouped_reduce(self: T, func: Callable, ignore_failures: bool = False) -> T:
+        """
+        Apply grouped reduction function columnwise, returning a new ArrayManager.
+
+        Parameters
+        ----------
+        func : grouped reduction function
+        ignore_failures : bool, default False
+            Whether to drop columns where func raises TypeError.
+
+        Returns
+        -------
+        ArrayManager
+        """
+        result_arrays: List[ArrayLike] = []
+
+        # TODO ignore_failures
+        result_arrays = [func(arr) for arr in self.arrays]
+
+        if len(result_arrays) == 0:
+            index = Index([None])  # placeholder
+        else:
+            index = Index(range(result_arrays[0].shape[0]))
+
+        return type(self)(result_arrays, [index, self.items])
+
     def operate_blockwise(self, other: ArrayManager, array_op) -> ArrayManager:
         """
         Apply array_op blockwise with another (aligned) BlockManager.

From 8c1b8a21aab236379a6d5b12306f51d3bdd3f360 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 24 Feb 2021 20:12:09 +0100
Subject: [PATCH 4/9] clean-up test

---
 pandas/tests/groupby/aggregate/test_cython.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py
index d8eee81274f92..fc92abdff6eab 100644
--- a/pandas/tests/groupby/aggregate/test_cython.py
+++ b/pandas/tests/groupby/aggregate/test_cython.py
@@ -276,7 +276,7 @@ def test_cython_with_timestamp_and_nat(op, data):
         "cummax",
     ],
 )
-def test_read_only_buffer_source_agg(agg, using_array_manager):
+def test_read_only_buffer_source_agg(agg):
     # https://github.com/pandas-dev/pandas/issues/36014
     df = DataFrame(
         {
@@ -284,10 +284,7 @@ def test_read_only_buffer_source_agg(agg, using_array_manager):
             "species": ["setosa", "setosa", "setosa", "setosa", "setosa"],
         }
     )
-    if using_array_manager:
-        df._mgr.arrays[0].flags.writeable = False
-    else:
-        df._mgr.blocks[0].values.flags.writeable = False
+    df._mgr.arrays[0].flags.writeable = False
 
     result = df.groupby(["species"]).agg({"sepal_length": agg})
     expected = df.copy().groupby(["species"]).agg({"sepal_length": agg})

From 06b6f3f389b3a538cf59f9fda62d63407f60b327 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 24 Feb 2021 20:15:50 +0100
Subject: [PATCH 5/9] clean-up setting of index axis

---
 pandas/core/groupby/generic.py         |  7 ++-----
 pandas/core/internals/array_manager.py | 20 +++++++++++---------
 pandas/core/internals/managers.py      | 19 +++++++++++--------
 3 files changed, 24 insertions(+), 22 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 909dbec88e63c..e26a421de63a4 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1766,19 +1766,16 @@ def _wrap_transformed_output(
         return result
 
     def _wrap_agged_manager(self, mgr: Manager) -> DataFrame:
-        # TODO clean this up
-        axis = 1 if isinstance(mgr, BlockManager) else 0
-        axes = mgr.axes if isinstance(mgr, BlockManager) else mgr._axes
         if not self.as_index:
             index = np.arange(mgr.shape[1])
-            axes[axis] = ibase.Index(index)
+            mgr.set_axis(1, ibase.Index(index), verify_integrity=False)
             result = self.obj._constructor(mgr)
 
             self._insert_inaxis_grouper_inplace(result)
             result = result._consolidate()
         else:
             index = self.grouper.result_index
-            axes[axis] = index
+            mgr.set_axis(1, index, verify_integrity=False)
             result = self.obj._constructor(mgr)
 
         if self.axis == 1:
diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index bf6ded2f26bdf..57a1ce26c14cd 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -141,18 +141,20 @@ def _normalize_axis(axis):
         axis = 1 if axis == 0 else 0
         return axis
 
-    # TODO can be shared
-    def set_axis(self, axis: int, new_labels: Index) -> None:
+    def set_axis(
+        self, axis: int, new_labels: Index, verify_integrity: bool = True
+    ) -> None:
         # Caller is responsible for ensuring we have an Index object.
         axis = self._normalize_axis(axis)
-        old_len = len(self._axes[axis])
-        new_len = len(new_labels)
+        if verify_integrity:
+            old_len = len(self._axes[axis])
+            new_len = len(new_labels)
 
-        if new_len != old_len:
-            raise ValueError(
-                f"Length mismatch: Expected axis has {old_len} elements, new "
-                f"values have {new_len} elements"
-            )
+            if new_len != old_len:
+                raise ValueError(
+                    f"Length mismatch: Expected axis has {old_len} elements, new "
+                    f"values have {new_len} elements"
+                )
 
         self._axes[axis] = new_labels
 
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 81fccc335b7c8..89faf7c89e402 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -235,16 +235,19 @@ def shape(self) -> Shape:
     def ndim(self) -> int:
         return len(self.axes)
 
-    def set_axis(self, axis: int, new_labels: Index) -> None:
+    def set_axis(
+        self, axis: int, new_labels: Index, verify_integrity: bool = True
+    ) -> None:
         # Caller is responsible for ensuring we have an Index object.
-        old_len = len(self.axes[axis])
-        new_len = len(new_labels)
+        if verify_integrity:
+            old_len = len(self.axes[axis])
+            new_len = len(new_labels)
 
-        if new_len != old_len:
-            raise ValueError(
-                f"Length mismatch: Expected axis has {old_len} elements, new "
-                f"values have {new_len} elements"
-            )
+            if new_len != old_len:
+                raise ValueError(
+                    f"Length mismatch: Expected axis has {old_len} elements, new "
+                    f"values have {new_len} elements"
+                )
 
         self.axes[axis] = new_labels
 

From 244152ba239fa1f11306cbccf2efec03a9aeccc9 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 24 Feb 2021 21:39:22 +0100
Subject: [PATCH 6/9] fix BM.arrays for use in tests

---
 pandas/core/internals/managers.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 89faf7c89e402..09b8ebe776b55 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -286,7 +286,7 @@ def get_dtypes(self):
         return algos.take_nd(dtypes, self.blknos, allow_fill=False)
 
     @property
-    def arrays(self):
+    def arrays(self) -> List[ArrayLike]:
         """
         Quick access to the backing arrays of the Blocks.
 
@@ -294,8 +294,7 @@ def arrays(self):
         Not to be used in actual code, and return value is not the same as the
         ArrayManager method (list of 1D arrays vs iterator of 2D ndarrays / 1D EAs).
         """
-        for blk in self.blocks:
-            yield blk.values
+        return [blk.values for blk in self.blocks]
 
     def __getstate__(self):
         block_values = [b.values for b in self.blocks]

From 32bf7d136fa59c9a42867cdba2ae909f00ea77a3 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 24 Feb 2021 22:56:12 +0100
Subject: [PATCH 7/9] typing

---
 pandas/core/groupby/generic.py         | 2 +-
 pandas/core/internals/array_manager.py | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index e26a421de63a4..45914519c0a94 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1085,7 +1085,7 @@ def _cython_agg_general(
 
     def _cython_agg_manager(
         self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1
-    ) -> BlockManager:
+    ) -> Manager:
 
         data: Manager = self._get_data_to_aggregate()
 
diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index 57a1ce26c14cd..e1770c67f8107 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -231,8 +231,6 @@ def grouped_reduce(self: T, func: Callable, ignore_failures: bool = False) -> T:
         -------
         ArrayManager
         """
-        result_arrays: List[ArrayLike] = []
-
         # TODO ignore_failures
         result_arrays = [func(arr) for arr in self.arrays]
 

From b44804ee0916d379443fffd6ce60cb958ac6262e Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 25 Feb 2021 08:24:48 +0100
Subject: [PATCH 8/9] use add_marker

---
 pandas/tests/groupby/aggregate/test_cython.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py
index fc92abdff6eab..558e01d08660c 100644
--- a/pandas/tests/groupby/aggregate/test_cython.py
+++ b/pandas/tests/groupby/aggregate/test_cython.py
@@ -41,10 +41,12 @@
         "max",
     ],
 )
-def test_cythonized_aggers(op_name, using_array_manager):
+def test_cythonized_aggers(op_name, using_array_manager, request):
     if using_array_manager and op_name in {"count", "sem"}:
         # TODO(ArrayManager) groupby count/sem
-        pytest.skip("ArrayManager groupby count/sem not yet implemented")
+        request.node.add_marker(
+            pytest.mark.xfail(reason="ArrayManager groupby count/sem not implemented")
+        )
     data = {
         "A": [0, 0, 0, 0, 1, 1, 1, 1, 1, 1.0, np.nan, np.nan],
         "B": ["A", "B"] * 6,

From 50fb97fc45de993d9ff1d4784276ff409e2300a9 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 25 Feb 2021 08:26:56 +0100
Subject: [PATCH 9/9] remove xfail marker - count is actually implemented now

---
 pandas/tests/groupby/aggregate/test_cython.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py
index 558e01d08660c..4a8aabe41b754 100644
--- a/pandas/tests/groupby/aggregate/test_cython.py
+++ b/pandas/tests/groupby/aggregate/test_cython.py
@@ -41,12 +41,7 @@
         "max",
     ],
 )
-def test_cythonized_aggers(op_name, using_array_manager, request):
-    if using_array_manager and op_name in {"count", "sem"}:
-        # TODO(ArrayManager) groupby count/sem
-        request.node.add_marker(
-            pytest.mark.xfail(reason="ArrayManager groupby count/sem not implemented")
-        )
+def test_cythonized_aggers(op_name):
     data = {
         "A": [0, 0, 0, 0, 1, 1, 1, 1, 1, 1.0, np.nan, np.nan],
         "B": ["A", "B"] * 6,