From 773c5b7705eaf11ba1ca3adff3c42a01e68acfda Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 4 Aug 2017 11:34:40 -0700 Subject: [PATCH 1/8] Define pivot_table in DataFrame instead of pinning it on over pin reshape.pivot --- pandas/core/frame.py | 84 ++++++++++++++++++++++++++++++++++++ pandas/core/reshape/pivot.py | 79 +-------------------------------- 2 files changed, 85 insertions(+), 78 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 027a427555253..82f6364081bb3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4146,6 +4146,90 @@ def pivot(self, index=None, columns=None, values=None): from pandas.core.reshape.reshape import pivot return pivot(self, index=index, columns=columns, values=values) + def pivot_table(self, values=None, index=None, columns=None, + aggfunc='mean', fill_value=None, margins=False, + dropna=True, margins_name='All'): + """ + Create a spreadsheet-style pivot table as a DataFrame. The levels in + the pivot table will be stored in MultiIndex objects (hierarchical + indexes) on the index and columns of the result DataFrame + + Parameters + ---------- + data : DataFrame + values : column to aggregate, optional + index : column, Grouper, array, or list of the previous + If an array is passed, it must be the same length as the data. The + list can contain any of the other types (except list). + Keys to group by on the pivot table index. If an array is passed, + it is being used as the same manner as column values. + columns : column, Grouper, array, or list of the previous + If an array is passed, it must be the same length as the data. The + list can contain any of the other types (except list). + Keys to group by on the pivot table column. If an array is passed, + it is being used as the same manner as column values. + aggfunc : function or list of functions, default numpy.mean + If list of functions passed, the resulting pivot table will have + hierarchical columns whose top level are the function names + (inferred from the function objects themselves) + fill_value : scalar, default None + Value to replace missing values with + margins : boolean, default False + Add all row / columns (e.g. for subtotal / grand totals) + dropna : boolean, default True + Do not include columns whose entries are all NaN + margins_name : string, default 'All' + Name of the row / column that will contain the totals + when margins is True. + + Examples + -------- + >>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo", + ... "bar", "bar", "bar", "bar"], + ... "B": ["one", "one", "one", "two", "two", + ... "one", "one", "two", "two"], + ... "C": ["small", "large", "large", "small", + ... "small", "large", "small", "small", + ... "large"], + ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7]}) + >>> df + A B C D + 0 foo one small 1 + 1 foo one large 2 + 2 foo one large 2 + 3 foo two small 3 + 4 foo two small 3 + 5 bar one large 4 + 6 bar one small 5 + 7 bar two small 6 + 8 bar two large 7 + + >>> table = pivot_table(df, values='D', index=['A', 'B'], + ... columns=['C'], aggfunc=np.sum) + >>> table + ... # doctest: +NORMALIZE_WHITESPACE + C large small + A B + bar one 4.0 5.0 + two 7.0 6.0 + foo one 4.0 1.0 + two NaN 6.0 + + Returns + ------- + table : DataFrame + + See also + -------- + DataFrame.pivot : pivot without aggregation that can handle + non-numeric data + """ + from pandas.core.reshape.reshape import pivot_table + return pivot_table(self, values=values, index=index, columns=columns, + aggfunc=aggfunc, fill_value=fill_value, + margins=margins, dropna=dropna, + margins_name=margins_name) + def stack(self, level=-1, dropna=True): """ Pivot a level of the (possibly hierarchical) column labels, returning a diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index d4ea49c130add..03c89e8d06118 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -16,81 +16,6 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', fill_value=None, margins=False, dropna=True, margins_name='All'): - """ - Create a spreadsheet-style pivot table as a DataFrame. The levels in the - pivot table will be stored in MultiIndex objects (hierarchical indexes) on - the index and columns of the result DataFrame - - Parameters - ---------- - data : DataFrame - values : column to aggregate, optional - index : column, Grouper, array, or list of the previous - If an array is passed, it must be the same length as the data. The list - can contain any of the other types (except list). - Keys to group by on the pivot table index. If an array is passed, it - is being used as the same manner as column values. - columns : column, Grouper, array, or list of the previous - If an array is passed, it must be the same length as the data. The list - can contain any of the other types (except list). - Keys to group by on the pivot table column. If an array is passed, it - is being used as the same manner as column values. - aggfunc : function or list of functions, default numpy.mean - If list of functions passed, the resulting pivot table will have - hierarchical columns whose top level are the function names (inferred - from the function objects themselves) - fill_value : scalar, default None - Value to replace missing values with - margins : boolean, default False - Add all row / columns (e.g. for subtotal / grand totals) - dropna : boolean, default True - Do not include columns whose entries are all NaN - margins_name : string, default 'All' - Name of the row / column that will contain the totals - when margins is True. - - Examples - -------- - >>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo", - ... "bar", "bar", "bar", "bar"], - ... "B": ["one", "one", "one", "two", "two", - ... "one", "one", "two", "two"], - ... "C": ["small", "large", "large", "small", - ... "small", "large", "small", "small", - ... "large"], - ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7]}) - >>> df - A B C D - 0 foo one small 1 - 1 foo one large 2 - 2 foo one large 2 - 3 foo two small 3 - 4 foo two small 3 - 5 bar one large 4 - 6 bar one small 5 - 7 bar two small 6 - 8 bar two large 7 - - >>> table = pivot_table(df, values='D', index=['A', 'B'], - ... columns=['C'], aggfunc=np.sum) - >>> table - ... # doctest: +NORMALIZE_WHITESPACE - C large small - A B - bar one 4.0 5.0 - two 7.0 6.0 - foo one 4.0 1.0 - two NaN 6.0 - - Returns - ------- - table : DataFrame - - See also - -------- - DataFrame.pivot : pivot without aggregation that can handle - non-numeric data - """ index = _convert_by(index) columns = _convert_by(columns) @@ -202,9 +127,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', table = table.dropna(how='all', axis=1) return table - - -DataFrame.pivot_table = pivot_table +pivot_table.__doc__ = DataFrame.pivot_table.__doc__ def _add_margins(table, data, values, rows, cols, aggfunc, From 23b1cb131c96a8ed0c6adb4f5093a9abe77e0523 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 4 Aug 2017 16:19:12 -0700 Subject: [PATCH 2/8] Fix NameError caused by referencing DataFrame incorrectly; avoid import of DataFrame altogether using ABC classes where possible --- pandas/core/reshape/pivot.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 03c89e8d06118..c1341a35b1070 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -2,8 +2,10 @@ from pandas.core.dtypes.common import is_list_like, is_scalar +from pandas.core.dtypes.generic import ABCDataFrame, ABCIndex, ABCSeries + from pandas.core.reshape.concat import concat -from pandas import Series, DataFrame, MultiIndex, Index +from pandas.core.series import Series from pandas.core.groupby import Grouper from pandas.core.reshape.util import cartesian_product from pandas.core.index import _get_combined_index @@ -16,6 +18,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', fill_value=None, margins=False, dropna=True, margins_name='All'): + """ See DataFrame.pivot_table.__doc__ """ index = _convert_by(index) columns = _convert_by(columns) @@ -87,6 +90,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', table = agged.unstack(to_unstack) if not dropna: + from pandas import MultiIndex try: m = MultiIndex.from_arrays(cartesian_product(table.index.levels), names=table.index.names) @@ -101,7 +105,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', except AttributeError: pass # it's a single level or a series - if isinstance(table, DataFrame): + if isinstance(table, ABCDataFrame): table = table.sort_index(axis=1) if fill_value is not None: @@ -123,11 +127,10 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', table = table.T # GH 15193 Makse sure empty columns are removed if dropna=True - if isinstance(table, DataFrame) and dropna: + if isinstance(table, ABCDataFrame) and dropna: table = table.dropna(how='all', axis=1) return table -pivot_table.__doc__ = DataFrame.pivot_table.__doc__ def _add_margins(table, data, values, rows, cols, aggfunc, @@ -153,7 +156,7 @@ def _add_margins(table, data, values, rows, cols, aggfunc, else: key = margins_name - if not values and isinstance(table, Series): + if not values and isinstance(table, ABCSeries): # If there are no values and the table is a series, then there is only # one column in the data. Compute grand margin and return it. return table.append(Series({key: grand_margin[margins_name]})) @@ -180,6 +183,7 @@ def _add_margins(table, data, values, rows, cols, aggfunc, else: row_margin[k] = grand_margin[k[0]] + from pandas import DataFrame margin_dummy = DataFrame(row_margin, columns=[key]).T row_names = result.index.names @@ -325,7 +329,7 @@ def _convert_by(by): if by is None: by = [] elif (is_scalar(by) or - isinstance(by, (np.ndarray, Index, Series, Grouper)) or + isinstance(by, (np.ndarray, ABCIndex, ABCSeries, Grouper)) or hasattr(by, '__call__')): by = [by] else: @@ -446,6 +450,7 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None, if values is not None and aggfunc is None: raise ValueError("values cannot be used without an aggfunc.") + from pandas import DataFrame df = DataFrame(data, index=common_idx) if values is None: df['__dummy__'] = 0 @@ -543,7 +548,7 @@ def _get_names(arrs, names, prefix='row'): if names is None: names = [] for i, arr in enumerate(arrs): - if isinstance(arr, Series) and arr.name is not None: + if isinstance(arr, ABCSeries) and arr.name is not None: names.append(arr.name) else: names.append('%s_%d' % (prefix, i)) From 082715e5bf8cf2ebaef5d2551316405bfd65bc9b Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 5 Aug 2017 21:28:07 -0700 Subject: [PATCH 3/8] Fix import from wrong module --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 82f6364081bb3..df88408b9d0c0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4224,7 +4224,7 @@ def pivot_table(self, values=None, index=None, columns=None, DataFrame.pivot : pivot without aggregation that can handle non-numeric data """ - from pandas.core.reshape.reshape import pivot_table + from pandas.core.reshape.pivot import pivot_table return pivot_table(self, values=values, index=index, columns=columns, aggfunc=aggfunc, fill_value=fill_value, margins=margins, dropna=dropna, From 3d6f3de343b74aaaa262333fc032d62a47327a3d Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 6 Aug 2017 12:43:29 -0700 Subject: [PATCH 4/8] Fix broken test by checking isinstance(by, Index) instead of ABCIndex ABCIndex appears to not recognize Int64Index, needs an Issue --- pandas/core/reshape/pivot.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index c1341a35b1070..14b1d4134d541 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -2,13 +2,13 @@ from pandas.core.dtypes.common import is_list_like, is_scalar -from pandas.core.dtypes.generic import ABCDataFrame, ABCIndex, ABCSeries +from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries from pandas.core.reshape.concat import concat from pandas.core.series import Series from pandas.core.groupby import Grouper from pandas.core.reshape.util import cartesian_product -from pandas.core.index import _get_combined_index +from pandas.core.index import Index, _get_combined_index from pandas.compat import range, lrange, zip from pandas import compat import pandas.core.common as com @@ -126,7 +126,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', if len(index) == 0 and len(columns) > 0: table = table.T - # GH 15193 Makse sure empty columns are removed if dropna=True + # GH 15193 Make sure empty columns are removed if dropna=True if isinstance(table, ABCDataFrame) and dropna: table = table.dropna(how='all', axis=1) @@ -329,7 +329,7 @@ def _convert_by(by): if by is None: by = [] elif (is_scalar(by) or - isinstance(by, (np.ndarray, ABCIndex, ABCSeries, Grouper)) or + isinstance(by, (np.ndarray, Index, ABCSeries, Grouper)) or hasattr(by, '__call__')): by = [by] else: From fce2a3824b9919dffee4da090ccf8869515495c0 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 7 Aug 2017 09:49:09 -0700 Subject: [PATCH 5/8] Move pivot_table.__doc__ into _shared_docs --- pandas/core/frame.py | 87 +++--------------------------------- pandas/core/reshape/pivot.py | 81 ++++++++++++++++++++++++++++++++- 2 files changed, 87 insertions(+), 81 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index df88408b9d0c0..011a9939b2796 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -101,6 +101,7 @@ from pandas._libs import lib, algos as libalgos from pandas.core.config import get_option +from pandas.core.reshape import pivot # --------------------------------------------------------------------- # Docstring templates @@ -4146,89 +4147,15 @@ def pivot(self, index=None, columns=None, values=None): from pandas.core.reshape.reshape import pivot return pivot(self, index=index, columns=columns, values=values) + @Substitution('') + @Appender(_shared_docs['pivot_table']) def pivot_table(self, values=None, index=None, columns=None, aggfunc='mean', fill_value=None, margins=False, dropna=True, margins_name='All'): - """ - Create a spreadsheet-style pivot table as a DataFrame. The levels in - the pivot table will be stored in MultiIndex objects (hierarchical - indexes) on the index and columns of the result DataFrame - - Parameters - ---------- - data : DataFrame - values : column to aggregate, optional - index : column, Grouper, array, or list of the previous - If an array is passed, it must be the same length as the data. The - list can contain any of the other types (except list). - Keys to group by on the pivot table index. If an array is passed, - it is being used as the same manner as column values. - columns : column, Grouper, array, or list of the previous - If an array is passed, it must be the same length as the data. The - list can contain any of the other types (except list). - Keys to group by on the pivot table column. If an array is passed, - it is being used as the same manner as column values. - aggfunc : function or list of functions, default numpy.mean - If list of functions passed, the resulting pivot table will have - hierarchical columns whose top level are the function names - (inferred from the function objects themselves) - fill_value : scalar, default None - Value to replace missing values with - margins : boolean, default False - Add all row / columns (e.g. for subtotal / grand totals) - dropna : boolean, default True - Do not include columns whose entries are all NaN - margins_name : string, default 'All' - Name of the row / column that will contain the totals - when margins is True. - - Examples - -------- - >>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo", - ... "bar", "bar", "bar", "bar"], - ... "B": ["one", "one", "one", "two", "two", - ... "one", "one", "two", "two"], - ... "C": ["small", "large", "large", "small", - ... "small", "large", "small", "small", - ... "large"], - ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7]}) - >>> df - A B C D - 0 foo one small 1 - 1 foo one large 2 - 2 foo one large 2 - 3 foo two small 3 - 4 foo two small 3 - 5 bar one large 4 - 6 bar one small 5 - 7 bar two small 6 - 8 bar two large 7 - - >>> table = pivot_table(df, values='D', index=['A', 'B'], - ... columns=['C'], aggfunc=np.sum) - >>> table - ... # doctest: +NORMALIZE_WHITESPACE - C large small - A B - bar one 4.0 5.0 - two 7.0 6.0 - foo one 4.0 1.0 - two NaN 6.0 - - Returns - ------- - table : DataFrame - - See also - -------- - DataFrame.pivot : pivot without aggregation that can handle - non-numeric data - """ - from pandas.core.reshape.pivot import pivot_table - return pivot_table(self, values=values, index=index, columns=columns, - aggfunc=aggfunc, fill_value=fill_value, - margins=margins, dropna=dropna, - margins_name=margins_name) + return pivot.pivot_table(self, values=values, index=index, + columns=columns, aggfunc=aggfunc, + fill_value=fill_value, margins=margins, + dropna=dropna, margins_name=margins_name) def stack(self, level=-1, dropna=True): """ diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 14b1d4134d541..33813f6ee128d 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -5,6 +5,7 @@ from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries from pandas.core.reshape.concat import concat +from pandas.core.generic import _shared_docs from pandas.core.series import Series from pandas.core.groupby import Grouper from pandas.core.reshape.util import cartesian_product @@ -12,13 +13,91 @@ from pandas.compat import range, lrange, zip from pandas import compat import pandas.core.common as com +from pandas.util._decorators import Appender, Substitution import numpy as np +_shared_docs['pivot_table'] = """ + Create a spreadsheet-style pivot table as a DataFrame. The levels in + the pivot table will be stored in MultiIndex objects (hierarchical + indexes) on the index and columns of the result DataFrame + + Parameters + ----------%s + values : column to aggregate, optional + index : column, Grouper, array, or list of the previous + If an array is passed, it must be the same length as the data. The + list can contain any of the other types (except list). + Keys to group by on the pivot table index. If an array is passed, + it is being used as the same manner as column values. + columns : column, Grouper, array, or list of the previous + If an array is passed, it must be the same length as the data. The + list can contain any of the other types (except list). + Keys to group by on the pivot table column. If an array is passed, + it is being used as the same manner as column values. + aggfunc : function or list of functions, default numpy.mean + If list of functions passed, the resulting pivot table will have + hierarchical columns whose top level are the function names + (inferred from the function objects themselves) + fill_value : scalar, default None + Value to replace missing values with + margins : boolean, default False + Add all row / columns (e.g. for subtotal / grand totals) + dropna : boolean, default True + Do not include columns whose entries are all NaN + margins_name : string, default 'All' + Name of the row / column that will contain the totals + when margins is True. + + Examples + -------- + >>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo", + ... "bar", "bar", "bar", "bar"], + ... "B": ["one", "one", "one", "two", "two", + ... "one", "one", "two", "two"], + ... "C": ["small", "large", "large", "small", + ... "small", "large", "small", "small", + ... "large"], + ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7]}) + >>> df + A B C D + 0 foo one small 1 + 1 foo one large 2 + 2 foo one large 2 + 3 foo two small 3 + 4 foo two small 3 + 5 bar one large 4 + 6 bar one small 5 + 7 bar two small 6 + 8 bar two large 7 + + >>> table = pivot_table(df, values='D', index=['A', 'B'], + ... columns=['C'], aggfunc=np.sum) + >>> table + ... # doctest: +NORMALIZE_WHITESPACE + C large small + A B + bar one 4.0 5.0 + two 7.0 6.0 + foo one 4.0 1.0 + two NaN 6.0 + + Returns + ------- + table : DataFrame + + See also + -------- + DataFrame.pivot : pivot without aggregation that can handle + non-numeric data + """ + + +@Substitution('\ndata : DataFrame') +@Appender(_shared_docs['pivot_table'], indents=1) def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', fill_value=None, margins=False, dropna=True, margins_name='All'): - """ See DataFrame.pivot_table.__doc__ """ index = _convert_by(index) columns = _convert_by(columns) From fd179369fb5f32c13bd3e1aabc1b5d9ef7b4a432 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 7 Aug 2017 10:47:15 -0700 Subject: [PATCH 6/8] Add pivot_table to _shared_docs in frame, then import frame before reshape.pivot. Going in the opposite order results in a circular import --- pandas/core/frame.py | 76 +++++++++++++++++++++++++++++++++- pandas/core/reshape/pivot.py | 80 ++---------------------------------- 2 files changed, 78 insertions(+), 78 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 011a9939b2796..910d78fc5ce26 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -101,7 +101,6 @@ from pandas._libs import lib, algos as libalgos from pandas.core.config import get_option -from pandas.core.reshape import pivot # --------------------------------------------------------------------- # Docstring templates @@ -4147,6 +4146,81 @@ def pivot(self, index=None, columns=None, values=None): from pandas.core.reshape.reshape import pivot return pivot(self, index=index, columns=columns, values=values) + _shared_docs['pivot_table'] = """ + Create a spreadsheet-style pivot table as a DataFrame. The levels in + the pivot table will be stored in MultiIndex objects (hierarchical + indexes) on the index and columns of the result DataFrame + + Parameters + ----------%s + values : column to aggregate, optional + index : column, Grouper, array, or list of the previous + If an array is passed, it must be the same length as the data. The + list can contain any of the other types (except list). + Keys to group by on the pivot table index. If an array is passed, + it is being used as the same manner as column values. + columns : column, Grouper, array, or list of the previous + If an array is passed, it must be the same length as the data. The + list can contain any of the other types (except list). + Keys to group by on the pivot table column. If an array is passed, + it is being used as the same manner as column values. + aggfunc : function or list of functions, default numpy.mean + If list of functions passed, the resulting pivot table will have + hierarchical columns whose top level are the function names + (inferred from the function objects themselves) + fill_value : scalar, default None + Value to replace missing values with + margins : boolean, default False + Add all row / columns (e.g. for subtotal / grand totals) + dropna : boolean, default True + Do not include columns whose entries are all NaN + margins_name : string, default 'All' + Name of the row / column that will contain the totals + when margins is True. + + Examples + -------- + >>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo", + ... "bar", "bar", "bar", "bar"], + ... "B": ["one", "one", "one", "two", "two", + ... "one", "one", "two", "two"], + ... "C": ["small", "large", "large", "small", + ... "small", "large", "small", "small", + ... "large"], + ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7]}) + >>> df + A B C D + 0 foo one small 1 + 1 foo one large 2 + 2 foo one large 2 + 3 foo two small 3 + 4 foo two small 3 + 5 bar one large 4 + 6 bar one small 5 + 7 bar two small 6 + 8 bar two large 7 + + >>> table = pivot_table(df, values='D', index=['A', 'B'], + ... columns=['C'], aggfunc=np.sum) + >>> table + ... # doctest: +NORMALIZE_WHITESPACE + C large small + A B + bar one 4.0 5.0 + two 7.0 6.0 + foo one 4.0 1.0 + two NaN 6.0 + + Returns + ------- + table : DataFrame + + See also + -------- + DataFrame.pivot : pivot without aggregation that can handle + non-numeric data + """ + @Substitution('') @Appender(_shared_docs['pivot_table']) def pivot_table(self, values=None, index=None, columns=None, diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 33813f6ee128d..592d978fa9f26 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -5,7 +5,9 @@ from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries from pandas.core.reshape.concat import concat -from pandas.core.generic import _shared_docs +from pandas.core.frame import _shared_docs +# Note: We need to make sure `frame` is imported before `pivot`, otherwise +# _shared_docs['pivot_table'] will not yet exist. from pandas.core.series import Series from pandas.core.groupby import Grouper from pandas.core.reshape.util import cartesian_product @@ -17,82 +19,6 @@ import numpy as np -_shared_docs['pivot_table'] = """ - Create a spreadsheet-style pivot table as a DataFrame. The levels in - the pivot table will be stored in MultiIndex objects (hierarchical - indexes) on the index and columns of the result DataFrame - - Parameters - ----------%s - values : column to aggregate, optional - index : column, Grouper, array, or list of the previous - If an array is passed, it must be the same length as the data. The - list can contain any of the other types (except list). - Keys to group by on the pivot table index. If an array is passed, - it is being used as the same manner as column values. - columns : column, Grouper, array, or list of the previous - If an array is passed, it must be the same length as the data. The - list can contain any of the other types (except list). - Keys to group by on the pivot table column. If an array is passed, - it is being used as the same manner as column values. - aggfunc : function or list of functions, default numpy.mean - If list of functions passed, the resulting pivot table will have - hierarchical columns whose top level are the function names - (inferred from the function objects themselves) - fill_value : scalar, default None - Value to replace missing values with - margins : boolean, default False - Add all row / columns (e.g. for subtotal / grand totals) - dropna : boolean, default True - Do not include columns whose entries are all NaN - margins_name : string, default 'All' - Name of the row / column that will contain the totals - when margins is True. - - Examples - -------- - >>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo", - ... "bar", "bar", "bar", "bar"], - ... "B": ["one", "one", "one", "two", "two", - ... "one", "one", "two", "two"], - ... "C": ["small", "large", "large", "small", - ... "small", "large", "small", "small", - ... "large"], - ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7]}) - >>> df - A B C D - 0 foo one small 1 - 1 foo one large 2 - 2 foo one large 2 - 3 foo two small 3 - 4 foo two small 3 - 5 bar one large 4 - 6 bar one small 5 - 7 bar two small 6 - 8 bar two large 7 - - >>> table = pivot_table(df, values='D', index=['A', 'B'], - ... columns=['C'], aggfunc=np.sum) - >>> table - ... # doctest: +NORMALIZE_WHITESPACE - C large small - A B - bar one 4.0 5.0 - two 7.0 6.0 - foo one 4.0 1.0 - two NaN 6.0 - - Returns - ------- - table : DataFrame - - See also - -------- - DataFrame.pivot : pivot without aggregation that can handle - non-numeric data - """ - - @Substitution('\ndata : DataFrame') @Appender(_shared_docs['pivot_table'], indents=1) def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', From bf0f4fdbf3baf623b37e5976fd0e64df03f075d7 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 7 Aug 2017 11:30:56 -0700 Subject: [PATCH 7/8] Fix missing import if pivot_table --- pandas/core/frame.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 910d78fc5ce26..26de1a9c0b1d3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4226,10 +4226,11 @@ def pivot(self, index=None, columns=None, values=None): def pivot_table(self, values=None, index=None, columns=None, aggfunc='mean', fill_value=None, margins=False, dropna=True, margins_name='All'): - return pivot.pivot_table(self, values=values, index=index, - columns=columns, aggfunc=aggfunc, - fill_value=fill_value, margins=margins, - dropna=dropna, margins_name=margins_name) + from pandas.core.reshape.pivot import pivot_table + return pivot_table(self, values=values, index=index, columns=columns, + aggfunc=aggfunc, fill_value=fill_value, + margins=margins, dropna=dropna, + margins_name=margins_name) def stack(self, level=-1, dropna=True): """ From ea334c51614f945987d28a94d129e74ec6ef7e85 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 8 Aug 2017 18:24:58 -0700 Subject: [PATCH 8/8] Move import of _shared_docs to end of reshape.pivot imports --- pandas/core/reshape/pivot.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 592d978fa9f26..e61adf3aac30a 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -5,9 +5,6 @@ from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries from pandas.core.reshape.concat import concat -from pandas.core.frame import _shared_docs -# Note: We need to make sure `frame` is imported before `pivot`, otherwise -# _shared_docs['pivot_table'] will not yet exist. from pandas.core.series import Series from pandas.core.groupby import Grouper from pandas.core.reshape.util import cartesian_product @@ -16,6 +13,11 @@ from pandas import compat import pandas.core.common as com from pandas.util._decorators import Appender, Substitution + +from pandas.core.frame import _shared_docs +# Note: We need to make sure `frame` is imported before `pivot`, otherwise +# _shared_docs['pivot_table'] will not yet exist. TODO: Fix this dependency + import numpy as np