From 9793e3ce78818ac200e8e17c619887dd6b640a06 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sat, 10 May 2014 18:38:01 -0500 Subject: [PATCH 1/3] API: Add return_type kwarg to boxplot update docs API: Let 'by' and groupby follow return_type Write all the docs --- doc/source/groupby.rst | 31 +++++++++ doc/source/release.rst | 7 ++ doc/source/v0.14.0.txt | 8 +++ doc/source/visualization.rst | 36 ++++++++++ pandas/core/frame.py | 20 +++++- pandas/tests/test_graphics.py | 121 ++++++++++++++++++++++++++++------ pandas/tools/plotting.py | 82 +++++++++++++++++++---- 7 files changed, 272 insertions(+), 33 deletions(-) diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index c6fd216ebdc36..12684aeda8cea 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -909,6 +909,37 @@ To see the order in which each row appears within its group, use the df.groupby('A').cumcount(ascending=False) # kwarg only +Plotting +~~~~~~~~ + +Groupby also works with some plotting methods. For example, suppose we +suspect that some features in a DataFrame my differ by group, in this case, +the values in column 1 where the group is "B" are 3 higher on average. + +.. ipython:: python + + np.random.seed(1234) + df = DataFrame(np.random.randn(50, 2)) + df['g'] = np.random.choice(['A', 'B'], size=50) + df.loc[df['g'] == 'B', 1] += 3 + +We can easily visualize this with a boxplot: + +..ipython:: python + + @savefig groupby_boxplot.png + bp = df.groupby('g').boxplot() + +The result of calling ``boxplot`` is a dictionary whose keys are the values +of our grouping column ``g`` ("A" and "B"). The values of the resulting dictionary +can be controlled by the ``return_type`` keyword of ``boxplot``. +See the :ref:`visualization documentation` for more. + +.. warning:: + + For historical reasons, ``df.groupby("g").boxplot()`` is not equivalent + to ``df.boxplot(by="g")``. See :ref:`here`. + Examples -------- diff --git a/doc/source/release.rst b/doc/source/release.rst index 973dfd73307f2..524548509e76c 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -208,6 +208,9 @@ API Changes returns a different Index (:issue:`7088`). Previously the index was unintentionally sorted. - arithmetic operations with **only** ``bool`` dtypes now raise an error (:issue:`7011`, :issue:`6762`, :issue:`7015`) +- :meth:`DataFrame.boxplot` has a new keyword argument, `return_type`. It accepts ``'dict'``, + ``'axes'``, or ``'both'``, in which case a namedtuple with the matplotlib + axes and a dict of matplotlib Lines is returned. Deprecations ~~~~~~~~~~~~ @@ -258,6 +261,10 @@ Deprecations Use the `percentiles` keyword instead, which takes a list of percentiles to display. The default output is unchanged. +- The default return type of :func:`boxplot` will change from a dict to a matpltolib Axes + in a future release. You can use the future behavior now by passing ``return_type='dict'`` + to boxplot. + Prior Version Deprecations/Changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt index c033debbb6808..9304925a079fc 100644 --- a/doc/source/v0.14.0.txt +++ b/doc/source/v0.14.0.txt @@ -210,6 +210,10 @@ API changes # this now raises for arith ops like ``+``, ``*``, etc. NotImplementedError: operator '*' not implemented for bool dtypes +- :meth:`DataFrame.boxplot` has a new keyword argument, `return_type`. It accepts ``'dict'``, + ``'axes'``, or ``'both'``, in which case a namedtuple with the matplotlib + axes and a dict of matplotlib Lines is returned. + .. _whatsnew_0140.display: @@ -554,6 +558,10 @@ Deprecations Use the `percentiles` keyword instead, which takes a list of percentiles to display. The default output is unchanged. +- The default return type of :func:`boxplot` will change from a dict to a matpltolib Axes + in a future release. You can use the future behavior now by passing ``return_type='dict'`` + to boxplot. + .. _whatsnew_0140.enhancements: Enhancements diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst index e5eee45894266..af8dc66c82a63 100644 --- a/doc/source/visualization.rst +++ b/doc/source/visualization.rst @@ -304,6 +304,42 @@ columns: plt.close('all') +.. _visualization.box.return: + +The return type of ``boxplot`` depends on two keyword arguments: ``by`` and ``return_type``. +When ``by`` is ``None``: + +* if ``return_type`` is ``'dict'``, a dictionary containing the :class:`matplotlib Lines ` is returned. The keys are "boxes", "caps", "fliers", "medians", and "whiskers". + This is the deafult. +* if ``return_type`` is ``'axes'``, a :class:`matplotlib Axes ` containing the boxplot is returned. +* if ``return_type`` is ``'both'`` a namedtuple containging the :class:`matplotlib Axes ` + and :class:`matplotlib Lines ` is returned + +When ``by`` is some column of the DataFrame, a dict of ``return_type`` is returned, where +the keys are the columns of the DataFrame. The plot has a facet for each column of +the DataFrame, with a separate box for each value of ``by``. + +Finally, when calling boxplot on a :class:`Groupby` object, a dict of ``return_type`` +is returned, where the keys are the same as the Groupby object. The plot has a +facet for each key, with each facet containing a box for each column of the +DataFrame. + +.. ipython:: python + + np.random.seed(1234) + df_box = DataFrame(np.random.randn(50, 2)) + df_box['g'] = np.random.choice(['A', 'B'], size=50) + df_box.loc[df_box['g'] == 'B', 1] += 3 + +..ipython:: python + + @savefig(boxplot_groupby.png) + df_box.boxplot(by='g') + + @savefig groupby_boxplot_vis.png + df_box.groupby('g').boxplot() + + .. _visualization.area_plot: Area Plot diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1b77a87b0d94a..59a3a4e931485 100755 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4857,7 +4857,8 @@ def _put_str(s, space): def boxplot(self, column=None, by=None, ax=None, fontsize=None, - rot=0, grid=True, **kwds): + rot=0, grid=True, figsize=None, layout=None, return_type=None, + **kwds): """ Make a box plot from DataFrame column/columns optionally grouped (stratified) by one or more columns @@ -4875,17 +4876,32 @@ def boxplot(self, column=None, by=None, ax=None, fontsize=None, Rotation for ticks grid : boolean, default None (matlab style default) Axis grid lines + layout : tuple (optional) + (rows, columns) for the layout of the plot + return_type : bool, default False + Whether to return a dict whose values are the lines of the boxplot + kwds : other plotting keyword arguments to be passed to matplotlib boxplot + function Returns ------- ax : matplotlib.axes.AxesSubplot + lines : dict (optional) + + Notes + ----- + Use ``return_dict=True`` when you want to modify the appearance + of the lines. In this case a named tuple is returned. """ import pandas.tools.plotting as plots import matplotlib.pyplot as plt ax = plots.boxplot(self, column=column, by=by, ax=ax, - fontsize=fontsize, grid=grid, rot=rot, **kwds) + fontsize=fontsize, grid=grid, rot=rot, + figsize=figsize, layout=layout, return_dict=return_dict, + **kwds) plt.draw_if_interactive() return ax + DataFrame.boxplot = boxplot ops.add_flex_arithmetic_methods(DataFrame, **ops.frame_flex_funcs) diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index c158aee096429..0bd095fe2f94e 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -1309,14 +1309,14 @@ def test_boxplot(self): df['indic'] = ['foo', 'bar'] * 3 df['indic2'] = ['foo', 'bar', 'foo'] * 2 - _check_plot_works(df.boxplot) - _check_plot_works(df.boxplot, column=['one', 'two']) + _check_plot_works(df.boxplot, return_type='dict') + _check_plot_works(df.boxplot, column=['one', 'two'], return_type='dict') _check_plot_works(df.boxplot, column=['one', 'two'], by='indic') _check_plot_works(df.boxplot, column='one', by=['indic', 'indic2']) _check_plot_works(df.boxplot, by='indic') _check_plot_works(df.boxplot, by=['indic', 'indic2']) - _check_plot_works(plotting.boxplot, df['one']) - _check_plot_works(df.boxplot, notch=1) + _check_plot_works(plotting.boxplot, df['one'], return_type='dict') + _check_plot_works(df.boxplot, notch=1, return_type='dict') _check_plot_works(df.boxplot, by='indic', notch=1) df = DataFrame(np.random.rand(10, 2), columns=['Col1', 'Col2']) @@ -1337,10 +1337,83 @@ def test_boxplot(self): # When by is None, check that all relevant lines are present in the dict fig, ax = self.plt.subplots() - d = df.boxplot(ax=ax) + d = df.boxplot(ax=ax, return_type='dict') lines = list(itertools.chain.from_iterable(d.values())) self.assertEqual(len(ax.get_lines()), len(lines)) + @slow + def test_boxplot_return_type(self): + # API change in https://github.com/pydata/pandas/pull/7096 + import matplotlib as mpl + + df = DataFrame(randn(6, 4), + index=list(string.ascii_letters[:6]), + columns=['one', 'two', 'three', 'four']) + with tm.assertRaises(ValueError): + df.boxplot(return_type='NOTATYPE') + + with tm.assert_produces_warning(FutureWarning): + result = df.boxplot() + self.assertIsInstance(result, dict) # change to Axes in future + + with tm.assert_produces_warning(False): + result = df.boxplot(return_type='dict') + self.assertIsInstance(result, dict) + + with tm.assert_produces_warning(False): + result = df.boxplot(return_type='axes') + self.assertIsInstance(result, mpl.axes.Axes) + + with tm.assert_produces_warning(False): + result = df.boxplot(return_type='both') + self.assertIsInstance(result, tuple) + + @slow + def test_boxplot_return_type_by(self): + import matplotlib as mpl + + df = DataFrame(np.random.randn(10, 2)) + df['g'] = ['a'] * 5 + ['b'] * 5 + + # old style: return_type=None + result = df.boxplot(by='g') + self.assertIsInstance(result, np.ndarray) + self.assertIsInstance(result[0], mpl.axes.Axes) + + result = df.boxplot(by='g', return_type='dict') + self.assertIsInstance(result, dict) + self.assertIsInstance(result[0], dict) + + result = df.boxplot(by='g', return_type='axes') + self.assertIsInstance(result, dict) + self.assertIsInstance(result[0], mpl.axes.Axes) + + result = df.boxplot(by='g', return_type='both') + self.assertIsInstance(result, dict) + self.assertIsInstance(result[0], tuple) + self.assertIsInstance(result[0][0], mpl.axes.Axes) + self.assertIsInstance(result[0][1], dict) + + # now for groupby + with tm.assert_produces_warning(FutureWarning): + result = df.groupby('g').boxplot() + self.assertIsInstance(result, dict) + self.assertIsInstance(result['a'], dict) + + result = df.groupby('g').boxplot(return_type='dict') + self.assertIsInstance(result, dict) + self.assertIsInstance(result['a'], dict) + + result = df.groupby('g').boxplot(return_type='axes') + self.assertIsInstance(result, dict) + self.assertIsInstance(result['a'], mpl.axes.Axes) + + result = df.groupby('g').boxplot(return_type='both') + self.assertIsInstance(result, dict) + self.assertIsInstance(result['a'], tuple) + self.assertIsInstance(result['a'][0], mpl.axes.Axes) + self.assertIsInstance(result['a'][1], dict) + @slow def test_kde(self): _skip_if_no_scipy() @@ -2044,13 +2117,12 @@ class TestDataFrameGroupByPlots(TestPlotBase): @slow def test_boxplot(self): - # unable to check layout because boxplot doesn't return ndarray - # axes_num can be checked using gcf().axes grouped = self.hist_df.groupby(by='gender') - box = _check_plot_works(grouped.boxplot) + box = _check_plot_works(grouped.boxplot, return_type='dict') self._check_axes_shape(self.plt.gcf().axes, axes_num=2) - box = _check_plot_works(grouped.boxplot, subplots=False) + box = _check_plot_works(grouped.boxplot, subplots=False, + return_type='dict') self._check_axes_shape(self.plt.gcf().axes, axes_num=2) tuples = lzip(string.ascii_letters[:10], range(10)) @@ -2058,17 +2130,19 @@ def test_boxplot(self): index=MultiIndex.from_tuples(tuples)) grouped = df.groupby(level=1) - box = _check_plot_works(grouped.boxplot) + box = _check_plot_works(grouped.boxplot, return_type='dict') self._check_axes_shape(self.plt.gcf().axes, axes_num=10) - box = _check_plot_works(grouped.boxplot, subplots=False) + box = _check_plot_works(grouped.boxplot, subplots=False, + return_type='dict') self._check_axes_shape(self.plt.gcf().axes, axes_num=10) grouped = df.unstack(level=1).groupby(level=0, axis=1) - box = _check_plot_works(grouped.boxplot) + box = _check_plot_works(grouped.boxplot, return_type='dict') self._check_axes_shape(self.plt.gcf().axes, axes_num=3) - box = _check_plot_works(grouped.boxplot, subplots=False) + box = _check_plot_works(grouped.boxplot, subplots=False, + return_type='dict') self._check_axes_shape(self.plt.gcf().axes, axes_num=3) def test_series_plot_color_kwargs(self): @@ -2133,31 +2207,38 @@ def test_grouped_box_layout(self): self.assertRaises(ValueError, df.boxplot, column=['weight', 'height'], by=df.gender, layout=(1, 1)) self.assertRaises(ValueError, df.boxplot, column=['height', 'weight', 'category'], - layout=(2, 1)) + layout=(2, 1), return_type='dict') - box = _check_plot_works(df.groupby('gender').boxplot, column='height') + box = _check_plot_works(df.groupby('gender').boxplot, column='height', + return_type='dict') self._check_axes_shape(self.plt.gcf().axes, axes_num=2) - box = _check_plot_works(df.groupby('category').boxplot, column='height') + box = _check_plot_works(df.groupby('category').boxplot, column='height', + return_type='dict') self._check_axes_shape(self.plt.gcf().axes, axes_num=4) # GH 6769 - box = _check_plot_works(df.groupby('classroom').boxplot, column='height') + box = _check_plot_works(df.groupby('classroom').boxplot, + column='height', return_type='dict') self._check_axes_shape(self.plt.gcf().axes, axes_num=3) box = df.boxplot(column=['height', 'weight', 'category'], by='gender') self._check_axes_shape(self.plt.gcf().axes, axes_num=3) - box = df.groupby('classroom').boxplot(column=['height', 'weight', 'category']) + box = df.groupby('classroom').boxplot( + column=['height', 'weight', 'category'], return_type='dict') self._check_axes_shape(self.plt.gcf().axes, axes_num=3) - box = _check_plot_works(df.groupby('category').boxplot, column='height', layout=(3, 2)) + box = _check_plot_works(df.groupby('category').boxplot, column='height', + layout=(3, 2), return_type='dict') self._check_axes_shape(self.plt.gcf().axes, axes_num=4) box = df.boxplot(column=['height', 'weight', 'category'], by='gender', layout=(4, 1)) self._check_axes_shape(self.plt.gcf().axes, axes_num=3) - box = df.groupby('classroom').boxplot(column=['height', 'weight', 'category'], layout=(1, 4)) + box = df.groupby('classroom').boxplot( + column=['height', 'weight', 'category'], layout=(1, 4), + return_type='dict') self._check_axes_shape(self.plt.gcf().axes, axes_num=3) @slow diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index e9dca5d91c8fc..388b716b16a9d 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -3,6 +3,7 @@ import datetime import warnings import re +from collections import namedtuple from contextlib import contextmanager from distutils.version import LooseVersion @@ -2258,7 +2259,8 @@ def plot_series(series, label=None, kind='line', use_index=True, rot=None, def boxplot(data, column=None, by=None, ax=None, fontsize=None, - rot=0, grid=True, figsize=None, layout=None, **kwds): + rot=0, grid=True, figsize=None, layout=None, return_type=None, + **kwds): """ Make a box plot from DataFrame column optionally grouped by some columns or other inputs @@ -2270,20 +2272,49 @@ def boxplot(data, column=None, by=None, ax=None, fontsize=None, Can be any valid input to groupby by : string or sequence Column in the DataFrame to group by - ax : Matplotlib axis object, optional + ax : Matplotlib axes object, optional fontsize : int or string rot : label rotation angle figsize : A tuple (width, height) in inches grid : Setting this to True will show the grid layout : tuple (optional) (rows, columns) for the layout of the plot + return_type : {'axes', 'dict', 'both'}, default 'dict' + The kind of object to return. 'dict' returns a dictionary + whose values are the matplotlib Lines of the boxplot; + 'axes' returns the matplotlib axes the boxplot is drawn on; + 'both' returns a namedtuple with the axes and dict. + + When grouping with ``by``, a dict mapping columns to ``return_type`` + is returned. + kwds : other plotting keyword arguments to be passed to matplotlib boxplot function Returns ------- - ax : matplotlib.axes.AxesSubplot + lines : dict + ax : matplotlib Axes + (ax, lines): namedtuple + + Notes + ----- + Use ``return_dict=True`` when you want to tweak the appearance + of the lines after plotting. In this case a dict containing the Lines + making up the boxes, caps, fliers, medians, and whiskers is returned. """ + + +@Appender(_shared_docs['boxplot'] % _shared_doc_kwargs) +def boxplot(data, column=None, by=None, ax=None, fontsize=None, + rot=0, grid=True, figsize=None, layout=None, return_type=None, + **kwds): + + # validate return_type: + valid_types = (None, 'axes', 'dict', 'both') + if return_type not in valid_types: + raise ValueError("return_type") + from pandas import Series, DataFrame if isinstance(data, Series): data = DataFrame({'x': data}) @@ -2310,6 +2341,7 @@ def plot_group(grouped, ax): else: ax.set_yticklabels(keys, rotation=rot, fontsize=fontsize) maybe_color_bp(bp) + return bp colors = _get_colors() if column is None: @@ -2320,13 +2352,23 @@ def plot_group(grouped, ax): else: columns = [column] + BP = namedtuple("Boxplot", ['ax', 'lines']) # namedtuple to hold results + if by is not None: - fig, axes = _grouped_plot_by_column(plot_group, data, columns=columns, - by=by, grid=grid, figsize=figsize, - ax=ax, layout=layout) + fig, axes, d = _grouped_plot_by_column(plot_group, data, columns=columns, + by=by, grid=grid, figsize=figsize, + ax=ax, layout=layout) # Return axes in multiplot case, maybe revisit later # 985 - ret = axes + if return_type is None: + ret = axes + if return_type == 'axes': + ret = dict((k, ax) for k, ax in zip(d.keys(), axes)) + elif return_type == 'dict': + ret = d + elif return_type == 'both': + ret = dict((k, BP(ax=ax, lines=line)) for + (k, line), ax in zip(d.items(), axes)) else: if layout is not None: raise ValueError("The 'layout' keyword is not supported when " @@ -2354,7 +2396,20 @@ def plot_group(grouped, ax): ax.set_yticklabels(keys, rotation=rot, fontsize=fontsize) ax.grid(grid) - ret = bp + ret = ax + + if return_type is None: + msg = ("\nThe default value for 'return_type' will change to " + "'axes' in a future release.\n To use the future behavior " + "now, set return_type='axes'.\n To keep the previous " + "behavior and silence this warning, set " + "return_type='dict'.") + warnings.warn(msg, FutureWarning) + return_type = 'dict' + if return_type == 'dict': + ret = bp + elif return_type == 'both': + ret = BP(ax=ret, lines=bp) fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2) return ret @@ -2692,7 +2747,7 @@ def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, def _grouped_plot(plotf, data, column=None, by=None, numeric_only=True, figsize=None, sharex=True, sharey=True, layout=None, rot=0, ax=None, **kwargs): - from pandas.core.frame import DataFrame + from pandas import DataFrame # allow to specify mpl default with 'default' if figsize is None or figsize == 'default': @@ -2726,6 +2781,8 @@ def _grouped_plot(plotf, data, column=None, by=None, numeric_only=True, def _grouped_plot_by_column(plotf, data, columns=None, by=None, numeric_only=True, grid=False, figsize=None, ax=None, layout=None, **kwargs): + from pandas.core.frame import DataFrame + grouped = data.groupby(by) if columns is None: if not isinstance(by, (list, tuple)): @@ -2746,18 +2803,21 @@ def _grouped_plot_by_column(plotf, data, columns=None, by=None, ravel_axes = _flatten(axes) + out_dict = compat.OrderedDict() + for i, col in enumerate(columns): ax = ravel_axes[i] gp_col = grouped[col] - plotf(gp_col, ax, **kwargs) + re_plotf = plotf(gp_col, ax, **kwargs) ax.set_title(col) ax.set_xlabel(com.pprint_thing(by)) ax.grid(grid) + out_dict[col] = re_plotf byline = by[0] if len(by) == 1 else by fig.suptitle('Boxplot grouped by %s' % byline) - return fig, axes + return fig, axes, out_dict def table(ax, data, rowLabels=None, colLabels=None, From 9233170550b5e82750da5b87ef586a109dfa9c07 Mon Sep 17 00:00:00 2001 From: TomAugspurger Date: Thu, 15 May 2014 09:38:30 -0500 Subject: [PATCH 2/3] DOC: Use shared docstring --- doc/source/groupby.rst | 5 +++-- doc/source/release.rst | 2 +- doc/source/v0.14.0.txt | 2 +- doc/source/visualization.rst | 11 ++++++----- pandas/core/frame.py | 37 ++---------------------------------- pandas/tools/plotting.py | 12 ++++++------ 6 files changed, 19 insertions(+), 50 deletions(-) diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index 12684aeda8cea..1c6bc976d85e9 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -925,7 +925,7 @@ the values in column 1 where the group is "B" are 3 higher on average. We can easily visualize this with a boxplot: -..ipython:: python +.. ipython:: python @savefig groupby_boxplot.png bp = df.groupby('g').boxplot() @@ -938,7 +938,8 @@ See the :ref:`visualization documentation` for more. .. warning:: For historical reasons, ``df.groupby("g").boxplot()`` is not equivalent - to ``df.boxplot(by="g")``. See :ref:`here`. + to ``df.boxplot(by="g")``. See :ref:`here` for + an explanation. Examples -------- diff --git a/doc/source/release.rst b/doc/source/release.rst index 524548509e76c..739d8ba46ec4e 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -262,7 +262,7 @@ Deprecations default output is unchanged. - The default return type of :func:`boxplot` will change from a dict to a matpltolib Axes - in a future release. You can use the future behavior now by passing ``return_type='dict'`` + in a future release. You can use the future behavior now by passing ``return_type='axes'`` to boxplot. Prior Version Deprecations/Changes diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt index 9304925a079fc..ad38cfc091c88 100644 --- a/doc/source/v0.14.0.txt +++ b/doc/source/v0.14.0.txt @@ -559,7 +559,7 @@ Deprecations default output is unchanged. - The default return type of :func:`boxplot` will change from a dict to a matpltolib Axes - in a future release. You can use the future behavior now by passing ``return_type='dict'`` + in a future release. You can use the future behavior now by passing ``return_type='axes'`` to boxplot. .. _whatsnew_0140.enhancements: diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst index af8dc66c82a63..72e657b6d3a23 100644 --- a/doc/source/visualization.rst +++ b/doc/source/visualization.rst @@ -331,14 +331,15 @@ DataFrame. df_box['g'] = np.random.choice(['A', 'B'], size=50) df_box.loc[df_box['g'] == 'B', 1] += 3 -..ipython:: python + @savefig boxplot_groupby.png + bp = df_box.boxplot(by='g') - @savefig(boxplot_groupby.png) - df_box.boxplot(by='g') +Compare to: - @savefig groupby_boxplot_vis.png - df_box.groupby('g').boxplot() +.. ipython:: python + @savefig groupby_boxplot_vis.png + bp = df_box.groupby('g').boxplot() .. _visualization.area_plot: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 59a3a4e931485..7ef15a9f36bf1 100755 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4856,48 +4856,15 @@ def _put_str(s, space): DataFrame.hist = gfx.hist_frame +@Appender(_shared_docs['boxplot'] % _shared_doc_kwargs) def boxplot(self, column=None, by=None, ax=None, fontsize=None, rot=0, grid=True, figsize=None, layout=None, return_type=None, **kwds): - """ - Make a box plot from DataFrame column/columns optionally grouped - (stratified) by one or more columns - - Parameters - ---------- - data : DataFrame - column : column names or list of names, or vector - Can be any valid input to groupby - by : string or sequence - Column in the DataFrame to group by - ax : matplotlib axis object, default None - fontsize : int or string - rot : int, default None - Rotation for ticks - grid : boolean, default None (matlab style default) - Axis grid lines - layout : tuple (optional) - (rows, columns) for the layout of the plot - return_type : bool, default False - Whether to return a dict whose values are the lines of the boxplot - kwds : other plotting keyword arguments to be passed to matplotlib boxplot - function - - Returns - ------- - ax : matplotlib.axes.AxesSubplot - lines : dict (optional) - - Notes - ----- - Use ``return_dict=True`` when you want to modify the appearance - of the lines. In this case a named tuple is returned. - """ import pandas.tools.plotting as plots import matplotlib.pyplot as plt ax = plots.boxplot(self, column=column, by=by, ax=ax, fontsize=fontsize, grid=grid, rot=rot, - figsize=figsize, layout=layout, return_dict=return_dict, + figsize=figsize, layout=layout, return_type=return_type, **kwds) plt.draw_if_interactive() return ax diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index 388b716b16a9d..b349e9f415ed5 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -11,6 +11,7 @@ from pandas.util.decorators import cache_readonly, deprecate_kwarg import pandas.core.common as com +from pandas.core.generic import _shared_docs, _shared_doc_kwargs from pandas.core.index import MultiIndex from pandas.core.series import Series, remove_na from pandas.tseries.index import DatetimeIndex @@ -19,6 +20,7 @@ from pandas.tseries.offsets import DateOffset from pandas.compat import range, lrange, lmap, map, zip, string_types import pandas.compat as compat +from pandas.util.decorators import Appender try: # mpl optional import pandas.tseries.converter as conv @@ -2258,16 +2260,13 @@ def plot_series(series, label=None, kind='line', use_index=True, rot=None, return plot_obj.axes[0] -def boxplot(data, column=None, by=None, ax=None, fontsize=None, - rot=0, grid=True, figsize=None, layout=None, return_type=None, - **kwds): - """ +_shared_docs['boxplot'] = """ Make a box plot from DataFrame column optionally grouped by some columns or other inputs Parameters ---------- - data : DataFrame or Series + data : the pandas object holding the data column : column name or list of names, or vector Can be any valid input to groupby by : string or sequence @@ -2299,7 +2298,7 @@ def boxplot(data, column=None, by=None, ax=None, fontsize=None, Notes ----- - Use ``return_dict=True`` when you want to tweak the appearance + Use ``return_type='dict'`` when you want to tweak the appearance of the lines after plotting. In this case a dict containing the Lines making up the boxes, caps, fliers, medians, and whiskers is returned. """ @@ -2315,6 +2314,7 @@ def boxplot(data, column=None, by=None, ax=None, fontsize=None, if return_type not in valid_types: raise ValueError("return_type") + from pandas import Series, DataFrame if isinstance(data, Series): data = DataFrame({'x': data}) From 660c928ae1c5bac44db94e6082ebeb6ba83d9000 Mon Sep 17 00:00:00 2001 From: TomAugspurger Date: Thu, 15 May 2014 10:21:00 -0500 Subject: [PATCH 3/3] TST: restore matplotlib defaults before testing --- pandas/tests/test_graphics.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index 0bd095fe2f94e..5d2934d9705cd 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -33,20 +33,25 @@ def _skip_if_no_scipy(): except ImportError: raise nose.SkipTest("no scipy") + @tm.mplskip class TestPlotBase(tm.TestCase): def setUp(self): + + import matplotlib as mpl + mpl.rcdefaults() + n = 100 with tm.RNGContext(42): gender = tm.choice(['Male', 'Female'], size=n) classroom = tm.choice(['A', 'B', 'C'], size=n) self.hist_df = DataFrame({'gender': gender, - 'classroom': classroom, - 'height': random.normal(66, 4, size=n), - 'weight': random.normal(161, 32, size=n), - 'category': random.randint(4, size=n)}) + 'classroom': classroom, + 'height': random.normal(66, 4, size=n), + 'weight': random.normal(161, 32, size=n), + 'category': random.randint(4, size=n)}) def tearDown(self): tm.close() @@ -120,7 +125,6 @@ def _check_visible(self, collections, visible=True): for patch in collections: self.assertEqual(patch.get_visible(), visible) - def _get_colors_mapped(self, series, colors): unique = series.unique() # unique and colors length can be differed @@ -338,6 +342,8 @@ class TestSeriesPlots(TestPlotBase): def setUp(self): TestPlotBase.setUp(self) import matplotlib as mpl + mpl.rcdefaults() + self.mpl_le_1_2_1 = str(mpl.__version__) <= LooseVersion('1.2.1') self.ts = tm.makeTimeSeries() self.ts.name = 'ts' @@ -706,6 +712,8 @@ class TestDataFramePlots(TestPlotBase): def setUp(self): TestPlotBase.setUp(self) import matplotlib as mpl + mpl.rcdefaults() + self.mpl_le_1_2_1 = str(mpl.__version__) <= LooseVersion('1.2.1') self.tdf = tm.makeTimeDataFrame() @@ -2100,7 +2108,7 @@ def test_errorbar_scatter(self): df = DataFrame(np.random.randn(5, 2), index=range(5), columns=['x', 'y']) df_err = DataFrame(np.random.randn(5, 2) / 5, index=range(5), columns=['x', 'y']) - + ax = _check_plot_works(df.plot, kind='scatter', x='x', y='y') self._check_has_errorbars(ax, xerr=0, yerr=0) ax = _check_plot_works(df.plot, kind='scatter', x='x', y='y', xerr=df_err)