From e8942ed194860f4c4aec5a63d70f4da4e8bf2df9 Mon Sep 17 00:00:00 2001
From: anomrake <anomrake@users.noreply.github.com>
Date: Thu, 24 Apr 2014 11:26:06 -0400
Subject: [PATCH 1/2] BUG: fix handling of color argument for variety of
 plotting functions

parallel_coordinates
- fix reordering of class column (from set) causing possible color/class
  mismatch
- deprecated use of argument colors in favor of color
radviz
- fix reordering of class column (from set) causing possible color/class
  mismatch
- added explicit color keyword argument (avoids multiple values 'color' being
  passed to plotting method)
andrews_curves
- added explicit color keyword argument (avoids multiple values 'color' being
  passed to plotting method)
---
 doc/source/release.rst        |   4 ++
 pandas/tests/test_graphics.py |  50 +++++++++++--
 pandas/tools/plotting.py      | 131 +++++++++++++++++-----------------
 3 files changed, 114 insertions(+), 71 deletions(-)

diff --git a/doc/source/release.rst b/doc/source/release.rst
index a6aa842940bc0..88833f48f1659 100644
--- a/doc/source/release.rst
+++ b/doc/source/release.rst
@@ -229,6 +229,10 @@ Deprecations
   returned if possible, otherwise a copy will be made. Previously the user could think that ``copy=False`` would
   ALWAYS return a view. (:issue:`6894`)
 
+- The :func:`parallel_coordinates` function now takes argument ``color``
+  instead of ``colors``. A ``FutureWarning`` is raised  to alert that
+  the old ``colors`` argument will not be supported in a future release
+
 Prior Version Deprecations/Changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py
index 829b2b296155f..629c011b4dbde 100644
--- a/pandas/tests/test_graphics.py
+++ b/pandas/tests/test_graphics.py
@@ -1220,11 +1220,26 @@ def scat2(x, y, by=None, ax=None, figsize=None):
     def test_andrews_curves(self):
         from pandas import read_csv
         from pandas.tools.plotting import andrews_curves
-
+        from matplotlib import cm
+        
         path = os.path.join(curpath(), 'data', 'iris.csv')
         df = read_csv(path)
 
         _check_plot_works(andrews_curves, df, 'Name')
+        _check_plot_works(andrews_curves, df, 'Name',
+                          color=('#556270', '#4ECDC4', '#C7F464'))
+        _check_plot_works(andrews_curves, df, 'Name',
+                          color=['dodgerblue', 'aquamarine', 'seagreen'])
+        _check_plot_works(andrews_curves, df, 'Name', colormap=cm.jet)
+
+        colors = ['b', 'g', 'r']
+        df = DataFrame({"A": [1, 2, 3],
+                        "B": [1, 2, 3],
+                        "C": [1, 2, 3],
+                        "Name": colors})
+        ax = andrews_curves(df, 'Name', color=colors)
+        legend_colors = [l.get_color() for l in ax.legend().get_lines()]
+        self.assertEqual(colors, legend_colors)
 
     @slow
     def test_parallel_coordinates(self):
@@ -1235,13 +1250,9 @@ def test_parallel_coordinates(self):
         df = read_csv(path)
         _check_plot_works(parallel_coordinates, df, 'Name')
         _check_plot_works(parallel_coordinates, df, 'Name',
-                          colors=('#556270', '#4ECDC4', '#C7F464'))
-        _check_plot_works(parallel_coordinates, df, 'Name',
-                          colors=['dodgerblue', 'aquamarine', 'seagreen'])
+                          color=('#556270', '#4ECDC4', '#C7F464'))
         _check_plot_works(parallel_coordinates, df, 'Name',
-                          colors=('#556270', '#4ECDC4', '#C7F464'))
-        _check_plot_works(parallel_coordinates, df, 'Name',
-                          colors=['dodgerblue', 'aquamarine', 'seagreen'])
+                          color=['dodgerblue', 'aquamarine', 'seagreen'])
         _check_plot_works(parallel_coordinates, df, 'Name', colormap=cm.jet)
 
         df = read_csv(path, header=None, skiprows=1, names=[1, 2, 4, 8,
@@ -1249,6 +1260,15 @@ def test_parallel_coordinates(self):
         _check_plot_works(parallel_coordinates, df, 'Name', use_columns=True)
         _check_plot_works(parallel_coordinates, df, 'Name',
                           xticks=[1, 5, 25, 125])
+        
+        colors = ['b', 'g', 'r']
+        df = DataFrame({"A": [1, 2, 3],
+                        "B": [1, 2, 3],
+                        "C": [1, 2, 3],
+                        "Name": colors})
+        ax = parallel_coordinates(df, 'Name', color=colors)
+        legend_colors = [l.get_color() for l in ax.legend().get_lines()]
+        self.assertEqual(colors, legend_colors)
 
     @slow
     def test_radviz(self):
@@ -1259,8 +1279,24 @@ def test_radviz(self):
         path = os.path.join(curpath(), 'data', 'iris.csv')
         df = read_csv(path)
         _check_plot_works(radviz, df, 'Name')
+        _check_plot_works(radviz, df, 'Name',
+                          color=('#556270', '#4ECDC4', '#C7F464'))
+        _check_plot_works(radviz, df, 'Name',
+                          color=['dodgerblue', 'aquamarine', 'seagreen'])
         _check_plot_works(radviz, df, 'Name', colormap=cm.jet)
 
+        colors = [[0., 0., 1., 1.],
+                  [0., 0.5, 1., 1.],
+                  [1., 0., 0., 1.]]
+        df = DataFrame({"A": [1, 2, 3],
+                        "B": [2, 1, 3],
+                        "C": [3, 2, 1],
+                        "Name": ['b', 'g', 'r']})
+        ax = radviz(df, 'Name', color=colors)
+        legend_colors = [c.get_facecolor().squeeze().tolist()
+                         for c in ax.collections]
+        self.assertEqual(colors, legend_colors)
+
     @slow
     def test_plot_int_columns(self):
         df = DataFrame(randn(100, 4)).cumsum()
diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py
index 4453b1db359e9..a7628f759132f 100644
--- a/pandas/tools/plotting.py
+++ b/pandas/tools/plotting.py
@@ -8,7 +8,7 @@
 
 import numpy as np
 
-from pandas.util.decorators import cache_readonly
+from pandas.util.decorators import cache_readonly, deprecate_kwarg
 import pandas.core.common as com
 from pandas.core.index import MultiIndex
 from pandas.core.series import Series, remove_na
@@ -355,18 +355,22 @@ def _get_marker_compat(marker):
     return marker
 
 
-def radviz(frame, class_column, ax=None, colormap=None, **kwds):
+def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds):
     """RadViz - a multivariate data visualization algorithm
 
     Parameters:
     -----------
-    frame: DataFrame object
-    class_column: Column name that contains information about class membership
+    frame: DataFrame
+    class_column: str
+        Column name containing class names
     ax: Matplotlib axis object, optional
+    color: list or tuple, optional
+        Colors to use for the different classes
     colormap : str or matplotlib colormap object, default None
         Colormap to select colors from. If string, load colormap with that name
         from matplotlib.
-    kwds: Matplotlib scatter method keyword arguments, optional
+    kwds: keywords
+        Options to pass to matplotlib scatter plotting method
 
     Returns:
     --------
@@ -380,44 +384,42 @@ def normalize(series):
         b = max(series)
         return (series - a) / (b - a)
 
-    column_names = [column_name for column_name in frame.columns
-                    if column_name != class_column]
-
-    df = frame[column_names].apply(normalize)
+    n = len(frame)
+    classes = frame[class_column].drop_duplicates()
+    class_col = frame[class_column]
+    df = frame.drop(class_column, axis=1).apply(normalize)
 
     if ax is None:
         ax = plt.gca(xlim=[-1, 1], ylim=[-1, 1])
 
-    classes = set(frame[class_column])
     to_plot = {}
-
     colors = _get_standard_colors(num_colors=len(classes), colormap=colormap,
-                                  color_type='random', color=kwds.get('color'))
+                                  color_type='random', color=color)
 
-    for class_ in classes:
-        to_plot[class_] = [[], []]
+    for kls in classes:
+        to_plot[kls] = [[], []]
 
     n = len(frame.columns) - 1
     s = np.array([(np.cos(t), np.sin(t))
                   for t in [2.0 * np.pi * (i / float(n))
                             for i in range(n)]])
 
-    for i in range(len(frame)):
-        row = df.irow(i).values
+    for i in range(n):
+        row = df.iloc[i].values
         row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1)
         y = (s * row_).sum(axis=0) / row.sum()
-        class_name = frame[class_column].iget(i)
-        to_plot[class_name][0].append(y[0])
-        to_plot[class_name][1].append(y[1])
+        kls = class_col.iat[i]
+        to_plot[kls][0].append(y[0])
+        to_plot[kls][1].append(y[1])
 
-    for i, class_ in enumerate(classes):
-        ax.scatter(to_plot[class_][0], to_plot[class_][1], color=colors[i],
-                   label=com.pprint_thing(class_), **kwds)
+    for i, kls in enumerate(classes):
+        ax.scatter(to_plot[kls][0], to_plot[kls][1], color=colors[i],
+                   label=com.pprint_thing(kls), **kwds)
     ax.legend()
 
     ax.add_patch(patches.Circle((0.0, 0.0), radius=1.0, facecolor='none'))
 
-    for xy, name in zip(s, column_names):
+    for xy, name in zip(s, df.columns):
 
         ax.add_patch(patches.Circle(xy, radius=0.025, facecolor='gray'))
 
@@ -438,20 +440,23 @@ def normalize(series):
     return ax
 
 
-def andrews_curves(data, class_column, ax=None, samples=200, colormap=None,
-                   **kwds):
+def andrews_curves(frame, class_column, ax=None, samples=200, color=None,
+                   colormap=None, **kwds):
     """
     Parameters:
     -----------
-    data : DataFrame
+    frame : DataFrame
         Data to be plotted, preferably normalized to (0.0, 1.0)
     class_column : Name of the column containing class names
     ax : matplotlib axes object, default None
     samples : Number of points to plot in each curve
+    color: list or tuple, optional
+        Colors to use for the different classes
     colormap : str or matplotlib colormap object, default None
         Colormap to select colors from. If string, load colormap with that name
         from matplotlib.
-    kwds : Optional plotting arguments to be passed to matplotlib
+    kwds: keywords
+        Options to pass to matplotlib plotting method
 
     Returns:
     --------
@@ -475,30 +480,31 @@ def f(x):
             return result
         return f
 
-    n = len(data)
-    class_col = data[class_column]
-    uniq_class = class_col.drop_duplicates()
-    columns = [data[col] for col in data.columns if (col != class_column)]
+    n = len(frame)
+    class_col = frame[class_column]
+    classes = frame[class_column].drop_duplicates()
+    df = frame.drop(class_column, axis=1)
     x = [-pi + 2.0 * pi * (t / float(samples)) for t in range(samples)]
     used_legends = set([])
 
-    colors = _get_standard_colors(num_colors=len(uniq_class), colormap=colormap,
-                                  color_type='random', color=kwds.get('color'))
-    col_dict = dict([(klass, col) for klass, col in zip(uniq_class, colors)])
+    color_values = _get_standard_colors(num_colors=len(classes),
+                                        colormap=colormap, color_type='random',
+                                        color=color)
+    colors = dict(zip(classes, color_values))
     if ax is None:
         ax = plt.gca(xlim=(-pi, pi))
     for i in range(n):
-        row = [columns[c][i] for c in range(len(columns))]
+        row = df.iloc[i].values
         f = function(row)
         y = [f(t) for t in x]
-        label = None
-        if com.pprint_thing(class_col[i]) not in used_legends:
-            label = com.pprint_thing(class_col[i])
+        kls = class_col.iat[i]
+        label = com.pprint_thing(kls)
+        if label not in used_legends:
             used_legends.add(label)
-            ax.plot(x, y, color=col_dict[class_col[i]], label=label, **kwds)
+            ax.plot(x, y, color=colors[kls], label=label, **kwds)
         else:
-            ax.plot(x, y, color=col_dict[class_col[i]], **kwds)
-
+            ax.plot(x, y, color=colors[kls], **kwds)
+    
     ax.legend(loc='upper right')
     ax.grid()
     return ax
@@ -564,22 +570,22 @@ def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds):
         plt.setp(axis.get_yticklabels(), fontsize=8)
     return fig
 
-
-def parallel_coordinates(data, class_column, cols=None, ax=None, colors=None,
-                         use_columns=False, xticks=None, colormap=None, **kwds):
+@deprecate_kwarg(old_arg_name='colors', new_arg_name='color')
+def parallel_coordinates(frame, class_column, cols=None, ax=None, color=None,
+                         use_columns=False, xticks=None, colormap=None,
+                         **kwds):
     """Parallel coordinates plotting.
 
     Parameters
     ----------
-    data: DataFrame
-        A DataFrame containing data to be plotted
+    frame: DataFrame
     class_column: str
         Column name containing class names
     cols: list, optional
         A list of column names to use
     ax: matplotlib.axis, optional
         matplotlib axis object
-    colors: list or tuple, optional
+    color: list or tuple, optional
         Colors to use for the different classes
     use_columns: bool, optional
         If true, columns will be used as xticks
@@ -587,8 +593,8 @@ def parallel_coordinates(data, class_column, cols=None, ax=None, colors=None,
         A list of values to use for xticks
     colormap: str or matplotlib colormap, default None
         Colormap to use for line colors.
-    kwds: list, optional
-        A list of keywords for matplotlib plot method
+    kwds: keywords
+        Options to pass to matplotlib plotting method
 
     Returns
     -------
@@ -600,20 +606,19 @@ def parallel_coordinates(data, class_column, cols=None, ax=None, colors=None,
     >>> from pandas.tools.plotting import parallel_coordinates
     >>> from matplotlib import pyplot as plt
     >>> df = read_csv('https://raw.github.com/pydata/pandas/master/pandas/tests/data/iris.csv')
-    >>> parallel_coordinates(df, 'Name', colors=('#556270', '#4ECDC4', '#C7F464'))
+    >>> parallel_coordinates(df, 'Name', color=('#556270', '#4ECDC4', '#C7F464'))
     >>> plt.show()
     """
     import matplotlib.pyplot as plt
 
-
-    n = len(data)
-    classes = set(data[class_column])
-    class_col = data[class_column]
+    n = len(frame)
+    classes = frame[class_column].drop_duplicates()
+    class_col = frame[class_column]
 
     if cols is None:
-        df = data.drop(class_column, axis=1)
+        df = frame.drop(class_column, axis=1)
     else:
-        df = data[cols]
+        df = frame[cols]
 
     used_legends = set([])
 
@@ -638,19 +643,17 @@ def parallel_coordinates(data, class_column, cols=None, ax=None, colors=None,
 
     color_values = _get_standard_colors(num_colors=len(classes),
                                         colormap=colormap, color_type='random',
-                                        color=colors)
+                                        color=color)
 
     colors = dict(zip(classes, color_values))
 
     for i in range(n):
-        row = df.irow(i).values
-        y = row
-        kls = class_col.iget_value(i)
-        if com.pprint_thing(kls) not in used_legends:
-            label = com.pprint_thing(kls)
+        y = df.iloc[i].values
+        kls = class_col.iat[i]
+        label = com.pprint_thing(kls)
+        if label not in used_legends:
             used_legends.add(label)
-            ax.plot(x, y, color=colors[kls],
-                    label=label, **kwds)
+            ax.plot(x, y, color=colors[kls], label=label, **kwds)
         else:
             ax.plot(x, y, color=colors[kls], **kwds)
 

From 1980c7a804ff122072ffc594f106037f39e507fc Mon Sep 17 00:00:00 2001
From: anomrake <anomrake@users.noreply.github.com>
Date: Thu, 1 May 2014 14:58:07 -0400
Subject: [PATCH 2/2] TST: add tests for deprecation warnings from plotting
 functions

parallel_coordinates/andrews_curves
- added deprecate_kwarg decorator for using frame argument instead of data
- added tests to check that FutureWarning is raised properly
---
 doc/source/release.rst        | 8 ++++++++
 doc/source/v0.14.0.txt        | 8 ++++++++
 pandas/tests/test_graphics.py | 9 +++++++++
 pandas/tools/plotting.py      | 4 ++--
 4 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/doc/source/release.rst b/doc/source/release.rst
index 88833f48f1659..4e4d61c3e971c 100644
--- a/doc/source/release.rst
+++ b/doc/source/release.rst
@@ -233,6 +233,10 @@ Deprecations
   instead of ``colors``. A ``FutureWarning`` is raised  to alert that
   the old ``colors`` argument will not be supported in a future release
 
+- The :func:`parallel_coordinates` and :func:`andrews_curves` functions now take 
+  positional argument ``frame`` instead of ``data``. A ``FutureWarning`` is 
+  raised  if the old ``data`` argument is used by name.
+
 Prior Version Deprecations/Changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -464,6 +468,10 @@ Bug Fixes
 - Bug in timeseries-with-frequency plot cursor display (:issue:`5453`)
 - Bug surfaced in groupby.plot when using a ``Float64Index`` (:issue:`7025`)
 - Stopped tests from failing if options data isn't able to be downloaded from Yahoo (:issue:`7034`)
+- Bug in ``parallel_coordinates`` and ``radviz`` where reordering of class column 
+  caused possible color/class mismatch
+- Bug in ``radviz`` and ``andrews_curves`` where multiple values of 'color'
+  were being passed to plotting method
 
 pandas 0.13.1
 -------------
diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt
index b5df39df3b617..f5e018b6141fe 100644
--- a/doc/source/v0.14.0.txt
+++ b/doc/source/v0.14.0.txt
@@ -382,6 +382,14 @@ Plotting
 
   Because of the default `align` value changes, coordinates of bar plots are now located on integer values (0.0, 1.0, 2.0 ...). This is intended to make bar plot be located on the same coodinates as line plot. However, bar plot may differs unexpectedly when you manually adjust the bar location or drawing area, such as using `set_xlim`, `set_ylim`, etc. In this cases, please modify your script to meet with new coordinates.
 
+- The :func:`parallel_coordinates` function now takes argument ``color``
+  instead of ``colors``. A ``FutureWarning`` is raised  to alert that
+  the old ``colors`` argument will not be supported in a future release
+
+- The :func:`parallel_coordinates` and :func:`andrews_curves` functions now take 
+  positional argument ``frame`` instead of ``data``. A ``FutureWarning`` is 
+  raised  if the old ``data`` argument is used by name.
+  
 .. _whatsnew_0140.prior_deprecations:
 
 Prior Version Deprecations/Changes
diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py
index 629c011b4dbde..e3f49e14400d1 100644
--- a/pandas/tests/test_graphics.py
+++ b/pandas/tests/test_graphics.py
@@ -1240,6 +1240,9 @@ def test_andrews_curves(self):
         ax = andrews_curves(df, 'Name', color=colors)
         legend_colors = [l.get_color() for l in ax.legend().get_lines()]
         self.assertEqual(colors, legend_colors)
+        
+        with tm.assert_produces_warning(FutureWarning):
+            andrews_curves(data=df, class_column='Name')
 
     @slow
     def test_parallel_coordinates(self):
@@ -1269,6 +1272,12 @@ def test_parallel_coordinates(self):
         ax = parallel_coordinates(df, 'Name', color=colors)
         legend_colors = [l.get_color() for l in ax.legend().get_lines()]
         self.assertEqual(colors, legend_colors)
+        
+        with tm.assert_produces_warning(FutureWarning):
+            parallel_coordinates(df, 'Name', colors=colors)
+        
+        with tm.assert_produces_warning(FutureWarning):
+            parallel_coordinates(data=df, class_column='Name')
 
     @slow
     def test_radviz(self):
diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py
index a7628f759132f..b11d71f48baf2 100644
--- a/pandas/tools/plotting.py
+++ b/pandas/tools/plotting.py
@@ -354,7 +354,6 @@ def _get_marker_compat(marker):
         return 'o'
     return marker
 
-
 def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds):
     """RadViz - a multivariate data visualization algorithm
 
@@ -439,7 +438,7 @@ def normalize(series):
     ax.axis('equal')
     return ax
 
-
+@deprecate_kwarg(old_arg_name='data', new_arg_name='frame')
 def andrews_curves(frame, class_column, ax=None, samples=200, color=None,
                    colormap=None, **kwds):
     """
@@ -571,6 +570,7 @@ def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds):
     return fig
 
 @deprecate_kwarg(old_arg_name='colors', new_arg_name='color')
+@deprecate_kwarg(old_arg_name='data', new_arg_name='frame')
 def parallel_coordinates(frame, class_column, cols=None, ax=None, color=None,
                          use_columns=False, xticks=None, colormap=None,
                          **kwds):