From ccaea6313369cbe7f70654428cb16af55d61ac01 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 27 Sep 2018 15:21:11 -0700 Subject: [PATCH 01/11] Added initial test --- pandas/tests/indexes/multi/test_conversion.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py index 8c9566b7e651f..e4f7d898bbd68 100644 --- a/pandas/tests/indexes/multi/test_conversion.py +++ b/pandas/tests/indexes/multi/test_conversion.py @@ -169,3 +169,10 @@ def test_to_series_with_arguments(idx): assert s.values is not idx.values assert s.index is not idx assert s.name != idx.name + + +def test_to_index(idx): + expected = pd.Index([('foo', 'one'), ('foo', 'two'), ('bar', 'one'), + ('baz', 'two'), ('qux', 'one'), ('qux', 'two')]) + result = idx.to_index() + tm.assert_index_equal(result, expected) From c9b26295bed1e70c7ef2f03dc3e265d74848ec1e Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 27 Sep 2018 15:36:38 -0700 Subject: [PATCH 02/11] Method implementation --- pandas/core/indexes/multi.py | 17 +++++++++++++++++ pandas/tests/indexes/multi/test_conversion.py | 5 +++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 3e6b934e1e863..488097d2669b5 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -193,6 +193,7 @@ class MultiIndex(Index): set_levels set_labels to_frame + to_index is_lexsorted sortlevel droplevel @@ -1196,6 +1197,22 @@ def to_frame(self, index=True, name=None): result.index = self return result + def to_index(self): + """ + Convert a MultiIndex to an Index of Tuples containing the level values. + + .. versionadded:: 0.24.0 + + Returns + ------- + Index : an Index with the MultiIndex data represented in Tuples. + + See also + -------- + Index + """ + return Index(self.values, tupleize_cols=False) + def to_hierarchical(self, n_repeat, n_shuffle=1): """ .. deprecated:: 0.24.0 diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py index e4f7d898bbd68..bc38d45e0daa7 100644 --- a/pandas/tests/indexes/multi/test_conversion.py +++ b/pandas/tests/indexes/multi/test_conversion.py @@ -172,7 +172,8 @@ def test_to_series_with_arguments(idx): def test_to_index(idx): - expected = pd.Index([('foo', 'one'), ('foo', 'two'), ('bar', 'one'), - ('baz', 'two'), ('qux', 'one'), ('qux', 'two')]) + expected = pd.Index((('foo', 'one'), ('foo', 'two'), ('bar', 'one'), + ('baz', 'two'), ('qux', 'one'), ('qux', 'two')), + tupleize_cols=False) result = idx.to_index() tm.assert_index_equal(result, expected) From b7150bb78155ad9659ffcf231043e362c126f393 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 1 Oct 2018 12:10:21 -0700 Subject: [PATCH 03/11] Stubbed out sep parameter --- pandas/core/indexes/multi.py | 12 +++++++++--- pandas/tests/indexes/multi/test_conversion.py | 6 ++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 488097d2669b5..afcbafdf1b195 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1197,7 +1197,7 @@ def to_frame(self, index=True, name=None): result.index = self return result - def to_index(self): + def to_index(self, sep=None): """ Convert a MultiIndex to an Index of Tuples containing the level values. @@ -1205,13 +1205,19 @@ def to_index(self): Returns ------- - Index : an Index with the MultiIndex data represented in Tuples. + pd.Index : an Index with the MultiIndex data represented in Tuples. See also -------- Index """ - return Index(self.values, tupleize_cols=False) + if sep is not None: + # TODO: Add support for separator to return strs instad of tuples + raise NotImplementedError + else: + idx = Index(self.values, tupleize_cols=False) + + return idx def to_hierarchical(self, n_repeat, n_shuffle=1): """ diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py index bc38d45e0daa7..946de12c51602 100644 --- a/pandas/tests/indexes/multi/test_conversion.py +++ b/pandas/tests/indexes/multi/test_conversion.py @@ -4,6 +4,7 @@ import numpy as np import pandas as pd import pandas.util.testing as tm +import pytest from pandas import DataFrame, MultiIndex, date_range from pandas.compat import range @@ -177,3 +178,8 @@ def test_to_index(idx): tupleize_cols=False) result = idx.to_index() tm.assert_index_equal(result, expected) + + +def test_to_index_sep_raises(idx): + with pytest.raises(NotImplementedError): + idx.to_index(sep="") From d96f293ae64d78e6d5b0144225cecba06456fbac Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 1 Oct 2018 12:15:00 -0700 Subject: [PATCH 04/11] Documentation updates --- doc/source/api.rst | 1 + doc/source/whatsnew/v0.24.0.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/doc/source/api.rst b/doc/source/api.rst index 073ed8a082a11..cb362d146abf6 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1700,6 +1700,7 @@ MultiIndex Components MultiIndex.set_levels MultiIndex.set_labels MultiIndex.to_hierarchical + MultiIndex.to_index MultiIndex.to_frame MultiIndex.is_lexsorted MultiIndex.sortlevel diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index b71edcf1f6f51..1629302445c7d 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -194,6 +194,7 @@ Other Enhancements - :meth:`Index.to_frame` now supports overriding column name(s) (:issue:`22580`). - New attribute :attr:`__git_version__` will return git commit sha of current build (:issue:`21295`). - Compatibility with Matplotlib 3.0 (:issue:`22790`). +- :meth:`MultiIndex.to_index` has been added to flatten multiple levels into a single-level :class:`Index` object. .. _whatsnew_0240.api_breaking: From 9e5a3051b17fd92954375843f9ff76a255c0036d Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 26 Oct 2018 16:27:56 -0700 Subject: [PATCH 05/11] Moved to_index to base --- pandas/core/indexes/base.py | 22 +++++++++++++++++++ pandas/core/indexes/multi.py | 22 ------------------- pandas/tests/indexes/multi/test_conversion.py | 5 ----- pandas/tests/indexes/test_base.py | 12 ++++++++++ 4 files changed, 34 insertions(+), 27 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e9b0b087179c9..8a177f42ef159 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1105,6 +1105,28 @@ def _format_attrs(self): """ return format_object_attrs(self) + def to_index(self, sep=None): + """ + Convert a MultiIndex to an Index of Tuples containing the level values. + + .. versionadded:: 0.24.0 + + Returns + ------- + pd.Index : an Index with the MultiIndex data represented in Tuples. + + See also + -------- + Index + """ + if sep is not None: + # TODO: Add support for separator to return strs instad of tuples + raise NotImplementedError + else: + idx = Index(self.values, tupleize_cols=False) + + return idx + def to_series(self, index=None, name=None): """ Create a Series with both index and values equal to the index keys diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 967c787fbb594..5cc5e97b524b2 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1202,28 +1202,6 @@ def to_frame(self, index=True, name=None): result.index = self return result - def to_index(self, sep=None): - """ - Convert a MultiIndex to an Index of Tuples containing the level values. - - .. versionadded:: 0.24.0 - - Returns - ------- - pd.Index : an Index with the MultiIndex data represented in Tuples. - - See also - -------- - Index - """ - if sep is not None: - # TODO: Add support for separator to return strs instad of tuples - raise NotImplementedError - else: - idx = Index(self.values, tupleize_cols=False) - - return idx - def to_hierarchical(self, n_repeat, n_shuffle=1): """ .. deprecated:: 0.24.0 diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py index 946de12c51602..bbbdcbbe6d3a1 100644 --- a/pandas/tests/indexes/multi/test_conversion.py +++ b/pandas/tests/indexes/multi/test_conversion.py @@ -178,8 +178,3 @@ def test_to_index(idx): tupleize_cols=False) result = idx.to_index() tm.assert_index_equal(result, expected) - - -def test_to_index_sep_raises(idx): - with pytest.raises(NotImplementedError): - idx.to_index(sep="") diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 921bcda62794d..af991ce8d1e2d 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -2201,6 +2201,18 @@ def test_tab_complete_warning(self, ip): with provisionalcompleter('ignore'): list(ip.Completer.completions('idx.', 4)) + def test_to_index(self, indices): + # 22866 + if isinstance(indices, MultiIndex): + pytest.skip("Separate expectation for MultiIndex") + + result = indices.to_index() + tm.assert_index_equal(result, indices) + + def test_to_index_sep_raises(self, indices): + with pytest.raises(NotImplementedError): + indices.to_index(sep="") + class TestMixedIntIndex(Base): # Mostly the tests from common.py for which the results differ From bc60292b555102f768ed22c5cafd4ab7dd90935b Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Fri, 26 Oct 2018 16:32:51 -0700 Subject: [PATCH 06/11] Removed unused import --- pandas/tests/indexes/multi/test_conversion.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py index bbbdcbbe6d3a1..bc38d45e0daa7 100644 --- a/pandas/tests/indexes/multi/test_conversion.py +++ b/pandas/tests/indexes/multi/test_conversion.py @@ -4,7 +4,6 @@ import numpy as np import pandas as pd import pandas.util.testing as tm -import pytest from pandas import DataFrame, MultiIndex, date_range from pandas.compat import range From 380595f6b2ba1a2598c92baaf99ed05a139d38be Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sat, 27 Oct 2018 11:57:34 -0700 Subject: [PATCH 07/11] Returned identity instead of copy for non MI --- pandas/core/indexes/base.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 8a177f42ef159..60022316c5ae3 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1115,15 +1115,23 @@ def to_index(self, sep=None): ------- pd.Index : an Index with the MultiIndex data represented in Tuples. - See also + Notes + ----- + This method will simply return the caller if called by anything other + than a MultiIndex. + + See Also -------- Index """ if sep is not None: # TODO: Add support for separator to return strs instad of tuples raise NotImplementedError - else: - idx = Index(self.values, tupleize_cols=False) + + if not isinstance(self, ABCMultiIndex): + return self + + idx = Index(self.values, tupleize_cols=False) return idx From be06e7d9470e2d4b549c4bedd91cc0cafcfa9b3f Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Sat, 27 Oct 2018 12:05:31 -0700 Subject: [PATCH 08/11] Fixed up docstring --- pandas/core/indexes/base.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 60022316c5ae3..8eb1ea6429a49 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1111,6 +1111,11 @@ def to_index(self, sep=None): .. versionadded:: 0.24.0 + Parameters + ---------- + sep : str, optional + Not yet implemented. + Returns ------- pd.Index : an Index with the MultiIndex data represented in Tuples. @@ -1122,7 +1127,18 @@ def to_index(self, sep=None): See Also -------- - Index + to_series : Similar method to construct a Series. + to_frame : Similar method to construct a DataFrame. + + Examples + -------- + >>> index = pd.MultiIndex.from_product( + ... [['foo', 'bar'], ['baz', 'qux']], + ... names=['a', 'b']) + >>> index.to_index() + Index([('foo', 'baz'), ('foo', 'qux'), + ('bar', 'baz'), ('bar', 'qux')], + dtype='object') """ if sep is not None: # TODO: Add support for separator to return strs instad of tuples From 1ac62004811701a14d718c2296f13c153d96232f Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 12 Nov 2018 21:41:49 -0800 Subject: [PATCH 09/11] Changed method name; removed sep parameter --- doc/source/api.rst | 2 +- doc/source/whatsnew/v0.24.0.txt | 2 +- pandas/core/indexes/base.py | 21 ++++--------------- pandas/core/indexes/multi.py | 2 +- pandas/tests/indexes/multi/test_conversion.py | 4 ++-- pandas/tests/indexes/test_base.py | 4 ---- 6 files changed, 9 insertions(+), 26 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 2a1c38a7716bb..81bb420c47a99 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1724,7 +1724,7 @@ MultiIndex Components MultiIndex.set_levels MultiIndex.set_labels MultiIndex.to_hierarchical - MultiIndex.to_index + MultiIndex.to_flat_index MultiIndex.to_frame MultiIndex.is_lexsorted MultiIndex.sortlevel diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 4602a70c92700..46abc16f1b96a 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -238,7 +238,7 @@ Other Enhancements - :func:`~DataFrame.to_parquet` now supports writing a ``DataFrame`` as a directory of parquet files partitioned by a subset of the columns when ``engine = 'pyarrow'`` (:issue:`23283`) - :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` have gained the ``nonexistent`` argument for alternative handling of nonexistent times. See :ref:`timeseries.timezone_nonexistent` (:issue:`8917`) - :meth:`read_excel()` now accepts ``usecols`` as a list of column names or callable (:issue:`18273`) -- :meth:`MultiIndex.to_index` has been added to flatten multiple levels into a single-level :class:`Index` object. +- :meth:`MultiIndex.to_flat_index` has been added to flatten multiple levels into a single-level :class:`Index` object. .. _whatsnew_0240.api_breaking: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ccd64c445c31a..a84064a7782af 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1113,45 +1113,32 @@ def _format_attrs(self): """ return format_object_attrs(self) - def to_index(self, sep=None): + def to_flat_index(self): """ Convert a MultiIndex to an Index of Tuples containing the level values. .. versionadded:: 0.24.0 - Parameters - ---------- - sep : str, optional - Not yet implemented. - Returns ------- - pd.Index : an Index with the MultiIndex data represented in Tuples. + pd.Index + Index with the MultiIndex data represented in Tuples. Notes ----- This method will simply return the caller if called by anything other than a MultiIndex. - See Also - -------- - to_series : Similar method to construct a Series. - to_frame : Similar method to construct a DataFrame. - Examples -------- >>> index = pd.MultiIndex.from_product( ... [['foo', 'bar'], ['baz', 'qux']], ... names=['a', 'b']) - >>> index.to_index() + >>> index.to_flat_index() Index([('foo', 'baz'), ('foo', 'qux'), ('bar', 'baz'), ('bar', 'qux')], dtype='object') """ - if sep is not None: - # TODO: Add support for separator to return strs instad of tuples - raise NotImplementedError - if not isinstance(self, ABCMultiIndex): return self diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 1065369ea302c..ca546a21c05ec 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -193,7 +193,7 @@ class MultiIndex(Index): set_levels set_labels to_frame - to_index + to_flat_index is_lexsorted sortlevel droplevel diff --git a/pandas/tests/indexes/multi/test_conversion.py b/pandas/tests/indexes/multi/test_conversion.py index 4013ceec85c02..fb734b016518e 100644 --- a/pandas/tests/indexes/multi/test_conversion.py +++ b/pandas/tests/indexes/multi/test_conversion.py @@ -172,9 +172,9 @@ def test_to_series_with_arguments(idx): assert s.name != idx.name -def test_to_index(idx): +def test_to_flat_index(idx): expected = pd.Index((('foo', 'one'), ('foo', 'two'), ('bar', 'one'), ('baz', 'two'), ('qux', 'one'), ('qux', 'two')), tupleize_cols=False) - result = idx.to_index() + result = idx.to_flat_index() tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index ddf2970057f6e..d285900c8c216 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -2274,10 +2274,6 @@ def test_to_index(self, indices): result = indices.to_index() tm.assert_index_equal(result, indices) - def test_to_index_sep_raises(self, indices): - with pytest.raises(NotImplementedError): - indices.to_index(sep="") - class TestMixedIntIndex(Base): # Mostly the tests from common.py for which the results differ From f7a2342dc706832cdb5e1f9073ee4af8190ff1a8 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 12 Nov 2018 21:47:35 -0800 Subject: [PATCH 10/11] Fixed broken test --- pandas/tests/indexes/test_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index d285900c8c216..619f60a42e0be 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -2266,12 +2266,12 @@ def test_tab_complete_warning(self, ip): with provisionalcompleter('ignore'): list(ip.Completer.completions('idx.', 4)) - def test_to_index(self, indices): + def test_to_flat_index(self, indices): # 22866 if isinstance(indices, MultiIndex): pytest.skip("Separate expectation for MultiIndex") - result = indices.to_index() + result = indices.to_flat_index() tm.assert_index_equal(result, indices) From bea4e85520a030ba5dcc8e98f73077d284641da0 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 13 Nov 2018 07:32:18 -0800 Subject: [PATCH 11/11] Moved to inheritance model --- pandas/core/indexes/base.py | 31 +++++++++---------------------- pandas/core/indexes/multi.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 22 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a84064a7782af..ff2562a4480bc 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1115,36 +1115,23 @@ def _format_attrs(self): def to_flat_index(self): """ - Convert a MultiIndex to an Index of Tuples containing the level values. + Identity method. .. versionadded:: 0.24.0 + This is implemented for compatability with subclass implementations + when chaining. + Returns ------- pd.Index - Index with the MultiIndex data represented in Tuples. - - Notes - ----- - This method will simply return the caller if called by anything other - than a MultiIndex. + Caller. - Examples + See Also -------- - >>> index = pd.MultiIndex.from_product( - ... [['foo', 'bar'], ['baz', 'qux']], - ... names=['a', 'b']) - >>> index.to_flat_index() - Index([('foo', 'baz'), ('foo', 'qux'), - ('bar', 'baz'), ('bar', 'qux')], - dtype='object') - """ - if not isinstance(self, ABCMultiIndex): - return self - - idx = Index(self.values, tupleize_cols=False) - - return idx + MultiIndex.to_flat_index : Subclass implementation. + """ + return self def to_series(self, index=None, name=None): """ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index ca546a21c05ec..310e7c2bd95d7 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1247,6 +1247,34 @@ def to_hierarchical(self, n_repeat, n_shuffle=1): FutureWarning, stacklevel=2) return MultiIndex(levels=levels, labels=labels, names=names) + def to_flat_index(self): + """ + Convert a MultiIndex to an Index of Tuples containing the level values. + + .. versionadded:: 0.24.0 + + Returns + ------- + pd.Index + Index with the MultiIndex data represented in Tuples. + + Notes + ----- + This method will simply return the caller if called by anything other + than a MultiIndex. + + Examples + -------- + >>> index = pd.MultiIndex.from_product( + ... [['foo', 'bar'], ['baz', 'qux']], + ... names=['a', 'b']) + >>> index.to_flat_index() + Index([('foo', 'baz'), ('foo', 'qux'), + ('bar', 'baz'), ('bar', 'qux')], + dtype='object') + """ + return Index(self.values, tupleize_cols=False) + @property def is_all_dates(self): return False