Skip to content

Commit f1b00b8

Browse files
authored
BUG: Fix MultiIndex from_tuples on tuples with NaNs (#60944)
1 parent 0acb9a0 commit f1b00b8

File tree

5 files changed

+55
-21
lines changed

5 files changed

+55
-21
lines changed

doc/source/whatsnew/v3.0.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -714,7 +714,7 @@ MultiIndex
714714
- :func:`MultiIndex.get_level_values` accessing a :class:`DatetimeIndex` does not carry the frequency attribute along (:issue:`58327`, :issue:`57949`)
715715
- Bug in :class:`DataFrame` arithmetic operations in case of unaligned MultiIndex columns (:issue:`60498`)
716716
- Bug in :class:`DataFrame` arithmetic operations with :class:`Series` in case of unaligned MultiIndex (:issue:`61009`)
717-
-
717+
- Bug in :meth:`MultiIndex.from_tuples` causing wrong output with input of type tuples having NaN values (:issue:`60695`, :issue:`60988`)
718718

719719
I/O
720720
^^^

pandas/core/algorithms.py

+6
Original file line numberDiff line numberDiff line change
@@ -1647,6 +1647,8 @@ def map_array(
16471647
If the function returns a tuple with more than one element
16481648
a MultiIndex will be returned.
16491649
"""
1650+
from pandas import Index
1651+
16501652
if na_action not in (None, "ignore"):
16511653
msg = f"na_action must either be 'ignore' or None, {na_action} was passed"
16521654
raise ValueError(msg)
@@ -1676,6 +1678,10 @@ def map_array(
16761678

16771679
if len(mapper) == 0:
16781680
mapper = Series(mapper, dtype=np.float64)
1681+
elif isinstance(mapper, dict):
1682+
mapper = Series(
1683+
mapper.values(), index=Index(mapper.keys(), tupleize_cols=False)
1684+
)
16791685
else:
16801686
mapper = Series(mapper)
16811687

pandas/core/indexes/multi.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
Sequence,
1010
)
1111
from functools import wraps
12+
from itertools import zip_longest
1213
from sys import getsizeof
1314
from typing import (
1415
TYPE_CHECKING,
@@ -588,7 +589,7 @@ def from_tuples(
588589
elif isinstance(tuples, list):
589590
arrays = list(lib.to_object_array_tuples(tuples).T)
590591
else:
591-
arrs = zip(*tuples)
592+
arrs = zip_longest(*tuples, fillvalue=np.nan)
592593
arrays = cast(list[Sequence[Hashable]], arrs)
593594

594595
return cls.from_arrays(arrays, sortorder=sortorder, names=names)

pandas/tests/indexes/multi/test_constructors.py

+13
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,19 @@ def test_from_tuples_with_tuple_label():
410410
tm.assert_frame_equal(expected, result)
411411

412412

413+
@pytest.mark.parametrize(
414+
"keys, expected",
415+
[
416+
((("l1",), ("l1", "l2")), (("l1", np.nan), ("l1", "l2"))),
417+
((("l1", "l2"), ("l1",)), (("l1", "l2"), ("l1", np.nan))),
418+
],
419+
)
420+
def test_from_tuples_with_various_tuple_lengths(keys, expected):
421+
# GH 60695
422+
idx = MultiIndex.from_tuples(keys)
423+
assert tuple(idx) == expected
424+
425+
413426
# ----------------------------------------------------------------------------
414427
# from_product
415428
# ----------------------------------------------------------------------------

pandas/tests/series/test_constructors.py

+33-19
Original file line numberDiff line numberDiff line change
@@ -1441,10 +1441,17 @@ def test_constructor_tuple_of_tuples(self):
14411441
s = Series(data)
14421442
assert tuple(s) == data
14431443

1444-
def test_constructor_dict_of_tuples(self):
1445-
data = {(1, 2): 3, (None, 5): 6}
1444+
@pytest.mark.parametrize(
1445+
"data, expected_values, expected_index",
1446+
[
1447+
({(1, 2): 3, (None, 5): 6}, [3, 6], [(1, 2), (None, 5)]),
1448+
({(1,): 3, (4, 5): 6}, [3, 6], [(1, None), (4, 5)]),
1449+
],
1450+
)
1451+
def test_constructor_dict_of_tuples(self, data, expected_values, expected_index):
1452+
# GH 60695
14461453
result = Series(data).sort_values()
1447-
expected = Series([3, 6], index=MultiIndex.from_tuples([(1, 2), (None, 5)]))
1454+
expected = Series(expected_values, index=MultiIndex.from_tuples(expected_index))
14481455
tm.assert_series_equal(result, expected)
14491456

14501457
# https://github.com/pandas-dev/pandas/issues/22698
@@ -1860,23 +1867,30 @@ class A(OrderedDict):
18601867
series = Series(A(data))
18611868
tm.assert_series_equal(series, expected)
18621869

1863-
def test_constructor_dict_multiindex(self):
1864-
d = {("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0}
1865-
_d = sorted(d.items())
1866-
result = Series(d)
1867-
expected = Series(
1868-
[x[1] for x in _d], index=MultiIndex.from_tuples([x[0] for x in _d])
1869-
)
1870-
tm.assert_series_equal(result, expected)
1870+
@pytest.mark.parametrize(
1871+
"data, expected_index_multi",
1872+
[
1873+
({("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0}, True),
1874+
({("a",): 0.0, ("a", "b"): 1.0}, True),
1875+
({"z": 111.0, ("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0}, False),
1876+
],
1877+
)
1878+
def test_constructor_dict_multiindex(self, data, expected_index_multi):
1879+
# GH#60695
1880+
result = Series(data)
18711881

1872-
d["z"] = 111.0
1873-
_d.insert(0, ("z", d["z"]))
1874-
result = Series(d)
1875-
expected = Series(
1876-
[x[1] for x in _d], index=Index([x[0] for x in _d], tupleize_cols=False)
1877-
)
1878-
result = result.reindex(index=expected.index)
1879-
tm.assert_series_equal(result, expected)
1882+
if expected_index_multi:
1883+
expected = Series(
1884+
list(data.values()),
1885+
index=MultiIndex.from_tuples(list(data.keys())),
1886+
)
1887+
tm.assert_series_equal(result, expected)
1888+
else:
1889+
expected = Series(
1890+
list(data.values()),
1891+
index=Index(list(data.keys())),
1892+
)
1893+
tm.assert_series_equal(result, expected)
18801894

18811895
def test_constructor_dict_multiindex_reindex_flat(self):
18821896
# construction involves reindexing with a MultiIndex corner case

0 commit comments

Comments
 (0)