Skip to content

Commit 07dc117

Browse files
gfyoungjreback
authored andcommitted
BUG: Allow merging on Index vectors (#19073)
* BUG: Allow merging on Index vectors This behavior used to work in v0.19.0 and is consistent with the documentation. Closes gh-19038 * ENH: Add is_array_like method Used for abstracting checks in DataFrame.merge, but the function itself can be quite useful.
1 parent c0e3767 commit 07dc117

File tree

7 files changed

+84
-7
lines changed

7 files changed

+84
-7
lines changed

doc/source/whatsnew/v0.23.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,7 @@ Reshaping
407407
- Bug in :func:`Series.rank` where ``Series`` containing ``NaT`` modifies the ``Series`` inplace (:issue:`18521`)
408408
- Bug in :func:`cut` which fails when using readonly arrays (:issue:`18773`)
409409
- Bug in :func:`Dataframe.pivot_table` which fails when the ``aggfunc`` arg is of type string. The behavior is now consistent with other methods like ``agg`` and ``apply`` (:issue:`18713`)
410-
410+
- Bug in :func:`DataFrame.merge` in which merging using ``Index`` objects as vectors raised an Exception (:issue:`19038`)
411411

412412
Numeric
413413
^^^^^^^

pandas/core/dtypes/api.py

+1
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
is_dict_like,
5656
is_iterator,
5757
is_file_like,
58+
is_array_like,
5859
is_list_like,
5960
is_hashable,
6061
is_named_tuple)

pandas/core/dtypes/inference.py

+33
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,39 @@ def is_list_like(obj):
267267
not isinstance(obj, string_and_binary_types))
268268

269269

270+
def is_array_like(obj):
271+
"""
272+
Check if the object is array-like.
273+
274+
For an object to be considered array-like, it must be list-like and
275+
have a `dtype` attribute.
276+
277+
Parameters
278+
----------
279+
obj : The object to check.
280+
281+
Returns
282+
-------
283+
is_array_like : bool
284+
Whether `obj` has array-like properties.
285+
286+
Examples
287+
--------
288+
>>> is_array_like(np.array([1, 2, 3]))
289+
True
290+
>>> is_array_like(pd.Series(["a", "b"]))
291+
True
292+
>>> is_array_like(pd.Index(["2016-01-01"]))
293+
True
294+
>>> is_array_like([1, 2, 3])
295+
False
296+
>>> is_array_like(("a", "b"))
297+
False
298+
"""
299+
300+
return is_list_like(obj) and hasattr(obj, "dtype")
301+
302+
270303
def is_nested_list_like(obj):
271304
"""
272305
Check if the object is list-like, and that all of its elements

pandas/core/reshape/merge.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,15 @@
1010
from pandas.compat import range, lzip, zip, map, filter
1111
import pandas.compat as compat
1212

13-
from pandas import (Categorical, Series, DataFrame,
13+
from pandas import (Categorical, DataFrame,
1414
Index, MultiIndex, Timedelta)
1515
from pandas.core.frame import _merge_doc
1616
from pandas.core.dtypes.common import (
1717
is_datetime64tz_dtype,
1818
is_datetime64_dtype,
1919
needs_i8_conversion,
2020
is_int64_dtype,
21+
is_array_like,
2122
is_categorical_dtype,
2223
is_integer_dtype,
2324
is_float_dtype,
@@ -814,13 +815,12 @@ def _get_merge_keys(self):
814815
join_names = []
815816
right_drop = []
816817
left_drop = []
818+
817819
left, right = self.left, self.right
818820
stacklevel = 5 # Number of stack levels from df.merge
819821

820-
is_lkey = lambda x: isinstance(
821-
x, (np.ndarray, Series)) and len(x) == len(left)
822-
is_rkey = lambda x: isinstance(
823-
x, (np.ndarray, Series)) and len(x) == len(right)
822+
is_lkey = lambda x: is_array_like(x) and len(x) == len(left)
823+
is_rkey = lambda x: is_array_like(x) and len(x) == len(right)
824824

825825
# Note that pd.merge_asof() has separate 'on' and 'by' parameters. A
826826
# user could, for example, request 'left_index' and 'left_by'. In a

pandas/tests/api/test_types.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ class TestTypes(Base):
3030
'is_period_dtype', 'is_interval', 'is_interval_dtype',
3131
'is_re', 'is_re_compilable',
3232
'is_dict_like', 'is_iterator', 'is_file_like',
33-
'is_list_like', 'is_hashable',
33+
'is_list_like', 'is_hashable', 'is_array_like',
3434
'is_named_tuple',
3535
'pandas_dtype', 'union_categoricals', 'infer_dtype']
3636
deprecated = ['is_any_int_dtype', 'is_floating_dtype', 'is_sequence']

pandas/tests/dtypes/test_inference.py

+17
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,23 @@ def test_is_list_like_fails(ll):
7878
assert not inference.is_list_like(ll)
7979

8080

81+
def test_is_array_like():
82+
assert inference.is_array_like(Series([]))
83+
assert inference.is_array_like(Series([1, 2]))
84+
assert inference.is_array_like(np.array(["a", "b"]))
85+
assert inference.is_array_like(Index(["2016-01-01"]))
86+
87+
class DtypeList(list):
88+
dtype = "special"
89+
90+
assert inference.is_array_like(DtypeList())
91+
92+
assert not inference.is_array_like([1, 2, 3])
93+
assert not inference.is_array_like(tuple())
94+
assert not inference.is_array_like("foo")
95+
assert not inference.is_array_like(123)
96+
97+
8198
@pytest.mark.parametrize('inner', [
8299
[], [1], (1, ), (1, 2), {'a': 1}, set([1, 'a']), Series([1]),
83100
Series([]), Series(['a']).str, (x for x in range(5))

pandas/tests/reshape/merge/test_merge.py

+26
Original file line numberDiff line numberDiff line change
@@ -1370,6 +1370,32 @@ def f():
13701370
household.join(log_return, how='outer')
13711371
pytest.raises(NotImplementedError, f)
13721372

1373+
@pytest.mark.parametrize("klass", [None, np.asarray, Series, Index])
1374+
def test_merge_datetime_index(self, klass):
1375+
# see gh-19038
1376+
df = DataFrame([1, 2, 3],
1377+
["2016-01-01", "2017-01-01", "2018-01-01"],
1378+
columns=["a"])
1379+
df.index = pd.to_datetime(df.index)
1380+
on_vector = df.index.year
1381+
1382+
if klass is not None:
1383+
on_vector = klass(on_vector)
1384+
1385+
expected = DataFrame({"a": [1, 2, 3]})
1386+
1387+
if klass == np.asarray:
1388+
# The join key is added for ndarray.
1389+
expected["key_1"] = [2016, 2017, 2018]
1390+
1391+
result = df.merge(df, on=["a", on_vector], how="inner")
1392+
tm.assert_frame_equal(result, expected)
1393+
1394+
expected = DataFrame({"a_x": [1, 2, 3],
1395+
"a_y": [1, 2, 3]})
1396+
result = df.merge(df, on=[df.index.year], how="inner")
1397+
tm.assert_frame_equal(result, expected)
1398+
13731399

13741400
class TestMergeDtypes(object):
13751401

0 commit comments

Comments
 (0)