Skip to content

Commit c0a4964

Browse files
h-vetinarijreback
authored andcommitted
DEPR: join_axes-kwarg in pd.concat (#22318)
1 parent 9ac75ca commit c0a4964

File tree

7 files changed

+63
-28
lines changed

7 files changed

+63
-28
lines changed

doc/source/user_guide/merging.rst

+10-8
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,8 @@ some configurable handling of "what to do with the other axes":
7070

7171
::
7272

73-
pd.concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False,
74-
keys=None, levels=None, names=None, verify_integrity=False,
75-
copy=True)
73+
pd.concat(objs, axis=0, join='outer', ignore_index=False, keys=None,
74+
levels=None, names=None, verify_integrity=False, copy=True)
7675

7776
* ``objs`` : a sequence or mapping of Series or DataFrame objects. If a
7877
dict is passed, the sorted keys will be used as the `keys` argument, unless
@@ -87,8 +86,6 @@ some configurable handling of "what to do with the other axes":
8786
n - 1. This is useful if you are concatenating objects where the
8887
concatenation axis does not have meaningful indexing information. Note
8988
the index values on the other axes are still respected in the join.
90-
* ``join_axes`` : list of Index objects. Specific indexes to use for the other
91-
n - 1 axes instead of performing inner/outer set logic.
9289
* ``keys`` : sequence, default None. Construct hierarchical index using the
9390
passed keys as the outermost level. If multiple levels passed, should
9491
contain tuples.
@@ -147,12 +144,11 @@ Set logic on the other axes
147144

148145
When gluing together multiple DataFrames, you have a choice of how to handle
149146
the other axes (other than the one being concatenated). This can be done in
150-
the following three ways:
147+
the following two ways:
151148

152149
* Take the union of them all, ``join='outer'``. This is the default
153150
option as it results in zero information loss.
154151
* Take the intersection, ``join='inner'``.
155-
* Use a specific index, as passed to the ``join_axes`` argument.
156152

157153
Here is an example of each of these methods. First, the default ``join='outer'``
158154
behavior:
@@ -202,7 +198,13 @@ DataFrame:
202198

203199
.. ipython:: python
204200
205-
result = pd.concat([df1, df4], axis=1, join_axes=[df1.index])
201+
result = pd.concat([df1, df4], axis=1).reindex(df1.index)
202+
203+
Similarly, we could index before the concatenation:
204+
205+
.. ipython:: python
206+
207+
pd.concat([df1, df4.reindex(df1.index)], axis=1)
206208
207209
.. ipython:: python
208210
:suppress:

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -809,6 +809,7 @@ Other deprecations
809809
810810
- The deprecated ``.ix[]`` indexer now raises a more visible ``FutureWarning`` instead of ``DeprecationWarning`` (:issue:`26438`).
811811
- Deprecated the ``units=M`` (months) and ``units=Y`` (year) parameters for ``units`` of :func:`pandas.to_timedelta`, :func:`pandas.Timedelta` and :func:`pandas.TimedeltaIndex` (:issue:`16344`)
812+
- :meth:`pandas.concat` has deprecated the ``join_axes``-keyword. Instead, use :meth:`DataFrame.reindex` or :meth:`DataFrame.reindex_like` on the result or on the inputs (:issue:`21951`)
812813
- The :attr:`SparseArray.values` attribute is deprecated. You can use ``np.asarray(...)`` or
813814
the :meth:`SparseArray.to_dense` method instead (:issue:`26421`).
814815
- The functions :func:`pandas.to_datetime` and :func:`pandas.to_timedelta` have deprecated the ``box`` keyword. Instead, use :meth:`to_numpy` or :meth:`Timestamp.to_datetime64` or :meth:`Timedelta.to_timedelta64`. (:issue:`24416`)

pandas/core/frame.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -6806,12 +6806,12 @@ def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='',
68066806
# join indexes only using concat
68076807
if can_concat:
68086808
if how == 'left':
6809-
how = 'outer'
6810-
join_axes = [self.index]
6809+
res = concat(frames, axis=1, join='outer',
6810+
verify_integrity=True)
6811+
return res.reindex(self.index, copy=False)
68116812
else:
6812-
join_axes = None
6813-
return concat(frames, axis=1, join=how, join_axes=join_axes,
6814-
verify_integrity=True)
6813+
return concat(frames, axis=1, join=how,
6814+
verify_integrity=True)
68156815

68166816
joined = frames[0]
68176817

pandas/core/generic.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -9716,7 +9716,8 @@ def describe_1d(data):
97169716
if name not in names:
97179717
names.append(name)
97189718

9719-
d = pd.concat(ldesc, join_axes=pd.Index([names]), axis=1)
9719+
d = pd.concat([x.reindex(names, copy=False) for x in ldesc],
9720+
axis=1, sort=False)
97209721
d.columns = data.columns.copy()
97219722
return d
97229723

pandas/core/groupby/generic.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -562,8 +562,10 @@ def _transform_general(self, func, *args, **kwargs):
562562
applied.append(res)
563563

564564
concat_index = obj.columns if self.axis == 0 else obj.index
565-
concatenated = concat(applied, join_axes=[concat_index],
566-
axis=self.axis, verify_integrity=False)
565+
other_axis = 1 if self.axis == 0 else 0 # switches between 0 & 1
566+
concatenated = concat(applied, axis=self.axis, verify_integrity=False)
567+
concatenated = concatenated.reindex(concat_index, axis=other_axis,
568+
copy=False)
567569
return self._set_result_index_ordered(concatenated)
568570

569571
@Substitution(klass='DataFrame', selected='')

pandas/core/reshape/concat.py

+21-12
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
concat routines
33
"""
44

5+
import warnings
6+
57
import numpy as np
68

79
import pandas.core.dtypes.concat as _concat
@@ -44,8 +46,11 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False,
4446
join : {'inner', 'outer'}, default 'outer'
4547
How to handle indexes on other axis (or axes).
4648
join_axes : list of Index objects
49+
.. deprecated:: 0.25.0
50+
4751
Specific indexes to use for the other n - 1 axes instead of performing
48-
inner/outer set logic.
52+
inner/outer set logic. Use .reindex() before or after concatenation
53+
as a replacement.
4954
ignore_index : bool, default False
5055
If True, do not use the index values along the concatenation axis. The
5156
resulting axis will be labeled 0, ..., n - 1. This is useful if you are
@@ -221,11 +226,11 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False,
221226
...
222227
ValueError: Indexes have overlapping values: ['a']
223228
"""
224-
op = _Concatenator(objs, axis=axis, join_axes=join_axes,
225-
ignore_index=ignore_index, join=join,
226-
keys=keys, levels=levels, names=names,
227-
verify_integrity=verify_integrity,
229+
op = _Concatenator(objs, axis=axis, ignore_index=ignore_index, join=join,
230+
join_axes=join_axes, keys=keys, levels=levels,
231+
names=names, verify_integrity=verify_integrity,
228232
copy=copy, sort=sort)
233+
229234
return op.get_result()
230235

231236

@@ -234,10 +239,9 @@ class _Concatenator:
234239
Orchestrates a concatenation operation for BlockManagers
235240
"""
236241

237-
def __init__(self, objs, axis=0, join='outer', join_axes=None,
238-
keys=None, levels=None, names=None,
239-
ignore_index=False, verify_integrity=False, copy=True,
240-
sort=False):
242+
def __init__(self, objs, axis=0, join='outer', join_axes=None, keys=None,
243+
levels=None, names=None, ignore_index=False,
244+
verify_integrity=False, copy=True, sort=False):
241245
if isinstance(objs, (NDFrame, str)):
242246
raise TypeError('first argument must be an iterable of pandas '
243247
'objects, you passed an object of type '
@@ -310,9 +314,7 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None,
310314
if sum(obj.shape) > 0 or isinstance(obj, Series)]
311315

312316
if (len(non_empties) and (keys is None and names is None and
313-
levels is None and
314-
join_axes is None and
315-
not self.intersect)):
317+
levels is None and not self.intersect)):
316318
objs = non_empties
317319
sample = objs[0]
318320

@@ -446,7 +448,14 @@ def _get_new_axes(self):
446448
if i == self.axis:
447449
continue
448450
new_axes[i] = self._get_comb_axis(i)
451+
449452
else:
453+
# GH 21951
454+
warnings.warn(
455+
'The join_axes-keyword is deprecated. Use .reindex or '
456+
'.reindex_like on the result to achieve the same '
457+
'functionality.', FutureWarning, stacklevel=4)
458+
450459
if len(self.join_axes) != ndim - 1:
451460
raise AssertionError("length of join_axes must be equal "
452461
"to {length}".format(length=ndim - 1))

pandas/tests/reshape/test_concat.py

+20
Original file line numberDiff line numberDiff line change
@@ -722,6 +722,26 @@ def test_concat_categorical_empty(self):
722722
tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
723723
tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)
724724

725+
def test_concat_join_axes_deprecated(self, axis):
726+
# GH21951
727+
one = pd.DataFrame([[0., 1.], [2., 3.]], columns=list('ab'))
728+
two = pd.DataFrame([[10., 11.], [12., 13.]], index=[1, 2],
729+
columns=list('bc'))
730+
731+
expected = pd.concat([one, two],
732+
axis=1, sort=False).reindex(index=two.index)
733+
with tm.assert_produces_warning(expected_warning=FutureWarning):
734+
result = pd.concat([one, two],
735+
axis=1, sort=False, join_axes=[two.index])
736+
tm.assert_frame_equal(result, expected)
737+
738+
expected = pd.concat([one, two],
739+
axis=0, sort=False).reindex(columns=two.columns)
740+
with tm.assert_produces_warning(expected_warning=FutureWarning):
741+
result = pd.concat([one, two],
742+
axis=0, sort=False, join_axes=[two.columns])
743+
tm.assert_frame_equal(result, expected)
744+
725745

726746
class TestAppend:
727747

0 commit comments

Comments
 (0)