Skip to content

Commit ad2e98c

Browse files
mroeschkejreback
authored andcommitted
ERR/TST: Raise NotImplementedError in to_hdf for extension dtypes in MultiIndex (#27144)
1 parent b870dee commit ad2e98c

File tree

6 files changed

+96
-6
lines changed

6 files changed

+96
-6
lines changed

doc/source/whatsnew/v0.25.0.rst

+4
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,7 @@ Other API changes
566566
- Removed support of gtk package for clipboards (:issue:`26563`)
567567
- Using an unsupported version of Beautiful Soup 4 will now raise an ``ImportError`` instead of a ``ValueError`` (:issue:`27063`)
568568
- :meth:`Series.to_excel` and :meth:`DataFrame.to_excel` will now raise a ``ValueError`` when saving timezone aware data. (:issue:`27008`, :issue:`7056`)
569+
- :meth:`DataFrame.to_hdf` and :meth:`Series.to_hdf` will now raise a ``NotImplementedError`` when saving a :class:`MultiIndex` with extention data types for a ``fixed`` format. (:issue:`7775`)
569570

570571
.. _whatsnew_0250.deprecations:
571572

@@ -719,6 +720,7 @@ Timezones
719720
- Bug in :func:`to_datetime` with ``unit='ns'`` would drop timezone information from the parsed argument (:issue:`26168`)
720721
- Bug in :func:`DataFrame.join` where joining a timezone aware index with a timezone aware column would result in a column of ``NaN`` (:issue:`26335`)
721722
- Bug in :func:`date_range` where ambiguous or nonexistent start or end times were not handled by the ``ambiguous`` or ``nonexistent`` keywords respectively (:issue:`27088`)
723+
- Bug in :meth:`DatetimeIndex.union` when combining a timezone aware and timezone unaware :class:`DatetimeIndex` (:issue:`21671`)
722724

723725
Numeric
724726
^^^^^^^
@@ -814,6 +816,7 @@ I/O
814816
- :func:`read_excel` now raises a ``ValueError`` when input is of type :class:`pandas.io.excel.ExcelFile` and ``engine`` param is passed since :class:`pandas.io.excel.ExcelFile` has an engine defined (:issue:`26566`)
815817
- Bug while selecting from :class:`HDFStore` with ``where=''`` specified (:issue:`26610`).
816818
- Fixed bug in :func:`DataFrame.to_excel()` where custom objects (i.e. `PeriodIndex`) inside merged cells were not being converted into types safe for the Excel writer (:issue:`27006`)
819+
- Bug in :meth:`read_hdf` where reading a timezone aware :class:`DatetimeIndex` would raise a ``TypeError`` (:issue:`11926`)
817820

818821
Plotting
819822
^^^^^^^^
@@ -868,6 +871,7 @@ Reshaping
868871
- Bug in :meth:`Series.nlargest` treats ``True`` as smaller than ``False`` (:issue:`26154`)
869872
- Bug in :func:`DataFrame.pivot_table` with a :class:`IntervalIndex` as pivot index would raise ``TypeError`` (:issue:`25814`)
870873
- Bug in :meth:`DataFrame.transpose` where transposing a DataFrame with a timezone-aware datetime column would incorrectly raise ``ValueError`` (:issue:`26825`)
874+
- Bug in :func:`pivot_table` when pivoting a timezone aware column as the ``values`` would remove timezone information (:issue:`14948`)
871875

872876
Sparse
873877
^^^^^^

pandas/io/pytables.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@
2323

2424
from pandas.core.dtypes.common import (
2525
ensure_object, is_categorical_dtype, is_datetime64_dtype,
26-
is_datetime64tz_dtype, is_list_like, is_timedelta64_dtype)
26+
is_datetime64tz_dtype, is_extension_type, is_list_like,
27+
is_timedelta64_dtype)
2728
from pandas.core.dtypes.missing import array_equivalent
2829

2930
from pandas import (
@@ -2647,6 +2648,9 @@ def write_multi_index(self, key, index):
26472648
index.codes,
26482649
index.names)):
26492650
# write the level
2651+
if is_extension_type(lev):
2652+
raise NotImplementedError("Saving a MultiIndex with an "
2653+
"extension dtype is not supported.")
26502654
level_key = '{key}_level{idx}'.format(key=key, idx=i)
26512655
conv_level = _convert_index(lev, self.encoding, self.errors,
26522656
self.format_type).set_name(level_key)

pandas/tests/indexes/datetimes/test_timezones.py

+12
Original file line numberDiff line numberDiff line change
@@ -1094,6 +1094,18 @@ def test_dti_union_aware(self):
10941094
assert result[0].tz.zone == 'US/Central'
10951095
assert result[-1].tz.zone == 'US/Eastern'
10961096

1097+
def test_dti_union_mixed(self):
1098+
# GH 21671
1099+
rng = DatetimeIndex([pd.Timestamp('2011-01-01'), pd.NaT])
1100+
rng2 = pd.DatetimeIndex(['2012-01-01', '2012-01-02'], tz='Asia/Tokyo')
1101+
result = rng.union(rng2)
1102+
expected = Index([pd.Timestamp('2011-01-01'),
1103+
pd.NaT,
1104+
pd.Timestamp('2012-01-01', tz='Asia/Tokyo'),
1105+
pd.Timestamp('2012-01-02', tz='Asia/Tokyo')],
1106+
dtype=object)
1107+
tm.assert_index_equal(result, expected)
1108+
10971109
@pytest.mark.parametrize('tz', [None, 'UTC', "US/Central",
10981110
dateutil.tz.tzoffset(None, -28800)])
10991111
@pytest.mark.usefixtures("datetime_tz_utc")

pandas/tests/indexes/multi/test_format.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
def test_dtype_str(indices):
1111
with tm.assert_produces_warning(FutureWarning):
1212
dtype = indices.dtype_str
13-
assert isinstance(dtype, str)
14-
assert dtype == str(indices.dtype)
13+
assert isinstance(dtype, str)
14+
assert dtype == str(indices.dtype)
1515

1616

1717
def test_format(idx):

pandas/tests/io/pytables/test_pytables.py

+33-3
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@
1717

1818
import pandas as pd
1919
from pandas import (
20-
Categorical, DataFrame, DatetimeIndex, Index, Int64Index, MultiIndex,
21-
RangeIndex, Series, Timestamp, bdate_range, concat, date_range, isna,
22-
timedelta_range)
20+
Categorical, CategoricalIndex, DataFrame, DatetimeIndex, Index, Int64Index,
21+
MultiIndex, RangeIndex, Series, Timestamp, bdate_range, concat, date_range,
22+
isna, timedelta_range)
2323
import pandas.util.testing as tm
2424
from pandas.util.testing import (
2525
assert_frame_equal, assert_series_equal, set_timezone)
@@ -4749,6 +4749,19 @@ def test_select_empty_where(self, where):
47494749
result = pd.read_hdf(store, "df", where=where)
47504750
assert_frame_equal(result, df)
47514751

4752+
@pytest.mark.parametrize('idx', [
4753+
date_range('2019', freq='D', periods=3, tz='UTC'),
4754+
CategoricalIndex(list('abc'))
4755+
])
4756+
def test_to_hdf_multiindex_extension_dtype(self, idx):
4757+
# GH 7775
4758+
mi = MultiIndex.from_arrays([idx, idx])
4759+
df = pd.DataFrame(0, index=mi, columns=['a'])
4760+
with ensure_clean_path(self.path) as path:
4761+
with pytest.raises(NotImplementedError,
4762+
match="Saving a MultiIndex"):
4763+
df.to_hdf(path, 'df')
4764+
47524765

47534766
class TestHDFComplexValues(Base):
47544767
# GH10447
@@ -5170,3 +5183,20 @@ def test_dst_transitions(self):
51705183
store.append('df', df)
51715184
result = store.select('df')
51725185
assert_frame_equal(result, df)
5186+
5187+
def test_read_with_where_tz_aware_index(self):
5188+
# GH 11926
5189+
periods = 10
5190+
dts = pd.date_range('20151201', periods=periods,
5191+
freq='D', tz='UTC')
5192+
mi = pd.MultiIndex.from_arrays([dts, range(periods)],
5193+
names=['DATE', 'NO'])
5194+
expected = pd.DataFrame({'MYCOL': 0}, index=mi)
5195+
5196+
key = 'mykey'
5197+
with ensure_clean_path(self.path) as path:
5198+
with pd.HDFStore(path) as store:
5199+
store.append(key, expected, format='table', append=True)
5200+
result = pd.read_hdf(path, key,
5201+
where="DATE > 20151130")
5202+
assert_frame_equal(result, expected)

pandas/tests/reshape/test_pivot.py

+40
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,46 @@ def test_pivot_with_tz(self, method):
429429
pv = pd.pivot(df, index='dt1', columns='dt2', values='data1')
430430
tm.assert_frame_equal(pv, expected)
431431

432+
def test_pivot_tz_in_values(self):
433+
# GH 14948
434+
df = pd.DataFrame([{'uid': u'aa',
435+
'ts': pd.Timestamp('2016-08-12 13:00:00-0700',
436+
tz='US/Pacific')},
437+
{'uid': u'aa',
438+
'ts': pd.Timestamp('2016-08-12 08:00:00-0700',
439+
tz='US/Pacific')},
440+
{'uid': u'aa',
441+
'ts': pd.Timestamp('2016-08-12 14:00:00-0700',
442+
tz='US/Pacific')},
443+
{'uid': u'aa',
444+
'ts': pd.Timestamp('2016-08-25 11:00:00-0700',
445+
tz='US/Pacific')},
446+
{'uid': u'aa',
447+
'ts': pd.Timestamp('2016-08-25 13:00:00-0700',
448+
tz='US/Pacific')}])
449+
450+
df = df.set_index('ts').reset_index()
451+
mins = df.ts.map(lambda x: x.replace(hour=0, minute=0,
452+
second=0, microsecond=0))
453+
454+
result = pd.pivot_table(df.set_index('ts').reset_index(),
455+
values='ts', index=['uid'], columns=[mins],
456+
aggfunc=np.min)
457+
expected = pd.DataFrame(
458+
[
459+
[pd.Timestamp('2016-08-12 08:00:00-0700', tz='US/Pacific'),
460+
pd.Timestamp('2016-08-25 11:00:00-0700', tz='US/Pacific')]
461+
],
462+
index=pd.Index(['aa'], name='uid'),
463+
columns=pd.DatetimeIndex(
464+
[
465+
pd.Timestamp('2016-08-12 00:00:00', tz='US/Pacific'),
466+
pd.Timestamp('2016-08-25 00:00:00', tz='US/Pacific')
467+
],
468+
name='ts')
469+
)
470+
tm.assert_frame_equal(result, expected)
471+
432472
@pytest.mark.parametrize('method', [True, False])
433473
def test_pivot_periods(self, method):
434474
df = DataFrame({'p1': [pd.Period('2013-01-01', 'D'),

0 commit comments

Comments
 (0)