Skip to content

Commit c85a210

Browse files
authored
REF: de-duplicate tzinfo-awareness mismatch checks (#58171)
* REF: de-duplicate tzinfo-awareness mismatch checks * function->method
1 parent d0da576 commit c85a210

File tree

4 files changed

+83
-85
lines changed

4 files changed

+83
-85
lines changed

Diff for: pandas/_libs/tslib.pyx

+4-43
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,6 @@ from pandas._libs.tslibs.nattype cimport (
7474
c_nat_strings as nat_strings,
7575
)
7676
from pandas._libs.tslibs.timestamps cimport _Timestamp
77-
from pandas._libs.tslibs.timezones cimport tz_compare
7877

7978
from pandas._libs.tslibs import (
8079
Resolution,
@@ -452,13 +451,9 @@ cpdef array_to_datetime(
452451
ndarray[int64_t] iresult
453452
npy_datetimestruct dts
454453
bint utc_convert = bool(utc)
455-
bint seen_datetime_offset = False
456454
bint is_raise = errors == "raise"
457455
bint is_coerce = errors == "coerce"
458-
bint is_same_offsets
459456
_TSObject tsobj
460-
float tz_offset
461-
set out_tzoffset_vals = set()
462457
tzinfo tz, tz_out = None
463458
cnp.flatiter it = cnp.PyArray_IterNew(values)
464459
NPY_DATETIMEUNIT item_reso
@@ -568,12 +563,12 @@ cpdef array_to_datetime(
568563
# dateutil timezone objects cannot be hashed, so
569564
# store the UTC offsets in seconds instead
570565
nsecs = tz.utcoffset(None).total_seconds()
571-
out_tzoffset_vals.add(nsecs)
572-
seen_datetime_offset = True
566+
state.out_tzoffset_vals.add(nsecs)
567+
state.found_aware_str = True
573568
else:
574569
# Add a marker for naive string, to track if we are
575570
# parsing mixed naive and aware strings
576-
out_tzoffset_vals.add("naive")
571+
state.out_tzoffset_vals.add("naive")
577572
state.found_naive_str = True
578573

579574
else:
@@ -588,41 +583,7 @@ cpdef array_to_datetime(
588583
raise
589584
return values, None
590585

591-
if seen_datetime_offset and not utc_convert:
592-
# GH#17697, GH#57275
593-
# 1) If all the offsets are equal, return one offset for
594-
# the parsed dates to (maybe) pass to DatetimeIndex
595-
# 2) If the offsets are different, then do not force the parsing
596-
# and raise a ValueError: "cannot parse datetimes with
597-
# mixed time zones unless `utc=True`" instead
598-
is_same_offsets = len(out_tzoffset_vals) == 1
599-
if not is_same_offsets:
600-
raise ValueError(
601-
"Mixed timezones detected. Pass utc=True in to_datetime "
602-
"or tz='UTC' in DatetimeIndex to convert to a common timezone."
603-
)
604-
elif state.found_naive or state.found_other:
605-
# e.g. test_to_datetime_mixed_awareness_mixed_types
606-
raise ValueError("Cannot mix tz-aware with tz-naive values")
607-
elif tz_out is not None:
608-
# GH#55693
609-
tz_offset = out_tzoffset_vals.pop()
610-
tz_out2 = timezone(timedelta(seconds=tz_offset))
611-
if not tz_compare(tz_out, tz_out2):
612-
# e.g. test_to_datetime_mixed_tzs_mixed_types
613-
raise ValueError(
614-
"Mixed timezones detected. Pass utc=True in to_datetime "
615-
"or tz='UTC' in DatetimeIndex to convert to a common timezone."
616-
)
617-
# e.g. test_to_datetime_mixed_types_matching_tzs
618-
else:
619-
tz_offset = out_tzoffset_vals.pop()
620-
tz_out = timezone(timedelta(seconds=tz_offset))
621-
elif not utc_convert:
622-
if tz_out and (state.found_other or state.found_naive_str):
623-
# found_other indicates a tz-naive int, float, dt64, or date
624-
# e.g. test_to_datetime_mixed_awareness_mixed_types
625-
raise ValueError("Cannot mix tz-aware with tz-naive values")
586+
tz_out = state.check_for_mixed_inputs(tz_out, utc)
626587

627588
if infer_reso:
628589
if state.creso_ever_changed:

Diff for: pandas/_libs/tslibs/strptime.pxd

+3
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,12 @@ cdef class DatetimeParseState:
1818
bint found_tz
1919
bint found_naive
2020
bint found_naive_str
21+
bint found_aware_str
2122
bint found_other
2223
bint creso_ever_changed
2324
NPY_DATETIMEUNIT creso
25+
set out_tzoffset_vals
2426

2527
cdef tzinfo process_datetime(self, datetime dt, tzinfo tz, bint utc_convert)
2628
cdef bint update_creso(self, NPY_DATETIMEUNIT item_reso) noexcept
29+
cdef tzinfo check_for_mixed_inputs(self, tzinfo tz_out, bint utc)

Diff for: pandas/_libs/tslibs/strptime.pyx

+62-38
Original file line numberDiff line numberDiff line change
@@ -252,8 +252,11 @@ cdef class DatetimeParseState:
252252
# found_naive_str refers to a string that was parsed to a timezone-naive
253253
# datetime.
254254
self.found_naive_str = False
255+
self.found_aware_str = False
255256
self.found_other = False
256257

258+
self.out_tzoffset_vals = set()
259+
257260
self.creso = creso
258261
self.creso_ever_changed = False
259262

@@ -292,6 +295,58 @@ cdef class DatetimeParseState:
292295
"tz-naive values")
293296
return tz
294297

298+
cdef tzinfo check_for_mixed_inputs(
299+
self,
300+
tzinfo tz_out,
301+
bint utc,
302+
):
303+
cdef:
304+
bint is_same_offsets
305+
float tz_offset
306+
307+
if self.found_aware_str and not utc:
308+
# GH#17697, GH#57275
309+
# 1) If all the offsets are equal, return one offset for
310+
# the parsed dates to (maybe) pass to DatetimeIndex
311+
# 2) If the offsets are different, then do not force the parsing
312+
# and raise a ValueError: "cannot parse datetimes with
313+
# mixed time zones unless `utc=True`" instead
314+
is_same_offsets = len(self.out_tzoffset_vals) == 1
315+
if not is_same_offsets or (self.found_naive or self.found_other):
316+
# e.g. test_to_datetime_mixed_awareness_mixed_types (array_to_datetime)
317+
raise ValueError(
318+
"Mixed timezones detected. Pass utc=True in to_datetime "
319+
"or tz='UTC' in DatetimeIndex to convert to a common timezone."
320+
)
321+
elif tz_out is not None:
322+
# GH#55693
323+
tz_offset = self.out_tzoffset_vals.pop()
324+
tz_out2 = timezone(timedelta(seconds=tz_offset))
325+
if not tz_compare(tz_out, tz_out2):
326+
# e.g. (array_strptime)
327+
# test_to_datetime_mixed_offsets_with_utc_false_removed
328+
# e.g. test_to_datetime_mixed_tzs_mixed_types (array_to_datetime)
329+
raise ValueError(
330+
"Mixed timezones detected. Pass utc=True in to_datetime "
331+
"or tz='UTC' in DatetimeIndex to convert to a common timezone."
332+
)
333+
# e.g. (array_strptime)
334+
# test_guess_datetime_format_with_parseable_formats
335+
# e.g. test_to_datetime_mixed_types_matching_tzs (array_to_datetime)
336+
else:
337+
# e.g. test_to_datetime_iso8601_with_timezone_valid (array_strptime)
338+
tz_offset = self.out_tzoffset_vals.pop()
339+
tz_out = timezone(timedelta(seconds=tz_offset))
340+
elif not utc:
341+
if tz_out and (self.found_other or self.found_naive_str):
342+
# found_other indicates a tz-naive int, float, dt64, or date
343+
# e.g. test_to_datetime_mixed_awareness_mixed_types (array_to_datetime)
344+
raise ValueError(
345+
"Mixed timezones detected. Pass utc=True in to_datetime "
346+
"or tz='UTC' in DatetimeIndex to convert to a common timezone."
347+
)
348+
return tz_out
349+
295350

296351
def array_strptime(
297352
ndarray[object] values,
@@ -319,11 +374,8 @@ def array_strptime(
319374
npy_datetimestruct dts
320375
int64_t[::1] iresult
321376
object val
322-
bint seen_datetime_offset = False
323377
bint is_raise = errors=="raise"
324378
bint is_coerce = errors=="coerce"
325-
bint is_same_offsets
326-
set out_tzoffset_vals = set()
327379
tzinfo tz, tz_out = None
328380
bint iso_format = format_is_iso(fmt)
329381
NPY_DATETIMEUNIT out_bestunit, item_reso
@@ -418,15 +470,15 @@ def array_strptime(
418470
) from err
419471
if out_local == 1:
420472
nsecs = out_tzoffset * 60
421-
out_tzoffset_vals.add(nsecs)
422-
seen_datetime_offset = True
473+
state.out_tzoffset_vals.add(nsecs)
474+
state.found_aware_str = True
423475
tz = timezone(timedelta(minutes=out_tzoffset))
424476
value = tz_localize_to_utc_single(
425477
value, tz, ambiguous="raise", nonexistent=None, creso=creso
426478
)
427479
else:
428480
tz = None
429-
out_tzoffset_vals.add("naive")
481+
state.out_tzoffset_vals.add("naive")
430482
state.found_naive_str = True
431483
iresult[i] = value
432484
continue
@@ -475,12 +527,12 @@ def array_strptime(
475527
elif creso == NPY_DATETIMEUNIT.NPY_FR_ms:
476528
nsecs = nsecs // 10**3
477529

478-
out_tzoffset_vals.add(nsecs)
479-
seen_datetime_offset = True
530+
state.out_tzoffset_vals.add(nsecs)
531+
state.found_aware_str = True
480532
else:
481533
state.found_naive_str = True
482534
tz = None
483-
out_tzoffset_vals.add("naive")
535+
state.out_tzoffset_vals.add("naive")
484536

485537
except ValueError as ex:
486538
ex.args = (
@@ -499,35 +551,7 @@ def array_strptime(
499551
raise
500552
return values, None
501553

502-
if seen_datetime_offset and not utc:
503-
is_same_offsets = len(out_tzoffset_vals) == 1
504-
if not is_same_offsets or (state.found_naive or state.found_other):
505-
raise ValueError(
506-
"Mixed timezones detected. Pass utc=True in to_datetime "
507-
"or tz='UTC' in DatetimeIndex to convert to a common timezone."
508-
)
509-
elif tz_out is not None:
510-
# GH#55693
511-
tz_offset = out_tzoffset_vals.pop()
512-
tz_out2 = timezone(timedelta(seconds=tz_offset))
513-
if not tz_compare(tz_out, tz_out2):
514-
# e.g. test_to_datetime_mixed_offsets_with_utc_false_removed
515-
raise ValueError(
516-
"Mixed timezones detected. Pass utc=True in to_datetime "
517-
"or tz='UTC' in DatetimeIndex to convert to a common timezone."
518-
)
519-
# e.g. test_guess_datetime_format_with_parseable_formats
520-
else:
521-
# e.g. test_to_datetime_iso8601_with_timezone_valid
522-
tz_offset = out_tzoffset_vals.pop()
523-
tz_out = timezone(timedelta(seconds=tz_offset))
524-
elif not utc:
525-
if tz_out and (state.found_other or state.found_naive_str):
526-
# found_other indicates a tz-naive int, float, dt64, or date
527-
raise ValueError(
528-
"Mixed timezones detected. Pass utc=True in to_datetime "
529-
"or tz='UTC' in DatetimeIndex to convert to a common timezone."
530-
)
554+
tz_out = state.check_for_mixed_inputs(tz_out, utc)
531555

532556
if infer_reso:
533557
if state.creso_ever_changed:

Diff for: pandas/tests/tools/test_to_datetime.py

+14-4
Original file line numberDiff line numberDiff line change
@@ -3545,19 +3545,27 @@ def test_to_datetime_mixed_awareness_mixed_types(aware_val, naive_val, naive_fir
35453545
# issued in _array_to_datetime_object
35463546
both_strs = isinstance(aware_val, str) and isinstance(naive_val, str)
35473547
has_numeric = isinstance(naive_val, (int, float))
3548+
both_datetime = isinstance(naive_val, datetime) and isinstance(aware_val, datetime)
3549+
3550+
mixed_msg = (
3551+
"Mixed timezones detected. Pass utc=True in to_datetime or tz='UTC' "
3552+
"in DatetimeIndex to convert to a common timezone"
3553+
)
35483554

35493555
first_non_null = next(x for x in vec if x != "")
35503556
# if first_non_null is a not a string, _guess_datetime_format_for_array
35513557
# doesn't guess a format so we don't go through array_strptime
35523558
if not isinstance(first_non_null, str):
35533559
# that case goes through array_strptime which has different behavior
3554-
msg = "Cannot mix tz-aware with tz-naive values"
3560+
msg = mixed_msg
35553561
if naive_first and isinstance(aware_val, Timestamp):
35563562
if isinstance(naive_val, Timestamp):
35573563
msg = "Tz-aware datetime.datetime cannot be converted to datetime64"
35583564
with pytest.raises(ValueError, match=msg):
35593565
to_datetime(vec)
35603566
else:
3567+
if not naive_first and both_datetime:
3568+
msg = "Cannot mix tz-aware with tz-naive values"
35613569
with pytest.raises(ValueError, match=msg):
35623570
to_datetime(vec)
35633571

@@ -3586,21 +3594,21 @@ def test_to_datetime_mixed_awareness_mixed_types(aware_val, naive_val, naive_fir
35863594
to_datetime(vec, utc=True)
35873595

35883596
else:
3589-
msg = "Mixed timezones detected. Pass utc=True in to_datetime"
3597+
msg = mixed_msg
35903598
with pytest.raises(ValueError, match=msg):
35913599
to_datetime(vec)
35923600

35933601
# No warning/error with utc=True
35943602
to_datetime(vec, utc=True)
35953603

35963604
if both_strs:
3597-
msg = "Mixed timezones detected. Pass utc=True in to_datetime"
3605+
msg = mixed_msg
35983606
with pytest.raises(ValueError, match=msg):
35993607
to_datetime(vec, format="mixed")
36003608
with pytest.raises(ValueError, match=msg):
36013609
DatetimeIndex(vec)
36023610
else:
3603-
msg = "Cannot mix tz-aware with tz-naive values"
3611+
msg = mixed_msg
36043612
if naive_first and isinstance(aware_val, Timestamp):
36053613
if isinstance(naive_val, Timestamp):
36063614
msg = "Tz-aware datetime.datetime cannot be converted to datetime64"
@@ -3609,6 +3617,8 @@ def test_to_datetime_mixed_awareness_mixed_types(aware_val, naive_val, naive_fir
36093617
with pytest.raises(ValueError, match=msg):
36103618
DatetimeIndex(vec)
36113619
else:
3620+
if not naive_first and both_datetime:
3621+
msg = "Cannot mix tz-aware with tz-naive values"
36123622
with pytest.raises(ValueError, match=msg):
36133623
to_datetime(vec, format="mixed")
36143624
with pytest.raises(ValueError, match=msg):

0 commit comments

Comments
 (0)