24
24
from xarray .core .common import contains_cftime_datetimes , is_np_datetime_like
25
25
from xarray .core .duck_array_ops import asarray , ravel , reshape
26
26
from xarray .core .formatting import first_n_items , format_timestamp , last_item
27
- from xarray .core .pdcompat import nanosecond_precision_timestamp
27
+ from xarray .core .pdcompat import nanosecond_precision_timestamp , timestamp_as_unit
28
28
from xarray .core .utils import attempt_import , emit_user_level_warning
29
29
from xarray .core .variable import Variable
30
30
from xarray .namedarray .parallelcompat import T_ChunkedArray , get_chunked_array_type
36
36
except ImportError :
37
37
cftime = None
38
38
39
- from xarray .core .types import CFCalendar , NPDatetimeUnitOptions , T_DuckArray
39
+ from xarray .core .types import (
40
+ CFCalendar ,
41
+ NPDatetimeUnitOptions ,
42
+ T_DuckArray ,
43
+ )
40
44
41
45
T_Name = Union [Hashable , None ]
42
46
@@ -259,18 +263,26 @@ def _parse_iso8601(date_type, timestr):
259
263
return default .replace (** replace ), resolution
260
264
261
265
262
- def _unpack_time_units_and_ref_date (units : str ) -> tuple [str , pd .Timestamp ]:
266
+ def _maybe_strip_tz_from_timestamp (date : pd .Timestamp ) -> pd .Timestamp :
267
+ # If the ref_date Timestamp is timezone-aware, convert to UTC and
268
+ # make it timezone-naive (GH 2649).
269
+ if date .tz is not None :
270
+ return date .tz_convert ("UTC" ).tz_convert (None )
271
+ return date
272
+
273
+
274
+ def _unpack_time_unit_and_ref_date (
275
+ units : str ,
276
+ ) -> tuple [NPDatetimeUnitOptions , pd .Timestamp ]:
263
277
# same us _unpack_netcdf_time_units but finalizes ref_date for
264
278
# processing in encode_cf_datetime
265
- time_units , _ref_date = _unpack_netcdf_time_units (units )
279
+ time_unit , _ref_date = _unpack_netcdf_time_units (units )
280
+ time_unit = _netcdf_to_numpy_timeunit (time_unit )
266
281
# TODO: the strict enforcement of nanosecond precision Timestamps can be
267
282
# relaxed when addressing GitHub issue #7493.
268
283
ref_date = nanosecond_precision_timestamp (_ref_date )
269
- # If the ref_date Timestamp is timezone-aware, convert to UTC and
270
- # make it timezone-naive (GH 2649).
271
- if ref_date .tz is not None :
272
- ref_date = ref_date .tz_convert (None )
273
- return time_units , ref_date
284
+ ref_date = _maybe_strip_tz_from_timestamp (ref_date )
285
+ return time_unit , ref_date
274
286
275
287
276
288
def _decode_cf_datetime_dtype (
@@ -317,6 +329,30 @@ def _decode_datetime_with_cftime(
317
329
return np .array ([], dtype = object )
318
330
319
331
332
+ def _check_date_for_units_since_refdate (
333
+ date , unit : str , ref_date : pd .Timestamp
334
+ ) -> pd .Timestamp :
335
+ # check for out-of-bounds floats and raise
336
+ if date > np .iinfo ("int64" ).max or date < np .iinfo ("int64" ).min :
337
+ raise OutOfBoundsTimedelta (
338
+ f"Value { date } can't be represented as Datetime/Timedelta."
339
+ )
340
+ delta = date * np .timedelta64 (1 , unit )
341
+ if not np .isnan (delta ):
342
+ # this will raise on dtype overflow for integer dtypes
343
+ if date .dtype .kind in "u" and not np .int64 (delta ) == date :
344
+ raise OutOfBoundsTimedelta (
345
+ "DType overflow in Datetime/Timedelta calculation."
346
+ )
347
+ # this will raise on overflow if ref_date + delta
348
+ # can't be represented in the current ref_date resolution
349
+ return timestamp_as_unit (ref_date + delta , ref_date .unit )
350
+ else :
351
+ # if date is exactly NaT (np.iinfo("int64").min) return NaT
352
+ # to make follow-up checks work
353
+ return pd .Timestamp ("NaT" )
354
+
355
+
320
356
def _decode_datetime_with_pandas (
321
357
flat_num_dates : np .ndarray , units : str , calendar : str
322
358
) -> np .ndarray :
@@ -335,12 +371,8 @@ def _decode_datetime_with_pandas(
335
371
elif flat_num_dates .dtype .kind == "u" :
336
372
flat_num_dates = flat_num_dates .astype (np .uint64 )
337
373
338
- time_units , ref_date_str = _unpack_netcdf_time_units (units )
339
- time_units = _netcdf_to_numpy_timeunit (time_units )
340
374
try :
341
- # TODO: the strict enforcement of nanosecond precision Timestamps can be
342
- # relaxed when addressing GitHub issue #7493.
343
- ref_date = nanosecond_precision_timestamp (ref_date_str )
375
+ time_unit , ref_date = _unpack_time_unit_and_ref_date (units )
344
376
except ValueError as err :
345
377
# ValueError is raised by pd.Timestamp for non-ISO timestamp
346
378
# strings, in which case we fall back to using cftime
@@ -350,8 +382,12 @@ def _decode_datetime_with_pandas(
350
382
warnings .filterwarnings ("ignore" , "invalid value encountered" , RuntimeWarning )
351
383
if flat_num_dates .size > 0 :
352
384
# avoid size 0 datetimes GH1329
353
- pd .to_timedelta (flat_num_dates .min (), time_units ) + ref_date
354
- pd .to_timedelta (flat_num_dates .max (), time_units ) + ref_date
385
+ _check_date_for_units_since_refdate (
386
+ flat_num_dates .min (), time_unit , ref_date
387
+ )
388
+ _check_date_for_units_since_refdate (
389
+ flat_num_dates .max (), time_unit , ref_date
390
+ )
355
391
356
392
# To avoid integer overflow when converting to nanosecond units for integer
357
393
# dtypes smaller than np.int64 cast all integer and unsigned integer dtype
@@ -364,20 +400,24 @@ def _decode_datetime_with_pandas(
364
400
elif flat_num_dates .dtype .kind in "f" :
365
401
flat_num_dates = flat_num_dates .astype (np .float64 )
366
402
367
- # Cast input ordinals to integers of nanoseconds because pd.to_timedelta
368
- # works much faster when dealing with integers (GH 1399).
369
- # properly handle NaN/NaT to prevent casting NaN to int
403
+ # keep NaT/nan mask
370
404
nan = np .isnan (flat_num_dates ) | (flat_num_dates == np .iinfo (np .int64 ).min )
371
- flat_num_dates = flat_num_dates * _NS_PER_TIME_DELTA [time_units ]
372
- flat_num_dates_ns_int = np .zeros_like (flat_num_dates , dtype = np .int64 )
373
- flat_num_dates_ns_int [nan ] = np .iinfo (np .int64 ).min
374
- flat_num_dates_ns_int [~ nan ] = flat_num_dates [~ nan ].astype (np .int64 )
405
+ # in case we need to change the unit, we fix the numbers here
406
+ # this should be safe, as errors would have been raised above
407
+ ns_time_unit = _NS_PER_TIME_DELTA [time_unit ]
408
+ ns_ref_date_unit = _NS_PER_TIME_DELTA [ref_date .unit ]
409
+ if ns_time_unit > ns_ref_date_unit :
410
+ flat_num_dates *= np .int64 (ns_time_unit / ns_ref_date_unit )
411
+ time_unit = ref_date .unit
375
412
376
- # Use pd.to_timedelta to safely cast integer values to timedeltas,
377
- # and add those to a Timestamp to safely produce a DatetimeIndex. This
378
- # ensures that we do not encounter integer overflow at any point in the
379
- # process without raising OutOfBoundsDatetime.
380
- return (pd .to_timedelta (flat_num_dates_ns_int , "ns" ) + ref_date ).values
413
+ # Cast input ordinals to integers and properly handle NaN/NaT
414
+ # to prevent casting NaN to int
415
+ flat_num_dates_int = np .zeros_like (flat_num_dates , dtype = np .int64 )
416
+ flat_num_dates_int [nan ] = np .iinfo (np .int64 ).min
417
+ flat_num_dates_int [~ nan ] = flat_num_dates [~ nan ].astype (np .int64 )
418
+
419
+ # cast to timedelta64[time_unit] and add to ref_date
420
+ return ref_date + flat_num_dates_int .astype (f"timedelta64[{ time_unit } ]" )
381
421
382
422
383
423
def decode_cf_datetime (
@@ -409,11 +449,15 @@ def decode_cf_datetime(
409
449
dates = _decode_datetime_with_cftime (
410
450
flat_num_dates .astype (float ), units , calendar
411
451
)
412
-
413
- if (
414
- dates [np .nanargmin (num_dates )].year < 1678
415
- or dates [np .nanargmax (num_dates )].year >= 2262
416
- ):
452
+ # retrieve cftype
453
+ dates_min = dates [np .nanargmin (num_dates )]
454
+ cftype = type (dates_min )
455
+ # "ns" borders
456
+ # between ['1677-09-21T00:12:43.145224193', '2262-04-11T23:47:16.854775807']
457
+ lower = cftype (1677 , 9 , 21 , 0 , 12 , 43 , 145224 )
458
+ upper = cftype (2262 , 4 , 11 , 23 , 47 , 16 , 854775 )
459
+
460
+ if dates_min < lower or dates [np .nanargmax (num_dates )] > upper :
417
461
if _is_standard_calendar (calendar ):
418
462
warnings .warn (
419
463
"Unable to decode time axis into full "
@@ -833,8 +877,8 @@ def _eagerly_encode_cf_datetime(
833
877
raise OutOfBoundsDatetime
834
878
assert dates .dtype == "datetime64[ns]"
835
879
836
- time_units , ref_date = _unpack_time_units_and_ref_date (units )
837
- time_delta = _time_units_to_timedelta64 ( time_units )
880
+ time_unit , ref_date = _unpack_time_unit_and_ref_date (units )
881
+ time_delta = np . timedelta64 ( 1 , time_unit )
838
882
839
883
# Wrap the dates in a DatetimeIndex to do the subtraction to ensure
840
884
# an OverflowError is raised if the ref_date is too far away from
@@ -843,16 +887,17 @@ def _eagerly_encode_cf_datetime(
843
887
time_deltas = dates_as_index - ref_date
844
888
845
889
# retrieve needed units to faithfully encode to int64
846
- needed_units , data_ref_date = _unpack_time_units_and_ref_date (data_units )
890
+ needed_unit , data_ref_date = _unpack_time_unit_and_ref_date (data_units )
891
+ needed_units = _numpy_to_netcdf_timeunit (needed_unit )
847
892
if data_units != units :
848
893
# this accounts for differences in the reference times
849
894
ref_delta = abs (data_ref_date - ref_date ).to_timedelta64 ()
850
- data_delta = _time_units_to_timedelta64 ( needed_units )
895
+ data_delta = np . timedelta64 ( 1 , needed_unit )
851
896
if (ref_delta % data_delta ) > np .timedelta64 (0 , "ns" ):
852
897
needed_units = _infer_time_units_from_diff (ref_delta )
853
898
854
899
# needed time delta to encode faithfully to int64
855
- needed_time_delta = _time_units_to_timedelta64 (needed_units )
900
+ needed_time_delta = _unit_timedelta_numpy (needed_units )
856
901
857
902
floor_division = np .issubdtype (dtype , np .integer ) or dtype is None
858
903
if time_delta > needed_time_delta :
@@ -865,6 +910,7 @@ def _eagerly_encode_cf_datetime(
865
910
f"Set encoding['dtype'] to floating point dtype to silence this warning."
866
911
)
867
912
elif np .issubdtype (dtype , np .integer ) and allow_units_modification :
913
+ floor_division = True
868
914
new_units = f"{ needed_units } since { format_timestamp (ref_date )} "
869
915
emit_user_level_warning (
870
916
f"Times can't be serialized faithfully to int64 with requested units { units !r} . "
@@ -874,9 +920,12 @@ def _eagerly_encode_cf_datetime(
874
920
)
875
921
units = new_units
876
922
time_delta = needed_time_delta
877
- floor_division = True
878
923
879
- num = _division (time_deltas , time_delta , floor_division )
924
+ # get resolution of TimedeltaIndex and align time_delta
925
+ # todo: check, if this works in any case
926
+ num = _division (
927
+ time_deltas , time_delta .astype (f"=m8[{ time_deltas .unit } ]" ), floor_division
928
+ )
880
929
num = reshape (num .values , dates .shape )
881
930
882
931
except (OutOfBoundsDatetime , OverflowError , ValueError ):
0 commit comments