diff --git a/cime_config/SystemTests/rxcropmaturity.py b/cime_config/SystemTests/rxcropmaturity.py
index 589fe38fab..9a73e2c825 100644
--- a/cime_config/SystemTests/rxcropmaturity.py
+++ b/cime_config/SystemTests/rxcropmaturity.py
@@ -492,15 +492,8 @@ def _run_interpolate_gdds(self):
)
def _get_conda_env(self):
- conda_setup_commands = stu.cmds_to_setup_conda(self._get_caseroot())
-
- # If npl conda environment is available, use that (It has dask, which
- # enables chunking, which makes reading daily 1-degree netCDF files
- # much more efficient.
- if "npl " in os.popen(conda_setup_commands + "conda env list").read():
- self._this_conda_env = "npl"
- else:
- self._this_conda_env = "ctsm_pylib"
+ stu.cmds_to_setup_conda(self._get_caseroot())
+ self._this_conda_env = "ctsm_pylib"
def _append_to_user_nl_clm(self, additions):
caseroot = self._get_caseroot()
diff --git a/cime_config/testdefs/ExpectedTestFails.xml b/cime_config/testdefs/ExpectedTestFails.xml
index fed5e8da74..1e82513737 100644
--- a/cime_config/testdefs/ExpectedTestFails.xml
+++ b/cime_config/testdefs/ExpectedTestFails.xml
@@ -36,27 +36,6 @@
-
-
- FAIL
- #3740
-
-
-
-
-
- FAIL
- #3740
-
-
-
-
-
- FAIL
- #3740
-
-
-
FAIL
diff --git a/cime_config/testdefs/testlist_clm.xml b/cime_config/testdefs/testlist_clm.xml
index b2701a333f..27c52a3510 100644
--- a/cime_config/testdefs/testlist_clm.xml
+++ b/cime_config/testdefs/testlist_clm.xml
@@ -4510,22 +4510,14 @@
-
-
-
-
-
-
-
-
-
+
-
+
diff --git a/python/ctsm/crop_calendars/check_constant_vars.py b/python/ctsm/crop_calendars/check_constant_vars.py
index 0e218c5bfa..7101b5877c 100644
--- a/python/ctsm/crop_calendars/check_constant_vars.py
+++ b/python/ctsm/crop_calendars/check_constant_vars.py
@@ -147,8 +147,8 @@ def ensure_all_patches_checked(this_ds, this_da, ra_sp, incl_patches):
incl_patches = np.sort(incl_patches)
if not np.array_equal(incl_patches, np.unique(incl_patches)):
raise RuntimeError("Patch(es) checked but also all-NaN??")
- if not np.array_equal(incl_patches, np.arange(this_ds.dims["patch"])):
- for patch in np.arange(this_ds.dims["patch"]):
+ if not np.array_equal(incl_patches, np.arange(this_ds.sizes["patch"])):
+ for patch in np.arange(this_ds.sizes["patch"]):
if patch not in incl_patches:
raise RuntimeError(
f"Not all patches checked! E.g., {patch}: {this_da.isel(patch=patch).values}"
@@ -178,7 +178,7 @@ def check_one_constant_var_loop_through_timesteps(
In check_one_constant_var(), loop through timesteps
"""
found_in_rx = None
- for timestep in np.arange(time_1 + 1, this_ds.dims[time_coord]):
+ for timestep in np.arange(time_1 + 1, this_ds.sizes[time_coord]):
t_yr = this_ds[time_coord].values[timestep]
t_vals = np.squeeze(this_da.isel({time_coord: timestep, "patch": these_patches}).values)
ok_p = t1_vals == t_vals
@@ -267,7 +267,7 @@ def check_one_constant_var(
bad_patches,
) = check_one_constant_var_setup(this_ds, case, var)
- for time_1 in np.arange(this_ds.dims[time_coord] - 1):
+ for time_1 in np.arange(this_ds.sizes[time_coord] - 1):
condn = ~np.isnan(ra_sp[time_1, ...])
if time_1 > 0:
condn = np.bitwise_and(condn, np.all(np.isnan(ra_sp[:time_1, ...]), axis=0))
@@ -324,12 +324,12 @@ def check_one_constant_var(
if not any_bad:
if any_bad_before_checking_rx:
print(
- f"✅ CLM output {var} do not vary through {this_ds.dims[time_coord]} growing "
+ f"✅ CLM output {var} do not vary through {this_ds.sizes[time_coord]} growing "
+ "seasons of output (except for patch(es) with missing rx)."
)
else:
print(
- f"✅ CLM output {var} do not vary through {this_ds.dims[time_coord]} growing "
+ f"✅ CLM output {var} do not vary through {this_ds.sizes[time_coord]} growing "
+ "seasons of output."
)
diff --git a/python/ctsm/crop_calendars/convert_axis_time2gs.py b/python/ctsm/crop_calendars/convert_axis_time2gs.py
index 3fa357b685..c0a7089d03 100644
--- a/python/ctsm/crop_calendars/convert_axis_time2gs.py
+++ b/python/ctsm/crop_calendars/convert_axis_time2gs.py
@@ -48,13 +48,13 @@ def convert_axis_time2gs_setup(this_ds, verbose):
Various setup steps for convert_axis_time2gs_setup()
"""
# How many non-NaN patch-seasons do we expect to have once we're done organizing things?
- n_patch = this_ds.dims["patch"]
+ n_patch = this_ds.sizes["patch"]
# Because some patches will be planted in the last year but not complete, we have to ignore any
# finalyear-planted seasons that do complete.
- n_gs = this_ds.dims["time"] - 1
+ n_gs = this_ds.sizes["time"] - 1
expected_valid = n_patch * n_gs
- mxharvests = this_ds.dims["mxharvests"]
+ mxharvests = this_ds.sizes["mxharvests"]
if verbose:
print(
@@ -377,7 +377,7 @@ def ignore_harvests_planted_in_final_year(
)
is_valid = ~np.isnan(hdates_pg2)
is_fake = np.isneginf(hdates_pg2)
- is_fake = np.reshape(is_fake[is_valid], (this_ds.dims["patch"], n_gs))
+ is_fake = np.reshape(is_fake[is_valid], (this_ds.sizes["patch"], n_gs))
discrepancy = np.sum(is_valid) - expected_valid
unique_n_seasons = np.unique(np.sum(is_valid, axis=1))
if verbose:
@@ -435,8 +435,8 @@ def create_dataset(
# Remove the nans and reshape to patches*growingseasons
da_pyh = da_yhp.transpose("patch", "time", "mxharvests")
- ar_pg = np.reshape(da_pyh.values, (this_ds.dims["patch"], -1))
- ar_valid_pg = np.reshape(ar_pg[is_valid], (this_ds.dims["patch"], n_gs))
+ ar_pg = np.reshape(da_pyh.values, (this_ds.sizes["patch"], -1))
+ ar_valid_pg = np.reshape(ar_pg[is_valid], (this_ds.sizes["patch"], n_gs))
# Change -infs to nans
ar_valid_pg[is_fake] = np.nan
# Save as DataArray to new Dataset, stripping _PERHARV from variable name
diff --git a/python/ctsm/crop_calendars/cropcal_module.py b/python/ctsm/crop_calendars/cropcal_module.py
index 976d022e85..430badfb48 100644
--- a/python/ctsm/crop_calendars/cropcal_module.py
+++ b/python/ctsm/crop_calendars/cropcal_module.py
@@ -42,7 +42,7 @@ def check_and_trim_years(year_1, year_n, ds_in):
# Make sure you have the expected number of timesteps (including extra year)
n_years_expected = year_n - year_1 + 2
- if ds_in.dims["time"] != n_years_expected:
+ if ds_in.sizes["time"] != n_years_expected:
raise RuntimeError(
f"Expected {n_years_expected} timesteps in output but got {ds_in.dims['time']}"
)
@@ -208,7 +208,7 @@ def import_max_gs_length(paramfile):
Import maximum growing season length
"""
# Get parameter file
- paramfile_ds = xr.open_dataset(paramfile)
+ paramfile_ds = xr.open_dataset(paramfile, decode_timedelta=True)
# Import max growing season length (stored in netCDF as nanoseconds!)
paramfile_mxmats = paramfile_ds["mxmat"].values / np.timedelta64(1, "D")
diff --git a/python/ctsm/crop_calendars/generate_gdds.py b/python/ctsm/crop_calendars/generate_gdds.py
index 196d7a96da..dd736add54 100644
--- a/python/ctsm/crop_calendars/generate_gdds.py
+++ b/python/ctsm/crop_calendars/generate_gdds.py
@@ -4,6 +4,7 @@
import os
import sys
+
import pickle
import datetime as dt
import argparse
@@ -29,6 +30,9 @@
# fixed. For now, we'll just disable the warning.
# pylint: disable=too-many-positional-arguments
+# Mapping of history tape number to output frequency
+H_FREQ_DICT = {1: "annual", 2: "daily"}
+
def _get_max_growing_season_lengths(max_season_length_from_hdates_file, paramfile, cushion):
"""
@@ -45,19 +49,31 @@ def _get_max_growing_season_lengths(max_season_length_from_hdates_file, paramfil
return mxmats
-def _get_history_yr_range(first_season, last_season):
+def _get_history_yr_range(first_season, last_season, freq):
"""
- Get a range object that can be used for looping over all years we need to process timestamps
- from.
+ Get range objects that can be used for looping over all years we need to process timestamps
+ from. Different handling for annual vs. daily history files. Assumption is that all history
+ files are instantanous.
"""
- # Saving at the end of a year receive the timestamp of the END of the year's final timestep,
- # which means it will actually be 00:00 of Jan. 1 of the next year.
- first_history_yr = first_season + 1
- # Same deal for the last history timestep, but we have to read an extra year in that case,
- # because in some places the last growing season won't complete until the year after it was
- # planted.
- last_history_yr = last_season + 2
+ if freq == "annual":
+ # Saving at the end of a year gives the timestamp of the END of the year's final timestep,
+ # which means it will actually be 00:00 of Jan. 1 of the next year.
+ first_history_yr = first_season + 1
+ last_history_yr = last_season + 1
+ elif freq == "daily":
+ # Saving at the end of a day/beginning of the next day (i.e., 00:00:00) means that the year
+ # will be correct for all but the Dec. 31 save, which will receive a timestamp of Jan. 1
+ # 00:00:00. That will be handled in _get_time_slice_lists(), so we don't need to account for
+ # it here.
+ first_history_yr = first_season
+ last_history_yr = last_season
+ else:
+ raise NotImplementedError(f"Not sure how to handle freq '{freq}'")
+
+ # For the last season, we have to read an extra year, because in some places the last growing
+ # season won't complete until the year after it was planted.
+ last_history_yr += 1
# last_history_yr + 1 because range() will iterate up to but not including the second value.
history_yr_range = range(first_history_yr, last_history_yr + 1)
@@ -65,11 +81,10 @@ def _get_history_yr_range(first_season, last_season):
return history_yr_range
-def _get_time_slice_list(first_season, last_season):
+def _get_time_slice_lists(first_season, last_season):
"""
- Given the requested first and last seasons, get the list of time slices that the script should
- look for. The assumptions here, as in import_and_process_1yr and as instructed in the docs, are
- that the user (a) is saving instantaneous annual files and (b) started on Jan. 1.
+ Given the requested first and last seasons, get the list of instantaneous time slices that the
+ script should look for.
"""
# Input checks
@@ -78,32 +93,56 @@ def _get_time_slice_list(first_season, last_season):
if first_season > last_season:
raise ValueError(f"first_season ({first_season}) > last_season ({last_season})")
- slice_list = []
- for history_yr in _get_history_yr_range(first_season, last_season):
- slice_start = f"{history_yr}-01-01"
- # Stop could probably be the same as start, since there should just be one value saved per
- # year and that should get the Jan. 1 timestamp.
- slice_stop = f"{history_yr}-12-31"
- slice_list.append(slice(slice_start, slice_stop))
-
- # We should be reading one more than the total number of years in [first_season, last_season].
- assert len(slice_list) == last_season - first_season + 2
-
- return slice_list
-
-
-def _get_file_lists(input_dir, time_slice_list, logger):
+ # Initialize list with None for each history file. Could avoid by starting with empty list and
+ # doing .append(), but pylint gets confused by that for some reason.
+ slice_lists_list = [None for x in range(len(H_FREQ_DICT))]
+
+ # Get time slice for each required history year in each history file.
+ for i, h in enumerate(list(H_FREQ_DICT.keys())):
+ slice_list = []
+ freq = H_FREQ_DICT[h]
+ for history_yr in _get_history_yr_range(first_season, last_season, freq):
+ if h == 1:
+ # Annual timesteps
+ slice_start = f"{history_yr}-01-01"
+ slice_stop = f"{history_yr}-01-01"
+ elif h == 2:
+ # Daily timesteps of instantaneous variables will go from Jan. 2 through Jan. 1
+ # because they will get the time at the end of each timestep.
+ slice_start = f"{history_yr}-01-02"
+ slice_stop = f"{history_yr + 1}-01-01"
+ else:
+ raise NotImplementedError(f"What frequency are h{h}i files saved at?")
+ slice_list.append(slice(slice_start, slice_stop))
+
+ # We should be reading one more than the total number of years in
+ # [first_season, last_season].
+ ns_exp = last_season - first_season + 2
+ ns_actual = len(slice_list)
+ msg = f"Expected {ns_exp} time slices in list for h{h}; got {ns_actual}"
+ assert ns_exp == ns_actual, msg
+
+ # Save
+ slice_lists_list[i] = slice_list
+
+ return tuple(slice_lists_list)
+
+
+def _get_file_lists(input_dir, time_slice_lists_list, logger):
"""
For each time slice in a list, find the file(s) that need to be read to get all history
timesteps in the slice. Returns both h1i and h2i file lists.
"""
output_file_lists_list = [None, None]
for i, h in enumerate([1, 2]):
+ time_slice_list = time_slice_lists_list[i]
all_h_files = gddfn.find_inst_hist_files(input_dir, h=h, logger=logger)
h_file_lists = []
for time_slice in time_slice_list:
try:
- h_file_lists.append(get_files_in_time_slice(all_h_files, time_slice, logger=logger))
+ h_file_lists.append(
+ get_files_in_time_slice(all_h_files, time_slice, logger=logger, quiet=True)
+ )
except FileNotFoundError as e:
raise FileNotFoundError(f"No h{h} timesteps found in {time_slice}") from e
output_file_lists_list[i] = h_file_lists
@@ -184,17 +223,6 @@ def main(
##########################
if not only_make_figs:
- # Keep 1 extra year to avoid incomplete final growing season for crops
- # harvested after Dec. 31.
- yr_1_import_str = f"{first_season+1}-01-01"
- yr_n_import_str = f"{last_season+2}-01-01"
-
- log(
- logger,
- f"Importing netCDF time steps {yr_1_import_str} through {yr_n_import_str} "
- + "(years are +1 because of CTSM output naming)",
- )
-
# This script uses pickle to save work in progress. In case of interruption, when the script
# is resumed, it will look for a pickle file. It will resume from the year after
# pickle_year, which is the last processed year in the pickle file.
@@ -205,24 +233,22 @@ def main(
(
first_season,
last_season,
- pickle_year,
+ pickle_season,
gddaccum_yp_list,
gddharv_yp_list,
skip_patches_for_isel_nan_lastyear,
lastyear_active_patch_indices_list,
- incorrectly_daily,
save_figs,
incl_vegtypes_str,
incl_patches1d_itype_veg,
mxsowings,
skip_crops,
) = pickle.load(file)
- print(f"Will resume import at {pickle_year+1}")
+ log(logger, f"Will resume import at season {pickle_season+1}")
h2_ds = None
else:
- incorrectly_daily = False
skip_patches_for_isel_nan_lastyear = np.ndarray([])
- pickle_year = -np.inf
+ pickle_season = -np.inf
gddaccum_yp_list = []
gddharv_yp_list = []
incl_vegtypes_str = None
@@ -235,19 +261,38 @@ def main(
)
# Get lists of history timesteps and files to read
- time_slice_list = _get_time_slice_list(first_season, last_season)
- h1_file_lists, h2_file_lists = _get_file_lists(input_dir, time_slice_list, logger)
+ log(logger, "Getting lists of history timesteps and files to read")
+ h1_time_slices, h2_time_slices = _get_time_slice_lists(first_season, last_season)
+ h1_file_lists, h2_file_lists = _get_file_lists(
+ input_dir, (h1_time_slices, h2_time_slices), logger
+ )
+ history_yr_range_h1 = _get_history_yr_range(first_season, last_season, H_FREQ_DICT[1])
+ history_yr_range_h2 = _get_history_yr_range(first_season, last_season, H_FREQ_DICT[2])
+ # Check
+ assert len(history_yr_range_h1) == len(history_yr_range_h2)
+ log(logger, "Checking h1 files")
+ gddfn.check_file_lists(
+ history_yr_range_h1, h1_file_lists, h1_time_slices, H_FREQ_DICT[1], logger
+ )
+ log(logger, "Checking h2 files")
+ gddfn.check_file_lists(
+ history_yr_range_h2, h2_file_lists, h2_time_slices, H_FREQ_DICT[2], logger
+ )
+ log(logger, "Done")
- for yr_index, this_yr in enumerate(_get_history_yr_range(first_season, last_season)):
+ for y, history_yr_h1 in enumerate(history_yr_range_h1):
# If resuming from a pickled file, we continue until we reach a year that hasn't yet
# been processed.
- if this_yr <= pickle_year:
+ if history_yr_h1 <= pickle_season:
continue
- log(logger, f"netCDF year {this_yr}...")
+ log(logger, f"History year {history_yr_h1}...")
- # Get h1 and h2 files to read for this year
- h1_file_list = h1_file_lists[yr_index] # pylint: disable=unsubscriptable-object
- h2_file_list = h2_file_lists[yr_index] # pylint: disable=unsubscriptable-object
+ # Get time slice and files to read for this year
+ h1_time_slice = h1_time_slices[y] # pylint: disable=unsubscriptable-object
+ h2_time_slice = h2_time_slices[y] # pylint: disable=unsubscriptable-object
+ h1_file_list = h1_file_lists[y] # pylint: disable=unsubscriptable-object
+ h2_file_list = h2_file_lists[y] # pylint: disable=unsubscriptable-object
+ history_yr_h2 = list(history_yr_range_h2)[y]
(
h2_ds,
@@ -257,21 +302,19 @@ def main(
gddharv_yp_list,
skip_patches_for_isel_nan_lastyear,
lastyear_active_patch_indices_list,
- incorrectly_daily,
incl_vegtypes_str,
incl_patches1d_itype_veg,
mxsowings,
) = gddfn.import_and_process_1yr(
first_season,
last_season,
- yr_index,
+ y,
sdates_rx,
hdates_rx,
gddaccum_yp_list,
gddharv_yp_list,
skip_patches_for_isel_nan_lastyear,
lastyear_active_patch_indices_list,
- incorrectly_daily,
incl_vegtypes_str,
h2_ds_file,
mxmats,
@@ -279,8 +322,12 @@ def main(
skip_crops,
outdir_figs,
logger,
+ history_yr_h1,
+ history_yr_h2,
h1_file_list,
h2_file_list,
+ h1_time_slice,
+ h2_time_slice,
)
log(logger, f" Saving pickle file ({pickle_file})...")
@@ -289,12 +336,11 @@ def main(
[
first_season,
last_season,
- this_yr,
+ history_yr_h1,
gddaccum_yp_list,
gddharv_yp_list,
skip_patches_for_isel_nan_lastyear,
lastyear_active_patch_indices_list,
- incorrectly_daily,
save_figs,
incl_vegtypes_str,
incl_patches1d_itype_veg,
@@ -403,7 +449,7 @@ def save_gdds(sdates_file, hdates_file, outfile, gdd_maps_ds, sdates_rx):
template_ds = xr.open_dataset(sdates_file, decode_times=True)
for var in template_ds:
if "sdate" in var:
- template_ds = template_ds.drop(var)
+ template_ds = template_ds.drop_vars(var)
template_ds.to_netcdf(path=outfile, format="NETCDF4_CLASSIC")
template_ds.close()
diff --git a/python/ctsm/crop_calendars/generate_gdds_functions.py b/python/ctsm/crop_calendars/generate_gdds_functions.py
index ed271bdd8d..d2b47f66cf 100644
--- a/python/ctsm/crop_calendars/generate_gdds_functions.py
+++ b/python/ctsm/crop_calendars/generate_gdds_functions.py
@@ -5,8 +5,11 @@
# pylint: disable=too-many-lines,too-many-statements,abstract-class-instantiated
import warnings
import os
+import sys
import glob
+from datetime import timedelta
from importlib import util as importlib_util
+
import numpy as np
import xarray as xr
@@ -56,6 +59,72 @@
CAN_PLOT = False
+def gen_inst_daily_year(year, cal_type):
+ """
+ For a given history year, return the timesteps we expect to see for daily instantaneous files
+ """
+
+ # Generate list of datetimes
+ start_date = cal_type(year, 1, 2) # Jan. 2 of given year
+ end_date = cal_type(year + 1, 1, 1) # Jan. 1 of next year
+ date_list = [start_date]
+ d = start_date
+ while d < end_date:
+ d += timedelta(days=1)
+ date_list.append(d)
+
+ # Convert to DataArray
+ date_da = xr.DataArray(data=date_list, dims=["time"], coords={"time": date_list})
+
+ return date_da
+
+
+def check_file_lists(history_yr_range, h_file_lists, time_slice_list, freq, logger):
+ """
+ For each history year, check that the corresponding list of files has the requested time steps.
+ Could theoretically make this faster by starting with checks of the first and last history
+ years, but it's really fast already.
+ """
+ for history_yr, h_file_list, time_slice in zip(
+ list(history_yr_range), h_file_lists, time_slice_list
+ ):
+ # Get time across all included files
+ time_da = None
+ for h_file in h_file_list:
+ ds = xr.open_dataset(h_file).sel(time=time_slice)
+ if time_da is None:
+ time_da = ds["time"]
+ else:
+ time_da = xr.concat([time_da, ds["time"]], dim="time")
+
+ # Check whether the time variable exactly matches what we would expect based on frequency.
+ _check_time_da(freq, history_yr, time_da, logger)
+
+
+def _check_time_da(freq, history_yr, time_da, logger):
+ """
+ Check whether a given time variable exactly matches what we would expect based on frequency.
+ """
+ # Create the expected DataArray
+ cal_type = type(np.atleast_1d(time_da.values)[0])
+ if freq == "annual":
+ date_list = [cal_type(history_yr, 1, 1)]
+ time_da_exp = xr.DataArray(data=date_list, dims=["time"], coords={"time": date_list})
+ elif freq == "daily":
+ time_da_exp = gen_inst_daily_year(history_yr, cal_type)
+ else:
+ msg = f"What should the time DataArray look like for freq '{freq}'?"
+ error(logger, msg, error_type=NotImplementedError)
+ sys.exit() # Because pylint doesn't know the script stops on error()
+
+ # Compare actual vs. expected
+ if not time_da.equals(time_da_exp):
+ log(logger, f"expected: {time_da_exp}")
+ log(logger, f"actual: {time_da}")
+ msg = "Unexpected time data after slicing; see logs above."
+ error(logger, msg, error_type=AssertionError)
+
+
def check_grid_match(grid0, grid1, tol=GRID_TOL_DEG):
"""
Check whether latitude or longitude values match
@@ -295,7 +364,6 @@ def import_and_process_1yr(
gddharv_yp_list,
skip_patches_for_isel_nan_last_year,
last_year_active_patch_indices_list,
- incorrectly_daily,
incl_vegtypes_str_in,
h2_ds_file,
mxmats,
@@ -303,8 +371,12 @@ def import_and_process_1yr(
skip_crops,
outdir_figs,
logger,
+ history_yr_h1,
+ history_yr_h2,
h1_filelist,
h2_filelist,
+ h1_time_slice,
+ h2_time_slice,
):
"""
Import one year of CLM output data for GDD generation
@@ -330,21 +402,14 @@ def import_and_process_1yr(
my_vegtypes=crops_to_read,
chunks=chunks,
logger=logger,
+ time_slice=h1_time_slice,
)
- for timestep in dates_ds["time"].values:
- print(timestep)
- if dates_ds.dims["time"] > 1:
- if dates_ds.dims["time"] == 365:
- if not incorrectly_daily:
- log(
- logger,
- " ℹ️ You saved SDATES and HDATES daily, but you only needed annual. Fixing.",
- )
- incorrectly_daily = True
- dates_ds = dates_ds.isel(time=-1)
- else:
- dates_ds = dates_ds.isel(time=0)
+ # Check included timesteps
+ _check_time_da("annual", history_yr_h1, dates_ds["time"], logger)
+
+ # Should now just be one timestep, so select it to remove dimension.
+ dates_ds = dates_ds.isel(time=0)
# Make sure NaN masks match
sdates_all_nan = (
@@ -405,41 +470,23 @@ def import_and_process_1yr(
if np.sum(~np.isnan(dates_incl_ds.SDATES.values)) == 0:
error(logger, "All SDATES are NaN after ignoring those patches!")
- # Some patches can have -1 sowing date?? Hopefully just an artifact of me incorrectly saving
- # SDATES/HDATES daily.
- mxsowings = dates_ds.dims["mxsowings"]
- mxsowings_dim = dates_ds.SDATES.dims.index("mxsowings")
+ # Some patches can have -1 sowing date??
+ mxsowings = dates_ds.sizes["mxsowings"]
skip_patches_for_isel_sdatelt1 = np.where(dates_incl_ds.SDATES.values < 1)[1]
skipping_patches_for_isel_sdatelt1 = len(skip_patches_for_isel_sdatelt1) > 0
if skipping_patches_for_isel_sdatelt1:
unique_hdates = np.unique(
dates_incl_ds.HDATES.isel(mxharvests=0, patch=skip_patches_for_isel_sdatelt1).values
)
- if incorrectly_daily and list(unique_hdates) == [364]:
- log(
- logger,
- f" ❗ {len(skip_patches_for_isel_sdatelt1)} patches have SDATE < 1, but this"
- + "might have just been because of incorrectly daily outputs. Setting them to 365.",
- )
- new_sdates_ar = dates_incl_ds.SDATES.values
- if mxsowings_dim != 0:
- error(logger, "Code this up")
- new_sdates_ar[0, skip_patches_for_isel_sdatelt1] = 365
- dates_incl_ds["SDATES"] = xr.DataArray(
- data=new_sdates_ar,
- coords=dates_incl_ds["SDATES"].coords,
- attrs=dates_incl_ds["SDATES"].attrs,
- )
- else:
- error(
- logger,
- f"{len(skip_patches_for_isel_sdatelt1)} patches have SDATE < 1. "
- + f"Unique affected hdates: {unique_hdates}",
- )
+ error(
+ logger,
+ f"{len(skip_patches_for_isel_sdatelt1)} patches have SDATE < 1. "
+ + f"Unique affected hdates: {unique_hdates}",
+ )
# Some patches can have -1 harvest date?? Hopefully just an artifact of me incorrectly saving
# SDATES/HDATES daily. Can also happen if patch wasn't active last year
- mxharvests = dates_ds.dims["mxharvests"]
+ mxharvests = dates_ds.sizes["mxharvests"]
mxharvests_dim = dates_ds.HDATES.dims.index("mxharvests")
# If a patch was inactive last year but was either (a) harvested the last time it was active or
# (b) was never active, it will have -1 as its harvest date this year. Such instances are okay.
@@ -481,30 +528,13 @@ def import_and_process_1yr(
unique_sdates = np.unique(
dates_incl_ds.SDATES.isel(patch=skip_patches_for_isel_hdatelt1).values
)
- if incorrectly_daily and list(unique_sdates) == [1]:
- log(
- logger,
- f" ❗ {len(skip_patches_for_isel_hdatelt1)} patches have HDATE < 1??? Seems like "
- + "this might have just been because of incorrectly daily outputs; setting them to "
- + "365.",
- )
- new_hdates_ar = dates_incl_ds.HDATES.values
- if mxharvests_dim != 0:
- error(logger, "Code this up")
- new_hdates_ar[0, skip_patches_for_isel_hdatelt1] = 365
- dates_incl_ds["HDATES"] = xr.DataArray(
- data=new_hdates_ar,
- coords=dates_incl_ds["HDATES"].coords,
- attrs=dates_incl_ds["HDATES"].attrs,
- )
- else:
- error(
- logger,
- f"{len(skip_patches_for_isel_hdatelt1)} patches have HDATE < 1. Possible causes:\n"
- + "* Not using constant crop areas (e.g., flanduse_timeseries from "
- + "make_lu_for_gddgen.py)\n * Not skipping the first 2 years of output\n"
- + f"Unique affected sdates: {unique_sdates}",
- )
+ error(
+ logger,
+ f"{len(skip_patches_for_isel_hdatelt1)} patches have HDATE < 1. Possible causes:\n"
+ + "* Not using constant crop areas (e.g., flanduse_timeseries from "
+ + "make_lu_for_gddgen.py)\n * Not skipping the first 2 years of output\n"
+ + f"Unique affected sdates: {unique_sdates}",
+ )
# Make sure there was only one harvest per year
n_extra_harv = np.sum(
@@ -560,7 +590,7 @@ def import_and_process_1yr(
# Limit growing season to CLM max growing season length, if needed
if mxmats and (imported_sdates or imported_hdates):
- print(" Limiting growing season length...")
+ log(logger, " Limiting growing season length...")
hdates_rx = hdates_rx_orig.copy()
for var in hdates_rx_orig:
if var == "time_bounds":
@@ -578,14 +608,14 @@ def import_and_process_1yr(
mxmat = mxmats[vegtype_str]
if np.isinf(mxmat):
- print(f" Not limiting {vegtype_str}: No mxmat value")
+ log(logger, f" Not limiting {vegtype_str}: No mxmat value")
continue
# Get "prescribed" growing season length
gs_len_rx_da = get_gs_len_da(hdates_rx_orig[var] - sdates_rx[var])
not_ok = gs_len_rx_da.values > mxmat
if not np.any(not_ok):
- print(f" Not limiting {vegtype_str}: No rx season > {mxmat} days")
+ log(logger, f" Not limiting {vegtype_str}: No rx season > {mxmat} days")
continue
hdates_limited = hdates_rx_orig[var].copy().values
@@ -600,10 +630,11 @@ def import_and_process_1yr(
coords=hdates_rx_orig[var].coords,
attrs=hdates_rx_orig[var].attrs,
)
- print(
+ log(
+ logger,
f" Limited {vegtype_str} growing season length to {mxmat}. Longest was "
+ f"{int(np.max(gs_len_rx_da.values))}, now "
- + f"{int(np.max(get_gs_len_da(hdates_rx[var] - sdates_rx[var]).values))}."
+ + f"{int(np.max(get_gs_len_da(hdates_rx[var] - sdates_rx[var]).values))}.",
)
else:
hdates_rx = hdates_rx_orig
@@ -617,8 +648,12 @@ def import_and_process_1yr(
my_vegtypes=crops_to_read,
chunks=chunks,
logger=logger,
+ time_slice=h2_time_slice,
)
+ # Check included timesteps
+ _check_time_da("daily", history_yr_h2, h2_ds["time"], logger)
+
# Restrict to patches we're including
if skipping_patches_for_isel_nan:
if not np.array_equal(dates_ds.patch.values, h2_ds.patch.values):
@@ -736,44 +771,20 @@ def import_and_process_1yr(
this_year_active_patch_indices[x] for x in where_gs_lastyr
]
if not np.array_equal(last_year_active_patch_indices, this_year_active_patch_indices):
- if incorrectly_daily:
- log(
- logger,
- " ❗ This year's active patch indices differ from last year's. "
- + "Allowing because this might just be an artifact of incorrectly daily "
- + "outputs, BUT RESULTS MUST NOT BE TRUSTED.",
- )
- else:
- error(logger, "This year's active patch indices differ from last year's.")
+ error(logger, "This year's active patch indices differ from last year's.")
# Make sure we're not about to overwrite any existing values.
if np.any(
~np.isnan(
gddaccum_yp_list[var][year_index - 1, active_this_year_where_gs_lastyr_indices]
)
):
- if incorrectly_daily:
- log(
- logger,
- " ❗ Unexpected non-NaN for last season's GDD accumulation. "
- + "Allowing because this might just be an artifact of incorrectly daily "
- + "outputs, BUT RESULTS MUST NOT BE TRUSTED.",
- )
- else:
- error(logger, "Unexpected non-NaN for last season's GDD accumulation")
+ error(logger, "Unexpected non-NaN for last season's GDD accumulation")
if save_figs and np.any(
~np.isnan(
gddharv_yp_list[var][year_index - 1, active_this_year_where_gs_lastyr_indices]
)
):
- if incorrectly_daily:
- log(
- logger,
- " ❗ Unexpected non-NaN for last season's GDDHARV. Allowing "
- + "because this might just be an artifact of incorrectly daily outputs, "
- + "BUT RESULTS MUST NOT BE TRUSTED.",
- )
- else:
- error(logger, "Unexpected non-NaN for last season's GDDHARV")
+ error(logger, "Unexpected non-NaN for last season's GDDHARV")
# Fill.
gddaccum_yp_list[var][year_index - 1, active_this_year_where_gs_lastyr_indices] = (
gddaccum_atharv_p[where_gs_lastyr]
@@ -788,15 +799,7 @@ def import_and_process_1yr(
gddaccum_yp_list[var][year_index - 1, active_this_year_where_gs_lastyr_indices]
)
):
- if incorrectly_daily:
- log(
- logger,
- " ❗ Unexpected NaN for last season's GDD accumulation. Allowing "
- + "because this might just be an artifact of incorrectly daily outputs, "
- + "BUT RESULTS MUST NOT BE TRUSTED.",
- )
- else:
- error(logger, "Unexpected NaN for last season's GDD accumulation.")
+ error(logger, "Unexpected NaN for last season's GDD accumulation.")
if (
save_figs
and check_gddharv
@@ -808,15 +811,7 @@ def import_and_process_1yr(
)
)
):
- if incorrectly_daily:
- log(
- logger,
- " ❗ Unexpected NaN for last season's GDDHARV. Allowing because "
- + "this might just be an artifact of incorrectly daily outputs, BUT "
- + "RESULTS MUST NOT BE TRUSTED.",
- )
- else:
- error(logger, "Unexpected NaN for last season's GDDHARV.")
+ error(logger, "Unexpected NaN for last season's GDDHARV.")
gddaccum_yp_list[var][year_index, this_year_active_patch_indices] = tmp_gddaccum
if save_figs:
gddharv_yp_list[var][year_index, this_year_active_patch_indices] = tmp_gddharv
@@ -831,19 +826,19 @@ def import_and_process_1yr(
)
nanmask_output_gdds_lastyr = np.isnan(gddaccum_yp_list[var][year_index - 1, :])
if not np.array_equal(nanmask_output_gdds_lastyr, nanmask_output_sdates):
- if incorrectly_daily:
- log(
- logger,
- " ❗ NaN masks differ between this year's sdates and 'filled-out' "
- + "GDDs from last year. Allowing because this might just be an artifact of "
- + "incorrectly daily outputs, BUT RESULTS MUST NOT BE TRUSTED.",
- )
- else:
- error(
- logger,
- "NaN masks differ between this year's sdates and 'filled-out' GDDs from "
- + "last year",
- )
+ n_lastyr_where_not_sdates = np.sum(
+ nanmask_output_gdds_lastyr & ~nanmask_output_sdates
+ )
+ n_sdates_where_not_lastyr = np.sum(
+ ~nanmask_output_gdds_lastyr & nanmask_output_sdates
+ )
+ msg = (
+ "NaN masks differ between this year's sdates and 'filled-out' GDDs from last"
+ f" year. {n_lastyr_where_not_sdates} NaN last year after filling out where not"
+ f" NaN in sdates; {n_sdates_where_not_lastyr} vice versa. Out of size"
+ f" {n_lastyr_where_not_sdates.size}."
+ )
+ error(logger, msg)
last_year_active_patch_indices_list[var] = this_year_active_patch_indices
skip_patches_for_isel_nan_last_year = skip_patches_for_isel_nan
@@ -860,24 +855,19 @@ def import_and_process_1yr(
gddharv_yp_list,
skip_patches_for_isel_nan_last_year,
last_year_active_patch_indices_list,
- incorrectly_daily,
incl_vegtypes_str,
incl_patches1d_itype_veg,
mxsowings,
)
-def find_inst_hist_files(indir, *, h, this_year=None, logger=None):
+def find_inst_hist_files(indir, *, h, logger=None):
"""
- Find all the instantaneous history files for a given tape number, optionally looking just for
- one year in filename.
+ Find all the instantaneous history files for a given tape number.
Args:
indir: Directory to search for history files
h: History tape number (must be an integer, e.g., 1 for h1, 2 for h2)
- this_year: Optional year to filter files by. If provided, only files with dates starting
- with "{this_year}-01" will be returned. If None, all files matching the
- history tape number will be returned.
logger: Optional logger for error messages. If None, errors are raised without logging.
Returns:
@@ -887,26 +877,19 @@ def find_inst_hist_files(indir, *, h, this_year=None, logger=None):
TypeError: If h is not an integer
FileNotFoundError: If no files matching the patterns are found
RuntimeError: If files from multiple case names are found (indicates mixed output from
- different simulations, which is pathological)
+ different simulations)
Notes:
- Searches for files matching patterns: "*h{h}i.*.nc" or "*h{h}i.*.nc.base"
- - When this_year is specified, searches for: "*h{h}i.{this_year}-01*.nc" or
- "*h{h}i.{this_year}-01*.nc.base"
- Prefers .nc files over .nc.base files (searches .nc pattern first)
- All returned files must be from the same case name (extracted from filename before
".clm2.h#i.")
"""
- if this_year is None:
- patterns = [f"*h{h}i.*.nc", f"*h{h}i.*.nc.base"]
- else:
- if not isinstance(h, int):
- err_msg = f"h ({h}) must be an integer, not {type(h)}"
- err_type = TypeError
- if logger:
- error(logger, err_msg, error_type=err_type)
- raise err_type(err_msg)
- patterns = [f"*h{h}i.{this_year}-01*.nc", f"*h{h}i.{this_year}-01*.nc.base"]
+ if not isinstance(h, int):
+ err_msg = f"h ({h}) must be an integer, not {type(h)}"
+ error(logger, err_msg, error_type=TypeError)
+
+ patterns = [f"*h{h}i.*.nc", f"*h{h}i.*.nc.base"]
for pat in patterns:
pattern = os.path.join(indir, pat)
file_list = glob.glob(pattern)
@@ -914,10 +897,7 @@ def find_inst_hist_files(indir, *, h, this_year=None, logger=None):
break
if not file_list:
err_msg = f"No files found matching patterns: {patterns}"
- err_type = FileNotFoundError
- if logger:
- error(logger, err_msg, error_type=err_type)
- raise err_type(err_msg)
+ error(logger, err_msg, error_type=FileNotFoundError)
# Error if files found from multiple cases
case_names = set()
@@ -930,10 +910,7 @@ def find_inst_hist_files(indir, *, h, this_year=None, logger=None):
case_names.add(case_name)
if len(case_names) > 1:
err_msg = f"Found files from multiple case names: {sorted(case_names)}"
- err_type = RuntimeError
- if logger:
- error(logger, err_msg, error_type=err_type)
- raise err_type(err_msg)
+ error(logger, err_msg, error_type=RuntimeError)
return file_list
diff --git a/python/ctsm/crop_calendars/import_ds.py b/python/ctsm/crop_calendars/import_ds.py
index 656d10985e..34d2100d82 100644
--- a/python/ctsm/crop_calendars/import_ds.py
+++ b/python/ctsm/crop_calendars/import_ds.py
@@ -6,13 +6,14 @@
vegetation types.
"""
+import os
import re
import warnings
from importlib.util import find_spec
import numpy as np
import xarray as xr
from ctsm.utils import is_instantaneous
-from ctsm.ctsm_logging import log
+from ctsm.ctsm_logging import log, error
import ctsm.crop_calendars.cropcal_utils as utils
from ctsm.crop_calendars.xr_flexsel import xr_flexsel
@@ -32,7 +33,7 @@ def compute_derived_vars(ds_in, var, logger=None):
hyears = ds_in["HDATES"].copy()
hyears.values = np.tile(
np.expand_dims(year_list, (1, 2)),
- (1, ds_in.dims["mxharvests"], ds_in.dims["patch"]),
+ (1, ds_in.sizes["mxharvests"], ds_in.sizes["patch"]),
)
with np.errstate(invalid="ignore"):
is_le_zero = ~np.isnan(ds_in.HDATES.values) & (ds_in.HDATES.values <= 0)
@@ -310,21 +311,23 @@ def import_ds(
return this_ds
-def get_files_in_time_slice(filelist, time_slice, logger=None):
+def get_files_in_time_slice(filelist, time_slice, logger=None, quiet=False):
"""
For a given list of files, find the files that need to be read in order to get all history
timesteps in the slice.
"""
new_filelist = []
for file in sorted(filelist):
- if logger:
- log(logger, f"Getting filetime from file: {file}")
filetime = xr.open_dataset(file).time
filetime_sel = utils.safer_timeslice(filetime, time_slice)
include_this_file = filetime_sel.size
if include_this_file:
- if logger:
- log(logger, f"Including filetime : {filetime_sel['time'].values}")
+ if not quiet:
+ f = os.path.basename(file)
+ first_str = str(filetime_sel["time"].isel(time=0).values)
+ last_str = str(filetime_sel["time"].isel(time=-1).values)
+ msg = f"From {f}, including {first_str} through {last_str}"
+ log(logger, msg)
new_filelist.append(file)
# If you found some matching files, but then you find one that doesn't, stop going
@@ -332,5 +335,6 @@ def get_files_in_time_slice(filelist, time_slice, logger=None):
elif new_filelist:
break
if not new_filelist:
- raise FileNotFoundError(f"No files found in time_slice {time_slice}")
+ err_msg = f"No files found in time_slice {time_slice}"
+ error(logger, err_msg, error_type=FileNotFoundError)
return new_filelist
diff --git a/python/ctsm/test/test_unit_generate_gdds.py b/python/ctsm/test/test_unit_generate_gdds.py
index 5c0219cd09..5efee497cc 100755
--- a/python/ctsm/test/test_unit_generate_gdds.py
+++ b/python/ctsm/test/test_unit_generate_gdds.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
"""
-Unit tests for generate_gdds.py and generate_gdds_functions.py
+Unit tests for generate_gdds.py
"""
import unittest
@@ -9,7 +9,6 @@
import argparse
import tempfile
import shutil
-import logging
import re
import numpy as np
@@ -18,7 +17,6 @@
from ctsm import unit_testing
from ctsm.crop_calendars import generate_gdds as gg
-from ctsm.crop_calendars import generate_gdds_functions as gf
# Allow test names that pylint doesn't like; otherwise hard to make them
# readable
@@ -237,327 +235,138 @@ def test_generate_gdds_get_mxmats_cushionneg14(self):
self.assertEqual(mxmats["miscanthus"], 210 - cushion)
-class TestGetTimeSliceList(unittest.TestCase):
- """Tests for _get_time_slice_list()"""
+class TestGetTimeSliceLists(unittest.TestCase):
+ """Tests for _get_time_slice_lists()"""
- def test_generate_gdds_get_time_slice_list(self):
- """Test that _get_time_slice_list works with two different years"""
- season_list = [1986, 1987]
- result = gg._get_time_slice_list(season_list[0], season_list[-1])
- expected = [
- slice("1987-01-01", "1987-12-31"),
- slice("1988-01-01", "1988-12-31"),
- slice("1989-01-01", "1989-12-31"),
+ def test_generate_gdds_get_time_slice_lists(self):
+ """Test that _get_time_slice_lists works with two different years"""
+ h1_slices, h2_slices = gg._get_time_slice_lists(1986, 1987)
+
+ # Check h1 slices (annual timesteps - single day)
+ expected_h1 = [
+ slice("1987-01-01", "1987-01-01"),
+ slice("1988-01-01", "1988-01-01"),
+ slice("1989-01-01", "1989-01-01"),
]
- assert result == expected
-
- def test_generate_gdds_get_time_slice_list_1yr(self):
- """Test that _get_time_slice_list works with the same year"""
- result = gg._get_time_slice_list(1987, 1987)
- expected = [
- slice("1988-01-01", "1988-12-31"),
- slice("1989-01-01", "1989-12-31"),
+ self.assertEqual(h1_slices, expected_h1)
+
+ # Check h2 slices (daily timesteps - full year)
+ # For daily, starts at first_season (1986), not first_season + 1
+ expected_h2 = [
+ slice("1986-01-02", "1987-01-01"),
+ slice("1987-01-02", "1988-01-01"),
+ slice("1988-01-02", "1989-01-01"),
]
- assert result == expected
+ self.assertEqual(h2_slices, expected_h2)
+
+ def test_generate_gdds_get_time_slice_lists_1yr(self):
+ """Test that _get_time_slice_lists works with the same year"""
+ h1_slices, h2_slices = gg._get_time_slice_lists(1987, 1987)
+
+ # Check h1 slices
+ expected_h1 = [
+ slice("1988-01-01", "1988-01-01"),
+ slice("1989-01-01", "1989-01-01"),
+ ]
+ self.assertEqual(h1_slices, expected_h1)
+
+ # Check h2 slices
+ # For daily, starts at first_season (1987), not first_season + 1
+ expected_h2 = [
+ slice("1987-01-02", "1988-01-01"),
+ slice("1988-01-02", "1989-01-01"),
+ ]
+ self.assertEqual(h2_slices, expected_h2)
def test_generate_gdds_get_time_slice_list_valueerror(self):
"""Test that _get_time_slice_list raises ValueError if last < first"""
with self.assertRaisesRegex(ValueError, "first_season.* > last_season"):
- gg._get_time_slice_list(1987, 1986)
+ gg._get_time_slice_lists(1987, 1986)
- def test_generate_gdds_get_time_slice_list_typeerror_first(self):
- """Test that _get_time_slice_list raises TypeError if not given integer first season"""
+ def test_generate_gdds_get_time_slice_lists_typeerror_first(self):
+ """Test that _get_time_slice_lists raises TypeError if not given integer first season"""
with self.assertRaisesRegex(
TypeError, r"_get_time_slice_list\(\) arguments must be integers"
):
- gg._get_time_slice_list(1986.3, 1987)
+ gg._get_time_slice_lists(1986.3, 1987)
- def test_generate_gdds_get_time_slice_list_typeerror_last(self):
- """Test that _get_time_slice_list raises TypeError if not given integer last season"""
+ def test_generate_gdds_get_time_slice_lists_typeerror_last(self):
+ """Test that _get_time_slice_lists raises TypeError if not given integer last season"""
with self.assertRaisesRegex(
TypeError, r"_get_time_slice_list\(\) arguments must be integers"
):
- gg._get_time_slice_list(1986, None)
-
-
-class TestCheckGridMatch(unittest.TestCase):
- """Tests check_grid_match()"""
-
- def test_check_grid_match_true_npnp(self):
- """Test check_grid_match() with two matching numpy arrays"""
- np0 = np.array([0, 1, 2, np.pi])
- match, max_abs_diff = gf.check_grid_match(np0, np0)
- self.assertTrue(match)
- self.assertEqual(max_abs_diff, 0.0)
-
- def test_check_grid_match_true_dada(self):
- """Test check_grid_match() with two matching DataArrays"""
- np0 = np.array([0, 1, 2, np.pi])
- da0 = xr.DataArray(data=np0)
- match, max_abs_diff = gf.check_grid_match(da0, da0)
- self.assertTrue(match)
- self.assertEqual(max_abs_diff, 0.0)
-
- def test_check_grid_match_false_npnp(self):
- """Test check_grid_match() with two non-matching numpy arrays"""
- np0 = np.array([0, 1, 2, np.pi])
- np1 = np0.copy()
- diff = 2 * gf.GRID_TOL_DEG
- np1[0] = np0[0] + diff
- match, max_abs_diff = gf.check_grid_match(np0, np1)
- self.assertFalse(match)
- self.assertEqual(max_abs_diff, diff)
-
- def test_check_grid_match_false_dada(self):
- """Test check_grid_match() with two non-matching DataArrays"""
- np0 = np.array([0, 1, 2, np.pi])
- np1 = np0.copy()
- diff = 2 * gf.GRID_TOL_DEG
- np1[0] = np0[0] + diff
- da0 = xr.DataArray(data=np0)
- da1 = xr.DataArray(data=np1)
- match, max_abs_diff = gf.check_grid_match(da0, da1)
- self.assertFalse(match)
- self.assertEqual(max_abs_diff, diff)
-
- def test_check_grid_match_falseneg_npnp(self):
- """As test_check_grid_match_false_npnp, but with diff in negative direction"""
- np0 = np.array([0, 1, 2, np.pi])
- np1 = np0.copy()
- diff = -2 * gf.GRID_TOL_DEG
- np1[0] = np0[0] + diff
- match, max_abs_diff = gf.check_grid_match(np0, np1)
- self.assertFalse(match)
- self.assertEqual(max_abs_diff, abs(diff))
-
- def test_check_grid_match_matchnans_true_npnp(self):
- """Test check_grid_match() with two numpy arrays that have nans and match"""
- np0 = np.array([np.nan, 1, 2, np.pi])
- with self.assertWarnsRegex(RuntimeWarning, r"NaN\(s\) in grid"):
- match, max_abs_diff = gf.check_grid_match(np0, np0)
- self.assertTrue(match)
- self.assertEqual(max_abs_diff, 0.0)
-
- def test_check_grid_match_matchnans_true_dada(self):
- """Test check_grid_match() with two DataArrays that have nans and match"""
- np0 = np.array([np.nan, 1, 2, np.pi])
- da0 = xr.DataArray(data=np0)
- with self.assertWarnsRegex(RuntimeWarning, r"NaN\(s\) in grid"):
- match, max_abs_diff = gf.check_grid_match(da0, da0)
- self.assertTrue(match)
- self.assertEqual(max_abs_diff, 0.0)
-
- def test_check_grid_match_matchnans_false_npnp(self):
- """Test check_grid_match() with two numpy arrays with nans that DON'T match"""
- np0 = np.array([np.nan, 1, 2, np.pi])
- np1 = np.array([np.nan, 1, np.nan, np.pi])
- with self.assertWarnsRegex(RuntimeWarning, r"NaN\(s\) in grid don't match"):
- match, max_abs_diff = gf.check_grid_match(np0, np1)
- self.assertFalse(match)
- self.assertIsNone(max_abs_diff)
-
- def test_check_grid_match_matchnans_falseshape_npnp(self):
- """Test check_grid_match() with two numpy arrays that have different shapes"""
- np0 = np.array([0, 1, 2, np.pi])
- np1 = np.array([0, 1, 2, np.pi, 4])
- match, max_abs_diff = gf.check_grid_match(np0, np1)
- self.assertFalse(match)
- self.assertIsNone(max_abs_diff)
-
- def test_check_grid_match_matchnans_falseshape_dada(self):
- """Test check_grid_match() with two DataArrays that have different shapes"""
- np0 = np.array([0, 1, 2, np.pi])
- np1 = np.array([0, 1, 2, np.pi, 4])
- da0 = xr.DataArray(data=np0)
- da1 = xr.DataArray(data=np1)
- match, max_abs_diff = gf.check_grid_match(da0, da1)
- self.assertFalse(match)
- self.assertIsNone(max_abs_diff)
-
-
-class TestFindInstHistFiles(unittest.TestCase):
- """Tests of find_inst_hist_files()"""
-
- def setUp(self):
- """
- Set up and change to temporary directory
- """
- self.prev_dir = os.getcwd()
- self.temp_dir = tempfile.mkdtemp()
- os.chdir(self.temp_dir)
-
- def tearDown(self):
- """
- Delete temporary directory
- """
- os.chdir(self.prev_dir)
- shutil.rmtree(self.temp_dir, ignore_errors=True)
-
- def _create_test_file(self, filename):
- """Helper to create an empty test file"""
- filepath = os.path.join(self.temp_dir, filename)
- with open(filepath, "a", encoding="utf-8"):
- pass
- return filepath
-
- def test_find_inst_hist_files_h1_no_year(self):
- """Test finding h1 files without specifying year"""
- # Create test files
- file1 = self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc")
- file2 = self._create_test_file("test.clm2.h1i.2000-02-01-00000.nc")
- file3 = self._create_test_file("test.clm2.h1i.2001-01-01-00000.nc")
-
- result = gf.find_inst_hist_files(self.temp_dir, h=1, this_year=None)
-
- # Should find all h1i files
- self.assertEqual(len(result), 3)
- self.assertIn(file1, result)
- self.assertIn(file2, result)
- self.assertIn(file3, result)
-
- def test_find_inst_hist_files_h2_no_year(self):
- """Test finding h2 files without specifying year"""
- # Create test files
- file1 = self._create_test_file("test.clm2.h2i.2000-01-01-00000.nc")
- file2 = self._create_test_file("test.clm2.h2i.2001-01-01-00000.nc")
- # Create h1 file that should not be found
- self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc")
-
- result = gf.find_inst_hist_files(self.temp_dir, h=2, this_year=None)
-
- # Should find only h2i files
- self.assertEqual(len(result), 2)
- self.assertIn(file1, result)
- self.assertIn(file2, result)
-
- def test_find_inst_hist_files_with_year(self):
- """Test finding files for a specific year"""
- # Create test files
- file_2000 = self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc")
- file_2001 = self._create_test_file("test.clm2.h1i.2001-01-01-00000.nc")
- file_2002 = self._create_test_file("test.clm2.h1i.2002-01-01-00000.nc")
-
- result = gf.find_inst_hist_files(self.temp_dir, h=1, this_year=2001)
-
- # Should find only 2001 file
- self.assertEqual(len(result), 1)
- self.assertIn(file_2001, result)
- self.assertNotIn(file_2000, result)
- self.assertNotIn(file_2002, result)
-
- def test_find_inst_hist_files_base_extension(self):
- """Test finding files with .nc.base extension"""
- # Create test files with .nc.base extension
- file1 = self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc.base")
- file2 = self._create_test_file("test.clm2.h1i.2001-01-01-00000.nc.base")
-
- result = gf.find_inst_hist_files(self.temp_dir, h=1, this_year=None)
-
- # Should find .nc.base files
- self.assertEqual(len(result), 2)
- self.assertIn(file1, result)
- self.assertIn(file2, result)
-
- def test_find_inst_hist_files_prefer_nc_over_base(self):
- """Test that .nc files are preferred over .nc.base files"""
- # Create both .nc and .nc.base files
- file_nc = self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc")
- file_nc_base = self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc.base")
-
- result = gf.find_inst_hist_files(self.temp_dir, h=1, this_year=None)
-
- # Should find .nc files first (pattern order preference)
- self.assertIn(file_nc, result)
- self.assertNotIn(file_nc_base, result)
-
- def test_find_inst_hist_files_multiple_months_same_year(self):
- """Test finding multiple files from the same year"""
- # Create multiple files from 2000
- file1 = self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc")
- file2 = self._create_test_file("test.clm2.h1i.2000-01-15-00000.nc")
- file3 = self._create_test_file("test.clm2.h1i.2000-01-31-00000.nc")
- # Create file from different year
- self._create_test_file("test.clm2.h1i.2001-01-01-00000.nc")
-
- result = gf.find_inst_hist_files(self.temp_dir, h=1, this_year=2000)
-
- # Should find all January 2000 files
- self.assertEqual(len(result), 3)
- self.assertIn(file1, result)
- self.assertIn(file2, result)
- self.assertIn(file3, result)
-
- def test_find_inst_hist_files_no_files_found(self):
- """Test error when no matching files are found"""
- # Create a non-matching file
- self._create_test_file("test.clm2.h0.2000-01-01-00000.nc")
-
- # Should raise a FileNotFoundError error
- with self.assertRaisesRegex(FileNotFoundError, "No files found matching patterns"):
- gf.find_inst_hist_files(self.temp_dir, h=1, this_year=None)
-
- def test_find_inst_hist_files_different_case_names(self):
- """Test that RuntimeError is raised when files from different case names are found"""
- # Create files with different case names
- self._create_test_file("case1.clm2.h1i.2000-01-01-00000.nc")
- self._create_test_file("case2.clm2.h1i.2000-01-01-00000.nc")
- self._create_test_file("longcasename.clm2.h1i.2000-01-01-00000.nc")
-
- # Should raise RuntimeError due to multiple case names
- with self.assertRaisesRegex(RuntimeError, "Found files from multiple case names"):
- gf.find_inst_hist_files(self.temp_dir, h=1, this_year=2000)
-
- def test_find_inst_hist_files_different_case_names_with_logger(self):
- """
- Test that RuntimeError is raised when files from different case names are found, with logger
- """
- # Create a logger
- logger = logging.getLogger("test_logger_case_names")
- logger.setLevel(logging.DEBUG)
-
- # Create files with different case names
- self._create_test_file("case1.clm2.h1i.2000-01-01-00000.nc")
- self._create_test_file("case2.clm2.h1i.2000-01-01-00000.nc")
- self._create_test_file("longcasename.clm2.h1i.2000-01-01-00000.nc")
-
- # Should raise RuntimeError due to multiple case names, even with logger
- with self.assertRaisesRegex(RuntimeError, "Found files from multiple case names"):
- gf.find_inst_hist_files(self.temp_dir, h=1, this_year=2000, logger=logger)
-
- def test_find_inst_hist_files_no_files_found_with_logger(self):
- """Test error when no matching files are found, with logger"""
- # Create a logger
- logger = logging.getLogger("test_logger_no_files")
- logger.setLevel(logging.DEBUG)
-
- # Create a non-matching file
- self._create_test_file("test.clm2.h0.2000-01-01-00000.nc")
-
- # Should raise a FileNotFoundError even with logger
- with self.assertRaisesRegex(FileNotFoundError, "No files found matching patterns"):
- gf.find_inst_hist_files(self.temp_dir, h=1, this_year=None, logger=logger)
-
- def test_find_inst_hist_files_h_str_with_logger(self):
- """Test that TypeError is raised when h is a string, with logger"""
- # Create a logger
- logger = logging.getLogger("test_logger_h_str")
- logger.setLevel(logging.DEBUG)
-
- self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc")
-
- with self.assertRaisesRegex(TypeError, "must be an integer, not"):
- gf.find_inst_hist_files(self.temp_dir, h="1", this_year=2000, logger=logger)
-
- def test_find_inst_hist_files_h_float_with_logger(self):
- """Test that TypeError is raised when h is a float, with logger"""
- # Create a logger
- logger = logging.getLogger("test_logger_h_float")
- logger.setLevel(logging.DEBUG)
-
- self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc")
-
- with self.assertRaisesRegex(TypeError, "must be an integer, not"):
- gf.find_inst_hist_files(self.temp_dir, h=1.0, this_year=2000, logger=logger)
+ gg._get_time_slice_lists(1986, None)
+
+ def test_generate_gdds_get_time_slice_lists_lengths_match(self):
+ """Test that h1 and h2 slice lists have the same length"""
+ h1_slices, h2_slices = gg._get_time_slice_lists(2000, 2005)
+ self.assertEqual(len(h1_slices), len(h2_slices))
+ # Should be last_season - first_season + 2
+ self.assertEqual(len(h1_slices), 2005 - 2000 + 2)
+
+ def test_generate_gdds_get_time_slice_lists_h1_start_equals_stop(self):
+ """Test that h1 slices are single-day (start == stop)"""
+ h1_slices, _ = gg._get_time_slice_lists(2000, 2002)
+ for s in h1_slices: # pylint: disable=not-an-iterable
+ self.assertEqual(s.start, s.stop)
+
+ def test_generate_gdds_get_time_slice_lists_h2_year_long(self):
+ """Test that h2 slices span one year"""
+ _, h2_slices = gg._get_time_slice_lists(2000, 2002)
+ for s in h2_slices: # pylint: disable=not-an-iterable
+ # Start should be Jan 2 of year Y
+ self.assertIn("-01-02", s.start)
+ # Stop should be Jan 1 of year Y+1
+ self.assertIn("-01-01", s.stop)
+ # Extract years and verify they're consecutive
+ start_year = int(s.start[:4])
+ stop_year = int(s.stop[:4])
+ self.assertEqual(stop_year, start_year + 1)
+
+
+class TestGetHistoryYrRange(unittest.TestCase):
+ """Tests for _get_history_yr_range()"""
+
+ def test_get_history_yr_range_annual(self):
+ """Test _get_history_yr_range with annual frequency"""
+ result = gg._get_history_yr_range(1986, 1987, "annual")
+ # For annual: first_season + 1 through last_season + 2
+ expected = range(1987, 1990)
+ self.assertEqual(result, expected)
+
+ def test_get_history_yr_range_daily(self):
+ """Test _get_history_yr_range with daily frequency"""
+ result = gg._get_history_yr_range(1986, 1987, "daily")
+ # For daily: first_season through last_season + 1
+ expected = range(1986, 1989)
+ self.assertEqual(result, expected)
+
+ def test_get_history_yr_range_annual_single_year(self):
+ """Test _get_history_yr_range with annual frequency and single year"""
+ result = gg._get_history_yr_range(2000, 2000, "annual")
+ # Should give 2001, 2002
+ expected = range(2001, 2003)
+ self.assertEqual(result, expected)
+
+ def test_get_history_yr_range_daily_single_year(self):
+ """Test _get_history_yr_range with daily frequency and single year"""
+ result = gg._get_history_yr_range(2000, 2000, "daily")
+ # Should give 2000, 2001
+ expected = range(2000, 2002)
+ self.assertEqual(result, expected)
+
+ def test_get_history_yr_range_unknown_freq(self):
+ """Test _get_history_yr_range with unknown frequency"""
+ with self.assertRaises(NotImplementedError):
+ gg._get_history_yr_range(2000, 2001, "monthly")
+
+ def test_get_history_yr_range_lengths_match(self):
+ """Test that annual and daily ranges have the same length"""
+ annual_range = gg._get_history_yr_range(2000, 2005, "annual")
+ daily_range = gg._get_history_yr_range(2000, 2005, "daily")
+ self.assertEqual(len(annual_range), len(daily_range))
+ # Should be last_season - first_season + 2
+ self.assertEqual(len(annual_range), 2005 - 2000 + 2)
class TestGetFileLists(unittest.TestCase):
@@ -599,47 +408,55 @@ def _create_test_file(self, filename):
def test_get_file_lists_single_year(self):
"""Test _get_file_lists with a single year of data"""
- # Create h1 and h2 files for 2000
- h1_file = self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc")
- h2_file = self._create_test_file("test.clm2.h2i.2000-01-01-00000.nc")
+ # Create h1 and h2 files for 2000 and 2001
+ # Also need h2 file for 1999 since daily starts at first_season
+ h2_file_1999 = self._create_test_file("test.clm2.h2i.1999-01-02-00000.nc")
+ h1_file_2000 = self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc")
+ h2_file_2000 = self._create_test_file("test.clm2.h2i.2000-01-02-00000.nc")
+ h1_file_2001 = self._create_test_file("test.clm2.h1i.2001-01-01-00000.nc")
- time_slice_list = [slice("2000-01-01", "2000-12-31")]
+ # Get time slice lists for first_season=1999, last_season=1999
+ # This will give us slices for 2000 and 2001 (h1), and 1999, 2000, 2001 (h2)
+ time_slice_lists_list = gg._get_time_slice_lists(1999, 1999)
h1_file_lists, h2_file_lists = gg._get_file_lists(
- self.temp_dir, time_slice_list, logger=None
+ self.temp_dir, time_slice_lists_list, logger=None
)
- # Should have one list for each time slice
- self.assertEqual(len(h1_file_lists), 1)
- self.assertEqual(len(h2_file_lists), 1)
+ # Should have two lists (one for each year: 2000, 2001)
+ self.assertEqual(len(h1_file_lists), 2)
+ self.assertEqual(len(h2_file_lists), 2)
# Check contents of file lists
# pylint: disable=unsubscriptable-object
self.assertEqual(len(h1_file_lists[0]), 1)
+ self.assertEqual(h1_file_lists[0], [h1_file_2000])
self.assertEqual(len(h2_file_lists[0]), 1)
- self.assertEqual(h1_file_lists[0], [h1_file])
- self.assertEqual(h2_file_lists[0], [h2_file])
+ self.assertEqual(h2_file_lists[0], [h2_file_1999])
+ self.assertEqual(len(h1_file_lists[1]), 1)
+ self.assertEqual(h1_file_lists[1], [h1_file_2001])
+ self.assertEqual(len(h2_file_lists[1]), 1)
+ self.assertEqual(h2_file_lists[1], [h2_file_2000])
def test_get_file_lists_multiple_years(self):
"""Test _get_file_lists with multiple years of data"""
# Create h1 and h2 files for 2000-2002
+ # Also need h2 file for 1999 since daily starts at first_season
h1_files = []
- h2_files = []
+ h2_files = [self._create_test_file("test.clm2.h2i.1999-01-02-00000.nc")]
for year in [2000, 2001, 2002]:
h1_files.append(self._create_test_file(f"test.clm2.h1i.{year}-01-01-00000.nc"))
- h2_files.append(self._create_test_file(f"test.clm2.h2i.{year}-01-01-00000.nc"))
+ h2_files.append(self._create_test_file(f"test.clm2.h2i.{year}-01-02-00000.nc"))
- time_slice_list = [
- slice("2000-01-01", "2000-12-31"),
- slice("2001-01-01", "2001-12-31"),
- slice("2002-01-01", "2002-12-31"),
- ]
+ # Get time slice lists for first_season=1999, last_season=2000
+ # This will give us slices for 2000, 2001, 2002 (h1) and 1999, 2000, 2001 (h2)
+ time_slice_lists_list = gg._get_time_slice_lists(1999, 2000)
h1_file_lists, h2_file_lists = gg._get_file_lists(
- self.temp_dir, time_slice_list, logger=None
+ self.temp_dir, time_slice_lists_list, logger=None
)
- # Should have one list for each time slice
+ # Should have three lists (one for each year: 2000, 2001, 2002)
self.assertEqual(len(h1_file_lists), 3)
self.assertEqual(len(h2_file_lists), 3)
@@ -653,93 +470,151 @@ def test_get_file_lists_multiple_years(self):
def test_get_file_lists_multiple_files_per_slice(self):
"""Test _get_file_lists when multiple files fall within a time slice"""
- # Create multiple h1 and h2 files for 2000
- h1_files = []
- h2_files = []
+ # Create h1 files for 2000 and 2001 (annual)
+ h1_file_2000 = self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc")
+ h1_file_2001 = self._create_test_file("test.clm2.h1i.2001-01-01-00000.nc")
+
+ # Create multiple h2 files for 1999 and 2000 (daily throughout the year)
+ h2_files_1999 = []
+ for month in ["01", "06", "12"]:
+ h2_files_1999.append(self._create_test_file(f"test.clm2.h2i.1999-{month}-15-00000.nc"))
+
+ h2_files_2000 = []
for month in ["01", "06", "12"]:
- h1_files.append(self._create_test_file(f"test.clm2.h1i.2000-{month}-01-00000.nc"))
- h2_files.append(self._create_test_file(f"test.clm2.h2i.2000-{month}-01-00000.nc"))
+ h2_files_2000.append(self._create_test_file(f"test.clm2.h2i.2000-{month}-15-00000.nc"))
- time_slice_list = [slice("2000-01-01", "2000-12-31")]
+ # Get time slice lists for first_season=1999, last_season=1999
+ time_slice_lists_list = gg._get_time_slice_lists(1999, 1999)
h1_file_lists, h2_file_lists = gg._get_file_lists(
- self.temp_dir, time_slice_list, logger=None
+ self.temp_dir, time_slice_lists_list, logger=None
)
- # Should have one list for the time slice
- self.assertEqual(len(h1_file_lists), 1)
- self.assertEqual(len(h2_file_lists), 1)
+ # Should have two lists (for 2000 and 2001)
+ self.assertEqual(len(h1_file_lists), 2)
+ self.assertEqual(len(h2_file_lists), 2)
- # Check contents of file lists (should be sorted)
+ # Check contents of file lists
# pylint: disable=unsubscriptable-object
- self.assertEqual(len(h1_file_lists[0]), 3)
+ self.assertEqual(len(h1_file_lists[0]), 1)
+ self.assertEqual(h1_file_lists[0], [h1_file_2000])
self.assertEqual(len(h2_file_lists[0]), 3)
- self.assertEqual(h1_file_lists[0], sorted(h1_files))
- self.assertEqual(h2_file_lists[0], sorted(h2_files))
+ self.assertEqual(h2_file_lists[0], sorted(h2_files_1999))
+
+ # Check second year (2001)
+ self.assertEqual(len(h1_file_lists[1]), 1)
+ self.assertEqual(h1_file_lists[1], [h1_file_2001])
+ self.assertEqual(len(h2_file_lists[1]), 3)
+ self.assertEqual(h2_file_lists[1], sorted(h2_files_2000))
def test_get_file_lists_no_h1_files(self):
"""Test _get_file_lists when h1 files are missing"""
- # Create only h2 files
- self._create_test_file("test.clm2.h2i.2000-01-01-00000.nc")
- time_slice_list = [slice("2000-01-01", "2000-12-31")]
+ time_slice_lists_list = gg._get_time_slice_lists(1999, 1999)
# Should raise FileNotFoundError when h1 files are not found
- with self.assertRaisesRegex(FileNotFoundError, "No files found matching patterns"):
- gg._get_file_lists(self.temp_dir, time_slice_list, logger=None)
+ with self.assertRaisesRegex(FileNotFoundError, "No files found matching patterns.*h1.*"):
+ gg._get_file_lists(self.temp_dir, time_slice_lists_list, logger=None)
def test_get_file_lists_no_h2_files(self):
"""Test _get_file_lists when h2 files are missing"""
- # Create only h1 files
- self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc")
+ growing_season = 2000
+
+ # Create h1 files with data from growing_season year and the next one
+ self._create_test_file(f"test.clm2.h1i.{growing_season + 1}-01-01-00000.nc")
+ self._create_test_file(f"test.clm2.h1i.{growing_season + 2}-01-01-00000.nc")
+ # But don't create h2 files
- time_slice_list = [slice("2000-01-01", "2000-12-31")]
+ time_slice_lists_list = gg._get_time_slice_lists(growing_season, growing_season)
# Should raise FileNotFoundError when h2 files are not found
- with self.assertRaisesRegex(FileNotFoundError, "No files found matching patterns"):
- gg._get_file_lists(self.temp_dir, time_slice_list, logger=None)
+ with self.assertRaisesRegex(FileNotFoundError, "No files found matching patterns.*h2.*"):
+ gg._get_file_lists(self.temp_dir, time_slice_lists_list, logger=None)
- def test_get_file_lists_h1_outside_time_slice(self):
- """Test _get_file_lists when h1 files exist but have no timesteps in the slice"""
- # Create h1 files for 2000 and h2 files for 2001
- self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc")
- self._create_test_file("test.clm2.h2i.2001-01-01-00000.nc")
+ def test_get_file_lists_h1_missing_a_time_slice(self):
+ """
+ Test _get_file_lists when h1 files exist but not for one of the needed time slices
+
+ We will be simulating a need for processing the 2000 growing season only. This will require
+ data from 2000 and also, because seasons can extend into the next calendar year, 2001.
+
+ Because of how CESM timestamps annual output files, it will be looking for h1 timestamps
+ with the years 2001-01-01 (data from 2000) and 2002-01-01 (data from 2001).
- # Request time slice for 2001 (h1 files exist but are outside the slice)
- time_slice_list = [slice("2001-01-01", "2001-12-31")]
+ In this test, we will only create one file for h1. We expect an error to be thrown about
+ that before the check of the available h2 time steps happens.
+ """
+ growing_season = 2000
+
+ # Create h1 files with data from growing_season year BUT NOT the next one
+ self._create_test_file(f"test.clm2.h1i.{growing_season + 1}-01-01-00000.nc")
+
+ # Get required time slice lists for h1 and h2 files
+ time_slice_lists_list = gg._get_time_slice_lists(growing_season, growing_season)
+
+ # Make sure the required time slice list is what we expect
+ assert time_slice_lists_list[0] == [
+ slice(f"{growing_season+1}-01-01", f"{growing_season+1}-01-01", None),
+ slice(f"{growing_season+2}-01-01", f"{growing_season+2}-01-01", None),
+ ]
# Should raise FileNotFoundError when h1 files have no timesteps in slice
with self.assertRaisesRegex(FileNotFoundError, "No h1 timesteps found in"):
- gg._get_file_lists(self.temp_dir, time_slice_list, logger=None)
+ gg._get_file_lists(self.temp_dir, time_slice_lists_list, logger=None)
- def test_get_file_lists_h2_outside_time_slice(self):
- """Test _get_file_lists when h2 files exist but have no timesteps in the slice"""
- # Create h1 files for 2001 and h2 files for 2000
- self._create_test_file("test.clm2.h1i.2001-01-01-00000.nc")
- self._create_test_file("test.clm2.h2i.2000-01-01-00000.nc")
+ def test_get_file_lists_h2_missing_a_time_slice(self):
+ """
+ Test _get_file_lists when h2 files exist but not for one of the needed time slices
+
+ We will be simulating a need for processing the 2000 growing season only. This will require
+ data from 2000 and also, because seasons can extend into the next calendar year, 2001.
- # Request time slice for 2001 (h2 files exist but are outside the slice)
- time_slice_list = [slice("2001-01-01", "2001-12-31")]
+ Because of how CESM timestamps daily output files, it will be looking for h2 timestamps
+ starting 2000-01-02 (data from 2000-01-01) through 2002-01-01 (data from 2001-12-31).
+
+ In this test, we will create all necessary files for h1 but only one of the two required for
+ h2.
+
+ (As test_get_file_lists_h1_missing_a_time_slice but for h2 instead.)
+ """
+ growing_season = 2000
+
+ # Create h1 files with data from growing_season year and the next one
+ self._create_test_file(f"test.clm2.h1i.{growing_season + 1}-01-01-00000.nc")
+ self._create_test_file(f"test.clm2.h1i.{growing_season + 2}-01-01-00000.nc")
+
+ # Create h2 file with data from growing_season year BUT NOT the next one
+ self._create_test_file(f"test.clm2.h2i.{growing_season+1}-01-02-00000.nc")
+
+ # Get required time slice lists for h1 and h2 files
+ time_slice_lists_list = gg._get_time_slice_lists(growing_season, growing_season)
+
+ # Make sure the required time slice lists are what we expect
+ assert time_slice_lists_list[0] == [
+ slice(f"{growing_season+1}-01-01", f"{growing_season+1}-01-01", None),
+ slice(f"{growing_season+2}-01-01", f"{growing_season+2}-01-01", None),
+ ]
+ assert time_slice_lists_list[1] == [
+ slice(f"{growing_season}-01-02", f"{growing_season+1}-01-01", None),
+ slice(f"{growing_season+1}-01-02", f"{growing_season+2}-01-01", None),
+ ]
# Should raise FileNotFoundError when h2 files have no timesteps in slice
with self.assertRaisesRegex(FileNotFoundError, "No h2 timesteps found in"):
- gg._get_file_lists(self.temp_dir, time_slice_list, logger=None)
+ gg._get_file_lists(self.temp_dir, time_slice_lists_list, logger=None)
def test_get_file_lists_partial_overlap(self):
"""Test _get_file_lists when some time slices have files and others don't"""
# Create h1 and h2 files for 2000 only
self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc")
- self._create_test_file("test.clm2.h2i.2000-01-01-00000.nc")
+ self._create_test_file("test.clm2.h2i.2000-01-02-00000.nc")
- # Request time slices for 2000 and 2001
- time_slice_list = [
- slice("2000-01-01", "2000-12-31"),
- slice("2001-01-01", "2001-12-31"),
- ]
+ # Request time slices for 2000, 2001, 2002 (only 2000 has files)
+ time_slice_lists_list = gg._get_time_slice_lists(1999, 2000)
# Should raise FileNotFoundError when second time slice has no files
with self.assertRaisesRegex(FileNotFoundError, "No h1 timesteps found in"):
- gg._get_file_lists(self.temp_dir, time_slice_list, logger=None)
+ gg._get_file_lists(self.temp_dir, time_slice_lists_list, logger=None)
if __name__ == "__main__":
diff --git a/python/ctsm/test/test_unit_generate_gdds_functions.py b/python/ctsm/test/test_unit_generate_gdds_functions.py
new file mode 100644
index 0000000000..e71c6b1c49
--- /dev/null
+++ b/python/ctsm/test/test_unit_generate_gdds_functions.py
@@ -0,0 +1,576 @@
+#!/usr/bin/env python3
+
+"""
+Unit tests for generate_gdds_functions.py
+"""
+
+import unittest
+import os
+import tempfile
+import shutil
+import logging
+
+import numpy as np
+import xarray as xr
+from cftime import DatetimeNoLeap, DatetimeAllLeap
+
+from ctsm import unit_testing
+from ctsm.crop_calendars import generate_gdds_functions as gf
+
+# Allow test names that pylint doesn't like; otherwise hard to make them
+# readable
+# pylint: disable=invalid-name
+
+# pylint: disable=protected-access
+
+## Too many instant variables as part of the class (too many self. in the SetUp)
+# pylint: disable=too-many-instance-attributes
+
+
+class TestCheckGridMatch(unittest.TestCase):
+ """Tests check_grid_match()"""
+
+ def test_check_grid_match_true_npnp(self):
+ """Test check_grid_match() with two matching numpy arrays"""
+ np0 = np.array([0, 1, 2, np.pi])
+ match, max_abs_diff = gf.check_grid_match(np0, np0)
+ self.assertTrue(match)
+ self.assertEqual(max_abs_diff, 0.0)
+
+ def test_check_grid_match_true_dada(self):
+ """Test check_grid_match() with two matching DataArrays"""
+ np0 = np.array([0, 1, 2, np.pi])
+ da0 = xr.DataArray(data=np0)
+ match, max_abs_diff = gf.check_grid_match(da0, da0)
+ self.assertTrue(match)
+ self.assertEqual(max_abs_diff, 0.0)
+
+ def test_check_grid_match_false_npnp(self):
+ """Test check_grid_match() with two non-matching numpy arrays"""
+ np0 = np.array([0, 1, 2, np.pi])
+ np1 = np0.copy()
+ diff = 2 * gf.GRID_TOL_DEG
+ np1[0] = np0[0] + diff
+ match, max_abs_diff = gf.check_grid_match(np0, np1)
+ self.assertFalse(match)
+ self.assertEqual(max_abs_diff, diff)
+
+ def test_check_grid_match_false_dada(self):
+ """Test check_grid_match() with two non-matching DataArrays"""
+ np0 = np.array([0, 1, 2, np.pi])
+ np1 = np0.copy()
+ diff = 2 * gf.GRID_TOL_DEG
+ np1[0] = np0[0] + diff
+ da0 = xr.DataArray(data=np0)
+ da1 = xr.DataArray(data=np1)
+ match, max_abs_diff = gf.check_grid_match(da0, da1)
+ self.assertFalse(match)
+ self.assertEqual(max_abs_diff, diff)
+
+ def test_check_grid_match_falseneg_npnp(self):
+ """As test_check_grid_match_false_npnp, but with diff in negative direction"""
+ np0 = np.array([0, 1, 2, np.pi])
+ np1 = np0.copy()
+ diff = -2 * gf.GRID_TOL_DEG
+ np1[0] = np0[0] + diff
+ match, max_abs_diff = gf.check_grid_match(np0, np1)
+ self.assertFalse(match)
+ self.assertEqual(max_abs_diff, abs(diff))
+
+ def test_check_grid_match_matchnans_true_npnp(self):
+ """Test check_grid_match() with two numpy arrays that have nans and match"""
+ np0 = np.array([np.nan, 1, 2, np.pi])
+ with self.assertWarnsRegex(RuntimeWarning, r"NaN\(s\) in grid"):
+ match, max_abs_diff = gf.check_grid_match(np0, np0)
+ self.assertTrue(match)
+ self.assertEqual(max_abs_diff, 0.0)
+
+ def test_check_grid_match_matchnans_true_dada(self):
+ """Test check_grid_match() with two DataArrays that have nans and match"""
+ np0 = np.array([np.nan, 1, 2, np.pi])
+ da0 = xr.DataArray(data=np0)
+ with self.assertWarnsRegex(RuntimeWarning, r"NaN\(s\) in grid"):
+ match, max_abs_diff = gf.check_grid_match(da0, da0)
+ self.assertTrue(match)
+ self.assertEqual(max_abs_diff, 0.0)
+
+ def test_check_grid_match_matchnans_false_npnp(self):
+ """Test check_grid_match() with two numpy arrays with nans that DON'T match"""
+ np0 = np.array([np.nan, 1, 2, np.pi])
+ np1 = np.array([np.nan, 1, np.nan, np.pi])
+ with self.assertWarnsRegex(RuntimeWarning, r"NaN\(s\) in grid don't match"):
+ match, max_abs_diff = gf.check_grid_match(np0, np1)
+ self.assertFalse(match)
+ self.assertIsNone(max_abs_diff)
+
+ def test_check_grid_match_matchnans_falseshape_npnp(self):
+ """Test check_grid_match() with two numpy arrays that have different shapes"""
+ np0 = np.array([0, 1, 2, np.pi])
+ np1 = np.array([0, 1, 2, np.pi, 4])
+ match, max_abs_diff = gf.check_grid_match(np0, np1)
+ self.assertFalse(match)
+ self.assertIsNone(max_abs_diff)
+
+ def test_check_grid_match_matchnans_falseshape_dada(self):
+ """Test check_grid_match() with two DataArrays that have different shapes"""
+ np0 = np.array([0, 1, 2, np.pi])
+ np1 = np.array([0, 1, 2, np.pi, 4])
+ da0 = xr.DataArray(data=np0)
+ da1 = xr.DataArray(data=np1)
+ match, max_abs_diff = gf.check_grid_match(da0, da1)
+ self.assertFalse(match)
+ self.assertIsNone(max_abs_diff)
+
+
+class TestFindInstHistFiles(unittest.TestCase):
+ """Tests of find_inst_hist_files()"""
+
+ def setUp(self):
+ """
+ Set up and change to temporary directory
+ """
+ self.prev_dir = os.getcwd()
+ self.temp_dir = tempfile.mkdtemp()
+ os.chdir(self.temp_dir)
+
+ def tearDown(self):
+ """
+ Delete temporary directory
+ """
+ os.chdir(self.prev_dir)
+ shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+ def _create_test_file(self, filename):
+ """Helper to create an empty test file"""
+ filepath = os.path.join(self.temp_dir, filename)
+ with open(filepath, "a", encoding="utf-8"):
+ pass
+ return filepath
+
+ def test_find_inst_hist_files(self):
+ """Test finding only h2 files when h1i files present too"""
+ # Create test files
+ file1 = self._create_test_file("test.clm2.h2i.2000-01-02-00000.nc")
+ file2 = self._create_test_file("test.clm2.h2i.2001-01-02-00000.nc")
+ # Create h1 file that should not be found
+ self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc")
+
+ result = gf.find_inst_hist_files(self.temp_dir, h=2)
+
+ # Should find only h2i files
+ self.assertEqual(len(result), 2)
+ self.assertIn(file1, result)
+ self.assertIn(file2, result)
+
+ def test_find_inst_hist_files_prefer_nc_over_base(self):
+ """Test that .nc files are preferred over .nc.base files"""
+ # Create both .nc and .nc.base files
+ file_nc = self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc")
+ file_nc_base = self._create_test_file("test.clm2.h1i.2001-01-01-00000.nc.base")
+
+ result = gf.find_inst_hist_files(self.temp_dir, h=1)
+
+ # Should find .nc files first (pattern order preference)
+ self.assertIn(file_nc, result)
+ self.assertNotIn(file_nc_base, result)
+ # Should have only 1 file (the .nc file, not the .nc.base)
+ self.assertEqual(len(result), 1)
+
+ def test_find_inst_hist_files_base_only(self):
+ """Test finding files when only .nc.base files exist"""
+ # Create only .nc.base files
+ file1 = self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc.base")
+ file2 = self._create_test_file("test.clm2.h1i.2001-01-01-00000.nc.base")
+
+ result = gf.find_inst_hist_files(self.temp_dir, h=1)
+
+ # Should find .nc.base files when no .nc files exist
+ self.assertEqual(len(result), 2)
+ self.assertIn(file1, result)
+ self.assertIn(file2, result)
+
+ def test_find_inst_hist_files_multiple_months_same_year(self):
+ """Test finding multiple files from the same year"""
+ # Create multiple files from 2000
+ file1 = self._create_test_file("test.clm2.h2i.2000-01-01-00000.nc")
+ file2 = self._create_test_file("test.clm2.h2i.2000-01-15-00000.nc")
+ file3 = self._create_test_file("test.clm2.h2i.2000-01-31-00000.nc")
+ # Create file from different year
+ file4 = self._create_test_file("test.clm2.h2i.2001-01-01-00000.nc")
+
+ result = gf.find_inst_hist_files(self.temp_dir, h=2)
+
+ # Should find all files
+ self.assertEqual(len(result), 4)
+ self.assertIn(file1, result)
+ self.assertIn(file2, result)
+ self.assertIn(file3, result)
+ self.assertIn(file4, result)
+
+ def test_find_inst_hist_files_no_files_found(self):
+ """Test error when no matching files are found"""
+ # Create a non-matching file
+ self._create_test_file("test.clm2.h0.2000-01-01-00000.nc")
+
+ # Should raise a FileNotFoundError error
+ with self.assertRaisesRegex(FileNotFoundError, "No files found matching patterns"):
+ gf.find_inst_hist_files(self.temp_dir, h=1)
+
+ def test_find_inst_hist_files_different_case_names(self):
+ """Test that RuntimeError is raised when files from different case names are found"""
+ # Create files with different case names
+ self._create_test_file("case1.clm2.h1i.2000-01-01-00000.nc")
+ self._create_test_file("case2.clm2.h1i.2000-01-01-00000.nc")
+ self._create_test_file("longcasename.clm2.h1i.2000-01-01-00000.nc")
+
+ # Should raise RuntimeError due to multiple case names
+ with self.assertRaisesRegex(RuntimeError, "Found files from multiple case names"):
+ gf.find_inst_hist_files(self.temp_dir, h=1)
+
+ def test_find_inst_hist_files_different_case_names_with_logger(self):
+ """
+ Test that RuntimeError is raised when files from different case names are found, with logger
+ """
+ # Create a logger
+ logger = logging.getLogger("test_logger_case_names")
+ logger.setLevel(logging.DEBUG)
+
+ # Create files with different case names
+ self._create_test_file("case1.clm2.h1i.2000-01-01-00000.nc")
+ self._create_test_file("case2.clm2.h1i.2000-01-01-00000.nc")
+ self._create_test_file("longcasename.clm2.h1i.2000-01-01-00000.nc")
+
+ # Should raise RuntimeError due to multiple case names, even with logger
+ with self.assertRaisesRegex(RuntimeError, "Found files from multiple case names"):
+ gf.find_inst_hist_files(self.temp_dir, h=1, logger=logger)
+
+ def test_find_inst_hist_files_no_files_found_with_logger(self):
+ """Test error when no matching files are found, with logger"""
+ # Create a logger
+ logger = logging.getLogger("test_logger_no_files")
+ logger.setLevel(logging.DEBUG)
+
+ # Create a non-matching file
+ self._create_test_file("test.clm2.h0.2000-01-01-00000.nc")
+
+ # Should raise a FileNotFoundError even with logger
+ with self.assertRaisesRegex(FileNotFoundError, "No files found matching patterns"):
+ gf.find_inst_hist_files(self.temp_dir, h=1, logger=logger)
+
+ def test_find_inst_hist_files_h_str_with_logger(self):
+ """Test that TypeError is raised when h is a string, with logger"""
+ # Create a logger
+ logger = logging.getLogger("test_logger_h_str")
+ logger.setLevel(logging.DEBUG)
+
+ self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc")
+
+ with self.assertRaisesRegex(TypeError, "must be an integer, not"):
+ gf.find_inst_hist_files(self.temp_dir, h="1", logger=logger)
+
+ def test_find_inst_hist_files_h_float_with_logger(self):
+ """Test that TypeError is raised when h is a float, with logger"""
+ # Create a logger
+ logger = logging.getLogger("test_logger_h_float")
+ logger.setLevel(logging.DEBUG)
+
+ self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc")
+
+ with self.assertRaisesRegex(TypeError, "must be an integer, not"):
+ gf.find_inst_hist_files(self.temp_dir, h=1.0, logger=logger)
+
+
+class TestGenInstDailyYear(unittest.TestCase):
+ """Tests of gen_inst_daily_year()"""
+
+ def test_gen_inst_daily_year_noleap(self):
+ """Test gen_inst_daily_year with DatetimeNoLeap calendar"""
+ result = gf.gen_inst_daily_year(2000, DatetimeNoLeap)
+
+ # Should have 365 timesteps (Jan 2 through Jan 1 of next year, inclusive)
+ self.assertEqual(len(result), 365)
+
+ # Check first and last dates
+ self.assertEqual(result.values[0], DatetimeNoLeap(2000, 1, 2, has_year_zero=True))
+ self.assertEqual(result.values[-1], DatetimeNoLeap(2001, 1, 1, has_year_zero=True))
+
+ # Verify no Feb 29
+ dates_str = [str(d) for d in result.values]
+ self.assertNotIn("2000-02-29", dates_str)
+
+ # Check that all timestamps are at exactly midnight
+ for t in result.values:
+ assert t.hour == 0
+ assert t.minute == 0
+ assert t.second == 0
+ assert t.microsecond == 0
+
+ def test_gen_inst_daily_year_leap(self):
+ """Test gen_inst_daily_year with a leap year"""
+ cal_type = DatetimeAllLeap
+ year = 2004
+ result = gf.gen_inst_daily_year(year, cal_type)
+
+ # Should have 366 timesteps (Jan 2 through Jan 1 of next year, inclusive, with leap day)
+ self.assertEqual(len(result), 366)
+
+ # Check first and last dates
+ self.assertEqual(result.values[0], cal_type(year, 1, 2, has_year_zero=True))
+ self.assertEqual(result.values[-1], cal_type(year + 1, 1, 1, has_year_zero=True))
+
+ # Verify Feb 29 is there
+ dates_str = [str(d) for d in result.values]
+ self.assertIn(f"{year}-02-29 00:00:00", dates_str)
+
+ # Check that all timestamps are at exactly midnight
+ for t in result.values:
+ assert t.hour == 0
+ assert t.minute == 0
+ assert t.second == 0
+ assert t.microsecond == 0
+
+ def test_gen_inst_daily_year_consecutive_days(self):
+ """Test that gen_inst_daily_year produces consecutive days"""
+ result = gf.gen_inst_daily_year(2000, DatetimeNoLeap)
+
+ # Check that each day is exactly one day after the previous
+ for i in range(1, len(result)):
+ diff = result.values[i] - result.values[i - 1]
+ self.assertEqual(diff.days, 1)
+
+ def test_gen_inst_daily_year_different_year(self):
+ """Test gen_inst_daily_year with a different year"""
+ result = gf.gen_inst_daily_year(1987, DatetimeNoLeap)
+
+ # Should still have 365 timesteps
+ self.assertEqual(len(result), 365)
+
+ # Check first and last dates
+ self.assertEqual(result.values[0], DatetimeNoLeap(1987, 1, 2, has_year_zero=True))
+ self.assertEqual(result.values[-1], DatetimeNoLeap(1988, 1, 1, has_year_zero=True))
+
+
+class TestCheckTimeDa(unittest.TestCase):
+ """Tests of _check_time_da()"""
+
+ def test_check_time_da_annual_correct(self):
+ """Test _check_time_da with correct annual data"""
+ time_val = DatetimeNoLeap(2000, 1, 1, has_year_zero=True)
+ time_da = xr.DataArray([time_val], dims=["time"], coords={"time": [time_val]})
+
+ # Should not raise an error
+ gf._check_time_da("annual", 2000, time_da, logger=None)
+
+ def test_check_time_da_annual_incorrect(self):
+ """Test _check_time_da with incorrect annual data"""
+ # Wrong date (Jan 2 instead of Jan 1)
+ time_val = DatetimeNoLeap(2000, 1, 2, has_year_zero=True)
+ time_da = xr.DataArray([time_val], dims=["time"], coords={"time": [time_val]})
+
+ # Should raise AssertionError (via error())
+ with self.assertRaises(AssertionError):
+ gf._check_time_da("annual", 2000, time_da, logger=None)
+
+ def test_check_time_da_daily_correct(self):
+ """Test _check_time_da with correct daily data"""
+ time_da = gf.gen_inst_daily_year(2000, DatetimeNoLeap)
+
+ # Should not raise an error
+ gf._check_time_da("daily", 2000, time_da, logger=None)
+
+ def test_check_time_da_daily_correct_leap(self):
+ """As test_check_time_da_daily_correct, but for a leap year"""
+ time_da = gf.gen_inst_daily_year(2000, DatetimeAllLeap)
+
+ # Should not raise an error
+ gf._check_time_da("daily", 2000, time_da, logger=None)
+
+ def test_check_time_da_daily_missing_day(self):
+ """Test _check_time_da with daily data missing a day"""
+ time_da = gf.gen_inst_daily_year(2000, DatetimeNoLeap)
+ # Remove one day from the middle (day 100)
+ time_da_missing = xr.concat(
+ [time_da.isel(time=slice(0, 100)), time_da.isel(time=slice(101, None))],
+ dim="time",
+ )
+
+ # Should raise AssertionError (via error())
+ with self.assertRaises(AssertionError):
+ gf._check_time_da("daily", 2000, time_da_missing, logger=None)
+
+ def test_check_time_da_daily_extra_day(self):
+ """Test _check_time_da with daily data having an extra day"""
+ time_da = gf.gen_inst_daily_year(2000, DatetimeNoLeap)
+ # Add an extra day
+ extra_day = DatetimeNoLeap(2001, 1, 3, has_year_zero=True)
+ time_da = xr.concat(
+ [time_da, xr.DataArray([extra_day], dims=["time"], coords={"time": [extra_day]})],
+ dim="time",
+ )
+
+ # Should raise AssertionError (via error())
+ with self.assertRaises(AssertionError):
+ gf._check_time_da("daily", 2000, time_da, logger=None)
+
+ def test_check_time_da_unknown_freq(self):
+ """Test _check_time_da with unknown frequency"""
+ time_val = DatetimeNoLeap(2000, 1, 1, has_year_zero=True)
+ time_da = xr.DataArray([time_val], dims=["time"], coords={"time": [time_val]})
+
+ # Should raise NotImplementedError for unknown frequency
+ with self.assertRaises(NotImplementedError):
+ gf._check_time_da("monthly", 2000, time_da, logger=None)
+
+
+class TestCheckFileLists(unittest.TestCase):
+ """Tests of check_file_lists()"""
+
+ def setUp(self):
+ """Set up and change to temporary directory"""
+ self.prev_dir = os.getcwd()
+ self.temp_dir = tempfile.mkdtemp()
+ os.chdir(self.temp_dir)
+
+ def tearDown(self):
+ """Delete temporary directory"""
+ os.chdir(self.prev_dir)
+ shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+ def _create_test_file_with_time(self, filename, time_values):
+ """Helper to create a test file with specific time values"""
+ filepath = os.path.join(self.temp_dir, filename)
+ time = xr.DataArray(time_values, dims=["time"], name="time")
+ ds = xr.Dataset({"time": time})
+ ds.to_netcdf(filepath)
+ return filepath
+
+ def test_check_file_lists_annual_correct(self):
+ """Test check_file_lists with correct annual data"""
+ # Create files with correct annual timesteps
+ file1 = self._create_test_file_with_time(
+ "test.h1i.2000.nc", [DatetimeNoLeap(2000, 1, 1, has_year_zero=True)]
+ )
+ file2 = self._create_test_file_with_time(
+ "test.h1i.2001.nc", [DatetimeNoLeap(2001, 1, 1, has_year_zero=True)]
+ )
+
+ history_yr_range = range(2000, 2002)
+ h_file_lists = [[file1], [file2]]
+ time_slice_list = [
+ slice("2000-01-01", "2000-01-01"),
+ slice("2001-01-01", "2001-01-01"),
+ ]
+
+ # Should not raise an error
+ gf.check_file_lists(history_yr_range, h_file_lists, time_slice_list, "annual", logger=None)
+
+ def test_check_file_lists_annual_correct_extrafile(self):
+ """Test check_file_lists with correct annual data but an extra file"""
+ # Create files with correct annual timesteps
+ file1 = self._create_test_file_with_time(
+ "test.h1i.2000.nc", [DatetimeNoLeap(2000, 1, 1, has_year_zero=True)]
+ )
+ file2 = self._create_test_file_with_time(
+ "test.h1i.2001.nc", [DatetimeNoLeap(2001, 1, 1, has_year_zero=True)]
+ )
+ file3 = self._create_test_file_with_time(
+ "test.h1i.2002.nc", [DatetimeNoLeap(2002, 1, 1, has_year_zero=True)]
+ )
+
+ history_yr_range = range(2000, 2002)
+ h_file_lists = [[file1], [file2], [file3]]
+ time_slice_list = [
+ slice("2000-01-01", "2000-01-01"),
+ slice("2001-01-01", "2001-01-01"),
+ ]
+
+ # Should not raise an error
+ gf.check_file_lists(history_yr_range, h_file_lists, time_slice_list, "annual", logger=None)
+
+ def test_check_file_lists_annual_incorrect(self):
+ """Test check_file_lists with incorrect annual data (wrong day)"""
+ # Create file with wrong date (Jan 2 instead of Jan 1)
+ file1 = self._create_test_file_with_time(
+ "test.h1i.2000.nc",
+ [DatetimeNoLeap(2000, 1, 2, has_year_zero=True)], # Wrong day
+ )
+
+ history_yr_range = range(2000, 2001)
+ h_file_lists = [[file1]]
+ # Use a slice that will include Jan 2
+ time_slice_list = [slice("2000-01-01", "2000-01-02")]
+
+ # Should raise AssertionError (has Jan 2 instead of Jan 1)
+ with self.assertRaises(AssertionError):
+ gf.check_file_lists(
+ history_yr_range, h_file_lists, time_slice_list, "annual", logger=None
+ )
+
+ def test_check_file_lists_daily_correct(self):
+ """Test check_file_lists with correct daily data"""
+ # Create file with correct daily timesteps
+ time_da = gf.gen_inst_daily_year(2000, DatetimeNoLeap)
+ file1 = self._create_test_file_with_time("test.h2i.2000.nc", time_da.values)
+
+ history_yr_range = range(2000, 2001)
+ h_file_lists = [[file1]]
+ time_slice_list = [slice("2000-01-02", "2001-01-01")]
+
+ # Should not raise an error
+ gf.check_file_lists(history_yr_range, h_file_lists, time_slice_list, "daily", logger=None)
+
+ def test_check_file_lists_daily_correct_leap(self):
+ """As test_check_file_lists_daily_correct but with a leap day"""
+ # Create file with correct daily timesteps
+ time_da = gf.gen_inst_daily_year(2000, DatetimeAllLeap)
+ file1 = self._create_test_file_with_time("test.h2i.2000.nc", time_da.values)
+
+ history_yr_range = range(2000, 2001)
+ h_file_lists = [[file1]]
+ time_slice_list = [slice("2000-01-02", "2001-01-01")]
+
+ # Should not raise an error
+ gf.check_file_lists(history_yr_range, h_file_lists, time_slice_list, "daily", logger=None)
+
+ def test_check_file_lists_daily_missing_day(self):
+ """Test check_file_lists with daily data missing a day"""
+ # Create file with incomplete daily timesteps
+ time_da = gf.gen_inst_daily_year(2000, DatetimeNoLeap)
+ # Remove one day from the middle
+ time_da_incomplete = xr.concat(
+ [time_da.isel(time=slice(0, 100)), time_da.isel(time=slice(101, None))],
+ dim="time",
+ )
+ file1 = self._create_test_file_with_time("test.h2i.2000.nc", time_da_incomplete.values)
+
+ history_yr_range = range(2000, 2001)
+ h_file_lists = [[file1]]
+ time_slice_list = [slice("2000-01-02", "2001-01-01")]
+
+ # Should raise AssertionError
+ with self.assertRaises(AssertionError):
+ gf.check_file_lists(
+ history_yr_range, h_file_lists, time_slice_list, "daily", logger=None
+ )
+
+ def test_check_file_lists_multiple_files_per_year(self):
+ """Test check_file_lists with multiple files per year"""
+ # Create two files that together have all daily timesteps for 2000
+ time_da_full = gf.gen_inst_daily_year(2000, DatetimeNoLeap)
+ time_da_first_half = time_da_full.isel(time=slice(0, 182))
+ time_da_second_half = time_da_full.isel(time=slice(182, None))
+
+ file1 = self._create_test_file_with_time("test.h2i.2000a.nc", time_da_first_half.values)
+ file2 = self._create_test_file_with_time("test.h2i.2000b.nc", time_da_second_half.values)
+
+ history_yr_range = range(2000, 2001)
+ h_file_lists = [[file1, file2]]
+ time_slice_list = [slice("2000-01-02", "2001-01-01")]
+
+ # Should not raise an error
+ gf.check_file_lists(history_yr_range, h_file_lists, time_slice_list, "daily", logger=None)
+
+
+if __name__ == "__main__":
+ unit_testing.setup_for_tests()
+ unittest.main()