diff --git a/cime_config/SystemTests/rxcropmaturity.py b/cime_config/SystemTests/rxcropmaturity.py index 589fe38fab..9a73e2c825 100644 --- a/cime_config/SystemTests/rxcropmaturity.py +++ b/cime_config/SystemTests/rxcropmaturity.py @@ -492,15 +492,8 @@ def _run_interpolate_gdds(self): ) def _get_conda_env(self): - conda_setup_commands = stu.cmds_to_setup_conda(self._get_caseroot()) - - # If npl conda environment is available, use that (It has dask, which - # enables chunking, which makes reading daily 1-degree netCDF files - # much more efficient. - if "npl " in os.popen(conda_setup_commands + "conda env list").read(): - self._this_conda_env = "npl" - else: - self._this_conda_env = "ctsm_pylib" + stu.cmds_to_setup_conda(self._get_caseroot()) + self._this_conda_env = "ctsm_pylib" def _append_to_user_nl_clm(self, additions): caseroot = self._get_caseroot() diff --git a/cime_config/testdefs/ExpectedTestFails.xml b/cime_config/testdefs/ExpectedTestFails.xml index fed5e8da74..1e82513737 100644 --- a/cime_config/testdefs/ExpectedTestFails.xml +++ b/cime_config/testdefs/ExpectedTestFails.xml @@ -36,27 +36,6 @@ - - - FAIL - #3740 - - - - - - FAIL - #3740 - - - - - - FAIL - #3740 - - - FAIL diff --git a/cime_config/testdefs/testlist_clm.xml b/cime_config/testdefs/testlist_clm.xml index b2701a333f..27c52a3510 100644 --- a/cime_config/testdefs/testlist_clm.xml +++ b/cime_config/testdefs/testlist_clm.xml @@ -4510,22 +4510,14 @@ - - - - - - - - - + - + diff --git a/python/ctsm/crop_calendars/check_constant_vars.py b/python/ctsm/crop_calendars/check_constant_vars.py index 0e218c5bfa..7101b5877c 100644 --- a/python/ctsm/crop_calendars/check_constant_vars.py +++ b/python/ctsm/crop_calendars/check_constant_vars.py @@ -147,8 +147,8 @@ def ensure_all_patches_checked(this_ds, this_da, ra_sp, incl_patches): incl_patches = np.sort(incl_patches) if not np.array_equal(incl_patches, np.unique(incl_patches)): raise RuntimeError("Patch(es) checked but also all-NaN??") - if not np.array_equal(incl_patches, np.arange(this_ds.dims["patch"])): - for patch in np.arange(this_ds.dims["patch"]): + if not np.array_equal(incl_patches, np.arange(this_ds.sizes["patch"])): + for patch in np.arange(this_ds.sizes["patch"]): if patch not in incl_patches: raise RuntimeError( f"Not all patches checked! E.g., {patch}: {this_da.isel(patch=patch).values}" @@ -178,7 +178,7 @@ def check_one_constant_var_loop_through_timesteps( In check_one_constant_var(), loop through timesteps """ found_in_rx = None - for timestep in np.arange(time_1 + 1, this_ds.dims[time_coord]): + for timestep in np.arange(time_1 + 1, this_ds.sizes[time_coord]): t_yr = this_ds[time_coord].values[timestep] t_vals = np.squeeze(this_da.isel({time_coord: timestep, "patch": these_patches}).values) ok_p = t1_vals == t_vals @@ -267,7 +267,7 @@ def check_one_constant_var( bad_patches, ) = check_one_constant_var_setup(this_ds, case, var) - for time_1 in np.arange(this_ds.dims[time_coord] - 1): + for time_1 in np.arange(this_ds.sizes[time_coord] - 1): condn = ~np.isnan(ra_sp[time_1, ...]) if time_1 > 0: condn = np.bitwise_and(condn, np.all(np.isnan(ra_sp[:time_1, ...]), axis=0)) @@ -324,12 +324,12 @@ def check_one_constant_var( if not any_bad: if any_bad_before_checking_rx: print( - f"✅ CLM output {var} do not vary through {this_ds.dims[time_coord]} growing " + f"✅ CLM output {var} do not vary through {this_ds.sizes[time_coord]} growing " + "seasons of output (except for patch(es) with missing rx)." ) else: print( - f"✅ CLM output {var} do not vary through {this_ds.dims[time_coord]} growing " + f"✅ CLM output {var} do not vary through {this_ds.sizes[time_coord]} growing " + "seasons of output." ) diff --git a/python/ctsm/crop_calendars/convert_axis_time2gs.py b/python/ctsm/crop_calendars/convert_axis_time2gs.py index 3fa357b685..c0a7089d03 100644 --- a/python/ctsm/crop_calendars/convert_axis_time2gs.py +++ b/python/ctsm/crop_calendars/convert_axis_time2gs.py @@ -48,13 +48,13 @@ def convert_axis_time2gs_setup(this_ds, verbose): Various setup steps for convert_axis_time2gs_setup() """ # How many non-NaN patch-seasons do we expect to have once we're done organizing things? - n_patch = this_ds.dims["patch"] + n_patch = this_ds.sizes["patch"] # Because some patches will be planted in the last year but not complete, we have to ignore any # finalyear-planted seasons that do complete. - n_gs = this_ds.dims["time"] - 1 + n_gs = this_ds.sizes["time"] - 1 expected_valid = n_patch * n_gs - mxharvests = this_ds.dims["mxharvests"] + mxharvests = this_ds.sizes["mxharvests"] if verbose: print( @@ -377,7 +377,7 @@ def ignore_harvests_planted_in_final_year( ) is_valid = ~np.isnan(hdates_pg2) is_fake = np.isneginf(hdates_pg2) - is_fake = np.reshape(is_fake[is_valid], (this_ds.dims["patch"], n_gs)) + is_fake = np.reshape(is_fake[is_valid], (this_ds.sizes["patch"], n_gs)) discrepancy = np.sum(is_valid) - expected_valid unique_n_seasons = np.unique(np.sum(is_valid, axis=1)) if verbose: @@ -435,8 +435,8 @@ def create_dataset( # Remove the nans and reshape to patches*growingseasons da_pyh = da_yhp.transpose("patch", "time", "mxharvests") - ar_pg = np.reshape(da_pyh.values, (this_ds.dims["patch"], -1)) - ar_valid_pg = np.reshape(ar_pg[is_valid], (this_ds.dims["patch"], n_gs)) + ar_pg = np.reshape(da_pyh.values, (this_ds.sizes["patch"], -1)) + ar_valid_pg = np.reshape(ar_pg[is_valid], (this_ds.sizes["patch"], n_gs)) # Change -infs to nans ar_valid_pg[is_fake] = np.nan # Save as DataArray to new Dataset, stripping _PERHARV from variable name diff --git a/python/ctsm/crop_calendars/cropcal_module.py b/python/ctsm/crop_calendars/cropcal_module.py index 976d022e85..430badfb48 100644 --- a/python/ctsm/crop_calendars/cropcal_module.py +++ b/python/ctsm/crop_calendars/cropcal_module.py @@ -42,7 +42,7 @@ def check_and_trim_years(year_1, year_n, ds_in): # Make sure you have the expected number of timesteps (including extra year) n_years_expected = year_n - year_1 + 2 - if ds_in.dims["time"] != n_years_expected: + if ds_in.sizes["time"] != n_years_expected: raise RuntimeError( f"Expected {n_years_expected} timesteps in output but got {ds_in.dims['time']}" ) @@ -208,7 +208,7 @@ def import_max_gs_length(paramfile): Import maximum growing season length """ # Get parameter file - paramfile_ds = xr.open_dataset(paramfile) + paramfile_ds = xr.open_dataset(paramfile, decode_timedelta=True) # Import max growing season length (stored in netCDF as nanoseconds!) paramfile_mxmats = paramfile_ds["mxmat"].values / np.timedelta64(1, "D") diff --git a/python/ctsm/crop_calendars/generate_gdds.py b/python/ctsm/crop_calendars/generate_gdds.py index 196d7a96da..dd736add54 100644 --- a/python/ctsm/crop_calendars/generate_gdds.py +++ b/python/ctsm/crop_calendars/generate_gdds.py @@ -4,6 +4,7 @@ import os import sys + import pickle import datetime as dt import argparse @@ -29,6 +30,9 @@ # fixed. For now, we'll just disable the warning. # pylint: disable=too-many-positional-arguments +# Mapping of history tape number to output frequency +H_FREQ_DICT = {1: "annual", 2: "daily"} + def _get_max_growing_season_lengths(max_season_length_from_hdates_file, paramfile, cushion): """ @@ -45,19 +49,31 @@ def _get_max_growing_season_lengths(max_season_length_from_hdates_file, paramfil return mxmats -def _get_history_yr_range(first_season, last_season): +def _get_history_yr_range(first_season, last_season, freq): """ - Get a range object that can be used for looping over all years we need to process timestamps - from. + Get range objects that can be used for looping over all years we need to process timestamps + from. Different handling for annual vs. daily history files. Assumption is that all history + files are instantanous. """ - # Saving at the end of a year receive the timestamp of the END of the year's final timestep, - # which means it will actually be 00:00 of Jan. 1 of the next year. - first_history_yr = first_season + 1 - # Same deal for the last history timestep, but we have to read an extra year in that case, - # because in some places the last growing season won't complete until the year after it was - # planted. - last_history_yr = last_season + 2 + if freq == "annual": + # Saving at the end of a year gives the timestamp of the END of the year's final timestep, + # which means it will actually be 00:00 of Jan. 1 of the next year. + first_history_yr = first_season + 1 + last_history_yr = last_season + 1 + elif freq == "daily": + # Saving at the end of a day/beginning of the next day (i.e., 00:00:00) means that the year + # will be correct for all but the Dec. 31 save, which will receive a timestamp of Jan. 1 + # 00:00:00. That will be handled in _get_time_slice_lists(), so we don't need to account for + # it here. + first_history_yr = first_season + last_history_yr = last_season + else: + raise NotImplementedError(f"Not sure how to handle freq '{freq}'") + + # For the last season, we have to read an extra year, because in some places the last growing + # season won't complete until the year after it was planted. + last_history_yr += 1 # last_history_yr + 1 because range() will iterate up to but not including the second value. history_yr_range = range(first_history_yr, last_history_yr + 1) @@ -65,11 +81,10 @@ def _get_history_yr_range(first_season, last_season): return history_yr_range -def _get_time_slice_list(first_season, last_season): +def _get_time_slice_lists(first_season, last_season): """ - Given the requested first and last seasons, get the list of time slices that the script should - look for. The assumptions here, as in import_and_process_1yr and as instructed in the docs, are - that the user (a) is saving instantaneous annual files and (b) started on Jan. 1. + Given the requested first and last seasons, get the list of instantaneous time slices that the + script should look for. """ # Input checks @@ -78,32 +93,56 @@ def _get_time_slice_list(first_season, last_season): if first_season > last_season: raise ValueError(f"first_season ({first_season}) > last_season ({last_season})") - slice_list = [] - for history_yr in _get_history_yr_range(first_season, last_season): - slice_start = f"{history_yr}-01-01" - # Stop could probably be the same as start, since there should just be one value saved per - # year and that should get the Jan. 1 timestamp. - slice_stop = f"{history_yr}-12-31" - slice_list.append(slice(slice_start, slice_stop)) - - # We should be reading one more than the total number of years in [first_season, last_season]. - assert len(slice_list) == last_season - first_season + 2 - - return slice_list - - -def _get_file_lists(input_dir, time_slice_list, logger): + # Initialize list with None for each history file. Could avoid by starting with empty list and + # doing .append(), but pylint gets confused by that for some reason. + slice_lists_list = [None for x in range(len(H_FREQ_DICT))] + + # Get time slice for each required history year in each history file. + for i, h in enumerate(list(H_FREQ_DICT.keys())): + slice_list = [] + freq = H_FREQ_DICT[h] + for history_yr in _get_history_yr_range(first_season, last_season, freq): + if h == 1: + # Annual timesteps + slice_start = f"{history_yr}-01-01" + slice_stop = f"{history_yr}-01-01" + elif h == 2: + # Daily timesteps of instantaneous variables will go from Jan. 2 through Jan. 1 + # because they will get the time at the end of each timestep. + slice_start = f"{history_yr}-01-02" + slice_stop = f"{history_yr + 1}-01-01" + else: + raise NotImplementedError(f"What frequency are h{h}i files saved at?") + slice_list.append(slice(slice_start, slice_stop)) + + # We should be reading one more than the total number of years in + # [first_season, last_season]. + ns_exp = last_season - first_season + 2 + ns_actual = len(slice_list) + msg = f"Expected {ns_exp} time slices in list for h{h}; got {ns_actual}" + assert ns_exp == ns_actual, msg + + # Save + slice_lists_list[i] = slice_list + + return tuple(slice_lists_list) + + +def _get_file_lists(input_dir, time_slice_lists_list, logger): """ For each time slice in a list, find the file(s) that need to be read to get all history timesteps in the slice. Returns both h1i and h2i file lists. """ output_file_lists_list = [None, None] for i, h in enumerate([1, 2]): + time_slice_list = time_slice_lists_list[i] all_h_files = gddfn.find_inst_hist_files(input_dir, h=h, logger=logger) h_file_lists = [] for time_slice in time_slice_list: try: - h_file_lists.append(get_files_in_time_slice(all_h_files, time_slice, logger=logger)) + h_file_lists.append( + get_files_in_time_slice(all_h_files, time_slice, logger=logger, quiet=True) + ) except FileNotFoundError as e: raise FileNotFoundError(f"No h{h} timesteps found in {time_slice}") from e output_file_lists_list[i] = h_file_lists @@ -184,17 +223,6 @@ def main( ########################## if not only_make_figs: - # Keep 1 extra year to avoid incomplete final growing season for crops - # harvested after Dec. 31. - yr_1_import_str = f"{first_season+1}-01-01" - yr_n_import_str = f"{last_season+2}-01-01" - - log( - logger, - f"Importing netCDF time steps {yr_1_import_str} through {yr_n_import_str} " - + "(years are +1 because of CTSM output naming)", - ) - # This script uses pickle to save work in progress. In case of interruption, when the script # is resumed, it will look for a pickle file. It will resume from the year after # pickle_year, which is the last processed year in the pickle file. @@ -205,24 +233,22 @@ def main( ( first_season, last_season, - pickle_year, + pickle_season, gddaccum_yp_list, gddharv_yp_list, skip_patches_for_isel_nan_lastyear, lastyear_active_patch_indices_list, - incorrectly_daily, save_figs, incl_vegtypes_str, incl_patches1d_itype_veg, mxsowings, skip_crops, ) = pickle.load(file) - print(f"Will resume import at {pickle_year+1}") + log(logger, f"Will resume import at season {pickle_season+1}") h2_ds = None else: - incorrectly_daily = False skip_patches_for_isel_nan_lastyear = np.ndarray([]) - pickle_year = -np.inf + pickle_season = -np.inf gddaccum_yp_list = [] gddharv_yp_list = [] incl_vegtypes_str = None @@ -235,19 +261,38 @@ def main( ) # Get lists of history timesteps and files to read - time_slice_list = _get_time_slice_list(first_season, last_season) - h1_file_lists, h2_file_lists = _get_file_lists(input_dir, time_slice_list, logger) + log(logger, "Getting lists of history timesteps and files to read") + h1_time_slices, h2_time_slices = _get_time_slice_lists(first_season, last_season) + h1_file_lists, h2_file_lists = _get_file_lists( + input_dir, (h1_time_slices, h2_time_slices), logger + ) + history_yr_range_h1 = _get_history_yr_range(first_season, last_season, H_FREQ_DICT[1]) + history_yr_range_h2 = _get_history_yr_range(first_season, last_season, H_FREQ_DICT[2]) + # Check + assert len(history_yr_range_h1) == len(history_yr_range_h2) + log(logger, "Checking h1 files") + gddfn.check_file_lists( + history_yr_range_h1, h1_file_lists, h1_time_slices, H_FREQ_DICT[1], logger + ) + log(logger, "Checking h2 files") + gddfn.check_file_lists( + history_yr_range_h2, h2_file_lists, h2_time_slices, H_FREQ_DICT[2], logger + ) + log(logger, "Done") - for yr_index, this_yr in enumerate(_get_history_yr_range(first_season, last_season)): + for y, history_yr_h1 in enumerate(history_yr_range_h1): # If resuming from a pickled file, we continue until we reach a year that hasn't yet # been processed. - if this_yr <= pickle_year: + if history_yr_h1 <= pickle_season: continue - log(logger, f"netCDF year {this_yr}...") + log(logger, f"History year {history_yr_h1}...") - # Get h1 and h2 files to read for this year - h1_file_list = h1_file_lists[yr_index] # pylint: disable=unsubscriptable-object - h2_file_list = h2_file_lists[yr_index] # pylint: disable=unsubscriptable-object + # Get time slice and files to read for this year + h1_time_slice = h1_time_slices[y] # pylint: disable=unsubscriptable-object + h2_time_slice = h2_time_slices[y] # pylint: disable=unsubscriptable-object + h1_file_list = h1_file_lists[y] # pylint: disable=unsubscriptable-object + h2_file_list = h2_file_lists[y] # pylint: disable=unsubscriptable-object + history_yr_h2 = list(history_yr_range_h2)[y] ( h2_ds, @@ -257,21 +302,19 @@ def main( gddharv_yp_list, skip_patches_for_isel_nan_lastyear, lastyear_active_patch_indices_list, - incorrectly_daily, incl_vegtypes_str, incl_patches1d_itype_veg, mxsowings, ) = gddfn.import_and_process_1yr( first_season, last_season, - yr_index, + y, sdates_rx, hdates_rx, gddaccum_yp_list, gddharv_yp_list, skip_patches_for_isel_nan_lastyear, lastyear_active_patch_indices_list, - incorrectly_daily, incl_vegtypes_str, h2_ds_file, mxmats, @@ -279,8 +322,12 @@ def main( skip_crops, outdir_figs, logger, + history_yr_h1, + history_yr_h2, h1_file_list, h2_file_list, + h1_time_slice, + h2_time_slice, ) log(logger, f" Saving pickle file ({pickle_file})...") @@ -289,12 +336,11 @@ def main( [ first_season, last_season, - this_yr, + history_yr_h1, gddaccum_yp_list, gddharv_yp_list, skip_patches_for_isel_nan_lastyear, lastyear_active_patch_indices_list, - incorrectly_daily, save_figs, incl_vegtypes_str, incl_patches1d_itype_veg, @@ -403,7 +449,7 @@ def save_gdds(sdates_file, hdates_file, outfile, gdd_maps_ds, sdates_rx): template_ds = xr.open_dataset(sdates_file, decode_times=True) for var in template_ds: if "sdate" in var: - template_ds = template_ds.drop(var) + template_ds = template_ds.drop_vars(var) template_ds.to_netcdf(path=outfile, format="NETCDF4_CLASSIC") template_ds.close() diff --git a/python/ctsm/crop_calendars/generate_gdds_functions.py b/python/ctsm/crop_calendars/generate_gdds_functions.py index ed271bdd8d..d2b47f66cf 100644 --- a/python/ctsm/crop_calendars/generate_gdds_functions.py +++ b/python/ctsm/crop_calendars/generate_gdds_functions.py @@ -5,8 +5,11 @@ # pylint: disable=too-many-lines,too-many-statements,abstract-class-instantiated import warnings import os +import sys import glob +from datetime import timedelta from importlib import util as importlib_util + import numpy as np import xarray as xr @@ -56,6 +59,72 @@ CAN_PLOT = False +def gen_inst_daily_year(year, cal_type): + """ + For a given history year, return the timesteps we expect to see for daily instantaneous files + """ + + # Generate list of datetimes + start_date = cal_type(year, 1, 2) # Jan. 2 of given year + end_date = cal_type(year + 1, 1, 1) # Jan. 1 of next year + date_list = [start_date] + d = start_date + while d < end_date: + d += timedelta(days=1) + date_list.append(d) + + # Convert to DataArray + date_da = xr.DataArray(data=date_list, dims=["time"], coords={"time": date_list}) + + return date_da + + +def check_file_lists(history_yr_range, h_file_lists, time_slice_list, freq, logger): + """ + For each history year, check that the corresponding list of files has the requested time steps. + Could theoretically make this faster by starting with checks of the first and last history + years, but it's really fast already. + """ + for history_yr, h_file_list, time_slice in zip( + list(history_yr_range), h_file_lists, time_slice_list + ): + # Get time across all included files + time_da = None + for h_file in h_file_list: + ds = xr.open_dataset(h_file).sel(time=time_slice) + if time_da is None: + time_da = ds["time"] + else: + time_da = xr.concat([time_da, ds["time"]], dim="time") + + # Check whether the time variable exactly matches what we would expect based on frequency. + _check_time_da(freq, history_yr, time_da, logger) + + +def _check_time_da(freq, history_yr, time_da, logger): + """ + Check whether a given time variable exactly matches what we would expect based on frequency. + """ + # Create the expected DataArray + cal_type = type(np.atleast_1d(time_da.values)[0]) + if freq == "annual": + date_list = [cal_type(history_yr, 1, 1)] + time_da_exp = xr.DataArray(data=date_list, dims=["time"], coords={"time": date_list}) + elif freq == "daily": + time_da_exp = gen_inst_daily_year(history_yr, cal_type) + else: + msg = f"What should the time DataArray look like for freq '{freq}'?" + error(logger, msg, error_type=NotImplementedError) + sys.exit() # Because pylint doesn't know the script stops on error() + + # Compare actual vs. expected + if not time_da.equals(time_da_exp): + log(logger, f"expected: {time_da_exp}") + log(logger, f"actual: {time_da}") + msg = "Unexpected time data after slicing; see logs above." + error(logger, msg, error_type=AssertionError) + + def check_grid_match(grid0, grid1, tol=GRID_TOL_DEG): """ Check whether latitude or longitude values match @@ -295,7 +364,6 @@ def import_and_process_1yr( gddharv_yp_list, skip_patches_for_isel_nan_last_year, last_year_active_patch_indices_list, - incorrectly_daily, incl_vegtypes_str_in, h2_ds_file, mxmats, @@ -303,8 +371,12 @@ def import_and_process_1yr( skip_crops, outdir_figs, logger, + history_yr_h1, + history_yr_h2, h1_filelist, h2_filelist, + h1_time_slice, + h2_time_slice, ): """ Import one year of CLM output data for GDD generation @@ -330,21 +402,14 @@ def import_and_process_1yr( my_vegtypes=crops_to_read, chunks=chunks, logger=logger, + time_slice=h1_time_slice, ) - for timestep in dates_ds["time"].values: - print(timestep) - if dates_ds.dims["time"] > 1: - if dates_ds.dims["time"] == 365: - if not incorrectly_daily: - log( - logger, - " ℹ️ You saved SDATES and HDATES daily, but you only needed annual. Fixing.", - ) - incorrectly_daily = True - dates_ds = dates_ds.isel(time=-1) - else: - dates_ds = dates_ds.isel(time=0) + # Check included timesteps + _check_time_da("annual", history_yr_h1, dates_ds["time"], logger) + + # Should now just be one timestep, so select it to remove dimension. + dates_ds = dates_ds.isel(time=0) # Make sure NaN masks match sdates_all_nan = ( @@ -405,41 +470,23 @@ def import_and_process_1yr( if np.sum(~np.isnan(dates_incl_ds.SDATES.values)) == 0: error(logger, "All SDATES are NaN after ignoring those patches!") - # Some patches can have -1 sowing date?? Hopefully just an artifact of me incorrectly saving - # SDATES/HDATES daily. - mxsowings = dates_ds.dims["mxsowings"] - mxsowings_dim = dates_ds.SDATES.dims.index("mxsowings") + # Some patches can have -1 sowing date?? + mxsowings = dates_ds.sizes["mxsowings"] skip_patches_for_isel_sdatelt1 = np.where(dates_incl_ds.SDATES.values < 1)[1] skipping_patches_for_isel_sdatelt1 = len(skip_patches_for_isel_sdatelt1) > 0 if skipping_patches_for_isel_sdatelt1: unique_hdates = np.unique( dates_incl_ds.HDATES.isel(mxharvests=0, patch=skip_patches_for_isel_sdatelt1).values ) - if incorrectly_daily and list(unique_hdates) == [364]: - log( - logger, - f" ❗ {len(skip_patches_for_isel_sdatelt1)} patches have SDATE < 1, but this" - + "might have just been because of incorrectly daily outputs. Setting them to 365.", - ) - new_sdates_ar = dates_incl_ds.SDATES.values - if mxsowings_dim != 0: - error(logger, "Code this up") - new_sdates_ar[0, skip_patches_for_isel_sdatelt1] = 365 - dates_incl_ds["SDATES"] = xr.DataArray( - data=new_sdates_ar, - coords=dates_incl_ds["SDATES"].coords, - attrs=dates_incl_ds["SDATES"].attrs, - ) - else: - error( - logger, - f"{len(skip_patches_for_isel_sdatelt1)} patches have SDATE < 1. " - + f"Unique affected hdates: {unique_hdates}", - ) + error( + logger, + f"{len(skip_patches_for_isel_sdatelt1)} patches have SDATE < 1. " + + f"Unique affected hdates: {unique_hdates}", + ) # Some patches can have -1 harvest date?? Hopefully just an artifact of me incorrectly saving # SDATES/HDATES daily. Can also happen if patch wasn't active last year - mxharvests = dates_ds.dims["mxharvests"] + mxharvests = dates_ds.sizes["mxharvests"] mxharvests_dim = dates_ds.HDATES.dims.index("mxharvests") # If a patch was inactive last year but was either (a) harvested the last time it was active or # (b) was never active, it will have -1 as its harvest date this year. Such instances are okay. @@ -481,30 +528,13 @@ def import_and_process_1yr( unique_sdates = np.unique( dates_incl_ds.SDATES.isel(patch=skip_patches_for_isel_hdatelt1).values ) - if incorrectly_daily and list(unique_sdates) == [1]: - log( - logger, - f" ❗ {len(skip_patches_for_isel_hdatelt1)} patches have HDATE < 1??? Seems like " - + "this might have just been because of incorrectly daily outputs; setting them to " - + "365.", - ) - new_hdates_ar = dates_incl_ds.HDATES.values - if mxharvests_dim != 0: - error(logger, "Code this up") - new_hdates_ar[0, skip_patches_for_isel_hdatelt1] = 365 - dates_incl_ds["HDATES"] = xr.DataArray( - data=new_hdates_ar, - coords=dates_incl_ds["HDATES"].coords, - attrs=dates_incl_ds["HDATES"].attrs, - ) - else: - error( - logger, - f"{len(skip_patches_for_isel_hdatelt1)} patches have HDATE < 1. Possible causes:\n" - + "* Not using constant crop areas (e.g., flanduse_timeseries from " - + "make_lu_for_gddgen.py)\n * Not skipping the first 2 years of output\n" - + f"Unique affected sdates: {unique_sdates}", - ) + error( + logger, + f"{len(skip_patches_for_isel_hdatelt1)} patches have HDATE < 1. Possible causes:\n" + + "* Not using constant crop areas (e.g., flanduse_timeseries from " + + "make_lu_for_gddgen.py)\n * Not skipping the first 2 years of output\n" + + f"Unique affected sdates: {unique_sdates}", + ) # Make sure there was only one harvest per year n_extra_harv = np.sum( @@ -560,7 +590,7 @@ def import_and_process_1yr( # Limit growing season to CLM max growing season length, if needed if mxmats and (imported_sdates or imported_hdates): - print(" Limiting growing season length...") + log(logger, " Limiting growing season length...") hdates_rx = hdates_rx_orig.copy() for var in hdates_rx_orig: if var == "time_bounds": @@ -578,14 +608,14 @@ def import_and_process_1yr( mxmat = mxmats[vegtype_str] if np.isinf(mxmat): - print(f" Not limiting {vegtype_str}: No mxmat value") + log(logger, f" Not limiting {vegtype_str}: No mxmat value") continue # Get "prescribed" growing season length gs_len_rx_da = get_gs_len_da(hdates_rx_orig[var] - sdates_rx[var]) not_ok = gs_len_rx_da.values > mxmat if not np.any(not_ok): - print(f" Not limiting {vegtype_str}: No rx season > {mxmat} days") + log(logger, f" Not limiting {vegtype_str}: No rx season > {mxmat} days") continue hdates_limited = hdates_rx_orig[var].copy().values @@ -600,10 +630,11 @@ def import_and_process_1yr( coords=hdates_rx_orig[var].coords, attrs=hdates_rx_orig[var].attrs, ) - print( + log( + logger, f" Limited {vegtype_str} growing season length to {mxmat}. Longest was " + f"{int(np.max(gs_len_rx_da.values))}, now " - + f"{int(np.max(get_gs_len_da(hdates_rx[var] - sdates_rx[var]).values))}." + + f"{int(np.max(get_gs_len_da(hdates_rx[var] - sdates_rx[var]).values))}.", ) else: hdates_rx = hdates_rx_orig @@ -617,8 +648,12 @@ def import_and_process_1yr( my_vegtypes=crops_to_read, chunks=chunks, logger=logger, + time_slice=h2_time_slice, ) + # Check included timesteps + _check_time_da("daily", history_yr_h2, h2_ds["time"], logger) + # Restrict to patches we're including if skipping_patches_for_isel_nan: if not np.array_equal(dates_ds.patch.values, h2_ds.patch.values): @@ -736,44 +771,20 @@ def import_and_process_1yr( this_year_active_patch_indices[x] for x in where_gs_lastyr ] if not np.array_equal(last_year_active_patch_indices, this_year_active_patch_indices): - if incorrectly_daily: - log( - logger, - " ❗ This year's active patch indices differ from last year's. " - + "Allowing because this might just be an artifact of incorrectly daily " - + "outputs, BUT RESULTS MUST NOT BE TRUSTED.", - ) - else: - error(logger, "This year's active patch indices differ from last year's.") + error(logger, "This year's active patch indices differ from last year's.") # Make sure we're not about to overwrite any existing values. if np.any( ~np.isnan( gddaccum_yp_list[var][year_index - 1, active_this_year_where_gs_lastyr_indices] ) ): - if incorrectly_daily: - log( - logger, - " ❗ Unexpected non-NaN for last season's GDD accumulation. " - + "Allowing because this might just be an artifact of incorrectly daily " - + "outputs, BUT RESULTS MUST NOT BE TRUSTED.", - ) - else: - error(logger, "Unexpected non-NaN for last season's GDD accumulation") + error(logger, "Unexpected non-NaN for last season's GDD accumulation") if save_figs and np.any( ~np.isnan( gddharv_yp_list[var][year_index - 1, active_this_year_where_gs_lastyr_indices] ) ): - if incorrectly_daily: - log( - logger, - " ❗ Unexpected non-NaN for last season's GDDHARV. Allowing " - + "because this might just be an artifact of incorrectly daily outputs, " - + "BUT RESULTS MUST NOT BE TRUSTED.", - ) - else: - error(logger, "Unexpected non-NaN for last season's GDDHARV") + error(logger, "Unexpected non-NaN for last season's GDDHARV") # Fill. gddaccum_yp_list[var][year_index - 1, active_this_year_where_gs_lastyr_indices] = ( gddaccum_atharv_p[where_gs_lastyr] @@ -788,15 +799,7 @@ def import_and_process_1yr( gddaccum_yp_list[var][year_index - 1, active_this_year_where_gs_lastyr_indices] ) ): - if incorrectly_daily: - log( - logger, - " ❗ Unexpected NaN for last season's GDD accumulation. Allowing " - + "because this might just be an artifact of incorrectly daily outputs, " - + "BUT RESULTS MUST NOT BE TRUSTED.", - ) - else: - error(logger, "Unexpected NaN for last season's GDD accumulation.") + error(logger, "Unexpected NaN for last season's GDD accumulation.") if ( save_figs and check_gddharv @@ -808,15 +811,7 @@ def import_and_process_1yr( ) ) ): - if incorrectly_daily: - log( - logger, - " ❗ Unexpected NaN for last season's GDDHARV. Allowing because " - + "this might just be an artifact of incorrectly daily outputs, BUT " - + "RESULTS MUST NOT BE TRUSTED.", - ) - else: - error(logger, "Unexpected NaN for last season's GDDHARV.") + error(logger, "Unexpected NaN for last season's GDDHARV.") gddaccum_yp_list[var][year_index, this_year_active_patch_indices] = tmp_gddaccum if save_figs: gddharv_yp_list[var][year_index, this_year_active_patch_indices] = tmp_gddharv @@ -831,19 +826,19 @@ def import_and_process_1yr( ) nanmask_output_gdds_lastyr = np.isnan(gddaccum_yp_list[var][year_index - 1, :]) if not np.array_equal(nanmask_output_gdds_lastyr, nanmask_output_sdates): - if incorrectly_daily: - log( - logger, - " ❗ NaN masks differ between this year's sdates and 'filled-out' " - + "GDDs from last year. Allowing because this might just be an artifact of " - + "incorrectly daily outputs, BUT RESULTS MUST NOT BE TRUSTED.", - ) - else: - error( - logger, - "NaN masks differ between this year's sdates and 'filled-out' GDDs from " - + "last year", - ) + n_lastyr_where_not_sdates = np.sum( + nanmask_output_gdds_lastyr & ~nanmask_output_sdates + ) + n_sdates_where_not_lastyr = np.sum( + ~nanmask_output_gdds_lastyr & nanmask_output_sdates + ) + msg = ( + "NaN masks differ between this year's sdates and 'filled-out' GDDs from last" + f" year. {n_lastyr_where_not_sdates} NaN last year after filling out where not" + f" NaN in sdates; {n_sdates_where_not_lastyr} vice versa. Out of size" + f" {n_lastyr_where_not_sdates.size}." + ) + error(logger, msg) last_year_active_patch_indices_list[var] = this_year_active_patch_indices skip_patches_for_isel_nan_last_year = skip_patches_for_isel_nan @@ -860,24 +855,19 @@ def import_and_process_1yr( gddharv_yp_list, skip_patches_for_isel_nan_last_year, last_year_active_patch_indices_list, - incorrectly_daily, incl_vegtypes_str, incl_patches1d_itype_veg, mxsowings, ) -def find_inst_hist_files(indir, *, h, this_year=None, logger=None): +def find_inst_hist_files(indir, *, h, logger=None): """ - Find all the instantaneous history files for a given tape number, optionally looking just for - one year in filename. + Find all the instantaneous history files for a given tape number. Args: indir: Directory to search for history files h: History tape number (must be an integer, e.g., 1 for h1, 2 for h2) - this_year: Optional year to filter files by. If provided, only files with dates starting - with "{this_year}-01" will be returned. If None, all files matching the - history tape number will be returned. logger: Optional logger for error messages. If None, errors are raised without logging. Returns: @@ -887,26 +877,19 @@ def find_inst_hist_files(indir, *, h, this_year=None, logger=None): TypeError: If h is not an integer FileNotFoundError: If no files matching the patterns are found RuntimeError: If files from multiple case names are found (indicates mixed output from - different simulations, which is pathological) + different simulations) Notes: - Searches for files matching patterns: "*h{h}i.*.nc" or "*h{h}i.*.nc.base" - - When this_year is specified, searches for: "*h{h}i.{this_year}-01*.nc" or - "*h{h}i.{this_year}-01*.nc.base" - Prefers .nc files over .nc.base files (searches .nc pattern first) - All returned files must be from the same case name (extracted from filename before ".clm2.h#i.") """ - if this_year is None: - patterns = [f"*h{h}i.*.nc", f"*h{h}i.*.nc.base"] - else: - if not isinstance(h, int): - err_msg = f"h ({h}) must be an integer, not {type(h)}" - err_type = TypeError - if logger: - error(logger, err_msg, error_type=err_type) - raise err_type(err_msg) - patterns = [f"*h{h}i.{this_year}-01*.nc", f"*h{h}i.{this_year}-01*.nc.base"] + if not isinstance(h, int): + err_msg = f"h ({h}) must be an integer, not {type(h)}" + error(logger, err_msg, error_type=TypeError) + + patterns = [f"*h{h}i.*.nc", f"*h{h}i.*.nc.base"] for pat in patterns: pattern = os.path.join(indir, pat) file_list = glob.glob(pattern) @@ -914,10 +897,7 @@ def find_inst_hist_files(indir, *, h, this_year=None, logger=None): break if not file_list: err_msg = f"No files found matching patterns: {patterns}" - err_type = FileNotFoundError - if logger: - error(logger, err_msg, error_type=err_type) - raise err_type(err_msg) + error(logger, err_msg, error_type=FileNotFoundError) # Error if files found from multiple cases case_names = set() @@ -930,10 +910,7 @@ def find_inst_hist_files(indir, *, h, this_year=None, logger=None): case_names.add(case_name) if len(case_names) > 1: err_msg = f"Found files from multiple case names: {sorted(case_names)}" - err_type = RuntimeError - if logger: - error(logger, err_msg, error_type=err_type) - raise err_type(err_msg) + error(logger, err_msg, error_type=RuntimeError) return file_list diff --git a/python/ctsm/crop_calendars/import_ds.py b/python/ctsm/crop_calendars/import_ds.py index 656d10985e..34d2100d82 100644 --- a/python/ctsm/crop_calendars/import_ds.py +++ b/python/ctsm/crop_calendars/import_ds.py @@ -6,13 +6,14 @@ vegetation types. """ +import os import re import warnings from importlib.util import find_spec import numpy as np import xarray as xr from ctsm.utils import is_instantaneous -from ctsm.ctsm_logging import log +from ctsm.ctsm_logging import log, error import ctsm.crop_calendars.cropcal_utils as utils from ctsm.crop_calendars.xr_flexsel import xr_flexsel @@ -32,7 +33,7 @@ def compute_derived_vars(ds_in, var, logger=None): hyears = ds_in["HDATES"].copy() hyears.values = np.tile( np.expand_dims(year_list, (1, 2)), - (1, ds_in.dims["mxharvests"], ds_in.dims["patch"]), + (1, ds_in.sizes["mxharvests"], ds_in.sizes["patch"]), ) with np.errstate(invalid="ignore"): is_le_zero = ~np.isnan(ds_in.HDATES.values) & (ds_in.HDATES.values <= 0) @@ -310,21 +311,23 @@ def import_ds( return this_ds -def get_files_in_time_slice(filelist, time_slice, logger=None): +def get_files_in_time_slice(filelist, time_slice, logger=None, quiet=False): """ For a given list of files, find the files that need to be read in order to get all history timesteps in the slice. """ new_filelist = [] for file in sorted(filelist): - if logger: - log(logger, f"Getting filetime from file: {file}") filetime = xr.open_dataset(file).time filetime_sel = utils.safer_timeslice(filetime, time_slice) include_this_file = filetime_sel.size if include_this_file: - if logger: - log(logger, f"Including filetime : {filetime_sel['time'].values}") + if not quiet: + f = os.path.basename(file) + first_str = str(filetime_sel["time"].isel(time=0).values) + last_str = str(filetime_sel["time"].isel(time=-1).values) + msg = f"From {f}, including {first_str} through {last_str}" + log(logger, msg) new_filelist.append(file) # If you found some matching files, but then you find one that doesn't, stop going @@ -332,5 +335,6 @@ def get_files_in_time_slice(filelist, time_slice, logger=None): elif new_filelist: break if not new_filelist: - raise FileNotFoundError(f"No files found in time_slice {time_slice}") + err_msg = f"No files found in time_slice {time_slice}" + error(logger, err_msg, error_type=FileNotFoundError) return new_filelist diff --git a/python/ctsm/test/test_unit_generate_gdds.py b/python/ctsm/test/test_unit_generate_gdds.py index 5c0219cd09..5efee497cc 100755 --- a/python/ctsm/test/test_unit_generate_gdds.py +++ b/python/ctsm/test/test_unit_generate_gdds.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """ -Unit tests for generate_gdds.py and generate_gdds_functions.py +Unit tests for generate_gdds.py """ import unittest @@ -9,7 +9,6 @@ import argparse import tempfile import shutil -import logging import re import numpy as np @@ -18,7 +17,6 @@ from ctsm import unit_testing from ctsm.crop_calendars import generate_gdds as gg -from ctsm.crop_calendars import generate_gdds_functions as gf # Allow test names that pylint doesn't like; otherwise hard to make them # readable @@ -237,327 +235,138 @@ def test_generate_gdds_get_mxmats_cushionneg14(self): self.assertEqual(mxmats["miscanthus"], 210 - cushion) -class TestGetTimeSliceList(unittest.TestCase): - """Tests for _get_time_slice_list()""" +class TestGetTimeSliceLists(unittest.TestCase): + """Tests for _get_time_slice_lists()""" - def test_generate_gdds_get_time_slice_list(self): - """Test that _get_time_slice_list works with two different years""" - season_list = [1986, 1987] - result = gg._get_time_slice_list(season_list[0], season_list[-1]) - expected = [ - slice("1987-01-01", "1987-12-31"), - slice("1988-01-01", "1988-12-31"), - slice("1989-01-01", "1989-12-31"), + def test_generate_gdds_get_time_slice_lists(self): + """Test that _get_time_slice_lists works with two different years""" + h1_slices, h2_slices = gg._get_time_slice_lists(1986, 1987) + + # Check h1 slices (annual timesteps - single day) + expected_h1 = [ + slice("1987-01-01", "1987-01-01"), + slice("1988-01-01", "1988-01-01"), + slice("1989-01-01", "1989-01-01"), ] - assert result == expected - - def test_generate_gdds_get_time_slice_list_1yr(self): - """Test that _get_time_slice_list works with the same year""" - result = gg._get_time_slice_list(1987, 1987) - expected = [ - slice("1988-01-01", "1988-12-31"), - slice("1989-01-01", "1989-12-31"), + self.assertEqual(h1_slices, expected_h1) + + # Check h2 slices (daily timesteps - full year) + # For daily, starts at first_season (1986), not first_season + 1 + expected_h2 = [ + slice("1986-01-02", "1987-01-01"), + slice("1987-01-02", "1988-01-01"), + slice("1988-01-02", "1989-01-01"), ] - assert result == expected + self.assertEqual(h2_slices, expected_h2) + + def test_generate_gdds_get_time_slice_lists_1yr(self): + """Test that _get_time_slice_lists works with the same year""" + h1_slices, h2_slices = gg._get_time_slice_lists(1987, 1987) + + # Check h1 slices + expected_h1 = [ + slice("1988-01-01", "1988-01-01"), + slice("1989-01-01", "1989-01-01"), + ] + self.assertEqual(h1_slices, expected_h1) + + # Check h2 slices + # For daily, starts at first_season (1987), not first_season + 1 + expected_h2 = [ + slice("1987-01-02", "1988-01-01"), + slice("1988-01-02", "1989-01-01"), + ] + self.assertEqual(h2_slices, expected_h2) def test_generate_gdds_get_time_slice_list_valueerror(self): """Test that _get_time_slice_list raises ValueError if last < first""" with self.assertRaisesRegex(ValueError, "first_season.* > last_season"): - gg._get_time_slice_list(1987, 1986) + gg._get_time_slice_lists(1987, 1986) - def test_generate_gdds_get_time_slice_list_typeerror_first(self): - """Test that _get_time_slice_list raises TypeError if not given integer first season""" + def test_generate_gdds_get_time_slice_lists_typeerror_first(self): + """Test that _get_time_slice_lists raises TypeError if not given integer first season""" with self.assertRaisesRegex( TypeError, r"_get_time_slice_list\(\) arguments must be integers" ): - gg._get_time_slice_list(1986.3, 1987) + gg._get_time_slice_lists(1986.3, 1987) - def test_generate_gdds_get_time_slice_list_typeerror_last(self): - """Test that _get_time_slice_list raises TypeError if not given integer last season""" + def test_generate_gdds_get_time_slice_lists_typeerror_last(self): + """Test that _get_time_slice_lists raises TypeError if not given integer last season""" with self.assertRaisesRegex( TypeError, r"_get_time_slice_list\(\) arguments must be integers" ): - gg._get_time_slice_list(1986, None) - - -class TestCheckGridMatch(unittest.TestCase): - """Tests check_grid_match()""" - - def test_check_grid_match_true_npnp(self): - """Test check_grid_match() with two matching numpy arrays""" - np0 = np.array([0, 1, 2, np.pi]) - match, max_abs_diff = gf.check_grid_match(np0, np0) - self.assertTrue(match) - self.assertEqual(max_abs_diff, 0.0) - - def test_check_grid_match_true_dada(self): - """Test check_grid_match() with two matching DataArrays""" - np0 = np.array([0, 1, 2, np.pi]) - da0 = xr.DataArray(data=np0) - match, max_abs_diff = gf.check_grid_match(da0, da0) - self.assertTrue(match) - self.assertEqual(max_abs_diff, 0.0) - - def test_check_grid_match_false_npnp(self): - """Test check_grid_match() with two non-matching numpy arrays""" - np0 = np.array([0, 1, 2, np.pi]) - np1 = np0.copy() - diff = 2 * gf.GRID_TOL_DEG - np1[0] = np0[0] + diff - match, max_abs_diff = gf.check_grid_match(np0, np1) - self.assertFalse(match) - self.assertEqual(max_abs_diff, diff) - - def test_check_grid_match_false_dada(self): - """Test check_grid_match() with two non-matching DataArrays""" - np0 = np.array([0, 1, 2, np.pi]) - np1 = np0.copy() - diff = 2 * gf.GRID_TOL_DEG - np1[0] = np0[0] + diff - da0 = xr.DataArray(data=np0) - da1 = xr.DataArray(data=np1) - match, max_abs_diff = gf.check_grid_match(da0, da1) - self.assertFalse(match) - self.assertEqual(max_abs_diff, diff) - - def test_check_grid_match_falseneg_npnp(self): - """As test_check_grid_match_false_npnp, but with diff in negative direction""" - np0 = np.array([0, 1, 2, np.pi]) - np1 = np0.copy() - diff = -2 * gf.GRID_TOL_DEG - np1[0] = np0[0] + diff - match, max_abs_diff = gf.check_grid_match(np0, np1) - self.assertFalse(match) - self.assertEqual(max_abs_diff, abs(diff)) - - def test_check_grid_match_matchnans_true_npnp(self): - """Test check_grid_match() with two numpy arrays that have nans and match""" - np0 = np.array([np.nan, 1, 2, np.pi]) - with self.assertWarnsRegex(RuntimeWarning, r"NaN\(s\) in grid"): - match, max_abs_diff = gf.check_grid_match(np0, np0) - self.assertTrue(match) - self.assertEqual(max_abs_diff, 0.0) - - def test_check_grid_match_matchnans_true_dada(self): - """Test check_grid_match() with two DataArrays that have nans and match""" - np0 = np.array([np.nan, 1, 2, np.pi]) - da0 = xr.DataArray(data=np0) - with self.assertWarnsRegex(RuntimeWarning, r"NaN\(s\) in grid"): - match, max_abs_diff = gf.check_grid_match(da0, da0) - self.assertTrue(match) - self.assertEqual(max_abs_diff, 0.0) - - def test_check_grid_match_matchnans_false_npnp(self): - """Test check_grid_match() with two numpy arrays with nans that DON'T match""" - np0 = np.array([np.nan, 1, 2, np.pi]) - np1 = np.array([np.nan, 1, np.nan, np.pi]) - with self.assertWarnsRegex(RuntimeWarning, r"NaN\(s\) in grid don't match"): - match, max_abs_diff = gf.check_grid_match(np0, np1) - self.assertFalse(match) - self.assertIsNone(max_abs_diff) - - def test_check_grid_match_matchnans_falseshape_npnp(self): - """Test check_grid_match() with two numpy arrays that have different shapes""" - np0 = np.array([0, 1, 2, np.pi]) - np1 = np.array([0, 1, 2, np.pi, 4]) - match, max_abs_diff = gf.check_grid_match(np0, np1) - self.assertFalse(match) - self.assertIsNone(max_abs_diff) - - def test_check_grid_match_matchnans_falseshape_dada(self): - """Test check_grid_match() with two DataArrays that have different shapes""" - np0 = np.array([0, 1, 2, np.pi]) - np1 = np.array([0, 1, 2, np.pi, 4]) - da0 = xr.DataArray(data=np0) - da1 = xr.DataArray(data=np1) - match, max_abs_diff = gf.check_grid_match(da0, da1) - self.assertFalse(match) - self.assertIsNone(max_abs_diff) - - -class TestFindInstHistFiles(unittest.TestCase): - """Tests of find_inst_hist_files()""" - - def setUp(self): - """ - Set up and change to temporary directory - """ - self.prev_dir = os.getcwd() - self.temp_dir = tempfile.mkdtemp() - os.chdir(self.temp_dir) - - def tearDown(self): - """ - Delete temporary directory - """ - os.chdir(self.prev_dir) - shutil.rmtree(self.temp_dir, ignore_errors=True) - - def _create_test_file(self, filename): - """Helper to create an empty test file""" - filepath = os.path.join(self.temp_dir, filename) - with open(filepath, "a", encoding="utf-8"): - pass - return filepath - - def test_find_inst_hist_files_h1_no_year(self): - """Test finding h1 files without specifying year""" - # Create test files - file1 = self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc") - file2 = self._create_test_file("test.clm2.h1i.2000-02-01-00000.nc") - file3 = self._create_test_file("test.clm2.h1i.2001-01-01-00000.nc") - - result = gf.find_inst_hist_files(self.temp_dir, h=1, this_year=None) - - # Should find all h1i files - self.assertEqual(len(result), 3) - self.assertIn(file1, result) - self.assertIn(file2, result) - self.assertIn(file3, result) - - def test_find_inst_hist_files_h2_no_year(self): - """Test finding h2 files without specifying year""" - # Create test files - file1 = self._create_test_file("test.clm2.h2i.2000-01-01-00000.nc") - file2 = self._create_test_file("test.clm2.h2i.2001-01-01-00000.nc") - # Create h1 file that should not be found - self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc") - - result = gf.find_inst_hist_files(self.temp_dir, h=2, this_year=None) - - # Should find only h2i files - self.assertEqual(len(result), 2) - self.assertIn(file1, result) - self.assertIn(file2, result) - - def test_find_inst_hist_files_with_year(self): - """Test finding files for a specific year""" - # Create test files - file_2000 = self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc") - file_2001 = self._create_test_file("test.clm2.h1i.2001-01-01-00000.nc") - file_2002 = self._create_test_file("test.clm2.h1i.2002-01-01-00000.nc") - - result = gf.find_inst_hist_files(self.temp_dir, h=1, this_year=2001) - - # Should find only 2001 file - self.assertEqual(len(result), 1) - self.assertIn(file_2001, result) - self.assertNotIn(file_2000, result) - self.assertNotIn(file_2002, result) - - def test_find_inst_hist_files_base_extension(self): - """Test finding files with .nc.base extension""" - # Create test files with .nc.base extension - file1 = self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc.base") - file2 = self._create_test_file("test.clm2.h1i.2001-01-01-00000.nc.base") - - result = gf.find_inst_hist_files(self.temp_dir, h=1, this_year=None) - - # Should find .nc.base files - self.assertEqual(len(result), 2) - self.assertIn(file1, result) - self.assertIn(file2, result) - - def test_find_inst_hist_files_prefer_nc_over_base(self): - """Test that .nc files are preferred over .nc.base files""" - # Create both .nc and .nc.base files - file_nc = self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc") - file_nc_base = self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc.base") - - result = gf.find_inst_hist_files(self.temp_dir, h=1, this_year=None) - - # Should find .nc files first (pattern order preference) - self.assertIn(file_nc, result) - self.assertNotIn(file_nc_base, result) - - def test_find_inst_hist_files_multiple_months_same_year(self): - """Test finding multiple files from the same year""" - # Create multiple files from 2000 - file1 = self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc") - file2 = self._create_test_file("test.clm2.h1i.2000-01-15-00000.nc") - file3 = self._create_test_file("test.clm2.h1i.2000-01-31-00000.nc") - # Create file from different year - self._create_test_file("test.clm2.h1i.2001-01-01-00000.nc") - - result = gf.find_inst_hist_files(self.temp_dir, h=1, this_year=2000) - - # Should find all January 2000 files - self.assertEqual(len(result), 3) - self.assertIn(file1, result) - self.assertIn(file2, result) - self.assertIn(file3, result) - - def test_find_inst_hist_files_no_files_found(self): - """Test error when no matching files are found""" - # Create a non-matching file - self._create_test_file("test.clm2.h0.2000-01-01-00000.nc") - - # Should raise a FileNotFoundError error - with self.assertRaisesRegex(FileNotFoundError, "No files found matching patterns"): - gf.find_inst_hist_files(self.temp_dir, h=1, this_year=None) - - def test_find_inst_hist_files_different_case_names(self): - """Test that RuntimeError is raised when files from different case names are found""" - # Create files with different case names - self._create_test_file("case1.clm2.h1i.2000-01-01-00000.nc") - self._create_test_file("case2.clm2.h1i.2000-01-01-00000.nc") - self._create_test_file("longcasename.clm2.h1i.2000-01-01-00000.nc") - - # Should raise RuntimeError due to multiple case names - with self.assertRaisesRegex(RuntimeError, "Found files from multiple case names"): - gf.find_inst_hist_files(self.temp_dir, h=1, this_year=2000) - - def test_find_inst_hist_files_different_case_names_with_logger(self): - """ - Test that RuntimeError is raised when files from different case names are found, with logger - """ - # Create a logger - logger = logging.getLogger("test_logger_case_names") - logger.setLevel(logging.DEBUG) - - # Create files with different case names - self._create_test_file("case1.clm2.h1i.2000-01-01-00000.nc") - self._create_test_file("case2.clm2.h1i.2000-01-01-00000.nc") - self._create_test_file("longcasename.clm2.h1i.2000-01-01-00000.nc") - - # Should raise RuntimeError due to multiple case names, even with logger - with self.assertRaisesRegex(RuntimeError, "Found files from multiple case names"): - gf.find_inst_hist_files(self.temp_dir, h=1, this_year=2000, logger=logger) - - def test_find_inst_hist_files_no_files_found_with_logger(self): - """Test error when no matching files are found, with logger""" - # Create a logger - logger = logging.getLogger("test_logger_no_files") - logger.setLevel(logging.DEBUG) - - # Create a non-matching file - self._create_test_file("test.clm2.h0.2000-01-01-00000.nc") - - # Should raise a FileNotFoundError even with logger - with self.assertRaisesRegex(FileNotFoundError, "No files found matching patterns"): - gf.find_inst_hist_files(self.temp_dir, h=1, this_year=None, logger=logger) - - def test_find_inst_hist_files_h_str_with_logger(self): - """Test that TypeError is raised when h is a string, with logger""" - # Create a logger - logger = logging.getLogger("test_logger_h_str") - logger.setLevel(logging.DEBUG) - - self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc") - - with self.assertRaisesRegex(TypeError, "must be an integer, not"): - gf.find_inst_hist_files(self.temp_dir, h="1", this_year=2000, logger=logger) - - def test_find_inst_hist_files_h_float_with_logger(self): - """Test that TypeError is raised when h is a float, with logger""" - # Create a logger - logger = logging.getLogger("test_logger_h_float") - logger.setLevel(logging.DEBUG) - - self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc") - - with self.assertRaisesRegex(TypeError, "must be an integer, not"): - gf.find_inst_hist_files(self.temp_dir, h=1.0, this_year=2000, logger=logger) + gg._get_time_slice_lists(1986, None) + + def test_generate_gdds_get_time_slice_lists_lengths_match(self): + """Test that h1 and h2 slice lists have the same length""" + h1_slices, h2_slices = gg._get_time_slice_lists(2000, 2005) + self.assertEqual(len(h1_slices), len(h2_slices)) + # Should be last_season - first_season + 2 + self.assertEqual(len(h1_slices), 2005 - 2000 + 2) + + def test_generate_gdds_get_time_slice_lists_h1_start_equals_stop(self): + """Test that h1 slices are single-day (start == stop)""" + h1_slices, _ = gg._get_time_slice_lists(2000, 2002) + for s in h1_slices: # pylint: disable=not-an-iterable + self.assertEqual(s.start, s.stop) + + def test_generate_gdds_get_time_slice_lists_h2_year_long(self): + """Test that h2 slices span one year""" + _, h2_slices = gg._get_time_slice_lists(2000, 2002) + for s in h2_slices: # pylint: disable=not-an-iterable + # Start should be Jan 2 of year Y + self.assertIn("-01-02", s.start) + # Stop should be Jan 1 of year Y+1 + self.assertIn("-01-01", s.stop) + # Extract years and verify they're consecutive + start_year = int(s.start[:4]) + stop_year = int(s.stop[:4]) + self.assertEqual(stop_year, start_year + 1) + + +class TestGetHistoryYrRange(unittest.TestCase): + """Tests for _get_history_yr_range()""" + + def test_get_history_yr_range_annual(self): + """Test _get_history_yr_range with annual frequency""" + result = gg._get_history_yr_range(1986, 1987, "annual") + # For annual: first_season + 1 through last_season + 2 + expected = range(1987, 1990) + self.assertEqual(result, expected) + + def test_get_history_yr_range_daily(self): + """Test _get_history_yr_range with daily frequency""" + result = gg._get_history_yr_range(1986, 1987, "daily") + # For daily: first_season through last_season + 1 + expected = range(1986, 1989) + self.assertEqual(result, expected) + + def test_get_history_yr_range_annual_single_year(self): + """Test _get_history_yr_range with annual frequency and single year""" + result = gg._get_history_yr_range(2000, 2000, "annual") + # Should give 2001, 2002 + expected = range(2001, 2003) + self.assertEqual(result, expected) + + def test_get_history_yr_range_daily_single_year(self): + """Test _get_history_yr_range with daily frequency and single year""" + result = gg._get_history_yr_range(2000, 2000, "daily") + # Should give 2000, 2001 + expected = range(2000, 2002) + self.assertEqual(result, expected) + + def test_get_history_yr_range_unknown_freq(self): + """Test _get_history_yr_range with unknown frequency""" + with self.assertRaises(NotImplementedError): + gg._get_history_yr_range(2000, 2001, "monthly") + + def test_get_history_yr_range_lengths_match(self): + """Test that annual and daily ranges have the same length""" + annual_range = gg._get_history_yr_range(2000, 2005, "annual") + daily_range = gg._get_history_yr_range(2000, 2005, "daily") + self.assertEqual(len(annual_range), len(daily_range)) + # Should be last_season - first_season + 2 + self.assertEqual(len(annual_range), 2005 - 2000 + 2) class TestGetFileLists(unittest.TestCase): @@ -599,47 +408,55 @@ def _create_test_file(self, filename): def test_get_file_lists_single_year(self): """Test _get_file_lists with a single year of data""" - # Create h1 and h2 files for 2000 - h1_file = self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc") - h2_file = self._create_test_file("test.clm2.h2i.2000-01-01-00000.nc") + # Create h1 and h2 files for 2000 and 2001 + # Also need h2 file for 1999 since daily starts at first_season + h2_file_1999 = self._create_test_file("test.clm2.h2i.1999-01-02-00000.nc") + h1_file_2000 = self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc") + h2_file_2000 = self._create_test_file("test.clm2.h2i.2000-01-02-00000.nc") + h1_file_2001 = self._create_test_file("test.clm2.h1i.2001-01-01-00000.nc") - time_slice_list = [slice("2000-01-01", "2000-12-31")] + # Get time slice lists for first_season=1999, last_season=1999 + # This will give us slices for 2000 and 2001 (h1), and 1999, 2000, 2001 (h2) + time_slice_lists_list = gg._get_time_slice_lists(1999, 1999) h1_file_lists, h2_file_lists = gg._get_file_lists( - self.temp_dir, time_slice_list, logger=None + self.temp_dir, time_slice_lists_list, logger=None ) - # Should have one list for each time slice - self.assertEqual(len(h1_file_lists), 1) - self.assertEqual(len(h2_file_lists), 1) + # Should have two lists (one for each year: 2000, 2001) + self.assertEqual(len(h1_file_lists), 2) + self.assertEqual(len(h2_file_lists), 2) # Check contents of file lists # pylint: disable=unsubscriptable-object self.assertEqual(len(h1_file_lists[0]), 1) + self.assertEqual(h1_file_lists[0], [h1_file_2000]) self.assertEqual(len(h2_file_lists[0]), 1) - self.assertEqual(h1_file_lists[0], [h1_file]) - self.assertEqual(h2_file_lists[0], [h2_file]) + self.assertEqual(h2_file_lists[0], [h2_file_1999]) + self.assertEqual(len(h1_file_lists[1]), 1) + self.assertEqual(h1_file_lists[1], [h1_file_2001]) + self.assertEqual(len(h2_file_lists[1]), 1) + self.assertEqual(h2_file_lists[1], [h2_file_2000]) def test_get_file_lists_multiple_years(self): """Test _get_file_lists with multiple years of data""" # Create h1 and h2 files for 2000-2002 + # Also need h2 file for 1999 since daily starts at first_season h1_files = [] - h2_files = [] + h2_files = [self._create_test_file("test.clm2.h2i.1999-01-02-00000.nc")] for year in [2000, 2001, 2002]: h1_files.append(self._create_test_file(f"test.clm2.h1i.{year}-01-01-00000.nc")) - h2_files.append(self._create_test_file(f"test.clm2.h2i.{year}-01-01-00000.nc")) + h2_files.append(self._create_test_file(f"test.clm2.h2i.{year}-01-02-00000.nc")) - time_slice_list = [ - slice("2000-01-01", "2000-12-31"), - slice("2001-01-01", "2001-12-31"), - slice("2002-01-01", "2002-12-31"), - ] + # Get time slice lists for first_season=1999, last_season=2000 + # This will give us slices for 2000, 2001, 2002 (h1) and 1999, 2000, 2001 (h2) + time_slice_lists_list = gg._get_time_slice_lists(1999, 2000) h1_file_lists, h2_file_lists = gg._get_file_lists( - self.temp_dir, time_slice_list, logger=None + self.temp_dir, time_slice_lists_list, logger=None ) - # Should have one list for each time slice + # Should have three lists (one for each year: 2000, 2001, 2002) self.assertEqual(len(h1_file_lists), 3) self.assertEqual(len(h2_file_lists), 3) @@ -653,93 +470,151 @@ def test_get_file_lists_multiple_years(self): def test_get_file_lists_multiple_files_per_slice(self): """Test _get_file_lists when multiple files fall within a time slice""" - # Create multiple h1 and h2 files for 2000 - h1_files = [] - h2_files = [] + # Create h1 files for 2000 and 2001 (annual) + h1_file_2000 = self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc") + h1_file_2001 = self._create_test_file("test.clm2.h1i.2001-01-01-00000.nc") + + # Create multiple h2 files for 1999 and 2000 (daily throughout the year) + h2_files_1999 = [] + for month in ["01", "06", "12"]: + h2_files_1999.append(self._create_test_file(f"test.clm2.h2i.1999-{month}-15-00000.nc")) + + h2_files_2000 = [] for month in ["01", "06", "12"]: - h1_files.append(self._create_test_file(f"test.clm2.h1i.2000-{month}-01-00000.nc")) - h2_files.append(self._create_test_file(f"test.clm2.h2i.2000-{month}-01-00000.nc")) + h2_files_2000.append(self._create_test_file(f"test.clm2.h2i.2000-{month}-15-00000.nc")) - time_slice_list = [slice("2000-01-01", "2000-12-31")] + # Get time slice lists for first_season=1999, last_season=1999 + time_slice_lists_list = gg._get_time_slice_lists(1999, 1999) h1_file_lists, h2_file_lists = gg._get_file_lists( - self.temp_dir, time_slice_list, logger=None + self.temp_dir, time_slice_lists_list, logger=None ) - # Should have one list for the time slice - self.assertEqual(len(h1_file_lists), 1) - self.assertEqual(len(h2_file_lists), 1) + # Should have two lists (for 2000 and 2001) + self.assertEqual(len(h1_file_lists), 2) + self.assertEqual(len(h2_file_lists), 2) - # Check contents of file lists (should be sorted) + # Check contents of file lists # pylint: disable=unsubscriptable-object - self.assertEqual(len(h1_file_lists[0]), 3) + self.assertEqual(len(h1_file_lists[0]), 1) + self.assertEqual(h1_file_lists[0], [h1_file_2000]) self.assertEqual(len(h2_file_lists[0]), 3) - self.assertEqual(h1_file_lists[0], sorted(h1_files)) - self.assertEqual(h2_file_lists[0], sorted(h2_files)) + self.assertEqual(h2_file_lists[0], sorted(h2_files_1999)) + + # Check second year (2001) + self.assertEqual(len(h1_file_lists[1]), 1) + self.assertEqual(h1_file_lists[1], [h1_file_2001]) + self.assertEqual(len(h2_file_lists[1]), 3) + self.assertEqual(h2_file_lists[1], sorted(h2_files_2000)) def test_get_file_lists_no_h1_files(self): """Test _get_file_lists when h1 files are missing""" - # Create only h2 files - self._create_test_file("test.clm2.h2i.2000-01-01-00000.nc") - time_slice_list = [slice("2000-01-01", "2000-12-31")] + time_slice_lists_list = gg._get_time_slice_lists(1999, 1999) # Should raise FileNotFoundError when h1 files are not found - with self.assertRaisesRegex(FileNotFoundError, "No files found matching patterns"): - gg._get_file_lists(self.temp_dir, time_slice_list, logger=None) + with self.assertRaisesRegex(FileNotFoundError, "No files found matching patterns.*h1.*"): + gg._get_file_lists(self.temp_dir, time_slice_lists_list, logger=None) def test_get_file_lists_no_h2_files(self): """Test _get_file_lists when h2 files are missing""" - # Create only h1 files - self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc") + growing_season = 2000 + + # Create h1 files with data from growing_season year and the next one + self._create_test_file(f"test.clm2.h1i.{growing_season + 1}-01-01-00000.nc") + self._create_test_file(f"test.clm2.h1i.{growing_season + 2}-01-01-00000.nc") + # But don't create h2 files - time_slice_list = [slice("2000-01-01", "2000-12-31")] + time_slice_lists_list = gg._get_time_slice_lists(growing_season, growing_season) # Should raise FileNotFoundError when h2 files are not found - with self.assertRaisesRegex(FileNotFoundError, "No files found matching patterns"): - gg._get_file_lists(self.temp_dir, time_slice_list, logger=None) + with self.assertRaisesRegex(FileNotFoundError, "No files found matching patterns.*h2.*"): + gg._get_file_lists(self.temp_dir, time_slice_lists_list, logger=None) - def test_get_file_lists_h1_outside_time_slice(self): - """Test _get_file_lists when h1 files exist but have no timesteps in the slice""" - # Create h1 files for 2000 and h2 files for 2001 - self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc") - self._create_test_file("test.clm2.h2i.2001-01-01-00000.nc") + def test_get_file_lists_h1_missing_a_time_slice(self): + """ + Test _get_file_lists when h1 files exist but not for one of the needed time slices + + We will be simulating a need for processing the 2000 growing season only. This will require + data from 2000 and also, because seasons can extend into the next calendar year, 2001. + + Because of how CESM timestamps annual output files, it will be looking for h1 timestamps + with the years 2001-01-01 (data from 2000) and 2002-01-01 (data from 2001). - # Request time slice for 2001 (h1 files exist but are outside the slice) - time_slice_list = [slice("2001-01-01", "2001-12-31")] + In this test, we will only create one file for h1. We expect an error to be thrown about + that before the check of the available h2 time steps happens. + """ + growing_season = 2000 + + # Create h1 files with data from growing_season year BUT NOT the next one + self._create_test_file(f"test.clm2.h1i.{growing_season + 1}-01-01-00000.nc") + + # Get required time slice lists for h1 and h2 files + time_slice_lists_list = gg._get_time_slice_lists(growing_season, growing_season) + + # Make sure the required time slice list is what we expect + assert time_slice_lists_list[0] == [ + slice(f"{growing_season+1}-01-01", f"{growing_season+1}-01-01", None), + slice(f"{growing_season+2}-01-01", f"{growing_season+2}-01-01", None), + ] # Should raise FileNotFoundError when h1 files have no timesteps in slice with self.assertRaisesRegex(FileNotFoundError, "No h1 timesteps found in"): - gg._get_file_lists(self.temp_dir, time_slice_list, logger=None) + gg._get_file_lists(self.temp_dir, time_slice_lists_list, logger=None) - def test_get_file_lists_h2_outside_time_slice(self): - """Test _get_file_lists when h2 files exist but have no timesteps in the slice""" - # Create h1 files for 2001 and h2 files for 2000 - self._create_test_file("test.clm2.h1i.2001-01-01-00000.nc") - self._create_test_file("test.clm2.h2i.2000-01-01-00000.nc") + def test_get_file_lists_h2_missing_a_time_slice(self): + """ + Test _get_file_lists when h2 files exist but not for one of the needed time slices + + We will be simulating a need for processing the 2000 growing season only. This will require + data from 2000 and also, because seasons can extend into the next calendar year, 2001. - # Request time slice for 2001 (h2 files exist but are outside the slice) - time_slice_list = [slice("2001-01-01", "2001-12-31")] + Because of how CESM timestamps daily output files, it will be looking for h2 timestamps + starting 2000-01-02 (data from 2000-01-01) through 2002-01-01 (data from 2001-12-31). + + In this test, we will create all necessary files for h1 but only one of the two required for + h2. + + (As test_get_file_lists_h1_missing_a_time_slice but for h2 instead.) + """ + growing_season = 2000 + + # Create h1 files with data from growing_season year and the next one + self._create_test_file(f"test.clm2.h1i.{growing_season + 1}-01-01-00000.nc") + self._create_test_file(f"test.clm2.h1i.{growing_season + 2}-01-01-00000.nc") + + # Create h2 file with data from growing_season year BUT NOT the next one + self._create_test_file(f"test.clm2.h2i.{growing_season+1}-01-02-00000.nc") + + # Get required time slice lists for h1 and h2 files + time_slice_lists_list = gg._get_time_slice_lists(growing_season, growing_season) + + # Make sure the required time slice lists are what we expect + assert time_slice_lists_list[0] == [ + slice(f"{growing_season+1}-01-01", f"{growing_season+1}-01-01", None), + slice(f"{growing_season+2}-01-01", f"{growing_season+2}-01-01", None), + ] + assert time_slice_lists_list[1] == [ + slice(f"{growing_season}-01-02", f"{growing_season+1}-01-01", None), + slice(f"{growing_season+1}-01-02", f"{growing_season+2}-01-01", None), + ] # Should raise FileNotFoundError when h2 files have no timesteps in slice with self.assertRaisesRegex(FileNotFoundError, "No h2 timesteps found in"): - gg._get_file_lists(self.temp_dir, time_slice_list, logger=None) + gg._get_file_lists(self.temp_dir, time_slice_lists_list, logger=None) def test_get_file_lists_partial_overlap(self): """Test _get_file_lists when some time slices have files and others don't""" # Create h1 and h2 files for 2000 only self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc") - self._create_test_file("test.clm2.h2i.2000-01-01-00000.nc") + self._create_test_file("test.clm2.h2i.2000-01-02-00000.nc") - # Request time slices for 2000 and 2001 - time_slice_list = [ - slice("2000-01-01", "2000-12-31"), - slice("2001-01-01", "2001-12-31"), - ] + # Request time slices for 2000, 2001, 2002 (only 2000 has files) + time_slice_lists_list = gg._get_time_slice_lists(1999, 2000) # Should raise FileNotFoundError when second time slice has no files with self.assertRaisesRegex(FileNotFoundError, "No h1 timesteps found in"): - gg._get_file_lists(self.temp_dir, time_slice_list, logger=None) + gg._get_file_lists(self.temp_dir, time_slice_lists_list, logger=None) if __name__ == "__main__": diff --git a/python/ctsm/test/test_unit_generate_gdds_functions.py b/python/ctsm/test/test_unit_generate_gdds_functions.py new file mode 100644 index 0000000000..e71c6b1c49 --- /dev/null +++ b/python/ctsm/test/test_unit_generate_gdds_functions.py @@ -0,0 +1,576 @@ +#!/usr/bin/env python3 + +""" +Unit tests for generate_gdds_functions.py +""" + +import unittest +import os +import tempfile +import shutil +import logging + +import numpy as np +import xarray as xr +from cftime import DatetimeNoLeap, DatetimeAllLeap + +from ctsm import unit_testing +from ctsm.crop_calendars import generate_gdds_functions as gf + +# Allow test names that pylint doesn't like; otherwise hard to make them +# readable +# pylint: disable=invalid-name + +# pylint: disable=protected-access + +## Too many instant variables as part of the class (too many self. in the SetUp) +# pylint: disable=too-many-instance-attributes + + +class TestCheckGridMatch(unittest.TestCase): + """Tests check_grid_match()""" + + def test_check_grid_match_true_npnp(self): + """Test check_grid_match() with two matching numpy arrays""" + np0 = np.array([0, 1, 2, np.pi]) + match, max_abs_diff = gf.check_grid_match(np0, np0) + self.assertTrue(match) + self.assertEqual(max_abs_diff, 0.0) + + def test_check_grid_match_true_dada(self): + """Test check_grid_match() with two matching DataArrays""" + np0 = np.array([0, 1, 2, np.pi]) + da0 = xr.DataArray(data=np0) + match, max_abs_diff = gf.check_grid_match(da0, da0) + self.assertTrue(match) + self.assertEqual(max_abs_diff, 0.0) + + def test_check_grid_match_false_npnp(self): + """Test check_grid_match() with two non-matching numpy arrays""" + np0 = np.array([0, 1, 2, np.pi]) + np1 = np0.copy() + diff = 2 * gf.GRID_TOL_DEG + np1[0] = np0[0] + diff + match, max_abs_diff = gf.check_grid_match(np0, np1) + self.assertFalse(match) + self.assertEqual(max_abs_diff, diff) + + def test_check_grid_match_false_dada(self): + """Test check_grid_match() with two non-matching DataArrays""" + np0 = np.array([0, 1, 2, np.pi]) + np1 = np0.copy() + diff = 2 * gf.GRID_TOL_DEG + np1[0] = np0[0] + diff + da0 = xr.DataArray(data=np0) + da1 = xr.DataArray(data=np1) + match, max_abs_diff = gf.check_grid_match(da0, da1) + self.assertFalse(match) + self.assertEqual(max_abs_diff, diff) + + def test_check_grid_match_falseneg_npnp(self): + """As test_check_grid_match_false_npnp, but with diff in negative direction""" + np0 = np.array([0, 1, 2, np.pi]) + np1 = np0.copy() + diff = -2 * gf.GRID_TOL_DEG + np1[0] = np0[0] + diff + match, max_abs_diff = gf.check_grid_match(np0, np1) + self.assertFalse(match) + self.assertEqual(max_abs_diff, abs(diff)) + + def test_check_grid_match_matchnans_true_npnp(self): + """Test check_grid_match() with two numpy arrays that have nans and match""" + np0 = np.array([np.nan, 1, 2, np.pi]) + with self.assertWarnsRegex(RuntimeWarning, r"NaN\(s\) in grid"): + match, max_abs_diff = gf.check_grid_match(np0, np0) + self.assertTrue(match) + self.assertEqual(max_abs_diff, 0.0) + + def test_check_grid_match_matchnans_true_dada(self): + """Test check_grid_match() with two DataArrays that have nans and match""" + np0 = np.array([np.nan, 1, 2, np.pi]) + da0 = xr.DataArray(data=np0) + with self.assertWarnsRegex(RuntimeWarning, r"NaN\(s\) in grid"): + match, max_abs_diff = gf.check_grid_match(da0, da0) + self.assertTrue(match) + self.assertEqual(max_abs_diff, 0.0) + + def test_check_grid_match_matchnans_false_npnp(self): + """Test check_grid_match() with two numpy arrays with nans that DON'T match""" + np0 = np.array([np.nan, 1, 2, np.pi]) + np1 = np.array([np.nan, 1, np.nan, np.pi]) + with self.assertWarnsRegex(RuntimeWarning, r"NaN\(s\) in grid don't match"): + match, max_abs_diff = gf.check_grid_match(np0, np1) + self.assertFalse(match) + self.assertIsNone(max_abs_diff) + + def test_check_grid_match_matchnans_falseshape_npnp(self): + """Test check_grid_match() with two numpy arrays that have different shapes""" + np0 = np.array([0, 1, 2, np.pi]) + np1 = np.array([0, 1, 2, np.pi, 4]) + match, max_abs_diff = gf.check_grid_match(np0, np1) + self.assertFalse(match) + self.assertIsNone(max_abs_diff) + + def test_check_grid_match_matchnans_falseshape_dada(self): + """Test check_grid_match() with two DataArrays that have different shapes""" + np0 = np.array([0, 1, 2, np.pi]) + np1 = np.array([0, 1, 2, np.pi, 4]) + da0 = xr.DataArray(data=np0) + da1 = xr.DataArray(data=np1) + match, max_abs_diff = gf.check_grid_match(da0, da1) + self.assertFalse(match) + self.assertIsNone(max_abs_diff) + + +class TestFindInstHistFiles(unittest.TestCase): + """Tests of find_inst_hist_files()""" + + def setUp(self): + """ + Set up and change to temporary directory + """ + self.prev_dir = os.getcwd() + self.temp_dir = tempfile.mkdtemp() + os.chdir(self.temp_dir) + + def tearDown(self): + """ + Delete temporary directory + """ + os.chdir(self.prev_dir) + shutil.rmtree(self.temp_dir, ignore_errors=True) + + def _create_test_file(self, filename): + """Helper to create an empty test file""" + filepath = os.path.join(self.temp_dir, filename) + with open(filepath, "a", encoding="utf-8"): + pass + return filepath + + def test_find_inst_hist_files(self): + """Test finding only h2 files when h1i files present too""" + # Create test files + file1 = self._create_test_file("test.clm2.h2i.2000-01-02-00000.nc") + file2 = self._create_test_file("test.clm2.h2i.2001-01-02-00000.nc") + # Create h1 file that should not be found + self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc") + + result = gf.find_inst_hist_files(self.temp_dir, h=2) + + # Should find only h2i files + self.assertEqual(len(result), 2) + self.assertIn(file1, result) + self.assertIn(file2, result) + + def test_find_inst_hist_files_prefer_nc_over_base(self): + """Test that .nc files are preferred over .nc.base files""" + # Create both .nc and .nc.base files + file_nc = self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc") + file_nc_base = self._create_test_file("test.clm2.h1i.2001-01-01-00000.nc.base") + + result = gf.find_inst_hist_files(self.temp_dir, h=1) + + # Should find .nc files first (pattern order preference) + self.assertIn(file_nc, result) + self.assertNotIn(file_nc_base, result) + # Should have only 1 file (the .nc file, not the .nc.base) + self.assertEqual(len(result), 1) + + def test_find_inst_hist_files_base_only(self): + """Test finding files when only .nc.base files exist""" + # Create only .nc.base files + file1 = self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc.base") + file2 = self._create_test_file("test.clm2.h1i.2001-01-01-00000.nc.base") + + result = gf.find_inst_hist_files(self.temp_dir, h=1) + + # Should find .nc.base files when no .nc files exist + self.assertEqual(len(result), 2) + self.assertIn(file1, result) + self.assertIn(file2, result) + + def test_find_inst_hist_files_multiple_months_same_year(self): + """Test finding multiple files from the same year""" + # Create multiple files from 2000 + file1 = self._create_test_file("test.clm2.h2i.2000-01-01-00000.nc") + file2 = self._create_test_file("test.clm2.h2i.2000-01-15-00000.nc") + file3 = self._create_test_file("test.clm2.h2i.2000-01-31-00000.nc") + # Create file from different year + file4 = self._create_test_file("test.clm2.h2i.2001-01-01-00000.nc") + + result = gf.find_inst_hist_files(self.temp_dir, h=2) + + # Should find all files + self.assertEqual(len(result), 4) + self.assertIn(file1, result) + self.assertIn(file2, result) + self.assertIn(file3, result) + self.assertIn(file4, result) + + def test_find_inst_hist_files_no_files_found(self): + """Test error when no matching files are found""" + # Create a non-matching file + self._create_test_file("test.clm2.h0.2000-01-01-00000.nc") + + # Should raise a FileNotFoundError error + with self.assertRaisesRegex(FileNotFoundError, "No files found matching patterns"): + gf.find_inst_hist_files(self.temp_dir, h=1) + + def test_find_inst_hist_files_different_case_names(self): + """Test that RuntimeError is raised when files from different case names are found""" + # Create files with different case names + self._create_test_file("case1.clm2.h1i.2000-01-01-00000.nc") + self._create_test_file("case2.clm2.h1i.2000-01-01-00000.nc") + self._create_test_file("longcasename.clm2.h1i.2000-01-01-00000.nc") + + # Should raise RuntimeError due to multiple case names + with self.assertRaisesRegex(RuntimeError, "Found files from multiple case names"): + gf.find_inst_hist_files(self.temp_dir, h=1) + + def test_find_inst_hist_files_different_case_names_with_logger(self): + """ + Test that RuntimeError is raised when files from different case names are found, with logger + """ + # Create a logger + logger = logging.getLogger("test_logger_case_names") + logger.setLevel(logging.DEBUG) + + # Create files with different case names + self._create_test_file("case1.clm2.h1i.2000-01-01-00000.nc") + self._create_test_file("case2.clm2.h1i.2000-01-01-00000.nc") + self._create_test_file("longcasename.clm2.h1i.2000-01-01-00000.nc") + + # Should raise RuntimeError due to multiple case names, even with logger + with self.assertRaisesRegex(RuntimeError, "Found files from multiple case names"): + gf.find_inst_hist_files(self.temp_dir, h=1, logger=logger) + + def test_find_inst_hist_files_no_files_found_with_logger(self): + """Test error when no matching files are found, with logger""" + # Create a logger + logger = logging.getLogger("test_logger_no_files") + logger.setLevel(logging.DEBUG) + + # Create a non-matching file + self._create_test_file("test.clm2.h0.2000-01-01-00000.nc") + + # Should raise a FileNotFoundError even with logger + with self.assertRaisesRegex(FileNotFoundError, "No files found matching patterns"): + gf.find_inst_hist_files(self.temp_dir, h=1, logger=logger) + + def test_find_inst_hist_files_h_str_with_logger(self): + """Test that TypeError is raised when h is a string, with logger""" + # Create a logger + logger = logging.getLogger("test_logger_h_str") + logger.setLevel(logging.DEBUG) + + self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc") + + with self.assertRaisesRegex(TypeError, "must be an integer, not"): + gf.find_inst_hist_files(self.temp_dir, h="1", logger=logger) + + def test_find_inst_hist_files_h_float_with_logger(self): + """Test that TypeError is raised when h is a float, with logger""" + # Create a logger + logger = logging.getLogger("test_logger_h_float") + logger.setLevel(logging.DEBUG) + + self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc") + + with self.assertRaisesRegex(TypeError, "must be an integer, not"): + gf.find_inst_hist_files(self.temp_dir, h=1.0, logger=logger) + + +class TestGenInstDailyYear(unittest.TestCase): + """Tests of gen_inst_daily_year()""" + + def test_gen_inst_daily_year_noleap(self): + """Test gen_inst_daily_year with DatetimeNoLeap calendar""" + result = gf.gen_inst_daily_year(2000, DatetimeNoLeap) + + # Should have 365 timesteps (Jan 2 through Jan 1 of next year, inclusive) + self.assertEqual(len(result), 365) + + # Check first and last dates + self.assertEqual(result.values[0], DatetimeNoLeap(2000, 1, 2, has_year_zero=True)) + self.assertEqual(result.values[-1], DatetimeNoLeap(2001, 1, 1, has_year_zero=True)) + + # Verify no Feb 29 + dates_str = [str(d) for d in result.values] + self.assertNotIn("2000-02-29", dates_str) + + # Check that all timestamps are at exactly midnight + for t in result.values: + assert t.hour == 0 + assert t.minute == 0 + assert t.second == 0 + assert t.microsecond == 0 + + def test_gen_inst_daily_year_leap(self): + """Test gen_inst_daily_year with a leap year""" + cal_type = DatetimeAllLeap + year = 2004 + result = gf.gen_inst_daily_year(year, cal_type) + + # Should have 366 timesteps (Jan 2 through Jan 1 of next year, inclusive, with leap day) + self.assertEqual(len(result), 366) + + # Check first and last dates + self.assertEqual(result.values[0], cal_type(year, 1, 2, has_year_zero=True)) + self.assertEqual(result.values[-1], cal_type(year + 1, 1, 1, has_year_zero=True)) + + # Verify Feb 29 is there + dates_str = [str(d) for d in result.values] + self.assertIn(f"{year}-02-29 00:00:00", dates_str) + + # Check that all timestamps are at exactly midnight + for t in result.values: + assert t.hour == 0 + assert t.minute == 0 + assert t.second == 0 + assert t.microsecond == 0 + + def test_gen_inst_daily_year_consecutive_days(self): + """Test that gen_inst_daily_year produces consecutive days""" + result = gf.gen_inst_daily_year(2000, DatetimeNoLeap) + + # Check that each day is exactly one day after the previous + for i in range(1, len(result)): + diff = result.values[i] - result.values[i - 1] + self.assertEqual(diff.days, 1) + + def test_gen_inst_daily_year_different_year(self): + """Test gen_inst_daily_year with a different year""" + result = gf.gen_inst_daily_year(1987, DatetimeNoLeap) + + # Should still have 365 timesteps + self.assertEqual(len(result), 365) + + # Check first and last dates + self.assertEqual(result.values[0], DatetimeNoLeap(1987, 1, 2, has_year_zero=True)) + self.assertEqual(result.values[-1], DatetimeNoLeap(1988, 1, 1, has_year_zero=True)) + + +class TestCheckTimeDa(unittest.TestCase): + """Tests of _check_time_da()""" + + def test_check_time_da_annual_correct(self): + """Test _check_time_da with correct annual data""" + time_val = DatetimeNoLeap(2000, 1, 1, has_year_zero=True) + time_da = xr.DataArray([time_val], dims=["time"], coords={"time": [time_val]}) + + # Should not raise an error + gf._check_time_da("annual", 2000, time_da, logger=None) + + def test_check_time_da_annual_incorrect(self): + """Test _check_time_da with incorrect annual data""" + # Wrong date (Jan 2 instead of Jan 1) + time_val = DatetimeNoLeap(2000, 1, 2, has_year_zero=True) + time_da = xr.DataArray([time_val], dims=["time"], coords={"time": [time_val]}) + + # Should raise AssertionError (via error()) + with self.assertRaises(AssertionError): + gf._check_time_da("annual", 2000, time_da, logger=None) + + def test_check_time_da_daily_correct(self): + """Test _check_time_da with correct daily data""" + time_da = gf.gen_inst_daily_year(2000, DatetimeNoLeap) + + # Should not raise an error + gf._check_time_da("daily", 2000, time_da, logger=None) + + def test_check_time_da_daily_correct_leap(self): + """As test_check_time_da_daily_correct, but for a leap year""" + time_da = gf.gen_inst_daily_year(2000, DatetimeAllLeap) + + # Should not raise an error + gf._check_time_da("daily", 2000, time_da, logger=None) + + def test_check_time_da_daily_missing_day(self): + """Test _check_time_da with daily data missing a day""" + time_da = gf.gen_inst_daily_year(2000, DatetimeNoLeap) + # Remove one day from the middle (day 100) + time_da_missing = xr.concat( + [time_da.isel(time=slice(0, 100)), time_da.isel(time=slice(101, None))], + dim="time", + ) + + # Should raise AssertionError (via error()) + with self.assertRaises(AssertionError): + gf._check_time_da("daily", 2000, time_da_missing, logger=None) + + def test_check_time_da_daily_extra_day(self): + """Test _check_time_da with daily data having an extra day""" + time_da = gf.gen_inst_daily_year(2000, DatetimeNoLeap) + # Add an extra day + extra_day = DatetimeNoLeap(2001, 1, 3, has_year_zero=True) + time_da = xr.concat( + [time_da, xr.DataArray([extra_day], dims=["time"], coords={"time": [extra_day]})], + dim="time", + ) + + # Should raise AssertionError (via error()) + with self.assertRaises(AssertionError): + gf._check_time_da("daily", 2000, time_da, logger=None) + + def test_check_time_da_unknown_freq(self): + """Test _check_time_da with unknown frequency""" + time_val = DatetimeNoLeap(2000, 1, 1, has_year_zero=True) + time_da = xr.DataArray([time_val], dims=["time"], coords={"time": [time_val]}) + + # Should raise NotImplementedError for unknown frequency + with self.assertRaises(NotImplementedError): + gf._check_time_da("monthly", 2000, time_da, logger=None) + + +class TestCheckFileLists(unittest.TestCase): + """Tests of check_file_lists()""" + + def setUp(self): + """Set up and change to temporary directory""" + self.prev_dir = os.getcwd() + self.temp_dir = tempfile.mkdtemp() + os.chdir(self.temp_dir) + + def tearDown(self): + """Delete temporary directory""" + os.chdir(self.prev_dir) + shutil.rmtree(self.temp_dir, ignore_errors=True) + + def _create_test_file_with_time(self, filename, time_values): + """Helper to create a test file with specific time values""" + filepath = os.path.join(self.temp_dir, filename) + time = xr.DataArray(time_values, dims=["time"], name="time") + ds = xr.Dataset({"time": time}) + ds.to_netcdf(filepath) + return filepath + + def test_check_file_lists_annual_correct(self): + """Test check_file_lists with correct annual data""" + # Create files with correct annual timesteps + file1 = self._create_test_file_with_time( + "test.h1i.2000.nc", [DatetimeNoLeap(2000, 1, 1, has_year_zero=True)] + ) + file2 = self._create_test_file_with_time( + "test.h1i.2001.nc", [DatetimeNoLeap(2001, 1, 1, has_year_zero=True)] + ) + + history_yr_range = range(2000, 2002) + h_file_lists = [[file1], [file2]] + time_slice_list = [ + slice("2000-01-01", "2000-01-01"), + slice("2001-01-01", "2001-01-01"), + ] + + # Should not raise an error + gf.check_file_lists(history_yr_range, h_file_lists, time_slice_list, "annual", logger=None) + + def test_check_file_lists_annual_correct_extrafile(self): + """Test check_file_lists with correct annual data but an extra file""" + # Create files with correct annual timesteps + file1 = self._create_test_file_with_time( + "test.h1i.2000.nc", [DatetimeNoLeap(2000, 1, 1, has_year_zero=True)] + ) + file2 = self._create_test_file_with_time( + "test.h1i.2001.nc", [DatetimeNoLeap(2001, 1, 1, has_year_zero=True)] + ) + file3 = self._create_test_file_with_time( + "test.h1i.2002.nc", [DatetimeNoLeap(2002, 1, 1, has_year_zero=True)] + ) + + history_yr_range = range(2000, 2002) + h_file_lists = [[file1], [file2], [file3]] + time_slice_list = [ + slice("2000-01-01", "2000-01-01"), + slice("2001-01-01", "2001-01-01"), + ] + + # Should not raise an error + gf.check_file_lists(history_yr_range, h_file_lists, time_slice_list, "annual", logger=None) + + def test_check_file_lists_annual_incorrect(self): + """Test check_file_lists with incorrect annual data (wrong day)""" + # Create file with wrong date (Jan 2 instead of Jan 1) + file1 = self._create_test_file_with_time( + "test.h1i.2000.nc", + [DatetimeNoLeap(2000, 1, 2, has_year_zero=True)], # Wrong day + ) + + history_yr_range = range(2000, 2001) + h_file_lists = [[file1]] + # Use a slice that will include Jan 2 + time_slice_list = [slice("2000-01-01", "2000-01-02")] + + # Should raise AssertionError (has Jan 2 instead of Jan 1) + with self.assertRaises(AssertionError): + gf.check_file_lists( + history_yr_range, h_file_lists, time_slice_list, "annual", logger=None + ) + + def test_check_file_lists_daily_correct(self): + """Test check_file_lists with correct daily data""" + # Create file with correct daily timesteps + time_da = gf.gen_inst_daily_year(2000, DatetimeNoLeap) + file1 = self._create_test_file_with_time("test.h2i.2000.nc", time_da.values) + + history_yr_range = range(2000, 2001) + h_file_lists = [[file1]] + time_slice_list = [slice("2000-01-02", "2001-01-01")] + + # Should not raise an error + gf.check_file_lists(history_yr_range, h_file_lists, time_slice_list, "daily", logger=None) + + def test_check_file_lists_daily_correct_leap(self): + """As test_check_file_lists_daily_correct but with a leap day""" + # Create file with correct daily timesteps + time_da = gf.gen_inst_daily_year(2000, DatetimeAllLeap) + file1 = self._create_test_file_with_time("test.h2i.2000.nc", time_da.values) + + history_yr_range = range(2000, 2001) + h_file_lists = [[file1]] + time_slice_list = [slice("2000-01-02", "2001-01-01")] + + # Should not raise an error + gf.check_file_lists(history_yr_range, h_file_lists, time_slice_list, "daily", logger=None) + + def test_check_file_lists_daily_missing_day(self): + """Test check_file_lists with daily data missing a day""" + # Create file with incomplete daily timesteps + time_da = gf.gen_inst_daily_year(2000, DatetimeNoLeap) + # Remove one day from the middle + time_da_incomplete = xr.concat( + [time_da.isel(time=slice(0, 100)), time_da.isel(time=slice(101, None))], + dim="time", + ) + file1 = self._create_test_file_with_time("test.h2i.2000.nc", time_da_incomplete.values) + + history_yr_range = range(2000, 2001) + h_file_lists = [[file1]] + time_slice_list = [slice("2000-01-02", "2001-01-01")] + + # Should raise AssertionError + with self.assertRaises(AssertionError): + gf.check_file_lists( + history_yr_range, h_file_lists, time_slice_list, "daily", logger=None + ) + + def test_check_file_lists_multiple_files_per_year(self): + """Test check_file_lists with multiple files per year""" + # Create two files that together have all daily timesteps for 2000 + time_da_full = gf.gen_inst_daily_year(2000, DatetimeNoLeap) + time_da_first_half = time_da_full.isel(time=slice(0, 182)) + time_da_second_half = time_da_full.isel(time=slice(182, None)) + + file1 = self._create_test_file_with_time("test.h2i.2000a.nc", time_da_first_half.values) + file2 = self._create_test_file_with_time("test.h2i.2000b.nc", time_da_second_half.values) + + history_yr_range = range(2000, 2001) + h_file_lists = [[file1, file2]] + time_slice_list = [slice("2000-01-02", "2001-01-01")] + + # Should not raise an error + gf.check_file_lists(history_yr_range, h_file_lists, time_slice_list, "daily", logger=None) + + +if __name__ == "__main__": + unit_testing.setup_for_tests() + unittest.main()