Skip to content
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
e297e38
generate_gdds: Different time slices needed for h1 vs h2.
samsrabin Feb 10, 2026
ce79750
find_inst_hist_files(): Remove unused this_year option.
samsrabin Nov 14, 2025
424a567
generate_gdds: Don't allow "incorrectly daily" h1 files.
samsrabin Nov 14, 2025
78c6cb6
generate_gdds: Improve logging.
samsrabin Nov 15, 2025
0a38056
crop_calendars: Resolve some FutureWarnings.
samsrabin Nov 15, 2025
b51af72
generate_gdds: Check that all years' files have correct timesteps bef…
samsrabin Nov 15, 2025
b33c42e
generate_gdds bugfix: Use correct h2 years.
samsrabin Nov 15, 2025
01d7c85
test_unit_generate_gdds: Fix expected names of h2 files in a test.
samsrabin Feb 10, 2026
96674d3
TestGenInstDailyYear: Check saves at exactly midnight.
samsrabin Feb 10, 2026
8905786
test_find_inst_hist_files_multiple_months_same_year: Use h2, not h1
samsrabin Feb 10, 2026
dd28452
Delete f09_g17 version of RXCROPMATURITY_ test.
samsrabin Feb 10, 2026
0978fc8
Add f09_t232 version of RXCROPMATURITY_ test to rxcropmaturity suite.
samsrabin Feb 10, 2026
00fbe54
Remove RXCROPMATURITY_ tests from expected failures.
samsrabin Feb 10, 2026
31af968
Split unit tests for generate_gdds vs generate_gdds functions.
samsrabin Feb 10, 2026
d1a23de
Resolve Python FutureWarnings from RXCROPMATURITY tests.
samsrabin Feb 11, 2026
9e471c3
RXCROPMATURITY tests: Always use ctsm_pylib.
samsrabin Feb 11, 2026
fff3a90
crop_calendars Python: Don't gate error() with if logger:.
samsrabin Feb 12, 2026
b3565ad
get_files_in_time_slice() Remove 'if logger' condition.
samsrabin Feb 12, 2026
9d2e455
Rename a test for clarity.
samsrabin Feb 12, 2026
df16fa3
generate_gdds: Be stricter about "tape" vs. "file".
samsrabin Feb 12, 2026
b021196
generate_gdds: Get freq from H_FREQ_DICT.
samsrabin Feb 12, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion python/ctsm/crop_calendars/cropcal_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ def import_max_gs_length(paramfile):
Import maximum growing season length
"""
# Get parameter file
paramfile_ds = xr.open_dataset(paramfile)
paramfile_ds = xr.open_dataset(paramfile, decode_timedelta=True)

# Import max growing season length (stored in netCDF as nanoseconds!)
paramfile_mxmats = paramfile_ds["mxmat"].values / np.timedelta64(1, "D")
Expand Down
159 changes: 101 additions & 58 deletions python/ctsm/crop_calendars/generate_gdds.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import os
import sys

import pickle
import datetime as dt
import argparse
Expand All @@ -29,6 +30,9 @@
# fixed. For now, we'll just disable the warning.
# pylint: disable=too-many-positional-arguments

# Mapping of history tape number to output frequency
H_FREQ_DICT = {1: "annual", 2: "daily"}


def _get_max_growing_season_lengths(max_season_length_from_hdates_file, paramfile, cushion):
"""
Expand All @@ -45,31 +49,43 @@ def _get_max_growing_season_lengths(max_season_length_from_hdates_file, paramfil
return mxmats


def _get_history_yr_range(first_season, last_season):
def _get_history_yr_range(first_season, last_season, freq):
"""
Get a range object that can be used for looping over all years we need to process timestamps
from.
Get range objects that can be used for looping over all years we need to process timestamps
from. Different handling for annual vs. daily history files. Assumption is that all history
files are instantanous.
"""
# Saving at the end of a year receive the timestamp of the END of the year's final timestep,
# which means it will actually be 00:00 of Jan. 1 of the next year.
first_history_yr = first_season + 1

# Same deal for the last history timestep, but we have to read an extra year in that case,
# because in some places the last growing season won't complete until the year after it was
# planted.
last_history_yr = last_season + 2
if freq == "annual":
# Saving at the end of a year gives the timestamp of the END of the year's final timestep,
# which means it will actually be 00:00 of Jan. 1 of the next year.
first_history_yr = first_season + 1
last_history_yr = last_season + 1
elif freq == "daily":
# Saving at the end of a day/beginning of the next day (i.e., 00:00:00) means that the year
# will be correct for all but the Dec. 31 save, which will receive a timestamp of Jan. 1
# 00:00:00. That will be handled in _get_time_slice_lists(), so we don't need to account for
# it here.
first_history_yr = first_season
last_history_yr = last_season
else:
raise NotImplementedError(f"Not sure how to handle freq '{freq}'")

# For the last season, we have to read an extra year, because in some places the last growing
# season won't complete until the year after it was planted.
last_history_yr += 1

# last_history_yr + 1 because range() will iterate up to but not including the second value.
history_yr_range = range(first_history_yr, last_history_yr + 1)

return history_yr_range


def _get_time_slice_list(first_season, last_season):
def _get_time_slice_lists(first_season, last_season):
"""
Given the requested first and last seasons, get the list of time slices that the script should
look for. The assumptions here, as in import_and_process_1yr and as instructed in the docs, are
that the user (a) is saving instantaneous annual files and (b) started on Jan. 1.
look for. The assumption here, as in _get_file_lists() and as instructed in the docs, is
that the user is saving instantaneous tapes.
"""

# Input checks
Expand All @@ -78,32 +94,56 @@ def _get_time_slice_list(first_season, last_season):
if first_season > last_season:
raise ValueError(f"first_season ({first_season}) > last_season ({last_season})")

slice_list = []
for history_yr in _get_history_yr_range(first_season, last_season):
slice_start = f"{history_yr}-01-01"
# Stop could probably be the same as start, since there should just be one value saved per
# year and that should get the Jan. 1 timestamp.
slice_stop = f"{history_yr}-12-31"
slice_list.append(slice(slice_start, slice_stop))

# We should be reading one more than the total number of years in [first_season, last_season].
assert len(slice_list) == last_season - first_season + 2

return slice_list


def _get_file_lists(input_dir, time_slice_list, logger):
# Initialize list with None for each history tape. Could avoid by starting with empty list and
# doing .append(), but pylint gets confused by that for some reason.
slice_lists_list = [None for x in range(len(H_FREQ_DICT))]

# Get time slice for each required history year in each history tape.
for i, h in enumerate(list(H_FREQ_DICT.keys())):
slice_list = []
freq = H_FREQ_DICT[h]
for history_yr in _get_history_yr_range(first_season, last_season, freq):
if h == 1:
# Annual timesteps
slice_start = f"{history_yr}-01-01"
slice_stop = f"{history_yr}-01-01"
elif h == 2:
# Daily timesteps of instantaneous variables will go from Jan. 2 through Jan. 1
# because they will get the time at the end of each timestep.
slice_start = f"{history_yr}-01-02"
slice_stop = f"{history_yr + 1}-01-01"
else:
raise NotImplementedError(f"What frequency are h{h}i files saved at?")
slice_list.append(slice(slice_start, slice_stop))

# We should be reading one more than the total number of years in
# [first_season, last_season].
ns_exp = last_season - first_season + 2
ns_actual = len(slice_list)
msg = f"Expected {ns_exp} time slices in list for h{h}; got {ns_actual}"
assert ns_exp == ns_actual, msg

# Save
slice_lists_list[i] = slice_list

return tuple(slice_lists_list)


def _get_file_lists(input_dir, time_slice_lists_list, logger):
"""
For each time slice in a list, find the file(s) that need to be read to get all history
timesteps in the slice. Returns both h1i and h2i file lists.
"""
output_file_lists_list = [None, None]
for i, h in enumerate([1, 2]):
time_slice_list = time_slice_lists_list[i]
all_h_files = gddfn.find_inst_hist_files(input_dir, h=h, logger=logger)
h_file_lists = []
for time_slice in time_slice_list:
try:
h_file_lists.append(get_files_in_time_slice(all_h_files, time_slice, logger=logger))
h_file_lists.append(
get_files_in_time_slice(all_h_files, time_slice, logger=logger, quiet=True)
)
except FileNotFoundError as e:
raise FileNotFoundError(f"No h{h} timesteps found in {time_slice}") from e
output_file_lists_list[i] = h_file_lists
Expand Down Expand Up @@ -184,17 +224,6 @@ def main(
##########################

if not only_make_figs:
# Keep 1 extra year to avoid incomplete final growing season for crops
# harvested after Dec. 31.
yr_1_import_str = f"{first_season+1}-01-01"
yr_n_import_str = f"{last_season+2}-01-01"

log(
logger,
f"Importing netCDF time steps {yr_1_import_str} through {yr_n_import_str} "
+ "(years are +1 because of CTSM output naming)",
)

# This script uses pickle to save work in progress. In case of interruption, when the script
# is resumed, it will look for a pickle file. It will resume from the year after
# pickle_year, which is the last processed year in the pickle file.
Expand All @@ -205,24 +234,22 @@ def main(
(
first_season,
last_season,
pickle_year,
pickle_season,
gddaccum_yp_list,
gddharv_yp_list,
skip_patches_for_isel_nan_lastyear,
lastyear_active_patch_indices_list,
incorrectly_daily,
save_figs,
incl_vegtypes_str,
incl_patches1d_itype_veg,
mxsowings,
skip_crops,
) = pickle.load(file)
print(f"Will resume import at {pickle_year+1}")
log(logger, f"Will resume import at season {pickle_season+1}")
h2_ds = None
else:
incorrectly_daily = False
skip_patches_for_isel_nan_lastyear = np.ndarray([])
pickle_year = -np.inf
pickle_season = -np.inf
gddaccum_yp_list = []
gddharv_yp_list = []
incl_vegtypes_str = None
Expand All @@ -235,19 +262,34 @@ def main(
)

# Get lists of history timesteps and files to read
time_slice_list = _get_time_slice_list(first_season, last_season)
h1_file_lists, h2_file_lists = _get_file_lists(input_dir, time_slice_list, logger)
log(logger, "Getting lists of history timesteps and files to read")
h1_time_slices, h2_time_slices = _get_time_slice_lists(first_season, last_season)
h1_file_lists, h2_file_lists = _get_file_lists(
input_dir, (h1_time_slices, h2_time_slices), logger
)
history_yr_range_h1 = _get_history_yr_range(first_season, last_season, "annual")
history_yr_range_h2 = _get_history_yr_range(first_season, last_season, "daily")
# Check
assert len(history_yr_range_h1) == len(history_yr_range_h2)
log(logger, "Checking h1 files")
gddfn.check_file_lists(history_yr_range_h1, h1_file_lists, h1_time_slices, "annual", logger)
log(logger, "Checking h2 files")
gddfn.check_file_lists(history_yr_range_h2, h2_file_lists, h2_time_slices, "daily", logger)
log(logger, "Done")

for yr_index, this_yr in enumerate(_get_history_yr_range(first_season, last_season)):
for y, history_yr_h1 in enumerate(history_yr_range_h1):
# If resuming from a pickled file, we continue until we reach a year that hasn't yet
# been processed.
if this_yr <= pickle_year:
if history_yr_h1 <= pickle_season:
continue
log(logger, f"netCDF year {this_yr}...")
log(logger, f"History year {history_yr_h1}...")

# Get h1 and h2 files to read for this year
h1_file_list = h1_file_lists[yr_index] # pylint: disable=unsubscriptable-object
h2_file_list = h2_file_lists[yr_index] # pylint: disable=unsubscriptable-object
# Get time slice and files to read for this year
h1_time_slice = h1_time_slices[y] # pylint: disable=unsubscriptable-object
h2_time_slice = h2_time_slices[y] # pylint: disable=unsubscriptable-object
h1_file_list = h1_file_lists[y] # pylint: disable=unsubscriptable-object
h2_file_list = h2_file_lists[y] # pylint: disable=unsubscriptable-object
history_yr_h2 = list(history_yr_range_h2)[y]

(
h2_ds,
Expand All @@ -257,30 +299,32 @@ def main(
gddharv_yp_list,
skip_patches_for_isel_nan_lastyear,
lastyear_active_patch_indices_list,
incorrectly_daily,
incl_vegtypes_str,
incl_patches1d_itype_veg,
mxsowings,
) = gddfn.import_and_process_1yr(
first_season,
last_season,
yr_index,
y,
sdates_rx,
hdates_rx,
gddaccum_yp_list,
gddharv_yp_list,
skip_patches_for_isel_nan_lastyear,
lastyear_active_patch_indices_list,
incorrectly_daily,
incl_vegtypes_str,
h2_ds_file,
mxmats,
cc.get_gs_len_da,
skip_crops,
outdir_figs,
logger,
history_yr_h1,
history_yr_h2,
h1_file_list,
h2_file_list,
h1_time_slice,
h2_time_slice,
)

log(logger, f" Saving pickle file ({pickle_file})...")
Expand All @@ -289,12 +333,11 @@ def main(
[
first_season,
last_season,
this_yr,
history_yr_h1,
gddaccum_yp_list,
gddharv_yp_list,
skip_patches_for_isel_nan_lastyear,
lastyear_active_patch_indices_list,
incorrectly_daily,
save_figs,
incl_vegtypes_str,
incl_patches1d_itype_veg,
Expand Down
Loading