Skip to content

Commit 22f74c2

Browse files
committed
generate_gdds bugfix: Use correct h2 years.
1 parent b51af72 commit 22f74c2

File tree

3 files changed

+120
-43
lines changed

3 files changed

+120
-43
lines changed

python/ctsm/crop_calendars/generate_gdds.py

Lines changed: 50 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@
3030
# fixed. For now, we'll just disable the warning.
3131
# pylint: disable=too-many-positional-arguments
3232

33+
# Mapping of history tape number to output frequency
34+
H_FREQ_DICT = {1: "annual", 2: "daily"}
35+
3336

3437
def _get_max_growing_season_lengths(max_season_length_from_hdates_file, paramfile, cushion):
3538
"""
@@ -46,19 +49,31 @@ def _get_max_growing_season_lengths(max_season_length_from_hdates_file, paramfil
4649
return mxmats
4750

4851

49-
def _get_history_yr_range(first_season, last_season):
52+
def _get_history_yr_range(first_season, last_season, freq):
5053
"""
51-
Get a range object that can be used for looping over all years we need to process timestamps
52-
from.
54+
Get range objects that can be used for looping over all years we need to process timestamps
55+
from. Different handling for annual vs. daily history files. Assumption is that all history
56+
files are instantanous.
5357
"""
54-
# Saving at the end of a year receive the timestamp of the END of the year's final timestep,
55-
# which means it will actually be 00:00 of Jan. 1 of the next year.
56-
first_history_yr = first_season + 1
5758

58-
# Same deal for the last history timestep, but we have to read an extra year in that case,
59-
# because in some places the last growing season won't complete until the year after it was
60-
# planted.
61-
last_history_yr = last_season + 2
59+
if freq == "annual":
60+
# Saving at the end of a year gives the timestamp of the END of the year's final timestep,
61+
# which means it will actually be 00:00 of Jan. 1 of the next year.
62+
first_history_yr = first_season + 1
63+
last_history_yr = last_season + 1
64+
elif freq == "daily":
65+
# Saving at the end of a day/beginning of the next day (i.e., 00:00:00) means that the year
66+
# will be correct for all but the Dec. 31 save, which will receive a timestamp of Jan. 1
67+
# 00:00:00. That will be handled in _get_time_slice_lists(), so we don't need to account for
68+
# it here.
69+
first_history_yr = first_season
70+
last_history_yr = last_season
71+
else:
72+
raise NotImplementedError(f"Not sure how to handle freq '{freq}'")
73+
74+
# For the last season, we have to read an extra year, because in some places the last growing
75+
# season won't complete until the year after it was planted.
76+
last_history_yr += 1
6277

6378
# last_history_yr + 1 because range() will iterate up to but not including the second value.
6479
history_yr_range = range(first_history_yr, last_history_yr + 1)
@@ -70,7 +85,7 @@ def _get_time_slice_lists(first_season, last_season):
7085
"""
7186
Given the requested first and last seasons, get the list of time slices that the script should
7287
look for. The assumption here, as in _get_file_lists() and as instructed in the docs, is
73-
that the user is saving instantaneous files.
88+
that the user is saving instantaneous tapes.
7489
"""
7590

7691
# Input checks
@@ -79,10 +94,15 @@ def _get_time_slice_lists(first_season, last_season):
7994
if first_season > last_season:
8095
raise ValueError(f"first_season ({first_season}) > last_season ({last_season})")
8196

82-
slice_lists_list = [None, None]
83-
for i, h in enumerate([1, 2]):
97+
# Initialize list with None for each history tape. Could avoid by starting with empty list and
98+
# doing .append(), but pylint gets confused by that for some reason.
99+
slice_lists_list = [None for x in range(len(H_FREQ_DICT))]
100+
101+
# Get time slice for each required history year in each history tape.
102+
for i, h in enumerate(list(H_FREQ_DICT.keys())):
84103
slice_list = []
85-
for history_yr in _get_history_yr_range(first_season, last_season):
104+
freq = H_FREQ_DICT[h]
105+
for history_yr in _get_history_yr_range(first_season, last_season, freq):
86106
if h == 1:
87107
# Annual timesteps
88108
slice_start = f"{history_yr}-01-01"
@@ -98,7 +118,10 @@ def _get_time_slice_lists(first_season, last_season):
98118

99119
# We should be reading one more than the total number of years in
100120
# [first_season, last_season].
101-
assert len(slice_list) == last_season - first_season + 2
121+
ns_exp = last_season - first_season + 2
122+
ns_actual = len(slice_list)
123+
msg = f"Expected {ns_exp} time slices in list for h{h}; got {ns_actual}"
124+
assert ns_exp == ns_actual, msg
102125

103126
# Save
104127
slice_lists_list[i] = slice_list
@@ -244,26 +267,29 @@ def main(
244267
h1_file_lists, h2_file_lists = _get_file_lists(
245268
input_dir, (h1_time_slices, h2_time_slices), logger
246269
)
247-
history_yr_range = _get_history_yr_range(first_season, last_season)
270+
history_yr_range_h1 = _get_history_yr_range(first_season, last_season, "annual")
271+
history_yr_range_h2 = _get_history_yr_range(first_season, last_season, "daily")
248272
# Check
273+
assert len(history_yr_range_h1) == len(history_yr_range_h2)
249274
log(logger, "Checking h1 files")
250-
gddfn.check_file_lists(history_yr_range, h1_file_lists, h1_time_slices, "annual", logger)
275+
gddfn.check_file_lists(history_yr_range_h1, h1_file_lists, h1_time_slices, "annual", logger)
251276
log(logger, "Checking h2 files")
252-
gddfn.check_file_lists(history_yr_range, h2_file_lists, h2_time_slices, "daily", logger)
277+
gddfn.check_file_lists(history_yr_range_h2, h2_file_lists, h2_time_slices, "daily", logger)
253278
log(logger, "Done")
254279

255-
for y, history_yr in enumerate(history_yr_range):
280+
for y, history_yr_h1 in enumerate(history_yr_range_h1):
256281
# If resuming from a pickled file, we continue until we reach a year that hasn't yet
257282
# been processed.
258-
if history_yr <= pickle_season:
283+
if history_yr_h1 <= pickle_season:
259284
continue
260-
log(logger, f"History year {history_yr}...")
285+
log(logger, f"History year {history_yr_h1}...")
261286

262287
# Get time slice and files to read for this year
263288
h1_time_slice = h1_time_slices[y] # pylint: disable=unsubscriptable-object
264289
h2_time_slice = h2_time_slices[y] # pylint: disable=unsubscriptable-object
265290
h1_file_list = h1_file_lists[y] # pylint: disable=unsubscriptable-object
266291
h2_file_list = h2_file_lists[y] # pylint: disable=unsubscriptable-object
292+
history_yr_h2 = list(history_yr_range_h2)[y]
267293

268294
(
269295
h2_ds,
@@ -293,7 +319,8 @@ def main(
293319
skip_crops,
294320
outdir_figs,
295321
logger,
296-
history_yr,
322+
history_yr_h1,
323+
history_yr_h2,
297324
h1_file_list,
298325
h2_file_list,
299326
h1_time_slice,
@@ -306,7 +333,7 @@ def main(
306333
[
307334
first_season,
308335
last_season,
309-
history_yr,
336+
history_yr_h1,
310337
gddaccum_yp_list,
311338
gddharv_yp_list,
312339
skip_patches_for_isel_nan_lastyear,

python/ctsm/crop_calendars/generate_gdds_functions.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,8 @@ def import_and_process_1yr(
371371
skip_crops,
372372
outdir_figs,
373373
logger,
374-
history_yr,
374+
history_yr_h1,
375+
history_yr_h2,
375376
h1_filelist,
376377
h2_filelist,
377378
h1_time_slice,
@@ -405,7 +406,7 @@ def import_and_process_1yr(
405406
)
406407

407408
# Check included timesteps
408-
_check_time_da("annual", history_yr, dates_ds["time"], logger)
409+
_check_time_da("annual", history_yr_h1, dates_ds["time"], logger)
409410

410411
# Should now just be one timestep, so select it to remove dimension.
411412
dates_ds = dates_ds.isel(time=0)
@@ -651,7 +652,7 @@ def import_and_process_1yr(
651652
)
652653

653654
# Check included timesteps
654-
_check_time_da("daily", history_yr, h2_ds["time"], logger)
655+
_check_time_da("daily", history_yr_h2, h2_ds["time"], logger)
655656

656657
# Restrict to patches we're including
657658
if skipping_patches_for_isel_nan:

python/ctsm/test/test_unit_generate_gdds.py

Lines changed: 66 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -253,10 +253,11 @@ def test_generate_gdds_get_time_slice_lists(self):
253253
self.assertEqual(h1_slices, expected_h1)
254254

255255
# Check h2 slices (daily timesteps - full year)
256+
# For daily, starts at first_season (1986), not first_season + 1
256257
expected_h2 = [
258+
slice("1986-01-02", "1987-01-01"),
257259
slice("1987-01-02", "1988-01-01"),
258260
slice("1988-01-02", "1989-01-01"),
259-
slice("1989-01-02", "1990-01-01"),
260261
]
261262
self.assertEqual(h2_slices, expected_h2)
262263

@@ -272,9 +273,10 @@ def test_generate_gdds_get_time_slice_lists_1yr(self):
272273
self.assertEqual(h1_slices, expected_h1)
273274

274275
# Check h2 slices
276+
# For daily, starts at first_season (1987), not first_season + 1
275277
expected_h2 = [
278+
slice("1987-01-02", "1988-01-01"),
276279
slice("1988-01-02", "1989-01-01"),
277-
slice("1989-01-02", "1990-01-01"),
278280
]
279281
self.assertEqual(h2_slices, expected_h2)
280282

@@ -324,6 +326,51 @@ def test_generate_gdds_get_time_slice_lists_h2_year_long(self):
324326
self.assertEqual(stop_year, start_year + 1)
325327

326328

329+
class TestGetHistoryYrRange(unittest.TestCase):
330+
"""Tests for _get_history_yr_range()"""
331+
332+
def test_get_history_yr_range_annual(self):
333+
"""Test _get_history_yr_range with annual frequency"""
334+
result = gg._get_history_yr_range(1986, 1987, "annual")
335+
# For annual: first_season + 1 through last_season + 2
336+
expected = range(1987, 1990)
337+
self.assertEqual(result, expected)
338+
339+
def test_get_history_yr_range_daily(self):
340+
"""Test _get_history_yr_range with daily frequency"""
341+
result = gg._get_history_yr_range(1986, 1987, "daily")
342+
# For daily: first_season through last_season + 1
343+
expected = range(1986, 1989)
344+
self.assertEqual(result, expected)
345+
346+
def test_get_history_yr_range_annual_single_year(self):
347+
"""Test _get_history_yr_range with annual frequency and single year"""
348+
result = gg._get_history_yr_range(2000, 2000, "annual")
349+
# Should give 2001, 2002
350+
expected = range(2001, 2003)
351+
self.assertEqual(result, expected)
352+
353+
def test_get_history_yr_range_daily_single_year(self):
354+
"""Test _get_history_yr_range with daily frequency and single year"""
355+
result = gg._get_history_yr_range(2000, 2000, "daily")
356+
# Should give 2000, 2001
357+
expected = range(2000, 2002)
358+
self.assertEqual(result, expected)
359+
360+
def test_get_history_yr_range_unknown_freq(self):
361+
"""Test _get_history_yr_range with unknown frequency"""
362+
with self.assertRaises(NotImplementedError):
363+
gg._get_history_yr_range(2000, 2001, "monthly")
364+
365+
def test_get_history_yr_range_lengths_match(self):
366+
"""Test that annual and daily ranges have the same length"""
367+
annual_range = gg._get_history_yr_range(2000, 2005, "annual")
368+
daily_range = gg._get_history_yr_range(2000, 2005, "daily")
369+
self.assertEqual(len(annual_range), len(daily_range))
370+
# Should be last_season - first_season + 2
371+
self.assertEqual(len(annual_range), 2005 - 2000 + 2)
372+
373+
327374
class TestCheckGridMatch(unittest.TestCase):
328375
"""Tests check_grid_match()"""
329376

@@ -617,14 +664,14 @@ def _create_test_file(self, filename):
617664
def test_get_file_lists_single_year(self):
618665
"""Test _get_file_lists with a single year of data"""
619666
# Create h1 and h2 files for 2000 and 2001
620-
# (first_season=1999, last_season=1999 will request slices for both years)
667+
# Also need h2 file for 1999 since daily starts at first_season
668+
h2_file_1999 = self._create_test_file("test.clm2.h2i.1999-01-02-00000.nc")
621669
h1_file_2000 = self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc")
622670
h2_file_2000 = self._create_test_file("test.clm2.h2i.2000-01-02-00000.nc")
623671
h1_file_2001 = self._create_test_file("test.clm2.h1i.2001-01-01-00000.nc")
624-
h2_file_2001 = self._create_test_file("test.clm2.h2i.2001-01-02-00000.nc")
625672

626673
# Get time slice lists for first_season=1999, last_season=1999
627-
# This will give us slices for 2000 and 2001
674+
# This will give us slices for 2000 and 2001 (h1), and 1999, 2000, 2001 (h2)
628675
time_slice_lists_list = gg._get_time_slice_lists(1999, 1999)
629676

630677
h1_file_lists, h2_file_lists = gg._get_file_lists(
@@ -640,23 +687,24 @@ def test_get_file_lists_single_year(self):
640687
self.assertEqual(len(h1_file_lists[0]), 1)
641688
self.assertEqual(h1_file_lists[0], [h1_file_2000])
642689
self.assertEqual(len(h2_file_lists[0]), 1)
643-
self.assertEqual(h2_file_lists[0], [h2_file_2000])
690+
self.assertEqual(h2_file_lists[0], [h2_file_1999])
644691
self.assertEqual(len(h1_file_lists[1]), 1)
645692
self.assertEqual(h1_file_lists[1], [h1_file_2001])
646693
self.assertEqual(len(h2_file_lists[1]), 1)
647-
self.assertEqual(h2_file_lists[1], [h2_file_2001])
694+
self.assertEqual(h2_file_lists[1], [h2_file_2000])
648695

649696
def test_get_file_lists_multiple_years(self):
650697
"""Test _get_file_lists with multiple years of data"""
651698
# Create h1 and h2 files for 2000-2002
699+
# Also need h2 file for 1999 since daily starts at first_season
652700
h1_files = []
653-
h2_files = []
701+
h2_files = [self._create_test_file("test.clm2.h2i.1999-01-02-00000.nc")]
654702
for year in [2000, 2001, 2002]:
655703
h1_files.append(self._create_test_file(f"test.clm2.h1i.{year}-01-01-00000.nc"))
656704
h2_files.append(self._create_test_file(f"test.clm2.h2i.{year}-01-02-00000.nc"))
657705

658706
# Get time slice lists for first_season=1999, last_season=2000
659-
# This will give us slices for 2000, 2001, 2002
707+
# This will give us slices for 2000, 2001, 2002 (h1) and 1999, 2000, 2001 (h2)
660708
time_slice_lists_list = gg._get_time_slice_lists(1999, 2000)
661709

662710
h1_file_lists, h2_file_lists = gg._get_file_lists(
@@ -681,14 +729,15 @@ def test_get_file_lists_multiple_files_per_slice(self):
681729
h1_file_2000 = self._create_test_file("test.clm2.h1i.2000-01-01-00000.nc")
682730
h1_file_2001 = self._create_test_file("test.clm2.h1i.2001-01-01-00000.nc")
683731

684-
# Create multiple h2 files for 2000 (daily throughout the year)
732+
# Create multiple h2 files for 1999 and 2000 (daily throughout the year)
733+
h2_files_1999 = []
734+
for month in ["01", "06", "12"]:
735+
h2_files_1999.append(self._create_test_file(f"test.clm2.h2i.1999-{month}-15-00000.nc"))
736+
685737
h2_files_2000 = []
686738
for month in ["01", "06", "12"]:
687739
h2_files_2000.append(self._create_test_file(f"test.clm2.h2i.2000-{month}-15-00000.nc"))
688740

689-
# Create h2 file for 2001
690-
h2_file_2001 = self._create_test_file("test.clm2.h2i.2001-01-02-00000.nc")
691-
692741
# Get time slice lists for first_season=1999, last_season=1999
693742
time_slice_lists_list = gg._get_time_slice_lists(1999, 1999)
694743

@@ -700,18 +749,18 @@ def test_get_file_lists_multiple_files_per_slice(self):
700749
self.assertEqual(len(h1_file_lists), 2)
701750
self.assertEqual(len(h2_file_lists), 2)
702751

703-
# Check contents of file lists for first year (2000)
752+
# Check contents of file lists
704753
# pylint: disable=unsubscriptable-object
705754
self.assertEqual(len(h1_file_lists[0]), 1)
706755
self.assertEqual(h1_file_lists[0], [h1_file_2000])
707756
self.assertEqual(len(h2_file_lists[0]), 3)
708-
self.assertEqual(h2_file_lists[0], sorted(h2_files_2000))
757+
self.assertEqual(h2_file_lists[0], sorted(h2_files_1999))
709758

710759
# Check second year (2001)
711760
self.assertEqual(len(h1_file_lists[1]), 1)
712761
self.assertEqual(h1_file_lists[1], [h1_file_2001])
713-
self.assertEqual(len(h2_file_lists[1]), 1)
714-
self.assertEqual(h2_file_lists[1], [h2_file_2001])
762+
self.assertEqual(len(h2_file_lists[1]), 3)
763+
self.assertEqual(h2_file_lists[1], sorted(h2_files_2000))
715764

716765
def test_get_file_lists_no_h1_files(self):
717766
"""Test _get_file_lists when h1 files are missing"""

0 commit comments

Comments
 (0)