generated from ACCESS-NRI/template
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtest_bit_reproducibility.py
203 lines (160 loc) · 7.39 KB
/
test_bit_reproducibility.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
# Copyright 2024 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details.
# SPDX-License-Identifier: Apache-2.0
"""Tests for model reproducibility"""
import json
from pathlib import Path
from typing import Optional
import pytest
from model_config_tests.exp_test_helper import setup_exp
from model_config_tests.util import DAY_IN_SECONDS, HOUR_IN_SECONDS
def set_checksum_output_dir(output_path: Path):
"""Create an output directory for checksums and remove any pre-existing
historical checksums. Note: The checksums stored in this directory are
used in Reproducibility CI workflows, and are copied up to Github"""
output_dir = output_path / "checksum"
output_dir.mkdir(parents=True, exist_ok=True)
pre_existing_files = output_dir.glob("historical-*hr-checksum.json")
for file in pre_existing_files:
file.unlink()
return output_dir
def read_historical_checksums(
control_path: Path, checksum_filename: str, checksum_path: Optional[Path] = None
):
"""Read a historical checksum file"""
if checksum_path is None:
# Default to testing/checksum/historical-*hr-checksums.json
# stored on model configuration directory
config_checksum_dir = control_path / "testing" / "checksum"
checksum_path = config_checksum_dir / checksum_filename
hist_checksums = None
if checksum_path.exists():
with open(checksum_path) as file:
hist_checksums = json.load(file)
return hist_checksums
class TestBitReproducibility:
@pytest.mark.checksum
def test_bit_repro_historical(
self,
output_path: Path,
control_path: Path,
checksum_path: Optional[Path],
keep_archive: Optional[bool],
):
"""
Test that a run reproduces historical checksums
Parameters (these are fixtures defined in conftest.py)
----------
output_path: Path
Output directory for test output and where the control and
lab directories are stored for the payu experiments. Default is
set in conftest.py
control_path: Path
Path to the model configuration to test. This is copied for
for control directories in experiments. Default is set in
conftests.py
checksum_path: Optional[Path]
Path to checksums to compare model output against. Default is
set to checksums saved on model configuration (set in )
keep_archive: Optional[bool]
This flag is used in testing for test code to use a previous test
archive, and to disable running the model with payu
"""
# Setup checksum output directory
checksum_output_dir = set_checksum_output_dir(output_path=output_path)
# Setup experiment
exp = setup_exp(
control_path, output_path, "test_bit_repro_historical", keep_archive
)
# Set model runtime using the configured default
exp.model.set_model_runtime()
# Run the experiment using payu
status, stdout, stderr, output_files = exp.setup_and_run()
if status != 0 or not exp.model.output_exists():
# Log the run information
exp.print_run_logs(status, stdout, stderr, output_files)
assert status == 0, (
"There was an error running the experiment. "
"See the logs for more infomation on the experiment run"
)
assert exp.model.output_exists(), (
"Output file for the model does not exist. "
"See the logs for more information on the experiment run"
)
# Set the checksum output filename using the model default runtime
runtime_hours = exp.model.default_runtime_seconds // HOUR_IN_SECONDS
checksum_filename = f"historical-{runtime_hours}hr-checksum.json"
# Read the historical checksum file
hist_checksums = read_historical_checksums(
control_path, checksum_filename, checksum_path
)
# Use historical file checksums schema version for parsing checksum,
# otherwise use the model default, if file does not exist
schema_version = (
hist_checksums["schema_version"]
if hist_checksums
else exp.model.default_schema_version
)
# Extract checksums
checksums = exp.extract_checksums(schema_version=schema_version)
# Write out checksums to output file
checksum_output_file = checksum_output_dir / checksum_filename
with open(checksum_output_file, "w") as file:
json.dump(checksums, file, indent=2)
assert (
hist_checksums == checksums
), f"Checksums were not equal. The new checksums have been written to {checksum_output_file}."
@pytest.mark.checksum_slow
def test_bit_repro_repeat(self, output_path: Path, control_path: Path):
"""
Test that a run has same checksums when ran twice
"""
exp_bit_repo1 = setup_exp(control_path, output_path, "test_bit_repro_repeat_1")
exp_bit_repo2 = setup_exp(control_path, output_path, "test_bit_repro_repeat_2")
# Reconfigure to the default model runtime and run
for exp in [exp_bit_repo1, exp_bit_repo2]:
exp.model.set_model_runtime()
exp.setup_and_run()
# Compare expected to produced.
assert exp_bit_repo1.model.output_exists()
expected = exp_bit_repo1.extract_checksums()
assert exp_bit_repo2.model.output_exists()
produced = exp_bit_repo2.extract_checksums()
assert produced == expected
@pytest.mark.checksum_slow
def test_restart_repro(self, output_path: Path, control_path: Path):
"""
Test that a run reproduces across restarts.
"""
# First do two short (1 day) runs.
exp_2x1day = setup_exp(control_path, output_path, "test_restart_repro_2x1day")
# Reconfigure to a 1 day run.
exp_2x1day.model.set_model_runtime(seconds=DAY_IN_SECONDS)
# Now run twice.
exp_2x1day.setup_and_run()
exp_2x1day.force_qsub_run()
# Now do a single 2 day run
exp_2day = setup_exp(control_path, output_path, "test_restart_repro_2day")
# Reconfigure
exp_2day.model.set_model_runtime(seconds=(2 * DAY_IN_SECONDS))
# Run once.
exp_2day.setup_and_run()
# Now compare the output between our two short and one long run.
checksums_1d_0 = exp_2x1day.extract_checksums()
checksums_1d_1 = exp_2x1day.extract_checksums(exp_2x1day.model.output_1)
checksums_2d = exp_2day.extract_checksums()
# Use model specific comparision method for checksums
model = exp_2day.model
matching_checksums = model.check_checksums_over_restarts(
long_run_checksum=checksums_2d,
short_run_checksum_0=checksums_1d_0,
short_run_checksum_1=checksums_1d_1,
)
if not matching_checksums:
# Write checksums out to file
with open(output_path / "restart-1d-0-checksum.json", "w") as file:
json.dump(checksums_1d_0, file, indent=2)
with open(output_path / "restart-1d-1-checksum.json", "w") as file:
json.dump(checksums_1d_1, file, indent=2)
with open(output_path / "restart-2d-0-checksum.json", "w") as file:
json.dump(checksums_2d, file, indent=2)
assert matching_checksums