-
Notifications
You must be signed in to change notification settings - Fork 49
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ci[cartesian]: Thread safe parallel stencil tests #1849
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -167,7 +167,7 @@ def get_globals_combinations(dtypes): | |
generation_strategy=composite_strategy_factory( | ||
d, generation_strategy_factories | ||
), | ||
implementations=[], | ||
implementation=None, | ||
test_id=len(cls_dict["tests"]), | ||
definition=annotate_function( | ||
function=cls_dict["definition"], | ||
|
@@ -199,14 +199,19 @@ def hyp_wrapper(test_hyp, hypothesis_data): | |
|
||
for test in cls_dict["tests"]: | ||
if test["suite"] == cls_name: | ||
marks = test["marks"] | ||
if gt4pyc.backend.from_name(test["backend"]).storage_info["device"] == "gpu": | ||
marks.append(pytest.mark.requires_gpu) | ||
name = test["backend"] | ||
name += "".join(f"_{key}_{value}" for key, value in test["constants"].items()) | ||
name += "".join( | ||
"_{}_{}".format(key, value.name) for key, value in test["dtypes"].items() | ||
) | ||
|
||
marks = test["marks"].copy() | ||
if gt4pyc.backend.from_name(test["backend"]).storage_info["device"] == "gpu": | ||
marks.append(pytest.mark.requires_gpu) | ||
# Run generation and implementation tests in the same group to ensure | ||
# (thread-) safe parallelization of stencil tests. | ||
marks.append(pytest.mark.xdist_group(name=f"{cls_name}_{name}")) | ||
|
||
param = pytest.param(test, marks=marks, id=name) | ||
pytest_params.append(param) | ||
|
||
|
@@ -228,14 +233,19 @@ def hyp_wrapper(test_hyp, hypothesis_data): | |
runtime_pytest_params = [] | ||
for test in cls_dict["tests"]: | ||
if test["suite"] == cls_name: | ||
marks = test["marks"] | ||
if gt4pyc.backend.from_name(test["backend"]).storage_info["device"] == "gpu": | ||
marks.append(pytest.mark.requires_gpu) | ||
name = test["backend"] | ||
name += "".join(f"_{key}_{value}" for key, value in test["constants"].items()) | ||
name += "".join( | ||
"_{}_{}".format(key, value.name) for key, value in test["dtypes"].items() | ||
) | ||
|
||
marks = test["marks"].copy() | ||
if gt4pyc.backend.from_name(test["backend"]).storage_info["device"] == "gpu": | ||
marks.append(pytest.mark.requires_gpu) | ||
# Run generation and implementation tests in the same group to ensure | ||
# (thread-) safe parallelization of stencil tests. | ||
marks.append(pytest.mark.xdist_group(name=f"{cls_name}_{name}")) | ||
|
||
runtime_pytest_params.append( | ||
pytest.param( | ||
test, | ||
|
@@ -434,8 +444,11 @@ class StencilTestSuite(metaclass=SuiteMeta): | |
def _test_generation(cls, test, externals_dict): | ||
"""Test source code generation for all *backends* and *stencil suites*. | ||
|
||
The generated implementations are cached in a :class:`utils.ImplementationsDB` | ||
instance, to avoid duplication of (potentially expensive) compilations. | ||
The generated implementation is cached in the test context, to avoid duplication | ||
of (potentially expensive) compilation. | ||
Note: This caching introduces a dependency between tests, which is captured by an | ||
`xdist_group` marker in combination with `--dist loadgroup` to ensure safe parallel | ||
test execution. | ||
Comment on lines
-437
to
+451
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This comment was out of date. There's no |
||
""" | ||
backend_slug = gt_utils.slugify(test["backend"], valid_symbols="") | ||
implementation = gtscript.stencil( | ||
|
@@ -461,7 +474,8 @@ def _test_generation(cls, test, externals_dict): | |
or ax == "K" | ||
or field_info.boundary[i] >= cls.global_boundaries[name][i] | ||
) | ||
test["implementations"].append(implementation) | ||
assert test["implementation"] is None | ||
test["implementation"] = implementation | ||
Comment on lines
-464
to
+478
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Assert our assumption that we only ever cache one implementation per test context. |
||
|
||
@classmethod | ||
def _run_test_implementation(cls, parameters_dict, implementation): # too complex | ||
|
@@ -585,16 +599,16 @@ def _run_test_implementation(cls, parameters_dict, implementation): # too compl | |
def _test_implementation(cls, test, parameters_dict): | ||
"""Test computed values for implementations generated for all *backends* and *stencil suites*. | ||
|
||
The generated implementations are reused from previous tests by means of a | ||
:class:`utils.ImplementationsDB` instance shared at module scope. | ||
The generated implementation was cached in the test context, to avoid duplication | ||
of (potentially expensive) compilation. | ||
Note: This caching introduces a dependency between tests, which is captured by an | ||
`xdist_group` marker in combination with `--dist loadgroup` to ensure safe parallel | ||
test execution. | ||
""" | ||
implementation_list = test["implementations"] | ||
if not implementation_list: | ||
pytest.skip( | ||
"Cannot perform validation tests, since there are no valid implementations." | ||
) | ||
for implementation in implementation_list: | ||
if not isinstance(implementation, StencilObject): | ||
raise RuntimeError("Wrong function got from implementations_db cache!") | ||
implementation = test["implementation"] | ||
assert ( | ||
implementation is not None | ||
), "Stencil not yet generated. Did you attempt to run stencil tests in parallel?" | ||
assert isinstance(implementation, StencilObject) | ||
|
||
cls._run_test_implementation(parameters_dict, implementation) | ||
cls._run_test_implementation(parameters_dict, implementation) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Simplified since we don't have an array of implementations (anymore). Assert that the stencil code has been generated. If not, fail instead of skip. This leads to more errors in case code generation fails. Imo the best way to handle this is to get rid of the the ideas of having two tests (one for codegen and one for validation) per class. We could achieve the same level of parallelization with less glue code if we had just one test per class (codegen and validation inside the same test). |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This might have been different in the past. The way we cache implementations now, there's only ever max one implementation per test context.