Skip to content

Commit 0a2475c

Browse files
stxue1mr-c
andcommitted
caching: enhance consideration of EnvVarRequirements
Especially with regards to overrides and `--preserve{-entire,}-environement` Added more tests for environment variable caching Co-authored-by: Michael R. Crusoe <[email protected]>
1 parent 3995bdf commit 0a2475c

7 files changed

+126
-13
lines changed

cwltool/command_line_tool.py

+8
Original file line numberDiff line numberDiff line change
@@ -876,6 +876,14 @@ def remove_prefix(s: str, prefix: str) -> str:
876876
if cls in interesting and cls not in keydict:
877877
keydict[cls] = r
878878

879+
# If there are environmental variables to preserve, add it to the key
880+
env_var_requirement = cast(dict[str, str], keydict.get("EnvVarRequirement", {}))
881+
env_def = cast(CWLObjectType, self.get_requirement("EnvVarRequirement")[0] or {})
882+
if runtimeContext.preserve_environment is not None:
883+
env_def.update(JobBase.extract_environment(runtimeContext, env_var_requirement))
884+
885+
if env_def:
886+
keydict["EnvVarRequirement"] = env_def
879887
keydictstr = json_dumps(keydict, separators=(",", ":"), sort_keys=True)
880888
cachekey = hashlib.md5(keydictstr.encode("utf-8")).hexdigest() # nosec
881889

cwltool/job.py

+25-13
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,29 @@ def _preserve_environment_on_containers_warning(
469469
# By default, don't do anything; ContainerCommandLineJob below
470470
# will issue a warning.
471471

472+
@staticmethod
473+
def extract_environment(
474+
runtimeContext: RuntimeContext, envVarReq: Mapping[str, str]
475+
) -> Mapping[str, str]:
476+
"""Extract environment variables that should be preserved."""
477+
# Start empty
478+
env: dict[str, str] = {}
479+
# Preserve any env vars
480+
if runtimeContext.preserve_entire_environment:
481+
env.update(os.environ)
482+
elif runtimeContext.preserve_environment:
483+
for key in runtimeContext.preserve_environment:
484+
try:
485+
env[key] = os.environ[key]
486+
except KeyError:
487+
_logger.warning(
488+
f"Attempting to preserve environment variable {key!r} which is not present"
489+
)
490+
# Apply EnvVarRequirement
491+
env.update(envVarReq)
492+
493+
return env
494+
472495
def prepare_environment(
473496
self, runtimeContext: RuntimeContext, envVarReq: Mapping[str, str]
474497
) -> None:
@@ -481,27 +504,16 @@ def prepare_environment(
481504
"""
482505
# Start empty
483506
env: dict[str, str] = {}
484-
485-
# Preserve any env vars
486507
if runtimeContext.preserve_entire_environment:
487508
self._preserve_environment_on_containers_warning()
488-
env.update(os.environ)
489509
elif runtimeContext.preserve_environment:
490510
self._preserve_environment_on_containers_warning(runtimeContext.preserve_environment)
491-
for key in runtimeContext.preserve_environment:
492-
try:
493-
env[key] = os.environ[key]
494-
except KeyError:
495-
_logger.warning(
496-
f"Attempting to preserve environment variable {key!r} which is not present"
497-
)
511+
512+
env.update(self.extract_environment(runtimeContext, envVarReq))
498513

499514
# Set required env vars
500515
env.update(self._required_env())
501516

502-
# Apply EnvVarRequirement
503-
env.update(envVarReq)
504-
505517
# Set on ourselves
506518
self.environment = env
507519

tests/cache_environment.yml

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
cwl:requirements:
2+
- class: EnvVarRequirement
3+
envDef:
4+
- envName: TEST_ENV
5+
envValue: value1

tests/cache_environment2.yml

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
cwl:requirements:
2+
- class: EnvVarRequirement
3+
envDef:
4+
- envName: TEST_ENV
5+
envValue: value2

tests/cache_environment_tool.cwl

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
class: CommandLineTool
2+
cwlVersion: v1.2
3+
inputs:
4+
[]
5+
outputs:
6+
[]
7+
8+
baseCommand: [bash, "-c", "echo $TEST_ENV"]

tests/test_environment.py

+28
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,34 @@ def test_preserve_single(
242242
assert_env_matches(checks, env)
243243

244244

245+
@CRT_PARAMS
246+
def test_preserve_single_missing(
247+
crt_params: CheckHolder,
248+
tmp_path: Path,
249+
monkeypatch: pytest.MonkeyPatch,
250+
caplog: pytest.LogCaptureFixture,
251+
) -> None:
252+
"""Test that attempting to preserve an unset env var produces a warning."""
253+
tmp_prefix = str(tmp_path / "canary")
254+
args = crt_params.flags + [
255+
f"--tmpdir-prefix={tmp_prefix}",
256+
"--preserve-environment=RANDOMVAR",
257+
]
258+
env = get_tool_env(
259+
tmp_path,
260+
args,
261+
monkeypatch=monkeypatch,
262+
runtime_env_accepts_null=crt_params.env_accepts_null,
263+
)
264+
checks = crt_params.checks(tmp_prefix)
265+
assert "RANDOMVAR" not in checks
266+
assert (
267+
"cwltool:job.py:487 Attempting to preserve environment variable "
268+
"'RANDOMVAR' which is not present"
269+
) in caplog.text
270+
assert_env_matches(checks, env)
271+
272+
245273
@CRT_PARAMS
246274
def test_preserve_all(
247275
crt_params: CheckHolder, tmp_path: Path, monkeypatch: pytest.MonkeyPatch

tests/test_examples.py

+47
Original file line numberDiff line numberDiff line change
@@ -1233,6 +1233,53 @@ def test_secondary_files_v1_0(tmp_path: Path, factor: str) -> None:
12331233
assert error_code == 0
12341234

12351235

1236+
@pytest.mark.parametrize("factor", test_factors)
1237+
def test_cache_environment_variable(tmp_path: Path, factor: str) -> None:
1238+
"""Ensure that changing the environment variables will result in different cache keys"""
1239+
test_file = "cache_environment_tool.cwl"
1240+
test_job_file = "cache_environment.yml"
1241+
cache_dir = str(tmp_path / "cwltool_cache")
1242+
commands = factor.split()
1243+
commands.extend(
1244+
[
1245+
"--cachedir",
1246+
cache_dir,
1247+
"--outdir",
1248+
str(tmp_path / "outdir"),
1249+
get_data("tests/" + test_file),
1250+
get_data(f"tests/{test_job_file}"),
1251+
]
1252+
)
1253+
error_code, _, stderr = get_main_output(commands)
1254+
1255+
stderr = re.sub(r"\s\s+", " ", stderr)
1256+
assert "Output of job will be cached in" in stderr
1257+
assert error_code == 0, stderr
1258+
1259+
error_code, _, stderr = get_main_output(commands)
1260+
assert "Output of job will be cached in" not in stderr
1261+
assert error_code == 0, stderr
1262+
1263+
commands = factor.split()
1264+
test_job_file = "cache_environment2.yml"
1265+
commands.extend(
1266+
[
1267+
"--cachedir",
1268+
cache_dir,
1269+
"--outdir",
1270+
str(tmp_path / "outdir"),
1271+
get_data("tests/" + test_file),
1272+
get_data(f"tests/{test_job_file}"),
1273+
]
1274+
)
1275+
1276+
error_code, _, stderr = get_main_output(commands)
1277+
1278+
stderr = re.sub(r"\s\s+", " ", stderr)
1279+
assert "Output of job will be cached in" in stderr
1280+
assert error_code == 0, stderr
1281+
1282+
12361283
def test_write_summary(tmp_path: Path) -> None:
12371284
"""Test --write-summary."""
12381285
commands = [

0 commit comments

Comments
 (0)