Skip to content

Commit 809f4cc

Browse files
tetronmr-c
andauthored
Keep track of which files have already been parsed (#1786)
This is to avoid re-parsing when the workflow refers to multiple fragments in the same file. * Depend on cwl-utils >= 0.22 * Add git to cwltool-docker * bump minimum schema-salad version to 8.4+ Co-authored-by: Michael R. Crusoe <[email protected]>
1 parent cb3160c commit 809f4cc

10 files changed

+97
-23
lines changed

.coveragerc

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ exclude_lines =
99
pragma: no cover
1010
raise NotImplementedError
1111
if __name__ == .__main__.:
12+
if TYPE_CHECKING:
1213
ignore_errors = True
1314
omit =
1415
tests/*

build-cwltool-docker.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,4 @@ docker run -t -v /var/run/docker.sock:/var/run/docker.sock \
77
-v /tmp:/tmp \
88
-v "$PWD":/tmp/cwltool \
99
quay.io/commonwl/cwltool_module /bin/sh -c \
10-
"apk add gcc bash && pip install -r/tmp/cwltool/test-requirements.txt ; pytest -k 'not (test_bioconda or test_double_overwrite or test_env_filtering or test_biocontainers or test_disable_file_overwrite_without_ext or test_disable_file_creation_in_outdir_with_ext or test_write_write_conflict or test_directory_literal_with_real_inputs_inside or test_revsort_workflow or test_stdin_with_id_preset or test_no_compute_chcksum or test_packed_workflow_execution[tests/wf/count-lines1-wf.cwl-tests/wf/wc-job.json-False] or test_sequential_workflow or test_single_process_subwf_subwf_inline_step)' --ignore-glob '*test_udocker.py' -n 0 -v -rs --pyargs cwltool"
10+
"apk add gcc bash git && pip install -r/tmp/cwltool/test-requirements.txt ; pytest -k 'not (test_bioconda or test_double_overwrite or test_env_filtering or test_biocontainers or test_disable_file_overwrite_without_ext or test_disable_file_creation_in_outdir_with_ext or test_write_write_conflict or test_directory_literal_with_real_inputs_inside or test_revsort_workflow or test_stdin_with_id_preset or test_no_compute_chcksum or test_packed_workflow_execution[tests/wf/count-lines1-wf.cwl-tests/wf/wc-job.json-False] or test_sequential_workflow or test_single_process_subwf_subwf_inline_step)' --ignore-glob '*test_udocker.py' -n 0 -v -rs --pyargs cwltool"

conformance-test.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ fi
7373
venv cwl-conformance-venv
7474
pip install -U setuptools wheel pip
7575
pip uninstall -y cwltool
76-
pip install "${SCRIPT_DIRECTORY}"
76+
pip install "${SCRIPT_DIRECTORY}" -r"${SCRIPT_DIRECTORY}/requirements.txt"
7777
pip install cwltest>=2.3 pytest-cov pytest-xdist
7878

7979
# Set conformance test filename

cwltool.Dockerfile

+3-3
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ RUN apk add --no-cache git gcc python3-dev libxml2-dev libxslt-dev libc-dev linu
55
WORKDIR /cwltool
66
COPY . .
77

8-
RUN pip install toml -rmypy-requirements.txt
8+
RUN pip install toml -rmypy-requirements.txt -rrequirements.txt
99
RUN CWLTOOL_USE_MYPYC=1 MYPYPATH=mypy-stubs pip wheel --no-binary schema-salad --wheel-dir=/wheels .[deps]
1010
RUN rm /wheels/schema_salad*
1111
RUN pip install black
@@ -15,13 +15,13 @@ RUN pip install --force-reinstall --no-index --no-warn-script-location --root=/p
1515
# --force-reinstall to install our new mypyc compiled schema-salad package
1616

1717
FROM python:3.11-alpine as module
18-
LABEL maintainer peter.amstutz@curri.com
18+
LABEL maintainer peter.amstutz@curii.com
1919

2020
RUN apk add --no-cache docker nodejs graphviz libxml2 libxslt
2121
COPY --from=builder /pythonroot/ /
2222

2323
FROM python:3.11-alpine
24-
LABEL maintainer peter.amstutz@curri.com
24+
LABEL maintainer peter.amstutz@curii.com
2525

2626
RUN apk add --no-cache docker nodejs graphviz libxml2 libxslt
2727
COPY --from=builder /pythonroot/ /

cwltool/load_tool.py

+30-14
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
ResolveType,
3333
json_dumps,
3434
)
35+
from schema_salad.fetcher import Fetcher
3536

3637
from ruamel.yaml.comments import CommentedMap, CommentedSeq
3738

@@ -318,8 +319,11 @@ def fast_parser(
318319
fileuri: Optional[str],
319320
uri: str,
320321
loadingContext: LoadingContext,
322+
fetcher: Fetcher,
321323
) -> Tuple[Union[CommentedMap, CommentedSeq], CommentedMap]:
322-
lopt = cwl_v1_2.LoadingOptions(idx=loadingContext.codegen_idx, fileuri=fileuri)
324+
lopt = cwl_v1_2.LoadingOptions(
325+
idx=loadingContext.codegen_idx, fileuri=fileuri, fetcher=fetcher
326+
)
323327

324328
if uri not in loadingContext.codegen_idx:
325329
cwl_v1_2.load_document_with_metadata(
@@ -359,18 +363,26 @@ def fast_parser(
359363
# Need to match the document loader's index with the fast parser index
360364
# Get the base URI (no fragments) for documents that use $graph
361365
nofrag = urllib.parse.urldefrag(uri)[0]
362-
objects, loadopt = loadingContext.codegen_idx[nofrag]
363-
fileobj = cmap(
364-
cast(
365-
Union[int, float, str, Dict[str, Any], List[Any], None],
366-
cwl_v1_2.save(objects, relative_uris=False),
366+
367+
flag = "fastparser-idx-from:" + nofrag
368+
if not loadingContext.loader.idx.get(flag):
369+
objects, loadopt = loadingContext.codegen_idx[nofrag]
370+
fileobj = cmap(
371+
cast(
372+
Union[int, float, str, Dict[str, Any], List[Any], None],
373+
cwl_v1_2.save(objects, relative_uris=False),
374+
)
367375
)
368-
)
369-
visit_class(
370-
fileobj,
371-
("CommandLineTool", "Workflow", "ExpressionTool"),
372-
partial(update_index, loadingContext.loader),
373-
)
376+
visit_class(
377+
fileobj,
378+
("CommandLineTool", "Workflow", "ExpressionTool"),
379+
partial(update_index, loadingContext.loader),
380+
)
381+
loadingContext.loader.idx[flag] = flag
382+
for u in lopt.imports:
383+
loadingContext.loader.idx["import:" + u] = "import:" + u
384+
for u in lopt.includes:
385+
loadingContext.loader.idx["include:" + u] = "include:" + u
374386

375387
return cast(
376388
Union[CommentedMap, CommentedSeq],
@@ -519,7 +531,9 @@ def resolve_and_validate_document(
519531
#
520532
processobj, metadata = document_loader.resolve_ref(uri)
521533
elif loadingContext.fast_parser:
522-
processobj, metadata = fast_parser(workflowobj, fileuri, uri, loadingContext)
534+
processobj, metadata = fast_parser(
535+
workflowobj, fileuri, uri, loadingContext, document_loader.fetcher
536+
)
523537
else:
524538
document_loader.resolve_all(workflowobj, fileuri)
525539
processobj, metadata = document_loader.resolve_ref(uri)
@@ -594,7 +608,9 @@ def make_tool(
594608
and isinstance(uri, str)
595609
and not loadingContext.skip_resolve_all
596610
):
597-
resolveduri, metadata = fast_parser(None, None, uri, loadingContext)
611+
resolveduri, metadata = fast_parser(
612+
None, None, uri, loadingContext, loadingContext.loader.fetcher
613+
)
598614
else:
599615
resolveduri, metadata = loadingContext.loader.resolve_ref(uri)
600616

requirements.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ ruamel.yaml>=0.15,<0.17.22
44
rdflib>=4.2.2,<6.3
55
rdflib>= 4.2.2, < 6.0.0;python_version<='3.6'
66
shellescape>=3.4.1,<3.9
7-
schema-salad>=8.2.20211104054942,<9
7+
schema-salad>=8.4,<9
88
prov==1.5.1
99
bagit==1.8.1
1010
mypy-extensions
@@ -15,4 +15,4 @@ pydot>=1.4.1
1515
argcomplete>=1.12.0
1616
pyparsing != 3.0.2 # breaks --print-dot (pydot) https://github.com/pyparsing/pyparsing/issues/319
1717
pyparsing < 3;python_version<='3.6' # breaks --print-dot
18-
cwl-utils>=0.19
18+
cwl-utils>=0.22

setup.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@
110110
"rdflib >= 4.2.2, < 6.3.0",
111111
"rdflib >= 4.2.2, < 6.0.0;python_version<='3.6'",
112112
"shellescape >= 3.4.1, < 3.9",
113-
"schema-salad >= 8.2.20211104054942, < 9",
113+
"schema-salad >= 8.4, < 9",
114114
"mypy-extensions",
115115
"psutil >= 5.6.6",
116116
"prov == 1.5.1",
@@ -121,7 +121,7 @@
121121
"pyparsing != 3.0.2", # breaks --print-dot (pydot) https://github.com/pyparsing/pyparsing/issues/319
122122
"pyparsing < 3 ;python_version<='3.6'", # breaks --print-dot (pydot)
123123
"argcomplete",
124-
"cwl-utils >= 0.19",
124+
"cwl-utils >= 0.22",
125125
],
126126
extras_require={
127127
"deps": ["galaxy-tool-util >= 22.1.2, <23"],

tests/test_load_tool.py

+18
Original file line numberDiff line numberDiff line change
@@ -130,3 +130,21 @@ def test_load_graph_fragment_from_packed() -> None:
130130

131131
finally:
132132
use_standard_schema("v1.0")
133+
134+
135+
def test_import_tracked() -> None:
136+
"""Test that $import and $include are tracked in the index."""
137+
138+
loadingContext = LoadingContext({"fast_parser": True})
139+
tool = load_tool(get_data("tests/wf/811-12.cwl"), loadingContext)
140+
path = "import:file://%s" % get_data("tests/wf/schemadef-type.yml")
141+
142+
assert tool.doc_loader is not None
143+
assert path in tool.doc_loader.idx
144+
145+
loadingContext = LoadingContext({"fast_parser": False})
146+
tool = load_tool(get_data("tests/wf/811.cwl"), loadingContext)
147+
path = "import:file://%s" % get_data("tests/wf/schemadef-type.yml")
148+
149+
assert tool.doc_loader is not None
150+
assert path in tool.doc_loader.idx

tests/wf/811-12.cwl

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
cwlVersion: v1.2
2+
class: Workflow
3+
4+
inputs:
5+
- id: hello
6+
type: Any
7+
outputs: []
8+
9+
steps:
10+
step:
11+
id: step
12+
run: schemadef-tool-12.cwl
13+
in:
14+
hello: hello
15+
out: []

tests/wf/schemadef-tool-12.cwl

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#!/usr/bin/env cwl-runner
2+
class: CommandLineTool
3+
cwlVersion: v1.2
4+
hints:
5+
ResourceRequirement:
6+
ramMin: 8
7+
8+
requirements:
9+
- $import: schemadef-type.yml
10+
11+
inputs:
12+
- id: hello
13+
type: "schemadef-type.yml#HelloType"
14+
inputBinding:
15+
valueFrom: $(self.a)/$(self.b)
16+
17+
outputs:
18+
- id: output
19+
type: File
20+
outputBinding:
21+
glob: output.txt
22+
23+
stdout: output.txt
24+
baseCommand: echo

0 commit comments

Comments
 (0)