Skip to content

Commit 03af16c

Browse files
authored
Merge pull request #1194 from common-workflow-language/toil-prov
3rd party cwltprov support (toil-cwl-runner, will help others)
2 parents ba16b65 + 25cac6c commit 03af16c

File tree

5 files changed

+17
-12
lines changed

5 files changed

+17
-12
lines changed

cwltool/executors.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,8 @@ def run_jobs(self,
159159
user_provenance=False,
160160
orcid=runtime_context.orcid,
161161
# single tool execution, so RO UUID = wf UUID = tool UUID
162-
run_uuid=runtime_context.research_obj.ro_uuid)
162+
run_uuid=runtime_context.research_obj.ro_uuid,
163+
fsaccess=runtime_context.make_fs_access(''))
163164
process.parent_wf = process.provenance_object
164165
jobiter = process.job(job_order_object, self.output_callback,
165166
runtime_context)
@@ -178,6 +179,7 @@ def run_jobs(self,
178179
prov_obj = job.prov_obj
179180
if prov_obj:
180181
runtime_context.prov_obj = prov_obj
182+
prov_obj.fsaccess = runtime_context.make_fs_access('')
181183
prov_obj.evaluate(
182184
process, job, job_order_object,
183185
runtime_context.research_obj)

cwltool/main.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -600,6 +600,7 @@ def main(argsl=None, # type: Optional[List[str]]
600600
_logger.error("--provenance incompatible with --no-compute-checksum")
601601
return 1
602602
ro = ResearchObject(
603+
getdefault(runtimeContext.make_fs_access, StdFsAccess),
603604
temp_prefix_ro=args.tmpdir_prefix, orcid=args.orcid,
604605
full_name=args.cwl_full_name)
605606
runtimeContext.research_obj = ro

cwltool/provenance.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -305,9 +305,11 @@ def __init__(self,
305305
host_provenance, # type: bool
306306
user_provenance, # type: bool
307307
orcid, # type: str
308+
fsaccess, # type: StdFsAccess
308309
run_uuid=None # type: Optional[uuid.UUID]
309310
): # type: (...) -> None
310311
"""Initialize the provenance profile."""
312+
self.fsaccess = fsaccess
311313
self.orcid = orcid
312314
self.research_object = research_object
313315
self.folder = self.research_object.folder
@@ -504,8 +506,7 @@ def declare_file(self, value):
504506
if not entity and 'location' in value:
505507
location = str(value['location'])
506508
# If we made it here, we'll have to add it to the RO
507-
fsaccess = StdFsAccess("")
508-
with fsaccess.open(location, "rb") as fhandle:
509+
with self.fsaccess.open(location, "rb") as fhandle:
509510
relative_path = self.research_object.add_data_file(fhandle)
510511
# FIXME: This naively relies on add_data_file setting hash as filename
511512
checksum = PurePath(relative_path).name
@@ -597,8 +598,7 @@ def declare_directory(self, value): # type: (MutableMapping[Text, Any]) -> Prov
597598
is_empty = True
598599

599600
if "listing" not in value:
600-
fsaccess = StdFsAccess("")
601-
get_listing(fsaccess, value)
601+
get_listing(self.fsaccess, value)
602602
for entry in value.get("listing", []):
603603
is_empty = False
604604
# Declare child-artifacts
@@ -945,8 +945,8 @@ def finalize_prov_profile(self, name):
945945
class ResearchObject():
946946
"""CWLProv Research Object."""
947947

948-
def __init__(self, temp_prefix_ro="tmp", orcid='', full_name=''):
949-
# type: (str, Text, Text) -> None
948+
def __init__(self, fsaccess, temp_prefix_ro="tmp", orcid='', full_name=''):
949+
# type: (StdFsAccess, str, Text, Text) -> None
950950
"""Initialize the ResearchObject."""
951951
self.temp_prefix = temp_prefix_ro
952952
self.orcid = '' if not orcid else _valid_orcid(orcid)
@@ -962,7 +962,7 @@ def __init__(self, temp_prefix_ro="tmp", orcid='', full_name=''):
962962
self._external_aggregates = [] # type: List[Dict[Text, Text]]
963963
self.annotations = [] # type: List[Dict[Text, Any]]
964964
self._content_types = {} # type: Dict[Text,str]
965-
965+
self.fsaccess = fsaccess
966966
# These should be replaced by generate_prov_doc when workflow/run IDs are known:
967967
self.engine_uuid = "urn:uuid:%s" % uuid.uuid4()
968968
self.ro_uuid = uuid.uuid4()
@@ -1561,8 +1561,7 @@ def _relativise_files(self, structure):
15611561
# Register in RO; but why was this not picked
15621562
# up by used_artefacts?
15631563
_logger.info("[provenance] Adding to RO %s", structure["location"])
1564-
fsaccess = StdFsAccess("")
1565-
with fsaccess.open(structure["location"], "rb") as fp:
1564+
with self.fsaccess.open(structure["location"], "rb") as fp:
15661565
relative_path = self.add_data_file(fp)
15671566
checksum = PurePosixPath(relative_path).name
15681567
structure["checksum"] = "%s$%s" % (SHA1, checksum)

cwltool/workflow.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -522,7 +522,8 @@ def __init__(self,
522522
host_provenance=loadingContext.host_provenance,
523523
user_provenance=loadingContext.user_provenance,
524524
orcid=loadingContext.orcid,
525-
run_uuid=run_uuid) # inherit RO UUID for master wf run
525+
run_uuid=run_uuid,
526+
fsaccess=loadingContext.research_obj.fsaccess) # inherit RO UUID for master wf run
526527
# TODO: Is Workflow(..) only called when we are the master workflow?
527528
self.parent_wf = self.provenance_object
528529

@@ -582,6 +583,7 @@ def job(self,
582583
if runtimeContext.research_obj is not None:
583584
if runtimeContext.toplevel:
584585
# Record primary-job.json
586+
runtimeContext.research_obj.fsaccess = runtimeContext.make_fs_access('')
585587
runtimeContext.research_obj.create_job(builder.job, self.job)
586588

587589
job = WorkflowJob(self, runtimeContext)

tests/test_provenance.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from cwltool.main import main
2525
from cwltool.resolver import Path
2626
from cwltool.context import RuntimeContext
27+
from cwltool.stdfsaccess import StdFsAccess
2728

2829
from .util import get_data, needs_docker, temp_dir, working_directory
2930

@@ -513,7 +514,7 @@ def check_prov(base_path, nested=False, single_tool=False, directory=False,
513514

514515
@pytest.fixture
515516
def research_object():
516-
re_ob = provenance.ResearchObject()
517+
re_ob = provenance.ResearchObject(StdFsAccess(''))
517518
yield re_ob
518519
re_ob.close()
519520

0 commit comments

Comments
 (0)