Skip to content

Commit 81b1e1e

Browse files
committed
fix requirements and pep8
1 parent 68c9581 commit 81b1e1e

File tree

5 files changed

+62
-93
lines changed

5 files changed

+62
-93
lines changed

requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@ jinja2
55
python-dateutil
66
click
77
prov
8-
schema_salad
8+
typing-extensions

rocrate/provenance_profile.py

+38-51
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,12 @@
1-
import copy
2-
import pdb
31
import datetime
4-
import logging
52
import urllib
63
import uuid
74
import json
8-
from io import BytesIO
95
from pathlib import PurePath, PurePosixPath
10-
from socket import getfqdn
116
from typing import (
127
Any,
138
Dict,
14-
Iterable,
159
List,
16-
MutableMapping,
1710
MutableSequence,
1811
Optional,
1912
Tuple,
@@ -23,9 +16,7 @@
2316

2417
from prov.identifier import Identifier
2518
from prov.model import PROV, PROV_LABEL, PROV_TYPE, PROV_VALUE, ProvDocument, ProvEntity
26-
from schema_salad.sourceline import SourceLine
27-
from typing_extensions import TYPE_CHECKING
28-
from tools.load_ga_export import load_ga_history_export, GalaxyJob, GalaxyDataset
19+
from tools.load_ga_export import load_ga_history_export, GalaxyJob
2920
from ast import literal_eval
3021
import os
3122

@@ -36,16 +27,12 @@
3627
from rocrate.provenance_constants import (
3728
ACCOUNT_UUID,
3829
CWLPROV,
39-
ENCODING,
40-
FOAF,
4130
METADATA,
4231
ORE,
4332
PROVENANCE,
4433
RO,
4534
SCHEMA,
4635
SHA1,
47-
SHA256,
48-
TEXT_PLAIN,
4936
UUID,
5037
WF4EVER,
5138
WFDESC,
@@ -59,15 +46,17 @@
5946
# from rocrate.provenance import ResearchObject
6047

6148
from pathlib import Path
62-
import rocrate.rocrate as roc
49+
6350

6451
def posix_path(local_path: str) -> str:
6552
return str(PurePosixPath(Path(local_path)))
6653

54+
6755
def remove_escapes(s):
6856
escapes = ''.join([chr(char) for char in range(1, 32)])
6957
translator = str.maketrans('', '', escapes)
70-
t = s.translate(translator)
58+
s.translate(translator)
59+
7160

7261
def reassign(d):
7362
for k, v in d.items():
@@ -78,16 +67,17 @@ def reassign(d):
7867
except ValueError:
7968
pass
8069

70+
8171
class ProvenanceProfile:
82-
"""
72+
"""\
8373
Provenance profile.
8474
8575
Populated from a galaxy workflow export.
8676
"""
8777

8878
def __init__(
8979
self,
90-
ga_export: Dict,
80+
ga_export: Dict,
9181
full_name: str = None,
9282
orcid: str = None,
9383
# prov_name: str = None,
@@ -112,12 +102,11 @@ def __init__(
112102
self.base_uri = "arcp://uuid,%s/" % self.ro_uuid
113103
self.document = ProvDocument()
114104
# TODO extract engine_uuid from galaxy, type: str
115-
self.engine_uuid = "urn:uuid:%s" % uuid.uuid4() #type: str
105+
self.engine_uuid = "urn:uuid:%s" % uuid.uuid4() # type: str
116106
self.full_name = full_name
117107
self.workflow_run_uuid = run_uuid or uuid.uuid4()
118108
self.workflow_run_uri = self.workflow_run_uuid.urn # type: str
119-
120-
# move to separate function
109+
# move to separate function
121110
metadata_export = load_ga_history_export(ga_export)
122111
self.generate_prov_doc()
123112
self.jobs = []
@@ -143,7 +132,7 @@ def generate_prov_doc(self) -> Tuple[str, ProvDocument]:
143132
# PROV_TYPE: FOAF["OnlineAccount"],
144133
# TODO: change how we register galaxy version, probably a declare_version func
145134
# self.galaxy_version = self.ga_export["jobs_attrs"][0]["galaxy_version"]
146-
# TODO: change notation to already imported namespaces?
135+
# TODO: change notation to already imported namespaces?
147136
self.document.add_namespace("wfprov", "http://purl.org/wf4ever/wfprov#")
148137
# document.add_namespace('prov', 'http://www.w3.org/ns/prov#')
149138
self.document.add_namespace("wfdesc", "http://purl.org/wf4ever/wfdesc#")
@@ -166,7 +155,7 @@ def generate_prov_doc(self) -> Tuple[str, ProvDocument]:
166155
"provenance", self.base_uri + posix_path(PROVENANCE) + "/"
167156
)
168157
# TODO: use appropriate refs for ga_export and related inputs
169-
ro_identifier_workflow = self.base_uri + "ga_export" + "/"
158+
ro_identifier_workflow = self.base_uri + "ga_export" + "/"
170159
self.wf_ns = self.document.add_namespace("wf", ro_identifier_workflow)
171160
ro_identifier_input = (
172161
self.base_uri + "ga_export/datasets#"
@@ -230,15 +219,15 @@ def declare_process(
230219
"""Record the start of each Process."""
231220
if process_run_id is None:
232221
process_run_id = uuid.uuid4().urn
233-
234-
cmd = ga_export_jobs_attrs["command_line"]
222+
223+
# cmd = ga_export_jobs_attrs["command_line"]
235224
process_name = ga_export_jobs_attrs["tool_id"]
236-
tool_version = ga_export_jobs_attrs["tool_version"]
225+
# tool_version = ga_export_jobs_attrs["tool_version"]
237226
prov_label = "Run of ga_export/jobs_attrs.txt#" + process_name
238227
start_time = ga_export_jobs_attrs["create_time"]
239228
end_time = ga_export_jobs_attrs["update_time"]
240229

241-
#TODO: Find out how to include commandline as a string
230+
# TODO: Find out how to include commandline as a string
242231
# cmd = self.document.entity(
243232
# uuid.uuid4().urn,
244233
# {PROV_TYPE: WFPROV["Artifact"], PROV_LABEL: ga_export_jobs_attrs["command_line"]}
@@ -249,9 +238,9 @@ def declare_process(
249238
start_time,
250239
end_time,
251240
{
252-
PROV_TYPE: WFPROV["ProcessRun"],
253-
PROV_LABEL: prov_label,
254-
#TODO: Find out how to include commandline as a string
241+
PROV_TYPE: WFPROV["ProcessRun"],
242+
PROV_LABEL: prov_label,
243+
# TODO: Find out how to include commandline as a string
255244
# PROV_LABEL: cmd
256245
},
257246
)
@@ -279,7 +268,7 @@ def used_artefacts(
279268
base += "/" + process_name
280269
tool_id = process_metadata["tool_id"]
281270
base += "/" + tool_id
282-
items = ["inputs","outputs","parameters"]
271+
items = ["inputs", "outputs", "parameters"]
283272
# print(process_metadata["params"])
284273
for item in items:
285274
# print(item)
@@ -293,8 +282,8 @@ def used_artefacts(
293282
value = json.loads(value)
294283
if isinstance(key, str):
295284
key = key.replace("|", "_")
296-
if isinstance(value, str):
297-
val = value.replace("|", "_")
285+
if isinstance(value, str):
286+
value = value.replace("|", "_")
298287

299288
prov_role = self.wf_ns[f"{base}/{key}"]
300289

@@ -307,7 +296,6 @@ def used_artefacts(
307296

308297
# for artefact in value:
309298
try:
310-
# pdb.set_trace()
311299
entity = self.declare_artefact(value)
312300
self.document.used(
313301
process_run_id,
@@ -346,7 +334,7 @@ def declare_artefact(self, value: Any) -> ProvEntity:
346334
# byte_s = BytesIO(value)
347335
# data_file = self.research_object.add_data_file(byte_s)
348336
# FIXME: Don't naively assume add_data_file uses hash in filename!
349-
data_id = "data:%s" % str(value) #PurePosixPath(data_file).stem
337+
data_id = "data:%s" % str(value) # PurePosixPath(data_file).stem
350338
return self.document.entity(
351339
data_id,
352340
{PROV_TYPE: WFPROV["Artifact"], PROV_VALUE: str(value)},
@@ -383,7 +371,7 @@ def declare_artefact(self, value: Any) -> ProvEntity:
383371
)
384372

385373
if value.get("class"):
386-
#_logger.warning("Unknown data class %s.", value["class"])
374+
# _logger.warning("Unknown data class %s.", value["class"])
387375
# FIXME: The class might be "http://example.com/somethingelse"
388376
coll.add_asserted_type(CWLPROV[value["class"]])
389377

@@ -393,7 +381,7 @@ def declare_artefact(self, value: Any) -> ProvEntity:
393381
# clean up unwanted characters
394382
if isinstance(key, str):
395383
key = key.replace("|", "_")
396-
if isinstance(val, str):
384+
if isinstance(val, str):
397385
val = val.replace("|", "_")
398386

399387
v_ent = self.declare_artefact(val)
@@ -440,7 +428,7 @@ def declare_artefact(self, value: Any) -> ProvEntity:
440428
# FIXME: list value does not support adding "@id"
441429
return coll
442430
except TypeError:
443-
#_logger.warning("Unrecognized type %s of %r", type(value), value)
431+
# _logger.warning("Unrecognized type %s of %r", type(value), value)
444432
# Let's just fall back to Python repr()
445433
entity = self.document.entity(uuid.uuid4().urn, {PROV_LABEL: repr(value)})
446434
# self.research_object.add_uri(entity.identifier.uri)
@@ -455,7 +443,7 @@ def declare_file(self, value: Dict) -> Tuple[ProvEntity, ProvEntity, str]:
455443
if "checksum" in value:
456444
csum = cast(str, value["checksum"])
457445
(method, checksum) = csum.split("$", 1)
458-
if method == SHA1: # and self.research_object.has_data_file(checksum):
446+
if method == SHA1: # and self.research_object.has_data_file(checksum):
459447
entity = self.document.entity("data:" + checksum)
460448

461449
if not entity and "location" in value:
@@ -502,8 +490,8 @@ def declare_file(self, value: Dict) -> Tuple[ProvEntity, ProvEntity, str]:
502490

503491
# Check for secondaries
504492
for sec in cast(
505-
# MutableSequence[CWLObjectType],
506-
value.get("secondaryFiles", [])
493+
# MutableSequence[CWLObjectType],
494+
value.get("secondaryFiles", []) # noqa
507495
):
508496
# TODO: Record these in a specializationOf entity with UUID?
509497
if sec["class"] == "File":
@@ -524,8 +512,10 @@ def declare_file(self, value: Dict) -> Tuple[ProvEntity, ProvEntity, str]:
524512

525513
return file_entity, entity, checksum
526514

527-
def declare_directory(self
528-
# , value: CWLObjectType
515+
def declare_directory(
516+
self,
517+
# value: CWLObjectType
518+
value
529519
) -> ProvEntity:
530520
"""Register any nested files/directories."""
531521
# FIXME: Calculate a hash-like identifier for directory
@@ -636,12 +626,11 @@ def declare_string(self, value: str) -> Tuple[ProvEntity, str]:
636626
# checksum = PurePosixPath(data_file).name
637627
# FIXME: Don't naively assume add_data_file uses hash in filename!
638628
value = str(value).replace("|", "_")
639-
data_id = "data:%s" % str(value) #PurePosixPath(data_file).stem
629+
data_id = "data:%s" % str(value) # PurePosixPath(data_file).stem
640630
entity = self.document.entity(
641631
data_id, {PROV_TYPE: WFPROV["Artifact"], PROV_VALUE: str(value)}
642632
) # type: ProvEntity
643-
return entity #, checksum
644-
633+
return entity # , checksum
645634

646635
def generate_output_prov(
647636
self,
@@ -724,7 +713,7 @@ def activity_has_provenance(self, activity, prov_ids):
724713
self.document.activity(activity, other_attributes=attribs)
725714
# Tip: we can't use https://www.w3.org/TR/prov-links/#term-mention
726715
# as prov:mentionOf() is only for entities, not activities
727-
uris = [i.uri for i in prov_ids]
716+
# uris = [i.uri for i in prov_ids]
728717
# self.research_object.add_annotation(activity, uris, PROV["has_provenance"].uri)
729718

730719
def finalize_prov_profile(self, name=None, out_path=None):
@@ -759,7 +748,7 @@ def finalize_prov_profile(self, name=None, out_path=None):
759748

760749
# https://www.w3.org/TR/prov-xml/
761750
# serialized_prov_docs["xml"] = self.document.serialize(format="xml", indent=4)
762-
prov_ids.append(self.provenance_ns[filename + ".xml"])
751+
prov_ids.append(self.provenance_ns[filename + ".xml"])
763752
with open(basename + ".xml", "w") as provenance_file:
764753
self.document.serialize(provenance_file, format="xml", indent=4)
765754

@@ -768,7 +757,6 @@ def finalize_prov_profile(self, name=None, out_path=None):
768757
prov_ids.append(self.provenance_ns[filename + ".provn"])
769758
with open(basename + ".provn", "w") as provenance_file:
770759
self.document.serialize(provenance_file, format="provn", indent=2)
771-
772760

773761
# https://www.w3.org/Submission/prov-json/
774762
# serialized_prov_docs["json"] = self.document.serialize(format="json", indent=2)
@@ -799,7 +787,6 @@ def finalize_prov_profile(self, name=None, out_path=None):
799787
prov_ids.append(self.provenance_ns[filename + ".jsonld"])
800788
with open(basename + ".jsonld", "w") as provenance_file:
801789
self.document.serialize(provenance_file, format="rdf", rdf_format="json-ld")
802-
803790

804-
#_logger.debug("[provenance] added provenance: %s", prov_ids)
791+
# _logger.debug("[provenance] added provenance: %s", prov_ids)
805792
return (serialized_prov_docs, prov_ids)

rocrate/rocrate_api.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
# limitations under the License.
1919

2020
from pathlib import Path
21-
import os
2221

2322
import rocrate.rocrate as roc
2423
from rocrate.provenance_profile import ProvenanceProfile
@@ -79,6 +78,7 @@ def make_workflow_rocrate(workflow_path, wf_type, include_files=[],
7978

8079
return wf_crate
8180

81+
8282
# WIP
8383
def make_workflow_run_rocrate(workflow_path, wf_type, wfr_metadata_path, author=None, orcid=None,
8484
include_files=[], fetch_remote=False, prov_name=None, prov_path=None, cwl=None, diagram=None):
@@ -110,4 +110,4 @@ def make_workflow_run_rocrate(workflow_path, wf_type, wfr_metadata_path, author=
110110
for file_entry in include_files:
111111
wfr_crate.add_file(file_entry)
112112

113-
return wfr_crate
113+
return wfr_crate

0 commit comments

Comments
 (0)