-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprov-to-opm.sh
executable file
·92 lines (65 loc) · 2.66 KB
/
prov-to-opm.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/bin/bash
echo Generating OPM for a single run
rm -f opm.xml
# TODO make swift-opm-ns into a proper URI
echo "<opmGraph xmlns=\"http://openprovenance.org/model/v1.01.a\" xmlns:swift=\"tag:[email protected],2008:swift:opm:20090311\">" > opm.xml
echo "<accounts><account id=\"base\" /></accounts>" >> opm.xml
echo "<processes>" >> opm.xml
while read time duration globalthread localthread endstate app scratch; do
threadid=$(echo $globalthread | sed 's/[:,@.]/-/g')
echo " <process id=\"$threadid\">"
echo " <account id=\"base\" />"
echo " <swift:info starttime=\"$time\" duration=\"$duration\" endstate=\"$endstate\" app=\"$app\" scratch=\"$scratch\"/>"
echo " <swift:uri>$globalthread</swift:uri>"
# TODO no value here - this is some URI into an ontology, which is don't
# really know how should be mapped from Swift
echo " </process>"
done < execute.global.event >> opm.xml
echo "</processes>" >> opm.xml
# TODO artifacts
echo "<artifacts>" >> opm.xml
# we need a list of all artifacts here. for now, take everything we can
# find in the tie-data-invocs and containment tables, uniquefied.
# This is probably the wrong thing to do.
while read outer inner; do
echo $input
echo $output
done < tie-containers.txt > tmp-dshandles.txt
while read t d dataset rest ; do
echo $dataset
done < tie-data-invocs.txt >> tmp-dshandles.txt
cat tmp-dshandles.txt | sort | uniq > tmp-dshandles2.txt
while read artifact ; do
artifactid=$(echo $artifact | sed 's/[:,@.]/-/g')
echo " <artifact id=\"$artifactid\">"
echo " <account id=\"base\" />"
echo " <swift:uri>$artifact</swift:uri>"
echo " </artifact>"
done < tmp-dshandles2.txt >> opm.xml
echo "</artifacts>" >> opm.xml
echo "<causalDependencies>" >> opm.xml
# other stuff can do this in any order, but here we must probably do it
# in two passes, one for each relation, in order to satisfy schema.
# but for now do it in a single pass...
while read thread direction dataset variable rest; do
datasetid=$(echo $dataset | sed 's/[:,@.]/-/g')
threadid=$(echo $thread | sed 's/[:,@.]/-/g')
if [ "$direction" == "input" ] ; then
echo " <used>"
echo " <effect id=\"$threadid\" />"
echo " <role value=\"$variable\" />"
echo " <cause id=\"$datasetid\" />"
echo " <account id=\"base\" />"
echo " </used>"
else
echo " <wasGeneratedBy>"
echo " <effect id=\"$datasetid\" />"
echo " <role value=\"$variable\" />"
echo " <cause id=\"$threadid\" />"
echo " <account id=\"base\" />"
echo " </wasGeneratedBy>"
fi
done < tie-data-invocs.txt >> opm.xml
echo "</causalDependencies>" >> opm.xml
echo "</opmGraph>" >> opm.xml
echo Finished generating OPM, in opm.xml