Skip to content

Commit 32bfc99

Browse files
committed
Add fix variations of mf runner examples #598
Reuse all fix workflows from #654 and bring folders together as suggested by @blackwinter in #654 (comment)
1 parent 16a41b0 commit 32bfc99

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+1777
-14
lines changed
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
//creates a beacon file based on a pica+ dump of the DNB CBS data.
2+
3+
default type = "ALL";
4+
default out = dump + "-" + type + ".beacon";
5+
default header = FLUX_DIR + "header.txt";
6+
7+
8+
//read header
9+
"reading header " + header | write("stdout");
10+
header|open-file|as-lines|@Y;
11+
12+
//count references
13+
"counting references in " + dump | write("stdout");
14+
15+
dump
16+
| open-file
17+
| as-lines
18+
| catch-object-exception
19+
| decode-pica
20+
| batch-log(batchsize="100000")
21+
| fix(FLUX_DIR + "extract.fix", *)
22+
| stream-to-triples(redirect="true")
23+
| sort-triples(by="subject")
24+
| collect-triples
25+
| fix(FLUX_DIR + "output.fix")
26+
| batch-log("merged ${totalRecords}", batchsize="100000")
27+
| stream-to-triples
28+
| template("${s}")
29+
| @Y;
30+
31+
@Y
32+
| wait-for-inputs("2")
33+
| write(out);
34+
35+
36+
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# 002@ not repeatable
2+
3+
if any_match("[email protected]", "^Tp.*$")
4+
copy_field("[email protected]","ok")
5+
end
6+
7+
# <!-- DBSM: -->
8+
# <!-- (006U $0 “04p01*”) or (017A $a “yy”) -->
9+
if any_match("006U.0","04p01.*")
10+
add_field("@value","DBSM|ALL")
11+
elsif any_equal("017A.a","yy")
12+
add_field("@value","DBSM|ALL")
13+
14+
# <!-- DEA: -->
15+
# <!-- (001@ $a 2””) or (209A $f “Exilarchiv” or 209A $f “HB/EB”) -->
16+
elsif any_equal("[email protected]","2")
17+
add_field("@value","DEA|ALL")
18+
elsif any_equal("209A.f","HB/EB")
19+
add_field("@value","DEA|ALL")
20+
21+
# <!-- DMA: -->
22+
# <!-- (002@ $0 “G*” or 002@ $0 “M*”) or (006U $0 “10,P01*”) -->
23+
24+
elsif any_equal("[email protected]","^[GM].*")
25+
add_field("@value","DMA|ALL")
26+
27+
elsif any_equal("006U.0","^10,P01.*")
28+
add_field("@value","DMA|ALL")
29+
else
30+
add_field("@value","ALL")
31+
end
32+
33+
# Test if type variable fits
34+
35+
if any_contain("@value","$[type]")
36+
add_field("@value","$[type]")
37+
else
38+
remove_field("@value")
39+
end
40+
41+
do list(path: "041A*|028A*|029B*|028C*|028Q*|028P*|028F*|028M*|028D*|028E*", "var":"$i")
42+
trim("$i.9")
43+
to_var("$i.9","ref")
44+
if exists("$i.9")
45+
copy_field("@value","{to:$[ref]}refed")
46+
end
47+
end
48+
49+
retain("{to*","ok")
50+
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
unless exists("refed")
2+
remove_field("ok","")
3+
end
4+
5+
unless exists("ok")
6+
remove_field("ok","")
7+
end
8+
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
default fileName = FLUX_DIR + "gnd-sample.pica";
2+
3+
fileName
4+
| open-file
5+
| as-lines
6+
| decode-pica
7+
| fix(FLUX_DIR + "gnd-type.fix")
8+
| stream-to-triples
9+
| count-triples(countBy="object")
10+
| template("${s}\t${o}")
11+
| write("stdout");
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
if any_match("[email protected]","...*")
2+
replace_all("[email protected]","^(..).*","$1") #only keep the first two letters
3+
retain("[email protected]") # only keep the relevent element
4+
else
5+
reject()
6+
end
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
do list(path:"041A*|041A","var":"$i") # Until https://github.com/metafacture/metafacture-core/issues/651 is fixed one hass to add "041A"
2+
copy_field("$i.9","relevantField.$append")
3+
end
4+
5+
trim("relevantField.*")
6+
uniq("relevantField")
7+
8+
retain("relevantField")
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
2+
default counts=FLUX_DIR + "counts.dat";
3+
default catalogue = FLUX_DIR + "10.pica";
4+
5+
//count references
6+
"counting references in " + catalogue | write("stdout");
7+
8+
catalogue
9+
| open-file
10+
| as-lines
11+
| catch-object-exception
12+
| decode-pica
13+
| fix(FLUX_DIR + "references.fix")
14+
| stream-to-triples
15+
| count-triples(countBy="object")
16+
| write(counts);
17+
18+
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
if any_match("[email protected]","^Tp.*")
2+
if any_match("041R.a",".*[Aa][Rr][Zz][Tt].*")
3+
nothing()
4+
else
5+
reject()
6+
end
7+
else
8+
reject()
9+
end
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// opens file 'fileName', interprets the content as pica and filters the results
2+
3+
default fileName = FLUX_DIR + "gnd-sample.pica";
4+
5+
fileName
6+
| open-file
7+
| as-lines
8+
| decode-pica
9+
| fix(FLUX_DIR + "filter.fix") // Fix does not use the filter function but has its own filter mechanism within fix.
10+
| encode-formeta(style="verbose")
11+
| write("stdout");
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
default base = "";
2+
default dump = FLUX_DIR + "10.pica";
3+
default out = base + "Ts1-Tg1-without-crisscross.txt";
4+
5+
"counting references in " + dump | write("stdout");
6+
7+
dump
8+
| open-file
9+
| as-lines
10+
| catch-object-exception
11+
| decode-pica
12+
| batch-log(batchsize="100000")
13+
| fix(FLUX_DIR + "extract.fix")
14+
| stream-to-triples(redirect="true")
15+
| sort-triples(by="subject")
16+
| collect-triples
17+
| fix(FLUX_DIR + "output.fix")
18+
| batch-log(batchsize="100000")
19+
| encode-csv(noquotes="true",separator=";")
20+
| write(out);

0 commit comments

Comments
 (0)