Skip to content

Commit 06c1955

Browse files
committed
Keep single records for every id
Created objects that are kept in path: records with different ids but the same rvk elements, encoded them as json and reopened them with direction records as the record container. By that I am able to create a single record for each id.
1 parent 239469d commit 06c1955

File tree

3 files changed

+23
-13
lines changed

3 files changed

+23
-13
lines changed
+3-1
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
1-
"HT013166356, HT018625006, TT000577460","CI 5310","CI 5603","CI 1100","CI 1125","CI 5603","CI 5604","EC 2430","IH 34381"
1+
"HT013166356","CI 5310","CI 5603","CI 1100","CI 1125","CI 5604","EC 2430","IH 34381"
2+
"HT018625006","CI 5310","CI 5603","CI 1100","CI 1125","CI 5604","EC 2430","IH 34381"
3+
"TT000577460","CI 5310","CI 5603","CI 1100","CI 1125","CI 5604","EC 2430","IH 34381"

Concordance-RVK-Verbundbibliothek/culturegraph_to_Rvk-Verbundbibliothek_concordance_csv.flux

+3-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
//
1212
// curl -XPOST --header 'Content-Type: application/x-ndjson' -d @bulk.ndjson 'http://localhost:9200/_bulk'
1313

14-
default outfile = FLUX_DIR + "bulk.json";
14+
default outfile = FLUX_DIR + "bulk.csv";
1515
default infile = FLUX_DIR + "aggregate_auslieferung_20191212.small.marcxml.gz";
1616
default fixfile = FLUX_DIR + "fix-cg-to-es.fix";
1717

@@ -21,7 +21,8 @@ infile
2121
| decode-xml
2222
| handle-marcxml
2323
| fix(fixfile)
24+
| encode-json
25+
| decode-json(recordPath="records")
2426
| encode-csv
25-
//encode-json
2627
| write(outfile)
2728
;
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,41 @@
1-
set_array("id")
2-
set_array("rvk[]")
1+
set_array("records[]")
2+
set_array("@id[]")
3+
set_array("@rvk[]")
34

45
do list(path: "084??", "var": "$i")
56
if any_match("$i.2", "rvk")
6-
copy_field("$i.a","rvk[].$append")
7+
copy_field("$i.a","@rvk[].$append")
78
end
89
end
910

11+
uniq("@rvk[]")
12+
1013

1114
do list(path: "035??", "var": "$i")
1215
if any_match("$i.a", "^\\(DE-605\\)(.*)")
13-
copy_field("$i.a","id.$append")
16+
copy_field("$i.a","@id[].$append")
1417
end
1518
end
16-
replace_all("id.*","^\\(DE-605\\)(.*)","$1")
17-
join_field("id",", ")
19+
replace_all("id[].*","^\\(DE-605\\)(.*)","$1")
20+
21+
do list(path: "@id[]", "var": "$i")
22+
copy_field("$i","records[].$append.id")
23+
copy_field("@rvk[]","records[].$last.rvk[]")
24+
end
25+
replace_all("records[].*.id","^\\(DE-605\\)(.*)","$1")
1826

19-
retain("rvk[]","id")
2027
vacuum()
2128

2229
# Filter records without RVK
23-
unless exists("rvk[]")
30+
unless exists("@rvk[]")
2431
reject()
2532
end
2633

2734
# Filter records without hbz ids
28-
unless exists("id")
35+
unless exists("@id[]")
2936
reject()
3037
end
3138

32-
39+
retain("records[]")
3340

3441

0 commit comments

Comments
 (0)