Skip to content

Commit f431bbc

Browse files
committed
Use the DOI field to build missing url in Zotero json data building the bibliography.json file.
Add debugging file generation under conditionals in update_bibliography.sh
1 parent f382c53 commit f431bbc

File tree

2 files changed

+169
-37
lines changed

2 files changed

+169
-37
lines changed

scripts/bib-fns.jq

+21-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ def unwrapDiv:
1212
def moveURL_to_url:
1313
select(nonBlankKey("URL")) | (setpath(["url"]; .URL) | del(.URL)) // .;
1414

15+
def make_DOI_to_url($doi):
16+
if ($doi | startswith("https:")) then $doi else "https://doi.org/" + ($doi | ltrimstr("/")) end ;
17+
1518
def cleanAbstracts:
1619
if blankKey("abstract") and nonBlankKey("abstractNote") then
1720
setpath(["abstract"]; .abstractNote) | del(.abstractNote)
@@ -69,4 +72,21 @@ def getNeededUrls: # assumes that array of all current items is the input
6972
.[0] as $k # get the key
7073
| select(($keys | all(. != $k))) # if this key isn't already in the $keys
7174
| .[1]?) # include this url in this collected array
72-
;
75+
;
76+
77+
# def intersection(x;y):
78+
# if (y|length) == 0 then
79+
# []
80+
# else (x|unique) as $x
81+
# | $x - ($x - y)
82+
# end;
83+
84+
def semiflatten: # assumes that only one item is the input
85+
. as $item
86+
| (keys - ["csljson","data"]) as $topKeys
87+
| ((.csljson // {}) * .data) as $inner
88+
| (($inner | keys) - ["key","version"]) as $innerKeys
89+
| ( ($topKeys | map(. as $tKey | {"key": $tKey, "value": ($item | getpath([$tKey]))}))
90+
+ ($innerKeys | map(. as $iKey | {"key": $iKey, "value": ($inner | getpath([$iKey]))})) )
91+
| from_entries;
92+

scripts/update_bibliography.sh

+148-36
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,28 @@
22

33
set -e
44

5+
rawItemsFile=true
6+
debugFiles=false
7+
tagFiles=false
8+
typeFiles=false
9+
# The collection files will be created only if directly querying Zotero API.
10+
collectionFiles=false
11+
12+
dfn="00"
13+
function debugFileName() {
14+
# The $1 is the file NAME string (without the number prefix or extension)
15+
# The $2 is the previous instance of the generated debugFileName
16+
# and the "count" is in the leading 2 characters.
17+
local cfn=$2
18+
# increment the count before using it.
19+
# (The leading 1 and the - 100 are so a leading 0 doesn't set octal parsing)
20+
echo $( printf "%02d-%s.json" $(( ("1${cfn:0:2}" - 100) + 1)) $1 )
21+
}
22+
523
GROUP_ID=2914042
624

725
items=""
26+
collections=""
827

928
function add_items_from_collection () {
1029
local collection_key="$1"
@@ -20,84 +39,177 @@ function add_items_from_collection () {
2039
[[ $(jq '. | length' <<< "$this_page") > 0 ]] || break
2140
done
2241

42+
local subcollections=$(curl -s "https://api.zotero.org/groups/$GROUP_ID/collections/$collection_key/collections" | jq -r '.[].data|{key, name, parentCollection}' | jq -s '.')
43+
collections=$(jq -s 'add' <<< "$subcollections$collections")
44+
2345
# Recurse into subcollections
2446
while read subcollection_key; do
2547
add_items_from_collection $subcollection_key
26-
done < <(curl -s "https://api.zotero.org/groups/$GROUP_ID/collections/$collection_key/collections" | jq -r '.[].key')
48+
done < <(jq -r '.[].key' <<< "$subcollections")
2749
}
2850

2951
root_collection="FSK5IX4F"
3052
if [[ $# -eq 0 ]] ; then
53+
# Initialize with the root collection.
54+
collections=$(curl -s "https://api.zotero.org/groups/$GROUP_ID/collections/top" | jq -r '.[].data|{key, name, parentCollection}' | jq -s '.' | jq "map(select(.key==\"$root_collection\"))")
3155
add_items_from_collection $root_collection
32-
else
33-
items=$(<$1)
34-
fi
35-
36-
# echo "$items" > 00-rawItems-pre-needed.json
56+
if $collectionFiles ; then
57+
echo "$collections" > collections.json
58+
fi
59+
echo "$(jq '. | length' <<< "$collections") collections"
3760

38-
while read neededUrl; do
61+
while read neededUrl; do
3962
item=$(curl -s "$neededUrl?include=data,csljson&v=3")
4063
items=$(jq -s 'add' <<< "[$item]$items")
41-
done < <(jq -r 'include "./bib-fns";getNeededUrls|.[]' <<< "$items")
42-
64+
done < <(jq -r 'include "./bib-fns";getNeededUrls|.[]' <<< "$items")
65+
66+
grouped_collections=$(jq 'group_by(.parentCollection)' <<< "$collections")
67+
# This is not yet fully hierarchically nested.
68+
if $collectionFiles ; then
69+
echo "$grouped_collections" > grouped_collections.json
70+
fi
71+
# Only if we got the raw items from the Zotero API
72+
if $rawItemsFile ; then
73+
echo "$items" > 00-rawItems.json
74+
fi
75+
else
76+
items=$(<$1)
77+
fi
4378

4479
echo "Got $(jq '. | length' <<< "$items") items"
4580

4681
# Piece-wise processing for debugging:
47-
# echo "$items" > 0-rawItems.json
48-
49-
items=$(jq 'map(.csljson * .data)' <<< "$items")
50-
# echo "$items" > 1-mapped.json
82+
items=$(jq 'include "./bib-fns";map(semiflatten)' <<< "$items")
83+
if $debugFiles ; then
84+
dfn=$(debugFileName "semiflattened" $dfn)
85+
echo "$items" > "$dfn"
86+
fi
5187
items=$(jq 'include "./bib-fns";map(if has("note") then .note |= unwrapDiv else . end)' <<< "$items")
52-
# echo "$items" > 2-cleanDiv.json
88+
if $debugFiles ; then
89+
dfn=$(debugFileName "cleanDiv" $dfn)
90+
echo "$items" > "$dfn"
91+
fi
5392
#find and eliminate duplicate .key entries
5493
groupedByKey=$(jq 'group_by(.key)' <<< "$items")
55-
dupIDs=$(jq 'map(select(length>1) | .[0])' <<< "$groupedByKey")
56-
# echo "$dupIDs" > dupIDs.json
94+
if $debugFiles ; then
95+
dupIDs=$(jq 'map(select(length>1) | {key: .[0].key, items: .})' <<< "$groupedByKey")
96+
dfn=$(debugFileName "dupIDs" $dfn)
97+
echo "$dupIDs" > "$dfn"
98+
fi
99+
57100
#remove duplicates
58101
items=$(jq 'map(.[0]) | sort_by(.date)' <<< "$groupedByKey")
59-
# echo "$items" > 3-noDupSorted.json
102+
if $debugFiles ; then
103+
dfn=$(debugFileName "noDupSorted" $dfn)
104+
echo "$items" > "$dfn"
105+
fi
106+
if $typeFiles ; then
107+
types=$(jq 'map({key, title, type, itemType}) | group_by(.type) | map({type:.[0].type, itemTypes:(group_by(.itemType)|map({itemType:.[0].itemType, items:map({title, key})}))})' <<<"$items")
108+
echo "$types" > titleKeyTypeInfo.json
109+
types=$(jq 'group_by(.type) | map({type:.[0].type, itemTypes:(group_by(.itemType)|map({itemType:.[0].itemType, items:.}))})' <<<"$items")
110+
echo "$types" > fullTypeInfo.json
111+
fi
112+
if $tagFiles ; then
113+
tags=$(jq 'map(.tags) | flatten | map(.tag) | unique' <<<"$items")
114+
echo "$tags" > tags.json
115+
fi
60116
# consolidate URL into url field
61117
items=$(jq 'include "./bib-fns";map(if nonBlankKey("URL") then setpath(["url"]; .URL) | del(.URL) else . end)' <<< "$items")
62-
# echo "$items" > consolidated.json
118+
if $debugFiles ; then
119+
dfn=$(debugFileName "consolidated" $dfn)
120+
echo "$items" > "$dfn"
121+
fi
122+
123+
# Use DOI to create url where needed and available
124+
items=$(jq 'include "./bib-fns";map(if (blankKey("url") and nonBlankKey("DOI")) then setpath(["url"]; make_DOI_to_url(.DOI)) else . end)' <<< "$items")
125+
if $debugFiles ; then
126+
dfn=$(debugFileName "DOI-url" $dfn)
127+
echo "$items" > "$dfn"
128+
fi
63129

64130
items=$(jq 'include "./bib-fns";map(getTargetInfo)' <<< "$items")
65-
# echo "$items" > withTargetInfo.json
131+
if $debugFiles ; then
132+
dfn=$(debugFileName "withTargetInfo" $dfn)
133+
echo "$items" > "$dfn"
134+
fi
66135

67136
# now use children items to amend the info for the parentItem
68137
allFixupInfo=$(jq 'group_by(.target)|map(sort_by(.parentItem))' <<< "$items")
69-
# echo "$allFixupInfoGrouped" > allFixupInfoGrouped.json
138+
if $debugFiles ; then
139+
dfn=$(debugFileName "allFixupInfo" $dfn)
140+
echo "$allFixupInfo" > "$dfn"
141+
fi
70142
# embed children into their parentItem (children field). Then delete items that Zotero indicated as such.
71143
items=$(jq 'map(if has(1) then (first + {children: .[1:]}) else first end)|map(select(.deleted|not))' <<< "$allFixupInfo")
72-
# echo "$items" > itemsNestedChildren.json
144+
if $debugFiles ; then
145+
dfn=$(debugFileName "itemsNestedChildren" $dfn)
146+
echo "$items" > "$dfn"
147+
fi
73148

74-
# echo "Got $(jq '. | length' <<< "$items") clean, top-level items"
149+
echo "Got $(jq '. | length' <<< "$items") clean, top-level items"
75150
items=$(jq 'include "./bib-fns";map(applyChildrenAmendments)' <<< "$items")
76-
# echo "$items" > updatedItems.json
77-
78-
# absNote=$(jq 'include "./bib-fns";map(select((nonBlankKey("abstract") or nonBlankKey("abstractNote")) and (.abstract != .abstractNote)) | {key: .key, title: .title, abstract: .abstract, abstractNote: .abstractNote})' <<< "$items")
79-
# echo "$absNote" > absNoteMismatch.json
151+
if $debugFiles ; then
152+
dfn=$(debugFileName "updatedItems" $dfn)
153+
echo "$items" > "$dfn"
154+
fi
155+
absNote=$(jq 'include "./bib-fns";map(select((nonBlankKey("abstract") or nonBlankKey("abstractNote")) and (.abstract != .abstractNote)) | {key: .key, title: .title, abstract: .abstract, abstractNote: .abstractNote})' <<< "$items")
156+
if $debugFiles ; then
157+
dfn=$(debugFileName "absNoteMismatch" $dfn)
158+
echo "$absNote" > "$dfn"
159+
fi
80160
# remove redundant .abstractNote fields
81161
items=$(jq 'include "./bib-fns";map(cleanAbstracts)' <<< "$items")
82-
# echo "$items" > abstractsCleaned.json
162+
if $debugFiles ; then
163+
dfn=$(debugFileName "abstractsCleaned" $dfn)
164+
echo "$items" > "$dfn"
165+
fi
166+
if $typeFiles ; then
167+
types=$(jq 'map({key, title, type, itemType}) | group_by(.type) | map({type:.[0].type, itemTypes:(group_by(.itemType)|map({itemType:.[0].itemType, items:map({title, key})}))})' <<<"$items")
168+
echo "$types" > finalTitleKeyTypeInfo.json
169+
fi
83170
noURL=$(jq 'include "./bib-fns";map(select(blankKey("url")))' <<< "$items")
84-
# echo "$noURL" > noURL.json
171+
if $debugFiles ; then
172+
dfn=$(debugFileName "noURL" $dfn)
173+
echo "$noURL" > "$dfn"
174+
fi
85175
finalCount=$(jq '. | length' <<< "$items")
86-
# Remove .children arrays, if any. Save space.
87-
items=$(jq 'map(del(.children))' <<< "$items")
176+
# # Remove .children arrays, if any. Save space.
177+
# items=$(jq 'map(del(.children))' <<< "$items")
178+
# if $debugFiles ; then
179+
# dfn=$(debugFileName "withoutChildrenArray")
180+
# echo "$items" > "$dfn"
181+
# fi
88182
# Group by year
89183
items=$(jq 'group_by(.issued."date-parts"[0][0])' <<< "$items")
90-
# echo "$items" > 4-grouped.json
184+
if $debugFiles ; then
185+
dfn=$(debugFileName "grouped" $dfn)
186+
echo "$items" > "$dfn"
187+
fi
91188
items=$(jq 'map({ (.[0].issued."date-parts"[0][0] // "Undated" | tostring): . })' <<< "$items")
92-
# echo "$items" > 5-Undated.json
189+
if $debugFiles ; then
190+
dfn=$(debugFileName "Undated" $dfn)
191+
echo "$items" > "$dfn"
192+
fi
93193
items=$(jq 'map(to_entries)' <<< "$items")
94-
# echo "$items" > 6-entries.json
194+
if $debugFiles ; then
195+
dfn=$(debugFileName "entries" $dfn)
196+
echo "$items" > "$dfn"
197+
fi
95198
items=$(jq 'flatten(1)' <<< "$items")
96-
# echo "$items" > 7-flattened.json
199+
if $debugFiles ; then
200+
dfn=$(debugFileName "flattened" $dfn)
201+
echo "$items" > "$dfn"
202+
fi
97203
items=$(jq 'group_by(.key)' <<< "$items")
98-
# echo "$items" > 8-groupedByKey.json
204+
if $debugFiles ; then
205+
dfn=$(debugFileName "groupedByKey" $dfn)
206+
echo "$items" > "$dfn"
207+
fi
99208
items=$(jq 'include "./bib-fns";map(reconstituteGroupedEntries) | from_entries' <<< "$items")
100-
# echo "$items" > 9-final.json
209+
if $debugFiles ; then
210+
dfn=$(debugFileName "final" $dfn)
211+
echo "$items" > "$dfn"
212+
fi
101213

102214

103215
echo "Outputting CSL JSON"

0 commit comments

Comments
 (0)