Skip to content

Commit d785162

Browse files
authored
Merge pull request #242 from Interlisp/mth2-use-DOI-for-url-bibliography
Use the DOI field to build missing URL for bibliography.json file.
2 parents 81e5519 + bab6f36 commit d785162

File tree

2 files changed

+169
-37
lines changed

2 files changed

+169
-37
lines changed

scripts/bib-fns.jq

+21-1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ def unwrapDiv:
1212
def moveURL_to_url:
1313
select(nonBlankKey("URL")) | (setpath(["url"]; .URL) | del(.URL)) // .;
1414

15+
def make_DOI_to_url($doi):
16+
if ($doi | startswith("https:")) then $doi else "https://doi.org/" + ($doi | ltrimstr("/")) end ;
17+
1518
def cleanAbstracts:
1619
if blankKey("abstract") and nonBlankKey("abstractNote") then
1720
setpath(["abstract"]; .abstractNote) | del(.abstractNote)
@@ -69,4 +72,21 @@ def getNeededUrls: # assumes that array of all current items is the input
6972
.[0] as $k # get the key
7073
| select(($keys | all(. != $k))) # if this key isn't already in the $keys
7174
| .[1]?) # include this url in this collected array
72-
;
75+
;
76+
77+
# def intersection(x;y):
78+
# if (y|length) == 0 then
79+
# []
80+
# else (x|unique) as $x
81+
# | $x - ($x - y)
82+
# end;
83+
84+
def semiflatten: # assumes that only one item is the input
85+
. as $item
86+
| (keys - ["csljson","data"]) as $topKeys
87+
| ((.csljson // {}) * .data) as $inner
88+
| (($inner | keys) - ["key","version"]) as $innerKeys
89+
| ( ($topKeys | map(. as $tKey | {"key": $tKey, "value": ($item | getpath([$tKey]))}))
90+
+ ($innerKeys | map(. as $iKey | {"key": $iKey, "value": ($inner | getpath([$iKey]))})) )
91+
| from_entries;
92+

scripts/update_bibliography.sh

+148-36
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,28 @@
22

33
set -e
44

5+
rawItemsFile=true
6+
debugFiles=false
7+
tagFiles=false
8+
typeFiles=false
9+
# The collection files will be created only if directly querying Zotero API.
10+
collectionFiles=false
11+
12+
dfn="00"
13+
function debugFileName() {
14+
# The $1 is the file NAME string (without the number prefix or extension)
15+
# The $2 is the previous instance of the generated debugFileName
16+
# and the "count" is in the leading 2 characters.
17+
local cfn=$2
18+
# increment the count before using it.
19+
# (The leading 1 and the - 100 are so a leading 0 doesn't set octal parsing)
20+
echo $( printf "%02d-%s.json" $(( ("1${cfn:0:2}" - 100) + 1)) $1 )
21+
}
22+
523
GROUP_ID=2914042
624

725
items=""
26+
collections=""
827

928
function add_items_from_collection () {
1029
local collection_key="$1"
@@ -20,84 +39,177 @@ function add_items_from_collection () {
2039
[[ $(jq '. | length' <<< "$this_page") > 0 ]] || break
2140
done
2241

42+
local subcollections=$(curl -s "https://api.zotero.org/groups/$GROUP_ID/collections/$collection_key/collections" | jq -r '.[].data|{key, name, parentCollection}' | jq -s '.')
43+
collections=$(jq -s 'add' <<< "$subcollections$collections")
44+
2345
# Recurse into subcollections
2446
while read subcollection_key; do
2547
add_items_from_collection $subcollection_key
26-
done < <(curl -s "https://api.zotero.org/groups/$GROUP_ID/collections/$collection_key/collections" | jq -r '.[].key')
48+
done < <(jq -r '.[].key' <<< "$subcollections")
2749
}
2850

2951
root_collection="FSK5IX4F"
3052
if [[ $# -eq 0 ]] ; then
53+
# Initialize with the root collection.
54+
collections=$(curl -s "https://api.zotero.org/groups/$GROUP_ID/collections/top" | jq -r '.[].data|{key, name, parentCollection}' | jq -s '.' | jq "map(select(.key==\"$root_collection\"))")
3155
add_items_from_collection $root_collection
32-
else
33-
items=$(<$1)
34-
fi
35-
36-
# echo "$items" > 00-rawItems-pre-needed.json
56+
if $collectionFiles ; then
57+
echo "$collections" > collections.json
58+
fi
59+
echo "$(jq '. | length' <<< "$collections") collections"
3760

38-
while read neededUrl; do
61+
while read neededUrl; do
3962
item=$(curl -s "$neededUrl?include=data,csljson&v=3")
4063
items=$(jq -s 'add' <<< "[$item]$items")
41-
done < <(jq -r 'include "./bib-fns";getNeededUrls|.[]' <<< "$items")
42-
64+
done < <(jq -r 'include "./bib-fns";getNeededUrls|.[]' <<< "$items")
65+
66+
grouped_collections=$(jq 'group_by(.parentCollection)' <<< "$collections")
67+
# This is not yet fully hierarchically nested.
68+
if $collectionFiles ; then
69+
echo "$grouped_collections" > grouped_collections.json
70+
fi
71+
# Only if we got the raw items from the Zotero API
72+
if $rawItemsFile ; then
73+
echo "$items" > 00-rawItems.json
74+
fi
75+
else
76+
items=$(<$1)
77+
fi
4378

4479
echo "Got $(jq '. | length' <<< "$items") items"
4580

4681
# Piece-wise processing for debugging:
47-
# echo "$items" > 0-rawItems.json
48-
49-
items=$(jq 'map(.csljson * .data)' <<< "$items")
50-
# echo "$items" > 1-mapped.json
82+
items=$(jq 'include "./bib-fns";map(semiflatten)' <<< "$items")
83+
if $debugFiles ; then
84+
dfn=$(debugFileName "semiflattened" $dfn)
85+
echo "$items" > "$dfn"
86+
fi
5187
items=$(jq 'include "./bib-fns";map(if has("note") then .note |= unwrapDiv else . end)' <<< "$items")
52-
# echo "$items" > 2-cleanDiv.json
88+
if $debugFiles ; then
89+
dfn=$(debugFileName "cleanDiv" $dfn)
90+
echo "$items" > "$dfn"
91+
fi
5392
#find and eliminate duplicate .key entries
5493
groupedByKey=$(jq 'group_by(.key)' <<< "$items")
55-
dupIDs=$(jq 'map(select(length>1) | .[0])' <<< "$groupedByKey")
56-
# echo "$dupIDs" > dupIDs.json
94+
if $debugFiles ; then
95+
dupIDs=$(jq 'map(select(length>1) | {key: .[0].key, items: .})' <<< "$groupedByKey")
96+
dfn=$(debugFileName "dupIDs" $dfn)
97+
echo "$dupIDs" > "$dfn"
98+
fi
99+
57100
#remove duplicates
58101
items=$(jq 'map(.[0]) | sort_by(.date)' <<< "$groupedByKey")
59-
# echo "$items" > 3-noDupSorted.json
102+
if $debugFiles ; then
103+
dfn=$(debugFileName "noDupSorted" $dfn)
104+
echo "$items" > "$dfn"
105+
fi
106+
if $typeFiles ; then
107+
types=$(jq 'map({key, title, type, itemType}) | group_by(.type) | map({type:.[0].type, itemTypes:(group_by(.itemType)|map({itemType:.[0].itemType, items:map({title, key})}))})' <<<"$items")
108+
echo "$types" > titleKeyTypeInfo.json
109+
types=$(jq 'group_by(.type) | map({type:.[0].type, itemTypes:(group_by(.itemType)|map({itemType:.[0].itemType, items:.}))})' <<<"$items")
110+
echo "$types" > fullTypeInfo.json
111+
fi
112+
if $tagFiles ; then
113+
tags=$(jq 'map(.tags) | flatten | map(.tag) | unique' <<<"$items")
114+
echo "$tags" > tags.json
115+
fi
60116
# consolidate URL into url field
61117
items=$(jq 'include "./bib-fns";map(if nonBlankKey("URL") then setpath(["url"]; .URL) | del(.URL) else . end)' <<< "$items")
62-
# echo "$items" > consolidated.json
118+
if $debugFiles ; then
119+
dfn=$(debugFileName "consolidated" $dfn)
120+
echo "$items" > "$dfn"
121+
fi
122+
123+
# Use DOI to create url where needed and available
124+
items=$(jq 'include "./bib-fns";map(if (blankKey("url") and nonBlankKey("DOI")) then setpath(["url"]; make_DOI_to_url(.DOI)) else . end)' <<< "$items")
125+
if $debugFiles ; then
126+
dfn=$(debugFileName "DOI-url" $dfn)
127+
echo "$items" > "$dfn"
128+
fi
63129

64130
items=$(jq 'include "./bib-fns";map(getTargetInfo)' <<< "$items")
65-
# echo "$items" > withTargetInfo.json
131+
if $debugFiles ; then
132+
dfn=$(debugFileName "withTargetInfo" $dfn)
133+
echo "$items" > "$dfn"
134+
fi
66135

67136
# now use children items to amend the info for the parentItem
68137
allFixupInfo=$(jq 'group_by(.target)|map(sort_by(.parentItem))' <<< "$items")
69-
# echo "$allFixupInfoGrouped" > allFixupInfoGrouped.json
138+
if $debugFiles ; then
139+
dfn=$(debugFileName "allFixupInfo" $dfn)
140+
echo "$allFixupInfo" > "$dfn"
141+
fi
70142
# embed children into their parentItem (children field). Then delete items that Zotero indicated as such.
71143
items=$(jq 'map(if has(1) then (first + {children: .[1:]}) else first end)|map(select(.deleted|not))' <<< "$allFixupInfo")
72-
# echo "$items" > itemsNestedChildren.json
144+
if $debugFiles ; then
145+
dfn=$(debugFileName "itemsNestedChildren" $dfn)
146+
echo "$items" > "$dfn"
147+
fi
73148

74-
# echo "Got $(jq '. | length' <<< "$items") clean, top-level items"
149+
echo "Got $(jq '. | length' <<< "$items") clean, top-level items"
75150
items=$(jq 'include "./bib-fns";map(applyChildrenAmendments)' <<< "$items")
76-
# echo "$items" > updatedItems.json
77-
78-
# absNote=$(jq 'include "./bib-fns";map(select((nonBlankKey("abstract") or nonBlankKey("abstractNote")) and (.abstract != .abstractNote)) | {key: .key, title: .title, abstract: .abstract, abstractNote: .abstractNote})' <<< "$items")
79-
# echo "$absNote" > absNoteMismatch.json
151+
if $debugFiles ; then
152+
dfn=$(debugFileName "updatedItems" $dfn)
153+
echo "$items" > "$dfn"
154+
fi
155+
absNote=$(jq 'include "./bib-fns";map(select((nonBlankKey("abstract") or nonBlankKey("abstractNote")) and (.abstract != .abstractNote)) | {key: .key, title: .title, abstract: .abstract, abstractNote: .abstractNote})' <<< "$items")
156+
if $debugFiles ; then
157+
dfn=$(debugFileName "absNoteMismatch" $dfn)
158+
echo "$absNote" > "$dfn"
159+
fi
80160
# remove redundant .abstractNote fields
81161
items=$(jq 'include "./bib-fns";map(cleanAbstracts)' <<< "$items")
82-
# echo "$items" > abstractsCleaned.json
162+
if $debugFiles ; then
163+
dfn=$(debugFileName "abstractsCleaned" $dfn)
164+
echo "$items" > "$dfn"
165+
fi
166+
if $typeFiles ; then
167+
types=$(jq 'map({key, title, type, itemType}) | group_by(.type) | map({type:.[0].type, itemTypes:(group_by(.itemType)|map({itemType:.[0].itemType, items:map({title, key})}))})' <<<"$items")
168+
echo "$types" > finalTitleKeyTypeInfo.json
169+
fi
83170
noURL=$(jq 'include "./bib-fns";map(select(blankKey("url")))' <<< "$items")
84-
# echo "$noURL" > noURL.json
171+
if $debugFiles ; then
172+
dfn=$(debugFileName "noURL" $dfn)
173+
echo "$noURL" > "$dfn"
174+
fi
85175
finalCount=$(jq '. | length' <<< "$items")
86-
# Remove .children arrays, if any. Save space.
87-
items=$(jq 'map(del(.children))' <<< "$items")
176+
# # Remove .children arrays, if any. Save space.
177+
# items=$(jq 'map(del(.children))' <<< "$items")
178+
# if $debugFiles ; then
179+
# dfn=$(debugFileName "withoutChildrenArray")
180+
# echo "$items" > "$dfn"
181+
# fi
88182
# Group by year
89183
items=$(jq 'group_by(.issued."date-parts"[0][0])' <<< "$items")
90-
# echo "$items" > 4-grouped.json
184+
if $debugFiles ; then
185+
dfn=$(debugFileName "grouped" $dfn)
186+
echo "$items" > "$dfn"
187+
fi
91188
items=$(jq 'map({ (.[0].issued."date-parts"[0][0] // "Undated" | tostring): . })' <<< "$items")
92-
# echo "$items" > 5-Undated.json
189+
if $debugFiles ; then
190+
dfn=$(debugFileName "Undated" $dfn)
191+
echo "$items" > "$dfn"
192+
fi
93193
items=$(jq 'map(to_entries)' <<< "$items")
94-
# echo "$items" > 6-entries.json
194+
if $debugFiles ; then
195+
dfn=$(debugFileName "entries" $dfn)
196+
echo "$items" > "$dfn"
197+
fi
95198
items=$(jq 'flatten(1)' <<< "$items")
96-
# echo "$items" > 7-flattened.json
199+
if $debugFiles ; then
200+
dfn=$(debugFileName "flattened" $dfn)
201+
echo "$items" > "$dfn"
202+
fi
97203
items=$(jq 'group_by(.key)' <<< "$items")
98-
# echo "$items" > 8-groupedByKey.json
204+
if $debugFiles ; then
205+
dfn=$(debugFileName "groupedByKey" $dfn)
206+
echo "$items" > "$dfn"
207+
fi
99208
items=$(jq 'include "./bib-fns";map(reconstituteGroupedEntries) | from_entries' <<< "$items")
100-
# echo "$items" > 9-final.json
209+
if $debugFiles ; then
210+
dfn=$(debugFileName "final" $dfn)
211+
echo "$items" > "$dfn"
212+
fi
101213

102214

103215
echo "Outputting CSL JSON"

0 commit comments

Comments
 (0)