-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsubjects.xquery
83 lines (76 loc) · 4.04 KB
/
subjects.xquery
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import module namespace bod = "http://www.bodleian.ox.ac.uk/bdlss" at "https://raw.githubusercontent.com/bodleian/consolidated-tei-schema/master/msdesc2solr.xquery";
declare namespace tei="http://www.tei-c.org/ns/1.0";
declare option saxon:output "indent=yes";
<add>
{
let $doc := doc("../authority/subjects.xml")
let $collection := collection('../collections?select=*.xml;recurse=yes')
let $subjects := $doc//tei:list/tei:item[@xml:id]
let $placekeys := distinct-values($collection//(tei:placeName|tei:name[@type='place'])/@key)
for $subject in $subjects
let $id := $subject/@xml:id/string()
let $name := normalize-space($subject/tei:term[@type = 'display' or (@type = 'variant' and not(preceding-sibling::tei:term))]/string())
let $isplace := boolean($id = $placekeys)
let $islcsh := starts-with($id, 'subject_s')
let $islcn := starts-with($id, 'subject_n')
let $variants := $subject/tei:term[@type="variant"]
let $noteitems := $subject/tei:note[@type="links"]//tei:item
let $mss := $collection//tei:TEI[.//(tei:term|tei:placeName|tei:name[@type='place'])[@key = $id]]
let $types := distinct-values((
$mss//(tei:term|tei:placeName|tei:name[@type='place'])[@key = $id]/@role/tokenize(normalize-space(.), ' '),
if ($isplace) then 'Place' else (),
if ($islcsh) then 'Library of Congress Subject Heading' else (),
if ($islcn) then 'Library of Congress Name Authority' else ()
))
return if (count($mss) > 0) then
<doc>
<field name="type">subject</field>
<field name="pk">{ $id }</field>
<field name="id">{ $id }</field>
<field name="title">{ $name }</field>
<field name="alpha_title">{ bod:alphabetize($name) }</field>
<field name="sb_name_s">{ $name }</field>
{
if (count($types) > 0) then
for $type in $types
order by $type
return <field name="sb_type_sm">{ $type }</field>
else
<field name="sb_type_sm">Not Specified</field>
}
{
for $variant in $variants
let $vname := normalize-space($variant/string())
order by $vname
return <field name="sb_variant_sm">{ $vname }</field>
}
{
for $item in $noteitems
let $refs := $item//tei:ref
order by $refs[1]
for $ref in $refs
let $linktarget := $ref/string(@target)
let $linktext := $ref/normalize-space(tei:title/string())
order by $linktarget
return <field name="link_external_smni">{ concat($linktarget, "|", $linktext)}</field>
}
{
for $ms in $mss
let $msid := $ms/@xml:id/string()
let $url := concat("/catalog/", $msid)
let $classmark := $ms//tei:msDesc/tei:msIdentifier/tei:idno[1]/text()
order by $classmark
return <field name="link_manuscripts_smni">{ concat($url, "|", $classmark) }</field>
}
{
for $relatedid in distinct-values((tokenize(translate($subject/@corresp, '#', ''), ' '), tokenize(translate($subject/@sameAs, '#', ''), ' ')))
let $url := concat("/catalog/", $relatedid)
let $linktext := $doc//tei:list/tei:item[@xml:id = $relatedid]/tei:term[@type = 'display' or (@type = 'variant' and not(preceding-sibling::tei:term))][1]/string()
return
<field name="link_related_smni">{ concat($url, "|", $linktext) }</field>
}
</doc>
else
bod:logging('info', 'Skipping subject in authority file but not in any manuscript', ($id, $name))
}
</add>