Skip to content

Commit 32abbb5

Browse files
committed
Update LexFCS Spec v0.3
1 parent b025be2 commit 32abbb5

14 files changed

+1279
-140
lines changed
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
3+
xmlns:h="http://www.w3.org/1999/xhtml"
4+
xmlns:vc="http://www.w3.org/2007/XMLSchema-versioning"
5+
xmlns:xml="http://www.w3.org/XML/1998/namespace"
6+
xmlns:lex="http://clarin.eu/fcs/dataview/lex"
7+
xml:lang="en" vc:minVersion="1.1"
8+
targetNamespace="http://clarin.eu/fcs/dataview/lex" elementFormDefault="qualified">
9+
<xs:import namespace="http://www.w3.org/XML/1998/namespace" schemaLocation="http://www.w3.org/2001/xml.xsd"/>
10+
11+
<xs:annotation>
12+
<xs:documentation>
13+
<h:p>
14+
This schema defines the structure of an
15+
<h:em>advanced lexical result</h:em> data view for lexical resources.
16+
</h:p>
17+
<h:p>
18+
The value <h:code>application/x-clarin-fcs-lex+xml</h:code>
19+
MUST be used to indicate an <h:em>advanced lexical result</h:em> data view.
20+
</h:p>
21+
</xs:documentation>
22+
</xs:annotation>
23+
24+
<xs:element name="Entry">
25+
<xs:complexType>
26+
<xs:sequence>
27+
<xs:element name="Field" type="lex:FieldType" minOccurs="1" maxOccurs="unbounded"/>
28+
</xs:sequence>
29+
<xs:attribute ref="xml:lang" />
30+
<xs:attribute name="langUri" type="xs:anyURI" use="optional" />
31+
32+
<!-- enforce xml:lang when using langUri -->
33+
<xs:assert test="not(not(@xml:lang) and @langUri)"/>
34+
<!-- enforce mandatory lemma Field -->
35+
<xs:assert test=".//@type='lemma'"/>
36+
</xs:complexType>
37+
<!-- enforce single Field per type -->
38+
<xs:unique name="uniqueFieldTypesPerEntry">
39+
<xs:selector xpath="lex:Field" />
40+
<xs:field xpath="@type" />
41+
</xs:unique>
42+
</xs:element>
43+
44+
<xs:complexType name="FieldType">
45+
<xs:sequence>
46+
<xs:element name="Value" type="lex:ValueType" minOccurs="1" maxOccurs="unbounded"/>
47+
</xs:sequence>
48+
<xs:attribute name="type" type="lex:fieldType" use="required"/>
49+
50+
<!-- enforce vocab(Value)Ref for "pos" Values -->
51+
<xs:assert test="if (@type='pos') then (.//@vocabValueRef or .//@vocabRef) else true()"/>
52+
<!-- enforce "citation" attributes only on "citation" Values -->
53+
<xs:assert test="not(not(@type='citation') and (.//@source or .//@sourceRef or .//@date or .//@rel))"/>
54+
</xs:complexType>
55+
56+
<xs:complexType name="ValueType">
57+
<xs:simpleContent>
58+
<xs:extension base="xs:string">
59+
<xs:attribute ref="xml:id" />
60+
<xs:attribute ref="xml:lang" />
61+
<xs:attribute name="langUri" type="xs:anyURI" use="optional"/>
62+
<xs:attribute name="preferred" type="xs:boolean" use="optional"/>
63+
<xs:attribute name="ref" type="xs:anyURI" use="optional"/>
64+
<xs:attribute name="idRefs" type="xs:IDREFS" use="optional"/>
65+
<xs:attribute name="vocabRef" type="xs:anyURI" use="optional"/>
66+
<xs:attribute name="vocabValueRef" type="xs:anyURI" use="optional"/>
67+
<xs:attribute name="type" type="xs:string" use="optional"/>
68+
<xs:attribute name="source" type="xs:string" use="optional"/>
69+
<xs:attribute name="sourceRef" type="xs:anyURI" use="optional"/>
70+
<xs:attribute name="date" type="xs:string" use="optional"/> <!-- EDTF? -->
71+
72+
<!-- enforce xml:lang when using langUri -->
73+
<xs:assert test="not(not(@xml:lang) and @langUri)"/>
74+
</xs:extension>
75+
</xs:simpleContent>
76+
</xs:complexType>
77+
<xs:simpleType name="fieldType">
78+
<xs:annotation>
79+
<xs:documentation xml:lang="en">
80+
Typed information about this lexical entry. Allowed field types are:
81+
<h:ul>
82+
<h:li><h:b>antonym</h:b>: Values in accordance to http://hdl.handle.net/11459/CCR_C-83_fb60d00c-08cf-1d88-ed75-8a8267bee0fb</h:li>
83+
<h:li><h:b>definition</h:b>: Values in accordance to http://hdl.handle.net/11459/CCR_C-1972_e9eef6e1-7df2-0d6f-4834-210d1711387b</h:li>
84+
<h:li><h:b>pos</h:b>: Values in accordance to http://hdl.handle.net/11459/CCR_C-396_5a972b93-2294-ab5c-a541-7c344c5f26c3</h:li>
85+
<h:li><h:b>synonym</h:b>: Values in accordance to http://hdl.handle.net/11459/CCR_C-317_9cc31ff4-059d-3c39-7e68-706d63b2b221</h:li>
86+
<h:li>...</h:li>
87+
</h:ul>
88+
</xs:documentation>
89+
</xs:annotation>
90+
<xs:restriction base="xs:string">
91+
<xs:enumeration value="entryId"/>
92+
<!-- word forms -->
93+
<xs:enumeration value="lemma"/>
94+
<xs:enumeration value="phonetic"/>
95+
<xs:enumeration value="translation"/>
96+
<xs:enumeration value="transcription"/> <!-- ? rather multiple `lemma` entries -->
97+
<!-- textual description and information -->
98+
<xs:enumeration value="definition"/>
99+
<xs:enumeration value="etymology"/>
100+
<!-- grammar and morphology -->
101+
<xs:enumeration value="case"/>
102+
<xs:enumeration value="number"/>
103+
<xs:enumeration value="gender"/>
104+
<xs:enumeration value="pos"/>
105+
<xs:enumeration value="baseform"/>
106+
<xs:enumeration value="segmentation"/>
107+
<!-- something else -->
108+
<xs:enumeration value="sentiment"/>
109+
<xs:enumeration value="frequency"/>
110+
<!-- (semantic) relations -->
111+
<xs:enumeration value="antonym"/>
112+
<xs:enumeration value="hyponym"/>
113+
<xs:enumeration value="hypernym"/>
114+
<xs:enumeration value="meronym"/>
115+
<xs:enumeration value="holonym"/>
116+
<xs:enumeration value="synonym"/>
117+
<xs:enumeration value="related"/>
118+
<!-- references -->
119+
<xs:enumeration value="ref"/> <!-- enforce xs:anyURI? -->
120+
<xs:enumeration value="senseRef"/>
121+
<!-- citations -->
122+
<xs:enumeration value="citation"/>
123+
<!-- TODO MORE? -->
124+
</xs:restriction>
125+
</xs:simpleType>
126+
</xs:schema>

lexfcs/attachments/DataView-LexHits.xsd

Lines changed: 40 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -9,30 +9,27 @@
99
<xs:annotation>
1010
<xs:documentation>
1111
<h:p>
12-
This schema defines the structure of a
13-
<h:em>generic result</h:em> data view. All CLARIN-FCS endpoints
14-
MUST support this data view.
12+
This schema defines the structure of a <h:em>generic result</h:em> data view.
13+
All CLARIN-FCS endpoints MUST support this data view.
1514
</h:p>
1615
<h:p>
17-
The value <h:code>application/x-clarin-fcs-hits+xml</h:code>
18-
MUST be used to indicate a <h:em>generic result</h:em> data view.
16+
The value <h:code>application/x-clarin-fcs-hits+xml</h:code> MUST be used to
17+
indicate a <h:em>generic result</h:em> data view.
1918
</h:p>
20-
</xs:documentation>
19+
</xs:documentation>
2120
</xs:annotation>
2221

2322
<xs:element name="Result">
2423
<xs:annotation>
2524
<xs:documentation>
2625
<h:p>
27-
A single result line with one or more marked hits.
28-
White-space is considered <h:em>non-signification</h:em>,
29-
except for delimiting tokens.
26+
A single result line with one or more marked hits. White-space is considered
27+
<h:em>non-signification</h:em>, except for delimiting tokens.
3028
</h:p>
3129
<h:p>
32-
CLARIN-FCS client MAY
33-
normalize white-space and strip leading and tailing
34-
white-space and collapse all white-space between
35-
tokens to a single #x20 character.
30+
CLARIN-FCS client MAY normalize white-space and strip leading and tailing
31+
white-space and collapse all white-space between tokens to a single #x20
32+
character.
3633
</h:p>
3734
</xs:documentation>
3835
</xs:annotation>
@@ -45,9 +42,8 @@
4542
A hit highlight or a field type annotation. It SHALL not be empty.
4643
</h:p>
4744
<h:p>
48-
One <h:code>Result</h:code> element MUST
49-
one <h:code>Hit</h:code> element, but MAY
50-
contain more than one.
45+
One <h:code>Result</h:code> element MUST one <h:code>Hit</h:code>
46+
element, but MAY contain more than one.
5147
</h:p>
5248
</xs:documentation>
5349
</xs:annotation>
@@ -66,7 +62,8 @@
6662
<xs:annotation>
6763
<xs:documentation>
6864
<h:p>
69-
Field type identifier for this annotation. Is used in the &lt;Hit&gt; element to determine which function the annotated text has.
65+
Field type identifier for this annotation. Is used in the &lt;Hit&gt;
66+
element to determine which function the annotated text has.
7067
</h:p>
7168
</xs:documentation>
7269
</xs:annotation>
@@ -76,10 +73,34 @@
7673
<xs:simpleType name="fieldType">
7774
<xs:restriction base="xs:string">
7875
<!-- <xs:pattern value="[a-zA-Z][a-zA-Z0-9]*" /> -->
76+
<!-- copied from DataView-Lex.xsd#fieldType and prefixed -->
77+
<xs:enumeration value="lex-entryId"/>
7978
<xs:enumeration value="lex-lemma"/>
79+
<xs:enumeration value="lex-phonetic"/>
80+
<xs:enumeration value="lex-translation"/>
81+
<xs:enumeration value="lex-transcription"/>
82+
<xs:enumeration value="lex-definition"/>
83+
<xs:enumeration value="lex-etymology"/>
84+
<xs:enumeration value="lex-case"/>
85+
<xs:enumeration value="lex-number"/>
86+
<xs:enumeration value="lex-gender"/>
8087
<xs:enumeration value="lex-pos"/>
81-
<xs:enumeration value="lex-def"/>
82-
<!-- <xs:enumeration value="query"/> -->
88+
<xs:enumeration value="lex-baseform"/>
89+
<xs:enumeration value="lex-segmentation"/>
90+
<xs:enumeration value="lex-sentiment"/>
91+
<xs:enumeration value="lex-frequency"/>
92+
<xs:enumeration value="lex-antonym"/>
93+
<xs:enumeration value="lex-hyponym"/>
94+
<xs:enumeration value="lex-hypernym"/>
95+
<xs:enumeration value="lex-meronym"/>
96+
<xs:enumeration value="lex-holonym"/>
97+
<xs:enumeration value="lex-synonym"/>
98+
<xs:enumeration value="lex-subordinate"/>
99+
<xs:enumeration value="lex-superordinate"/>
100+
<xs:enumeration value="lex-related"/>
101+
<xs:enumeration value="lex-ref"/>
102+
<xs:enumeration value="lex-senseRef"/>
103+
<xs:enumeration value="lex-citation"/>
83104
</xs:restriction>
84105
</xs:simpleType>
85106
</xs:schema>

0 commit comments

Comments
 (0)