|
1 | 1 | // This grammar is derived from the XPath 3.1 grammar produced by Ken Domino, et al (https://github.com/antlr/grammars-v4/blob/63359bd91593ece31a384acd507ae860d6cf7ff7/xpath/xpath31/XPath31Lexer.g4).
|
2 | 2 |
|
| 3 | +// This is a faithful implementation of the XPath version 3.1 grammar |
| 4 | +// from the spec at https://www.w3.org/TR/2017/REC-xpath-31-20170321/ |
| 5 | + |
| 6 | +// $antlr-format alignTrailingComments true, columnLimit 150, maxEmptyLinesToKeep 1, reflowComments false, useTab false |
| 7 | +// $antlr-format allowShortRulesOnASingleLine true, allowShortBlocksOnASingleLine true, minEmptyLines 0, alignSemicolons ownLine |
| 8 | +// $antlr-format alignColons trailing, singleLineOverrulesHangingColon true, alignLexerCommands true, alignLabels true, alignTrailers true |
| 9 | + |
3 | 10 | lexer grammar Metapath10Lexer;
|
4 | 11 |
|
5 |
| -AT : '@' ; |
6 |
| -BANG : '!' ; |
7 |
| -CB : ']' ; |
8 |
| -CC : '}' ; |
9 |
| -CEQ : ':=' ; |
10 |
| -COLON : ':' ; |
11 |
| -COLONCOLON : '::' ; |
12 |
| -COMMA : ',' ; |
13 |
| -CP : ')' ; |
14 |
| -CS : ':*' ; |
15 |
| -D : '.' ; |
16 |
| -DD : '..' ; |
17 |
| -DOLLAR : '$' ; |
18 |
| -EG : '=>' ; |
19 |
| -EQ : '=' ; |
20 |
| -GE : '>=' ; |
21 |
| -GG : '>>' ; |
22 |
| -GT : '>' ; |
23 |
| -LE : '<=' ; |
24 |
| -LL : '<<' ; |
25 |
| -LT : '<' ; |
26 |
| -MINUS : '-' ; |
27 |
| -NE : '!=' ; |
28 |
| -OB : '[' ; |
29 |
| -OC : '{' ; |
30 |
| -OP : '(' ; |
31 |
| -P : '|' ; |
32 |
| -PLUS : '+' ; |
33 |
| -POUND : '#' ; |
34 |
| -PP : '||' ; |
35 |
| -QM : '?' ; |
36 |
| -SC : '*:' ; |
37 |
| -SLASH : '/' ; |
38 |
| -SS : '//' ; |
39 |
| -STAR : '*' ; |
| 12 | +AT : '@'; |
| 13 | +BANG : '!'; |
| 14 | +CB : ']'; |
| 15 | +CC : '}'; |
| 16 | +CEQ : ':='; |
| 17 | +COLON : ':'; |
| 18 | +COLONCOLON : '::'; |
| 19 | +COMMA : ','; |
| 20 | +CP : ')'; |
| 21 | +CS : ':*'; |
| 22 | +D : '.'; |
| 23 | +DD : '..'; |
| 24 | +DOLLAR : '$'; |
| 25 | +EG : '=>'; |
| 26 | +EQ : '='; |
| 27 | +GE : '>='; |
| 28 | +GG : '>>'; |
| 29 | +GT : '>'; |
| 30 | +LE : '<='; |
| 31 | +LL : '<<'; |
| 32 | +LT : '<'; |
| 33 | +MINUS : '-'; |
| 34 | +NE : '!='; |
| 35 | +OB : '['; |
| 36 | +OC : '{'; |
| 37 | +OP : '('; |
| 38 | +P : '|'; |
| 39 | +PLUS : '+'; |
| 40 | +POUND : '#'; |
| 41 | +PP : '||'; |
| 42 | +QM : '?'; |
| 43 | +SC : '*:'; |
| 44 | +SLASH : '/'; |
| 45 | +SS : '//'; |
| 46 | +STAR : '*'; |
40 | 47 |
|
41 | 48 | // KEYWORDS
|
42 | 49 |
|
43 |
| -KW_ANCESTOR : 'ancestor' ; |
44 |
| -KW_ANCESTOR_OR_SELF : 'ancestor-or-self' ; |
45 |
| -KW_AND : 'and' ; |
46 |
| -KW_ARRAY : 'array' ; |
47 |
| -KW_AS : 'as' ; |
48 |
| -KW_ATTRIBUTE : 'attribute' ; |
49 |
| -KW_CAST : 'cast' ; |
50 |
| -KW_CASTABLE : 'castable' ; |
51 |
| -KW_CHILD : 'child' ; |
52 |
| -KW_COMMENT : 'comment' ; |
53 |
| -KW_DESCENDANT : 'descendant' ; |
54 |
| -KW_DESCENDANT_OR_SELF : 'descendant-or-self' ; |
55 |
| -KW_DIV : 'div' ; |
56 |
| -KW_DOCUMENT_NODE : 'document-node' ; |
57 |
| -KW_ELEMENT : 'element' ; |
58 |
| -KW_ELSE : 'else' ; |
59 |
| -KW_EMPTY_SEQUENCE : 'empty-sequence' ; |
60 |
| -KW_EQ : 'eq' ; |
61 |
| -KW_EVERY : 'every' ; |
62 |
| -KW_EXCEPT : 'except' ; |
63 |
| -KW_FOLLOWING : 'following' ; |
64 |
| -KW_FOLLOWING_SIBLING : 'following-sibling' ; |
65 |
| -KW_FOR : 'for' ; |
66 |
| -KW_FUNCTION : 'function' ; |
67 |
| -KW_GE : 'ge' ; |
68 |
| -KW_GT : 'gt' ; |
69 |
| -KW_IDIV : 'idiv' ; |
70 |
| -KW_IF : 'if' ; |
71 |
| -KW_IN : 'in' ; |
72 |
| -KW_INSTANCE : 'instance' ; |
73 |
| -KW_INTERSECT : 'intersect' ; |
74 |
| -KW_IS : 'is' ; |
75 |
| -KW_ITEM : 'item' ; |
76 |
| -KW_LE : 'le' ; |
77 |
| -KW_LET : 'let' ; |
78 |
| -KW_LT : 'lt' ; |
79 |
| -KW_MAP : 'map' ; |
80 |
| -KW_MOD : 'mod' ; |
81 |
| -KW_NAMESPACE : 'namespace' ; |
82 |
| -KW_NAMESPACE_NODE : 'namespace-node' ; |
83 |
| -KW_NE : 'ne' ; |
84 |
| -KW_NODE : 'node' ; |
85 |
| -KW_OF : 'of' ; |
86 |
| -KW_OR : 'or' ; |
87 |
| -KW_PARENT : 'parent' ; |
88 |
| -KW_PRECEDING : 'preceding' ; |
89 |
| -KW_PRECEDING_SIBLING : 'preceding-sibling' ; |
90 |
| -KW_PROCESSING_INSTRUCTION : 'processing-instruction' ; |
91 |
| -KW_RETURN : 'return' ; |
92 |
| -KW_SATISFIES : 'satisfies' ; |
93 |
| -KW_SCHEMA_ATTRIBUTE : 'schema-attribute' ; |
94 |
| -KW_SCHEMA_ELEMENT : 'schema-element' ; |
95 |
| -KW_SELF : 'self' ; |
96 |
| -KW_SOME : 'some' ; |
97 |
| -KW_TEXT : 'text' ; |
98 |
| -KW_THEN : 'then' ; |
99 |
| -KW_TO : 'to' ; |
100 |
| -KW_TREAT : 'treat' ; |
101 |
| -KW_UNION : 'union' ; |
| 50 | +KW_ANCESTOR : 'ancestor'; |
| 51 | +KW_ANCESTOR_OR_SELF : 'ancestor-or-self'; |
| 52 | +KW_AND : 'and'; |
| 53 | +KW_ARRAY : 'array'; |
| 54 | +KW_AS : 'as'; |
| 55 | +KW_ATTRIBUTE : 'attribute'; |
| 56 | +KW_CAST : 'cast'; |
| 57 | +KW_CASTABLE : 'castable'; |
| 58 | +KW_CHILD : 'child'; |
| 59 | +KW_COMMENT : 'comment'; |
| 60 | +KW_DESCENDANT : 'descendant'; |
| 61 | +KW_DESCENDANT_OR_SELF : 'descendant-or-self'; |
| 62 | +KW_DIV : 'div'; |
| 63 | +KW_DOCUMENT_NODE : 'document-node'; |
| 64 | +KW_ELEMENT : 'element'; |
| 65 | +KW_ELSE : 'else'; |
| 66 | +KW_EMPTY_SEQUENCE : 'empty-sequence'; |
| 67 | +KW_EQ : 'eq'; |
| 68 | +KW_EVERY : 'every'; |
| 69 | +KW_EXCEPT : 'except'; |
| 70 | +KW_FOLLOWING : 'following'; |
| 71 | +KW_FOLLOWING_SIBLING : 'following-sibling'; |
| 72 | +KW_FOR : 'for'; |
| 73 | +KW_FUNCTION : 'function'; |
| 74 | +KW_GE : 'ge'; |
| 75 | +KW_GT : 'gt'; |
| 76 | +KW_IDIV : 'idiv'; |
| 77 | +KW_IF : 'if'; |
| 78 | +KW_IN : 'in'; |
| 79 | +KW_INSTANCE : 'instance'; |
| 80 | +KW_INTERSECT : 'intersect'; |
| 81 | +KW_IS : 'is'; |
| 82 | +KW_ITEM : 'item'; |
| 83 | +KW_LE : 'le'; |
| 84 | +KW_LET : 'let'; |
| 85 | +KW_LT : 'lt'; |
| 86 | +KW_MAP : 'map'; |
| 87 | +KW_MOD : 'mod'; |
| 88 | +KW_NAMESPACE : 'namespace'; |
| 89 | +KW_NAMESPACE_NODE : 'namespace-node'; |
| 90 | +KW_NE : 'ne'; |
| 91 | +KW_NODE : 'node'; |
| 92 | +KW_OF : 'of'; |
| 93 | +KW_OR : 'or'; |
| 94 | +KW_PARENT : 'parent'; |
| 95 | +KW_PRECEDING : 'preceding'; |
| 96 | +KW_PRECEDING_SIBLING : 'preceding-sibling'; |
| 97 | +KW_PROCESSING_INSTRUCTION : 'processing-instruction'; |
| 98 | +KW_RETURN : 'return'; |
| 99 | +KW_SATISFIES : 'satisfies'; |
| 100 | +KW_SCHEMA_ATTRIBUTE : 'schema-attribute'; |
| 101 | +KW_SCHEMA_ELEMENT : 'schema-element'; |
| 102 | +KW_SELF : 'self'; |
| 103 | +KW_SOME : 'some'; |
| 104 | +KW_TEXT : 'text'; |
| 105 | +KW_THEN : 'then'; |
| 106 | +KW_TO : 'to'; |
| 107 | +KW_TREAT : 'treat'; |
| 108 | +KW_UNION : 'union'; |
102 | 109 |
|
103 | 110 | // A.2.1. TERMINAL SYMBOLS
|
104 | 111 | // This isn't a complete list of tokens in the language.
|
105 | 112 | // Keywords and symbols are terminals.
|
106 | 113 |
|
107 |
| -IntegerLiteral : FragDigits ; |
108 |
| -DecimalLiteral : '.' FragDigits | FragDigits '.' [0-9]* ; |
109 |
| -DoubleLiteral : ('.' FragDigits | FragDigits ('.' [0-9]*)?) [eE] [+-]? FragDigits ; |
110 |
| -StringLiteral : '"' (~["] | FragEscapeQuot)* '"' | '\'' (~['] | FragEscapeApos)* '\'' ; |
111 |
| -URIQualifiedName : BracedURILiteral NCName ; |
112 |
| -BracedURILiteral : 'Q' '{' [^{}]* '}' ; |
| 114 | +IntegerLiteral : FragDigits; |
| 115 | +DecimalLiteral : '.' FragDigits | FragDigits '.' [0-9]*; |
| 116 | +DoubleLiteral : ('.' FragDigits | FragDigits ('.' [0-9]*)?) [eE] [+-]? FragDigits; |
| 117 | +StringLiteral : '"' (~["] | FragEscapeQuot)* '"' | '\'' (~['] | FragEscapeApos)* '\''; |
| 118 | +URIQualifiedName : BracedURILiteral NCName; |
| 119 | +BracedURILiteral : 'Q' '{' [^{}]* '}'; |
113 | 120 | // Error in spec: EscapeQuot and EscapeApos are not terminals!
|
114 |
| -fragment FragEscapeQuot : '""' ; |
| 121 | +fragment FragEscapeQuot : '""'; |
115 | 122 | fragment FragEscapeApos : '\'\'';
|
116 | 123 | // Error in spec: Comment isn't really a terminal, but an off-channel object.
|
117 |
| -Comment : '(:' (Comment | CommentContents)*? ':)' -> skip ; |
118 |
| -QName : FragQName ; |
119 |
| -NCName : FragmentNCName ; |
| 124 | +Comment : '(:' (Comment | CommentContents)*? ':)' -> skip; |
| 125 | +NCName : FragmentNCName; |
| 126 | +QName : FragQName; |
120 | 127 | // Error in spec: Char is not a terminal!
|
121 |
| -fragment Char : FragChar ; |
122 |
| -fragment FragDigits : [0-9]+ ; |
123 |
| -fragment CommentContents : Char ; |
| 128 | +fragment Char : FragChar; |
| 129 | +fragment FragDigits : [0-9]+; |
| 130 | +fragment CommentContents : Char; |
124 | 131 | // https://www.w3.org/TR/REC-xml-names/#NT-QName
|
125 |
| -fragment FragQName : FragPrefixedName | FragUnprefixedName ; |
126 |
| -fragment FragPrefixedName : FragPrefix ':' FragLocalPart ; |
127 |
| -fragment FragUnprefixedName : FragLocalPart ; |
128 |
| -fragment FragPrefix : FragmentNCName ; |
129 |
| -fragment FragLocalPart : FragmentNCName ; |
130 |
| -fragment FragNCNameStartChar |
131 |
| - : 'A'..'Z' |
132 |
| - | '_' |
133 |
| - | 'a'..'z' |
134 |
| - | '\u00C0'..'\u00D6' |
135 |
| - | '\u00D8'..'\u00F6' |
136 |
| - | '\u00F8'..'\u02FF' |
137 |
| - | '\u0370'..'\u037D' |
138 |
| - | '\u037F'..'\u1FFF' |
139 |
| - | '\u200C'..'\u200D' |
140 |
| - | '\u2070'..'\u218F' |
141 |
| - | '\u2C00'..'\u2FEF' |
142 |
| - | '\u3001'..'\uD7FF' |
143 |
| - | '\uF900'..'\uFDCF' |
144 |
| - | '\uFDF0'..'\uFFFD' |
145 |
| - | '\u{10000}'..'\u{EFFFF}' |
146 |
| - ; |
147 |
| -fragment FragNCNameChar |
148 |
| - : FragNCNameStartChar | '-' | '.' | '0'..'9' |
149 |
| - | '\u00B7' | '\u0300'..'\u036F' |
150 |
| - | '\u203F'..'\u2040' |
151 |
| - ; |
152 |
| -fragment FragmentNCName : FragNCNameStartChar FragNCNameChar* ; |
| 132 | +fragment FragQName : FragPrefixedName | FragUnprefixedName; |
| 133 | +fragment FragPrefixedName : FragPrefix ':' FragLocalPart; |
| 134 | +fragment FragUnprefixedName : FragLocalPart; |
| 135 | +fragment FragPrefix : FragmentNCName; |
| 136 | +fragment FragLocalPart : FragmentNCName; |
| 137 | +fragment FragNCNameStartChar: |
| 138 | + 'A' ..'Z' |
| 139 | + | '_' |
| 140 | + | 'a' ..'z' |
| 141 | + | '\u00C0' ..'\u00D6' |
| 142 | + | '\u00D8' ..'\u00F6' |
| 143 | + | '\u00F8' ..'\u02FF' |
| 144 | + | '\u0370' ..'\u037D' |
| 145 | + | '\u037F' ..'\u1FFF' |
| 146 | + | '\u200C' ..'\u200D' |
| 147 | + | '\u2070' ..'\u218F' |
| 148 | + | '\u2C00' ..'\u2FEF' |
| 149 | + | '\u3001' ..'\uD7FF' |
| 150 | + | '\uF900' ..'\uFDCF' |
| 151 | + | '\uFDF0' ..'\uFFFD' |
| 152 | + | '\u{10000}' ..'\u{EFFFF}' |
| 153 | +; |
| 154 | +fragment FragNCNameChar: |
| 155 | + FragNCNameStartChar |
| 156 | + | '-' |
| 157 | + | '.' |
| 158 | + | '0' ..'9' |
| 159 | + | '\u00B7' |
| 160 | + | '\u0300' ..'\u036F' |
| 161 | + | '\u203F' ..'\u2040' |
| 162 | +; |
| 163 | +fragment FragmentNCName: FragNCNameStartChar FragNCNameChar*; |
153 | 164 |
|
154 | 165 | // https://www.w3.org/TR/REC-xml/#NT-Char
|
155 | 166 |
|
156 |
| -fragment FragChar : '\u0009' | '\u000a' | '\u000d' |
157 |
| - | '\u0020'..'\ud7ff' |
158 |
| - | '\ue000'..'\ufffd' |
159 |
| - | '\u{10000}'..'\u{10ffff}' |
160 |
| - ; |
| 167 | +fragment FragChar: |
| 168 | + '\u0009' |
| 169 | + | '\u000a' |
| 170 | + | '\u000d' |
| 171 | + | '\u0020' ..'\ud7ff' |
| 172 | + | '\ue000' ..'\ufffd' |
| 173 | + | '\u{10000}' ..'\u{10ffff}' |
| 174 | +; |
161 | 175 |
|
162 | 176 | // https://github.com/antlr/grammars-v4/blob/17d3db3fd6a8fc319a12176e0bb735b066ec0616/xpath/xpath31/XPath31.g4#L389
|
163 |
| -Whitespace : ('\u000d' | '\u000a' | '\u0020' | '\u0009')+ -> skip ; |
| 177 | +Whitespace: ('\u000d' | '\u000a' | '\u0020' | '\u0009')+ -> skip; |
164 | 178 |
|
165 | 179 | // Not per spec. Specified for testing.
|
166 |
| -SEMI : ';' ; |
| 180 | +SEMI: ';'; |
0 commit comments