forked from metaschema-framework/metaschema-java
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added support for Metapath maps and map lookups using function, unary…
…, and postfix methods.
- Loading branch information
1 parent
f29d5d7
commit fd72d32
Showing
58 changed files
with
2,379 additions
and
387 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,166 +1,180 @@ | ||
// This grammar is derived from the XPath 3.1 grammar produced by Ken Domino, et al (https://github.com/antlr/grammars-v4/blob/63359bd91593ece31a384acd507ae860d6cf7ff7/xpath/xpath31/XPath31Lexer.g4). | ||
|
||
// This is a faithful implementation of the XPath version 3.1 grammar | ||
// from the spec at https://www.w3.org/TR/2017/REC-xpath-31-20170321/ | ||
|
||
// $antlr-format alignTrailingComments true, columnLimit 150, maxEmptyLinesToKeep 1, reflowComments false, useTab false | ||
// $antlr-format allowShortRulesOnASingleLine true, allowShortBlocksOnASingleLine true, minEmptyLines 0, alignSemicolons ownLine | ||
// $antlr-format alignColons trailing, singleLineOverrulesHangingColon true, alignLexerCommands true, alignLabels true, alignTrailers true | ||
|
||
lexer grammar Metapath10Lexer; | ||
|
||
AT : '@' ; | ||
BANG : '!' ; | ||
CB : ']' ; | ||
CC : '}' ; | ||
CEQ : ':=' ; | ||
COLON : ':' ; | ||
COLONCOLON : '::' ; | ||
COMMA : ',' ; | ||
CP : ')' ; | ||
CS : ':*' ; | ||
D : '.' ; | ||
DD : '..' ; | ||
DOLLAR : '$' ; | ||
EG : '=>' ; | ||
EQ : '=' ; | ||
GE : '>=' ; | ||
GG : '>>' ; | ||
GT : '>' ; | ||
LE : '<=' ; | ||
LL : '<<' ; | ||
LT : '<' ; | ||
MINUS : '-' ; | ||
NE : '!=' ; | ||
OB : '[' ; | ||
OC : '{' ; | ||
OP : '(' ; | ||
P : '|' ; | ||
PLUS : '+' ; | ||
POUND : '#' ; | ||
PP : '||' ; | ||
QM : '?' ; | ||
SC : '*:' ; | ||
SLASH : '/' ; | ||
SS : '//' ; | ||
STAR : '*' ; | ||
AT : '@'; | ||
BANG : '!'; | ||
CB : ']'; | ||
CC : '}'; | ||
CEQ : ':='; | ||
COLON : ':'; | ||
COLONCOLON : '::'; | ||
COMMA : ','; | ||
CP : ')'; | ||
CS : ':*'; | ||
D : '.'; | ||
DD : '..'; | ||
DOLLAR : '$'; | ||
EG : '=>'; | ||
EQ : '='; | ||
GE : '>='; | ||
GG : '>>'; | ||
GT : '>'; | ||
LE : '<='; | ||
LL : '<<'; | ||
LT : '<'; | ||
MINUS : '-'; | ||
NE : '!='; | ||
OB : '['; | ||
OC : '{'; | ||
OP : '('; | ||
P : '|'; | ||
PLUS : '+'; | ||
POUND : '#'; | ||
PP : '||'; | ||
QM : '?'; | ||
SC : '*:'; | ||
SLASH : '/'; | ||
SS : '//'; | ||
STAR : '*'; | ||
|
||
// KEYWORDS | ||
|
||
KW_ANCESTOR : 'ancestor' ; | ||
KW_ANCESTOR_OR_SELF : 'ancestor-or-self' ; | ||
KW_AND : 'and' ; | ||
KW_ARRAY : 'array' ; | ||
KW_AS : 'as' ; | ||
KW_ATTRIBUTE : 'attribute' ; | ||
KW_CAST : 'cast' ; | ||
KW_CASTABLE : 'castable' ; | ||
KW_CHILD : 'child' ; | ||
KW_COMMENT : 'comment' ; | ||
KW_DESCENDANT : 'descendant' ; | ||
KW_DESCENDANT_OR_SELF : 'descendant-or-self' ; | ||
KW_DIV : 'div' ; | ||
KW_DOCUMENT_NODE : 'document-node' ; | ||
KW_ELEMENT : 'element' ; | ||
KW_ELSE : 'else' ; | ||
KW_EMPTY_SEQUENCE : 'empty-sequence' ; | ||
KW_EQ : 'eq' ; | ||
KW_EVERY : 'every' ; | ||
KW_EXCEPT : 'except' ; | ||
KW_FOLLOWING : 'following' ; | ||
KW_FOLLOWING_SIBLING : 'following-sibling' ; | ||
KW_FOR : 'for' ; | ||
KW_FUNCTION : 'function' ; | ||
KW_GE : 'ge' ; | ||
KW_GT : 'gt' ; | ||
KW_IDIV : 'idiv' ; | ||
KW_IF : 'if' ; | ||
KW_IN : 'in' ; | ||
KW_INSTANCE : 'instance' ; | ||
KW_INTERSECT : 'intersect' ; | ||
KW_IS : 'is' ; | ||
KW_ITEM : 'item' ; | ||
KW_LE : 'le' ; | ||
KW_LET : 'let' ; | ||
KW_LT : 'lt' ; | ||
KW_MAP : 'map' ; | ||
KW_MOD : 'mod' ; | ||
KW_NAMESPACE : 'namespace' ; | ||
KW_NAMESPACE_NODE : 'namespace-node' ; | ||
KW_NE : 'ne' ; | ||
KW_NODE : 'node' ; | ||
KW_OF : 'of' ; | ||
KW_OR : 'or' ; | ||
KW_PARENT : 'parent' ; | ||
KW_PRECEDING : 'preceding' ; | ||
KW_PRECEDING_SIBLING : 'preceding-sibling' ; | ||
KW_PROCESSING_INSTRUCTION : 'processing-instruction' ; | ||
KW_RETURN : 'return' ; | ||
KW_SATISFIES : 'satisfies' ; | ||
KW_SCHEMA_ATTRIBUTE : 'schema-attribute' ; | ||
KW_SCHEMA_ELEMENT : 'schema-element' ; | ||
KW_SELF : 'self' ; | ||
KW_SOME : 'some' ; | ||
KW_TEXT : 'text' ; | ||
KW_THEN : 'then' ; | ||
KW_TO : 'to' ; | ||
KW_TREAT : 'treat' ; | ||
KW_UNION : 'union' ; | ||
KW_ANCESTOR : 'ancestor'; | ||
KW_ANCESTOR_OR_SELF : 'ancestor-or-self'; | ||
KW_AND : 'and'; | ||
KW_ARRAY : 'array'; | ||
KW_AS : 'as'; | ||
KW_ATTRIBUTE : 'attribute'; | ||
KW_CAST : 'cast'; | ||
KW_CASTABLE : 'castable'; | ||
KW_CHILD : 'child'; | ||
KW_COMMENT : 'comment'; | ||
KW_DESCENDANT : 'descendant'; | ||
KW_DESCENDANT_OR_SELF : 'descendant-or-self'; | ||
KW_DIV : 'div'; | ||
KW_DOCUMENT_NODE : 'document-node'; | ||
KW_ELEMENT : 'element'; | ||
KW_ELSE : 'else'; | ||
KW_EMPTY_SEQUENCE : 'empty-sequence'; | ||
KW_EQ : 'eq'; | ||
KW_EVERY : 'every'; | ||
KW_EXCEPT : 'except'; | ||
KW_FOLLOWING : 'following'; | ||
KW_FOLLOWING_SIBLING : 'following-sibling'; | ||
KW_FOR : 'for'; | ||
KW_FUNCTION : 'function'; | ||
KW_GE : 'ge'; | ||
KW_GT : 'gt'; | ||
KW_IDIV : 'idiv'; | ||
KW_IF : 'if'; | ||
KW_IN : 'in'; | ||
KW_INSTANCE : 'instance'; | ||
KW_INTERSECT : 'intersect'; | ||
KW_IS : 'is'; | ||
KW_ITEM : 'item'; | ||
KW_LE : 'le'; | ||
KW_LET : 'let'; | ||
KW_LT : 'lt'; | ||
KW_MAP : 'map'; | ||
KW_MOD : 'mod'; | ||
KW_NAMESPACE : 'namespace'; | ||
KW_NAMESPACE_NODE : 'namespace-node'; | ||
KW_NE : 'ne'; | ||
KW_NODE : 'node'; | ||
KW_OF : 'of'; | ||
KW_OR : 'or'; | ||
KW_PARENT : 'parent'; | ||
KW_PRECEDING : 'preceding'; | ||
KW_PRECEDING_SIBLING : 'preceding-sibling'; | ||
KW_PROCESSING_INSTRUCTION : 'processing-instruction'; | ||
KW_RETURN : 'return'; | ||
KW_SATISFIES : 'satisfies'; | ||
KW_SCHEMA_ATTRIBUTE : 'schema-attribute'; | ||
KW_SCHEMA_ELEMENT : 'schema-element'; | ||
KW_SELF : 'self'; | ||
KW_SOME : 'some'; | ||
KW_TEXT : 'text'; | ||
KW_THEN : 'then'; | ||
KW_TO : 'to'; | ||
KW_TREAT : 'treat'; | ||
KW_UNION : 'union'; | ||
|
||
// A.2.1. TERMINAL SYMBOLS | ||
// This isn't a complete list of tokens in the language. | ||
// Keywords and symbols are terminals. | ||
|
||
IntegerLiteral : FragDigits ; | ||
DecimalLiteral : '.' FragDigits | FragDigits '.' [0-9]* ; | ||
DoubleLiteral : ('.' FragDigits | FragDigits ('.' [0-9]*)?) [eE] [+-]? FragDigits ; | ||
StringLiteral : '"' (~["] | FragEscapeQuot)* '"' | '\'' (~['] | FragEscapeApos)* '\'' ; | ||
URIQualifiedName : BracedURILiteral NCName ; | ||
BracedURILiteral : 'Q' '{' [^{}]* '}' ; | ||
IntegerLiteral : FragDigits; | ||
DecimalLiteral : '.' FragDigits | FragDigits '.' [0-9]*; | ||
DoubleLiteral : ('.' FragDigits | FragDigits ('.' [0-9]*)?) [eE] [+-]? FragDigits; | ||
StringLiteral : '"' (~["] | FragEscapeQuot)* '"' | '\'' (~['] | FragEscapeApos)* '\''; | ||
URIQualifiedName : BracedURILiteral NCName; | ||
BracedURILiteral : 'Q' '{' [^{}]* '}'; | ||
// Error in spec: EscapeQuot and EscapeApos are not terminals! | ||
fragment FragEscapeQuot : '""' ; | ||
fragment FragEscapeQuot : '""'; | ||
fragment FragEscapeApos : '\'\''; | ||
// Error in spec: Comment isn't really a terminal, but an off-channel object. | ||
Comment : '(:' (Comment | CommentContents)*? ':)' -> skip ; | ||
QName : FragQName ; | ||
NCName : FragmentNCName ; | ||
Comment : '(:' (Comment | CommentContents)*? ':)' -> skip; | ||
NCName : FragmentNCName; | ||
QName : FragQName; | ||
// Error in spec: Char is not a terminal! | ||
fragment Char : FragChar ; | ||
fragment FragDigits : [0-9]+ ; | ||
fragment CommentContents : Char ; | ||
fragment Char : FragChar; | ||
fragment FragDigits : [0-9]+; | ||
fragment CommentContents : Char; | ||
// https://www.w3.org/TR/REC-xml-names/#NT-QName | ||
fragment FragQName : FragPrefixedName | FragUnprefixedName ; | ||
fragment FragPrefixedName : FragPrefix ':' FragLocalPart ; | ||
fragment FragUnprefixedName : FragLocalPart ; | ||
fragment FragPrefix : FragmentNCName ; | ||
fragment FragLocalPart : FragmentNCName ; | ||
fragment FragNCNameStartChar | ||
: 'A'..'Z' | ||
| '_' | ||
| 'a'..'z' | ||
| '\u00C0'..'\u00D6' | ||
| '\u00D8'..'\u00F6' | ||
| '\u00F8'..'\u02FF' | ||
| '\u0370'..'\u037D' | ||
| '\u037F'..'\u1FFF' | ||
| '\u200C'..'\u200D' | ||
| '\u2070'..'\u218F' | ||
| '\u2C00'..'\u2FEF' | ||
| '\u3001'..'\uD7FF' | ||
| '\uF900'..'\uFDCF' | ||
| '\uFDF0'..'\uFFFD' | ||
| '\u{10000}'..'\u{EFFFF}' | ||
; | ||
fragment FragNCNameChar | ||
: FragNCNameStartChar | '-' | '.' | '0'..'9' | ||
| '\u00B7' | '\u0300'..'\u036F' | ||
| '\u203F'..'\u2040' | ||
; | ||
fragment FragmentNCName : FragNCNameStartChar FragNCNameChar* ; | ||
fragment FragQName : FragPrefixedName | FragUnprefixedName; | ||
fragment FragPrefixedName : FragPrefix ':' FragLocalPart; | ||
fragment FragUnprefixedName : FragLocalPart; | ||
fragment FragPrefix : FragmentNCName; | ||
fragment FragLocalPart : FragmentNCName; | ||
fragment FragNCNameStartChar: | ||
'A' ..'Z' | ||
| '_' | ||
| 'a' ..'z' | ||
| '\u00C0' ..'\u00D6' | ||
| '\u00D8' ..'\u00F6' | ||
| '\u00F8' ..'\u02FF' | ||
| '\u0370' ..'\u037D' | ||
| '\u037F' ..'\u1FFF' | ||
| '\u200C' ..'\u200D' | ||
| '\u2070' ..'\u218F' | ||
| '\u2C00' ..'\u2FEF' | ||
| '\u3001' ..'\uD7FF' | ||
| '\uF900' ..'\uFDCF' | ||
| '\uFDF0' ..'\uFFFD' | ||
| '\u{10000}' ..'\u{EFFFF}' | ||
; | ||
fragment FragNCNameChar: | ||
FragNCNameStartChar | ||
| '-' | ||
| '.' | ||
| '0' ..'9' | ||
| '\u00B7' | ||
| '\u0300' ..'\u036F' | ||
| '\u203F' ..'\u2040' | ||
; | ||
fragment FragmentNCName: FragNCNameStartChar FragNCNameChar*; | ||
|
||
// https://www.w3.org/TR/REC-xml/#NT-Char | ||
|
||
fragment FragChar : '\u0009' | '\u000a' | '\u000d' | ||
| '\u0020'..'\ud7ff' | ||
| '\ue000'..'\ufffd' | ||
| '\u{10000}'..'\u{10ffff}' | ||
; | ||
fragment FragChar: | ||
'\u0009' | ||
| '\u000a' | ||
| '\u000d' | ||
| '\u0020' ..'\ud7ff' | ||
| '\ue000' ..'\ufffd' | ||
| '\u{10000}' ..'\u{10ffff}' | ||
; | ||
|
||
// https://github.com/antlr/grammars-v4/blob/17d3db3fd6a8fc319a12176e0bb735b066ec0616/xpath/xpath31/XPath31.g4#L389 | ||
Whitespace : ('\u000d' | '\u000a' | '\u0020' | '\u0009')+ -> skip ; | ||
Whitespace: ('\u000d' | '\u000a' | '\u0020' | '\u0009')+ -> skip; | ||
|
||
// Not per spec. Specified for testing. | ||
SEMI : ';' ; | ||
SEMI: ';'; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.