@@ -32,7 +32,7 @@ MaybeWhsp = {WhspChar}*
32
32
EOL = \r|\n|\r\n
33
33
Identifier = [a-zA-Z_] [a-zA-Z0-9_]*
34
34
Sigils = ("$" | "@" | "%" | "&" | "*")
35
- WxSigils = [[\W]--[\$\@\%\&\*\"\'\`\#]]
35
+ WxSigils = [[\W]--[\$\@\%\&\*\"\'\`\#\r\n ]]
36
36
37
37
// Perl special identifiers (four of six from
38
38
// https://perldoc.perl.org/perldata.html#Identifier-parsing):
@@ -100,35 +100,38 @@ TRhash = "tr"\#
100
100
TRpunc = "tr" {MaybeWhsp} {Quo0xHash}
101
101
TRword = "tr" {WhiteSpace} \w
102
102
103
- HereContinuation = \,{MaybeWhsp} "<<"\~? {MaybeWhsp}
104
- MaybeHereMarkers = ([\"\'\`\\]?{Identifier} [^\n\r]* {HereContinuation})?
103
+ HereEOF1 = [\"][^\r\n\"]*[\"]
104
+ HereEOF2 = [\`][^\r\n\`]*[\`]
105
+ HereEOF3 = [\'][^\r\n\']*[\']
106
+ HereEOF4 = [\\]?{Identifier}
105
107
106
108
//
107
109
// Track some keywords that can be used to identify heuristically a possible
108
110
// beginning of the shortcut syntax, //, for m//. Also include any perlfunc
109
- // that takes /PATTERN/ -- which is just "split" . Heuristics using punctuation
110
- // are defined inline later in some rules.
111
+ // that takes /PATTERN/. Heuristics using punctuation are defined inline later
112
+ // in some rules.
111
113
//
112
114
Mwords_1 = ("eq" | "ne" | "le" | "ge" | "lt" | "gt" | "cmp")
113
115
Mwords_2 = ("if" | "unless" | "or" | "and" | "not")
114
- Mwords_3 = ("split")
116
+ Mwords_3 = ("split" | "grep" )
115
117
Mwords = ({Mwords_1} | {Mwords_2} | {Mwords_3})
116
118
117
119
Mpunc1YYIN = [\(\!]
118
- Mpunc2IN = ([!=]"~" | [\:\?\=\+\-\<\>] | "=="|"!="|"<="|">="|"<=>")
120
+ Mpunc2IN = ([!=]"~" | [\:\?\=\+\-\<\>] | "=="|"!="|"<="|">="|"<=>"|"=>" )
119
121
120
122
//
121
123
// There are two dimensions to quoting: "link"-or-not and "interpolate"-or-not.
122
124
// Unfortunately, we cannot control the %state values, so we have to declare
123
125
// a cross-product of states. (Technically, state values are not guaranteed to
124
126
// be unique by jflex, but states that do not have identical rules will have
125
- // different values. The following four "QUO" states satisfy this difference
126
- // criterion. Likewise with the four "HERE" states.)
127
+ // different values. The four "QUO" below states satisfy this difference
128
+ // criterion; as likewise do the four "HERE" states.)
127
129
//
128
130
// YYINITIAL : nothing yet parsed or just after a non-quoted [;{}]
129
131
// INTRA : saw content from YYINITIAL but not yet other state or [;{}]
130
132
// SCOMMENT : single-line comment
131
133
// POD : Perl Plain-Old-Documentation
134
+ // FMT : an output record format
132
135
// QUO : quote-like that is OK to match paths|files|URLs|e-mails
133
136
// QUOxN : "" but with no interpolation
134
137
// QUOxL : quote-like that is not OK to match paths|files|URLs|e-mails
@@ -139,20 +142,21 @@ Mpunc2IN = ([!=]"~" | [\:\?\=\+\-\<\>] | "=="|"!="|"<="|">="|"<=>")
139
142
// HERExN : Here-docs with no interpolation
140
143
// HEREin : Indented Here-docs
141
144
// HEREinxN : Indented Here-docs with no interpolation
142
- // FMT : an output record format
143
145
//
144
- %state INTRA SCOMMENT POD FMT QUO QUOxN QUOxL QUOxLxN QM HERE HERExN HEREin HEREinxN
146
+ %state INTRA SCOMMENT POD FMT
147
+ %state QUO QUOxN QUOxL QUOxLxN QM
148
+ %state HERE HERExN HEREin HEREinxN
145
149
146
150
%%
147
151
<HERE, HERExN> {
148
152
^ {Identifier} / {MaybeWhsp}{EOL} {
149
- if ( h.maybeEndHere(yytext())) yyjump(YYINITIAL );
153
+ h.maybeEndHere(yytext());
150
154
}
151
155
}
152
156
153
157
<HEREin, HEREinxN> {
154
158
^ {MaybeWhsp} {Identifier} / {MaybeWhsp}{EOL} {
155
- if ( h.maybeEndHere(yytext())) yyjump(YYINITIAL );
159
+ h.maybeEndHere(yytext());
156
160
}
157
161
}
158
162
@@ -173,23 +177,8 @@ Mpunc2IN = ([!=]"~" | [\:\?\=\+\-\<\>] | "=="|"!="|"<="|">="|"<=>")
173
177
takeNonword(yytext());
174
178
}
175
179
176
- // Following are rules for Here-documents. Stacked multiple here-docs are
177
- // recognized, but not fully supported, as only the interpolation setting
178
- // of the first marker will apply to all sections. (The final, second HERE
179
- // quoting character is not demanded, as it is superfluous for the needs of
180
- // xref lexing; and leaving it off simplifies parsing.)
181
-
182
- "<<" {MaybeWhsp} {MaybeHereMarkers} [\"\`]?{Identifier} {
183
- h.hop(yytext(), false/*nointerp*/, false/*indented*/);
184
- }
185
- "<<~" {MaybeWhsp} {MaybeHereMarkers} [\"\`]?{Identifier} {
186
- h.hop(yytext(), false/*nointerp*/, true/*indented*/);
187
- }
188
- "<<" {MaybeWhsp} {MaybeHereMarkers} [\'\\]{Identifier} {
189
- h.hop(yytext(), true/*nointerp*/, false/*indented*/);
190
- }
191
- "<<~" {MaybeWhsp} {MaybeHereMarkers} [\'\\]{Identifier} {
192
- h.hop(yytext(), true/*nointerp*/, true/*indented*/);
180
+ "<<"[~]? {MaybeWhsp} ({HereEOF1}|{HereEOF2}|{HereEOF3}|{HereEOF4}) {
181
+ h.hop(yytext());
193
182
}
194
183
195
184
{Identifier} {
@@ -293,7 +282,8 @@ Mpunc2IN = ([!=]"~" | [\:\?\=\+\-\<\>] | "=="|"!="|"<="|">="|"<=>")
293
282
}
294
283
295
284
// FORMAT start
296
- ^ {MaybeWhsp} "format" ({WhiteSpace} {Identifier})? {MaybeWhsp} "=" {
285
+ ^ {MaybeWhsp} "format" ({WhiteSpace} {Identifier})? {MaybeWhsp} "=" /
286
+ {MaybeWhsp}{EOL} {
297
287
pushState(FMT);
298
288
if (takeAllContent()) {
299
289
// split off the " format" as `initial' for keyword processing
@@ -399,11 +389,13 @@ Mpunc2IN = ([!=]"~" | [\:\?\=\+\-\<\>] | "=="|"!="|"<="|">="|"<=>")
399
389
}
400
390
}
401
391
402
- <QUO, QUOxN, QUOxL, QUOxLxN> {
392
+ <FMT, QUO, QUOxN, QUOxL, QUOxLxN, HERE, HEREin > {
403
393
\\ \S {
404
394
takeNonword(yytext());
405
395
}
396
+ }
406
397
398
+ <QUO, QUOxN, QUOxL, QUOxLxN> {
407
399
{Quo0} |
408
400
\w {
409
401
String capture = yytext();
@@ -449,6 +441,11 @@ Mpunc2IN = ([!=]"~" | [\:\?\=\+\-\<\>] | "=="|"!="|"<="|">="|"<=>")
449
441
takeNonword(yytext());
450
442
take(Consts.ZS);
451
443
}
444
+
445
+ {WhiteSpace}{EOL} |
446
+ {EOL} {
447
+ doStartNewLine();
448
+ }
452
449
}
453
450
454
451
<FMT> {
@@ -475,24 +472,32 @@ Mpunc2IN = ([!=]"~" | [\:\?\=\+\-\<\>] | "=="|"!="|"<="|">="|"<=>")
475
472
<SCOMMENT> {
476
473
{WhiteSpace}{EOL} |
477
474
{EOL} {
475
+ String capture = yytext();
476
+ yypushback(capture.length());
478
477
yypop();
479
478
take(Consts.ZS);
480
- doStartNewLine();
481
479
}
482
480
}
483
481
482
+ <YYINITIAL, INTRA> {
483
+ {WhiteSpace}{EOL} |
484
+ {EOL} {
485
+ String capture = yytext();
486
+ if (h.maybeStartHere()) {
487
+ yypushback(capture.length());
488
+ } else {
489
+ doStartNewLine();
490
+ }
491
+ }
492
+ }
493
+
484
494
<YYINITIAL, INTRA, SCOMMENT, POD, FMT, QUO, QUOxN, QUOxL, QUOxLxN,
485
495
HERE, HERExN, HEREin, HEREinxN> {
486
496
[&<>\"\'] {
487
497
maybeIntraState();
488
498
takeNonword(yytext());
489
499
}
490
500
491
- {WhiteSpace}{EOL} |
492
- {EOL} {
493
- doStartNewLine();
494
- }
495
-
496
501
// Only one whitespace char at a time or else {WxSigils} can be broken
497
502
{WhspChar} {
498
503
take(yytext());
0 commit comments