@@ -32,7 +32,7 @@ MaybeWhsp = {WhspChar}*
3232EOL = \r|\n|\r\n
3333Identifier = [a-zA-Z_] [a-zA-Z0-9_]*
3434Sigils = ("$" | "@" | "%" | "&" | "*")
35- WxSigils = [[\W]--[\$\@\%\&\*\"\'\`\#]]
35+ WxSigils = [[\W]--[\$\@\%\&\*\"\'\`\#\r\n ]]
3636
3737// Perl special identifiers (four of six from
3838// https://perldoc.perl.org/perldata.html#Identifier-parsing):
@@ -100,35 +100,38 @@ TRhash = "tr"\#
100100TRpunc = "tr" {MaybeWhsp} {Quo0xHash}
101101TRword = "tr" {WhiteSpace} \w
102102
103- HereContinuation = \,{MaybeWhsp} "<<"\~? {MaybeWhsp}
104- MaybeHereMarkers = ([\"\'\`\\]?{Identifier} [^\n\r]* {HereContinuation})?
103+ HereEOF1 = [\"][^\r\n\"]*[\"]
104+ HereEOF2 = [\`][^\r\n\`]*[\`]
105+ HereEOF3 = [\'][^\r\n\']*[\']
106+ HereEOF4 = [\\]?{Identifier}
105107
106108//
107109// Track some keywords that can be used to identify heuristically a possible
108110// beginning of the shortcut syntax, //, for m//. Also include any perlfunc
109- // that takes /PATTERN/ -- which is just "split" . Heuristics using punctuation
110- // are defined inline later in some rules.
111+ // that takes /PATTERN/. Heuristics using punctuation are defined inline later
112+ // in some rules.
111113//
112114Mwords_1 = ("eq" | "ne" | "le" | "ge" | "lt" | "gt" | "cmp")
113115Mwords_2 = ("if" | "unless" | "or" | "and" | "not")
114- Mwords_3 = ("split")
116+ Mwords_3 = ("split" | "grep" )
115117Mwords = ({Mwords_1} | {Mwords_2} | {Mwords_3})
116118
117119Mpunc1YYIN = [\(\!]
118- Mpunc2IN = ([!=]"~" | [\:\?\=\+\-\<\>] | "=="|"!="|"<="|">="|"<=>")
120+ Mpunc2IN = ([!=]"~" | [\:\?\=\+\-\<\>] | "=="|"!="|"<="|">="|"<=>"|"=>" )
119121
120122//
121123// There are two dimensions to quoting: "link"-or-not and "interpolate"-or-not.
122124// Unfortunately, we cannot control the %state values, so we have to declare
123125// a cross-product of states. (Technically, state values are not guaranteed to
124126// be unique by jflex, but states that do not have identical rules will have
125- // different values. The following four "QUO" states satisfy this difference
126- // criterion. Likewise with the four "HERE" states.)
127+ // different values. The four "QUO" below states satisfy this difference
128+ // criterion; as likewise do the four "HERE" states.)
127129//
128130// YYINITIAL : nothing yet parsed or just after a non-quoted [;{}]
129131// INTRA : saw content from YYINITIAL but not yet other state or [;{}]
130132// SCOMMENT : single-line comment
131133// POD : Perl Plain-Old-Documentation
134+ // FMT : an output record format
132135// QUO : quote-like that is OK to match paths|files|URLs|e-mails
133136// QUOxN : "" but with no interpolation
134137// QUOxL : quote-like that is not OK to match paths|files|URLs|e-mails
@@ -139,20 +142,21 @@ Mpunc2IN = ([!=]"~" | [\:\?\=\+\-\<\>] | "=="|"!="|"<="|">="|"<=>")
139142// HERExN : Here-docs with no interpolation
140143// HEREin : Indented Here-docs
141144// HEREinxN : Indented Here-docs with no interpolation
142- // FMT : an output record format
143145//
144- %state INTRA SCOMMENT POD FMT QUO QUOxN QUOxL QUOxLxN QM HERE HERExN HEREin HEREinxN
146+ %state INTRA SCOMMENT POD FMT
147+ %state QUO QUOxN QUOxL QUOxLxN QM
148+ %state HERE HERExN HEREin HEREinxN
145149
146150%%
147151<HERE, HERExN> {
148152 ^ {Identifier} / {MaybeWhsp}{EOL} {
149- if ( h.maybeEndHere(yytext())) yyjump(YYINITIAL );
153+ h.maybeEndHere(yytext());
150154 }
151155}
152156
153157<HEREin, HEREinxN> {
154158 ^ {MaybeWhsp} {Identifier} / {MaybeWhsp}{EOL} {
155- if ( h.maybeEndHere(yytext())) yyjump(YYINITIAL );
159+ h.maybeEndHere(yytext());
156160 }
157161}
158162
@@ -173,23 +177,8 @@ Mpunc2IN = ([!=]"~" | [\:\?\=\+\-\<\>] | "=="|"!="|"<="|">="|"<=>")
173177 takeNonword(yytext());
174178 }
175179
176- // Following are rules for Here-documents. Stacked multiple here-docs are
177- // recognized, but not fully supported, as only the interpolation setting
178- // of the first marker will apply to all sections. (The final, second HERE
179- // quoting character is not demanded, as it is superfluous for the needs of
180- // xref lexing; and leaving it off simplifies parsing.)
181-
182- "<<" {MaybeWhsp} {MaybeHereMarkers} [\"\`]?{Identifier} {
183- h.hop(yytext(), false/*nointerp*/, false/*indented*/);
184- }
185- "<<~" {MaybeWhsp} {MaybeHereMarkers} [\"\`]?{Identifier} {
186- h.hop(yytext(), false/*nointerp*/, true/*indented*/);
187- }
188- "<<" {MaybeWhsp} {MaybeHereMarkers} [\'\\]{Identifier} {
189- h.hop(yytext(), true/*nointerp*/, false/*indented*/);
190- }
191- "<<~" {MaybeWhsp} {MaybeHereMarkers} [\'\\]{Identifier} {
192- h.hop(yytext(), true/*nointerp*/, true/*indented*/);
180+ "<<"[~]? {MaybeWhsp} ({HereEOF1}|{HereEOF2}|{HereEOF3}|{HereEOF4}) {
181+ h.hop(yytext());
193182 }
194183
195184 {Identifier} {
@@ -293,7 +282,8 @@ Mpunc2IN = ([!=]"~" | [\:\?\=\+\-\<\>] | "=="|"!="|"<="|">="|"<=>")
293282 }
294283
295284 // FORMAT start
296- ^ {MaybeWhsp} "format" ({WhiteSpace} {Identifier})? {MaybeWhsp} "=" {
285+ ^ {MaybeWhsp} "format" ({WhiteSpace} {Identifier})? {MaybeWhsp} "=" /
286+ {MaybeWhsp}{EOL} {
297287 pushState(FMT);
298288 if (takeAllContent()) {
299289 // split off the " format" as `initial' for keyword processing
@@ -399,11 +389,13 @@ Mpunc2IN = ([!=]"~" | [\:\?\=\+\-\<\>] | "=="|"!="|"<="|">="|"<=>")
399389 }
400390}
401391
402- <QUO, QUOxN, QUOxL, QUOxLxN> {
392+ <FMT, QUO, QUOxN, QUOxL, QUOxLxN, HERE, HEREin > {
403393 \\ \S {
404394 takeNonword(yytext());
405395 }
396+ }
406397
398+ <QUO, QUOxN, QUOxL, QUOxLxN> {
407399 {Quo0} |
408400 \w {
409401 String capture = yytext();
@@ -449,6 +441,11 @@ Mpunc2IN = ([!=]"~" | [\:\?\=\+\-\<\>] | "=="|"!="|"<="|">="|"<=>")
449441 takeNonword(yytext());
450442 take(Consts.ZS);
451443 }
444+
445+ {WhiteSpace}{EOL} |
446+ {EOL} {
447+ doStartNewLine();
448+ }
452449}
453450
454451<FMT> {
@@ -475,24 +472,32 @@ Mpunc2IN = ([!=]"~" | [\:\?\=\+\-\<\>] | "=="|"!="|"<="|">="|"<=>")
475472<SCOMMENT> {
476473 {WhiteSpace}{EOL} |
477474 {EOL} {
475+ String capture = yytext();
476+ yypushback(capture.length());
478477 yypop();
479478 take(Consts.ZS);
480- doStartNewLine();
481479 }
482480}
483481
482+ <YYINITIAL, INTRA> {
483+ {WhiteSpace}{EOL} |
484+ {EOL} {
485+ String capture = yytext();
486+ if (h.maybeStartHere()) {
487+ yypushback(capture.length());
488+ } else {
489+ doStartNewLine();
490+ }
491+ }
492+ }
493+
484494<YYINITIAL, INTRA, SCOMMENT, POD, FMT, QUO, QUOxN, QUOxL, QUOxLxN,
485495 HERE, HERExN, HEREin, HEREinxN> {
486496 [&<>\"\'] {
487497 maybeIntraState();
488498 takeNonword(yytext());
489499 }
490500
491- {WhiteSpace}{EOL} |
492- {EOL} {
493- doStartNewLine();
494- }
495-
496501 // Only one whitespace char at a time or else {WxSigils} can be broken
497502 {WhspChar} {
498503 take(yytext());
0 commit comments