Skip to content

Commit 335ba6d

Browse files
committed
[GR-37496] Only run MarkLookBehindEntriesVisitor when regexp can transform to DFA.
PullRequest: graal/11399
2 parents 4b926c2 + 53a722e commit 335ba6d

File tree

4 files changed

+59
-55
lines changed

4 files changed

+59
-55
lines changed

regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/JsTests.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
*/
4141
package com.oracle.truffle.regex.tregex.test;
4242

43+
import com.oracle.truffle.regex.tregex.TRegexOptions;
4344
import org.junit.Test;
4445

4546
public class JsTests extends RegexTestBase {
@@ -158,4 +159,9 @@ public void gr21421() {
158159
test("(?=(\\3?)|([^\\W\uaa3bt-\ua4b9]){4294967296}|(?=[^]+[\\n-\u4568\\uD3D5\\u00ca-\\u00fF]*)*|(?:\\2|^)?.){33554431}(?:(?:\\S{1,}(?:\\b|\\w{1,}))(?:\\2?)+){4,}", "im",
159160
"\u4568\u4568\u4568\u4568________\\xee0000", 0, true, 0, 20, 0, 0, -1, -1);
160161
}
162+
163+
@Test
164+
public void gr37496() {
165+
test("(?:(?:" + "a".repeat(TRegexOptions.TRegexMaxParseTreeSizeForDFA) + ")?(?<=a))+", "", "", 0, false);
166+
}
161167
}

regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/TRegexCompilationRequest.java

Lines changed: 3 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@
4040
*/
4141
package com.oracle.truffle.regex.tregex;
4242

43-
import java.util.StringJoiner;
4443
import java.util.logging.Level;
4544

4645
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
@@ -174,7 +173,7 @@ private RegexExecNode compileInternal() {
174173
Loggers.LOG_MATCHING_STRATEGY.fine(() -> "using literal matcher " + literal.getClass().getSimpleName());
175174
return literal;
176175
}
177-
if (canTransformToDFA(ast)) {
176+
if (ast.canTransformToDFA()) {
178177
try {
179178
createNFA();
180179
if (nfa.isDead()) {
@@ -187,7 +186,7 @@ private RegexExecNode compileInternal() {
187186
Loggers.LOG_MATCHING_STRATEGY.fine(() -> "NFA generator bailout: " + e.getReason() + ", using back-tracking matcher");
188187
}
189188
} else {
190-
Loggers.LOG_MATCHING_STRATEGY.fine(() -> "using back-tracking matcher, reason: " + canTransformToDFAFailureReason(ast));
189+
Loggers.LOG_MATCHING_STRATEGY.fine(() -> "using back-tracking matcher, reason: " + ast.canTransformToDFAFailureReason());
191190
}
192191
return new TRegexExecNode(ast, compileBacktrackingExecutor());
193192
}
@@ -260,63 +259,13 @@ TRegexExecNode.LazyCaptureGroupRegexSearchNode compileLazyDFAExecutor(TRegexExec
260259
TRegexDFAExecutorNode compileEagerDFAExecutor() {
261260
createAST();
262261
RegexProperties properties = ast.getProperties();
263-
assert canTransformToDFA(ast);
262+
assert ast.canTransformToDFA();
264263
assert properties.hasCaptureGroups() || properties.hasLookAroundAssertions();
265264
assert !ast.getRoot().isDead();
266265
createNFA();
267266
return createDFAExecutor(nfa, true, true, true, false, ast.getOptions().getFlavor().usesLastGroupResultField());
268267
}
269268

270-
private static boolean canTransformToDFA(RegexAST ast) throws UnsupportedRegexException {
271-
RegexProperties p = ast.getProperties();
272-
boolean couldCalculateLastGroup = !ast.getOptions().getFlavor().usesLastGroupResultField() || !p.hasCaptureGroupsInLookAroundAssertions();
273-
return ast.getNumberOfNodes() <= TRegexOptions.TRegexMaxParseTreeSizeForDFA &&
274-
ast.getNumberOfCaptureGroups() <= TRegexOptions.TRegexMaxNumberOfCaptureGroupsForDFA &&
275-
!(ast.getRoot().hasBackReferences() ||
276-
p.hasLargeCountedRepetitions() ||
277-
p.hasNegativeLookAheadAssertions() ||
278-
p.hasNonLiteralLookBehindAssertions() ||
279-
p.hasNegativeLookBehindAssertions() ||
280-
ast.getRoot().hasQuantifiers() ||
281-
p.hasAtomicGroups()) &&
282-
couldCalculateLastGroup;
283-
}
284-
285-
@TruffleBoundary
286-
private static String canTransformToDFAFailureReason(RegexAST ast) throws UnsupportedRegexException {
287-
RegexProperties p = ast.getProperties();
288-
StringJoiner sb = new StringJoiner(", ");
289-
if (ast.getNumberOfNodes() > TRegexOptions.TRegexMaxParseTreeSizeForDFA) {
290-
sb.add(String.format("Parser tree has too many nodes: %d (threshold: %d)", ast.getNumberOfNodes(), TRegexOptions.TRegexMaxParseTreeSizeForDFA));
291-
}
292-
if (ast.getNumberOfCaptureGroups() > TRegexOptions.TRegexMaxNumberOfCaptureGroupsForDFA) {
293-
sb.add(String.format("regex has too many capture groups: %d (threshold: %d)", ast.getNumberOfCaptureGroups(), TRegexOptions.TRegexMaxNumberOfCaptureGroupsForDFA));
294-
}
295-
if (ast.getRoot().hasBackReferences()) {
296-
sb.add("regex has back-references");
297-
}
298-
if (p.hasLargeCountedRepetitions()) {
299-
sb.add(String.format("regex has large counted repetitions (threshold: %d for single CC, %d for groups)",
300-
TRegexOptions.TRegexQuantifierUnrollThresholdSingleCC, TRegexOptions.TRegexQuantifierUnrollThresholdGroup));
301-
}
302-
if (p.hasNegativeLookAheadAssertions()) {
303-
sb.add("regex has negative look-ahead assertions");
304-
}
305-
if (p.hasNegativeLookBehindAssertions()) {
306-
sb.add("regex has negative look-behind assertions");
307-
}
308-
if (p.hasNonLiteralLookBehindAssertions()) {
309-
sb.add("regex has non-literal look-behind assertions");
310-
}
311-
if (ast.getRoot().hasQuantifiers()) {
312-
sb.add("could not unroll all quantifiers");
313-
}
314-
if (p.hasAtomicGroups()) {
315-
sb.add("regex has atomic groups");
316-
}
317-
return sb.toString();
318-
}
319-
320269
private void createAST() {
321270
phaseStart("Parser");
322271
try {

regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/RegexASTPostProcessor.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ public void prepareForDFA() {
9393
CalcASTPropsVisitor.run(ast);
9494
ast.createPrefix();
9595
InitIDVisitor.init(ast);
96-
if (!properties.hasNonLiteralLookBehindAssertions() && !ast.getRoot().hasBackReferences() && !properties.hasLargeCountedRepetitions() && !properties.hasAtomicGroups()) {
96+
if (ast.canTransformToDFA()) {
9797
new MarkLookBehindEntriesVisitor(ast).run();
9898
}
9999
checkInnerLiteral();

regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/parser/ast/RegexAST.java

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
import java.util.ArrayList;
4444
import java.util.Collection;
4545
import java.util.List;
46+
import java.util.StringJoiner;
4647
import java.util.stream.Stream;
4748

4849
import org.graalvm.collections.EconomicMap;
@@ -615,6 +616,54 @@ public InnerLiteral extractInnerLiteral() {
615616
return new InnerLiteral(literal.materialize(), hasMask ? mask.materialize() : null, root.getFirstAlternative().get(literalStart).getMaxPath() - 1);
616617
}
617618

619+
public boolean canTransformToDFA() {
620+
boolean couldCalculateLastGroup = !getOptions().getFlavor().usesLastGroupResultField() || !getProperties().hasCaptureGroupsInLookAroundAssertions();
621+
return getNumberOfNodes() <= TRegexOptions.TRegexMaxParseTreeSizeForDFA &&
622+
getNumberOfCaptureGroups() <= TRegexOptions.TRegexMaxNumberOfCaptureGroupsForDFA &&
623+
!(getRoot().hasBackReferences() ||
624+
getProperties().hasLargeCountedRepetitions() ||
625+
getProperties().hasNegativeLookAheadAssertions() ||
626+
getProperties().hasNonLiteralLookBehindAssertions() ||
627+
getProperties().hasNegativeLookBehindAssertions() ||
628+
getRoot().hasQuantifiers() ||
629+
getProperties().hasAtomicGroups()) &&
630+
couldCalculateLastGroup;
631+
}
632+
633+
@TruffleBoundary
634+
public String canTransformToDFAFailureReason() {
635+
StringJoiner sb = new StringJoiner(", ");
636+
if (getNumberOfNodes() > TRegexOptions.TRegexMaxParseTreeSizeForDFA) {
637+
sb.add(String.format("Parser tree has too many nodes: %d (threshold: %d)", getNumberOfNodes(), TRegexOptions.TRegexMaxParseTreeSizeForDFA));
638+
}
639+
if (getNumberOfCaptureGroups() > TRegexOptions.TRegexMaxNumberOfCaptureGroupsForDFA) {
640+
sb.add(String.format("regex has too many capture groups: %d (threshold: %d)", getNumberOfCaptureGroups(), TRegexOptions.TRegexMaxNumberOfCaptureGroupsForDFA));
641+
}
642+
if (getRoot().hasBackReferences()) {
643+
sb.add("regex has back-references");
644+
}
645+
if (getProperties().hasLargeCountedRepetitions()) {
646+
sb.add(String.format("regex has large counted repetitions (threshold: %d for single CC, %d for groups)",
647+
TRegexOptions.TRegexQuantifierUnrollThresholdSingleCC, TRegexOptions.TRegexQuantifierUnrollThresholdGroup));
648+
}
649+
if (getProperties().hasNegativeLookAheadAssertions()) {
650+
sb.add("regex has negative look-ahead assertions");
651+
}
652+
if (getProperties().hasNegativeLookBehindAssertions()) {
653+
sb.add("regex has negative look-behind assertions");
654+
}
655+
if (getProperties().hasNonLiteralLookBehindAssertions()) {
656+
sb.add("regex has non-literal look-behind assertions");
657+
}
658+
if (getRoot().hasQuantifiers()) {
659+
sb.add("could not unroll all quantifiers");
660+
}
661+
if (getProperties().hasAtomicGroups()) {
662+
sb.add("regex has atomic groups");
663+
}
664+
return sb.toString();
665+
}
666+
618667
@TruffleBoundary
619668
@Override
620669
public JsonValue toJson() {

0 commit comments

Comments
 (0)