Skip to content

Commit b7711d9

Browse files
committed
[GR-51523] [GR-51674] TRegex bug fixes.
PullRequest: graal/16783
2 parents 32d5155 + a0dd346 commit b7711d9

File tree

5 files changed

+220
-131
lines changed

5 files changed

+220
-131
lines changed

regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/JsTests.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,4 +268,11 @@ public void gr48586() {
268268
public void gr50807() {
269269
test("(?<=%b{1,4}?)foo", "", "%bbbbfoo", 0, true, 5, 8);
270270
}
271+
272+
@Test
273+
public void gr51523() {
274+
test("(?:^|\\.?)([A-Z])", "g", "desktopBrowser", 0, true, 7, 8, 7, 8);
275+
test("(?:^|\\.?)([A-Z])", "g", "locationChanged", 0, true, 8, 9, 8, 9);
276+
test("(?:^|\\.?)([A-Z]|(?<=[a-z])\\d(?=\\d+))", "g", "helloWorld", 0, true, 5, 6, 5, 6);
277+
}
271278
}

regex/src/com.oracle.truffle.regex.test/src/com/oracle/truffle/regex/tregex/test/OracleDBTests.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,12 @@ public void testSpecialGroups() {
229229
}
230230
}
231231

232+
@Test
233+
public void testNestedQuantifier() {
234+
test("(a*|b)*", "", "aaaaaabaaaaaaaaaaaaaabb", 0, true, 0, 6, 6, 6);
235+
test("a((b?)*)*", "", "ab", 0, true, 0, 2, 2, 2, 2, 2);
236+
}
237+
232238
@Test
233239
public void generatedTests() {
234240
/* GENERATED CODE BEGIN - KEEP THIS MARKER FOR AUTOMATIC UPDATES */

regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/nfa/ASTStepVisitor.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ public ASTStep step(NFAState expandState) {
144144

145145
@Override
146146
protected void visit(RegexASTNode target) {
147+
assert noPredicatesInGuards(getQuantifierGuardsOnPath());
147148
ASTSuccessor successor = new ASTSuccessor();
148149
ASTTransition transition = new ASTTransition(ast.getLanguage());
149150
transition.setGroupBoundaries(getGroupBoundaries());
@@ -183,6 +184,19 @@ protected void visit(RegexASTNode target) {
183184
stepCur.addSuccessor(successor);
184185
}
185186

187+
private static boolean noPredicatesInGuards(QuantifierGuard[] quantifierGuards) {
188+
// Normalization should remove any exitZeroWidth, escapeZeroWidth, checkGroupMatched and
189+
// checkGroupNotMatched guards. The effect of updateCG guards is implemented using
190+
// getGroupBoundaries and enterZeroWidth guards have no effect when exitZeroWidth and
191+
// escapeZeroWidth are removed already. Other guards shouldn't be used when building a DFA.
192+
for (QuantifierGuard guard : quantifierGuards) {
193+
if (guard.getKind() != QuantifierGuard.Kind.updateCG && guard.getKind() != QuantifierGuard.Kind.enterZeroWidth) {
194+
return false;
195+
}
196+
}
197+
return true;
198+
}
199+
186200
@Override
187201
protected void enterLookAhead(LookAheadAssertion assertion) {
188202
TBitSet currentMatchedConditionGroups = getCurrentMatchedConditionGroups();

regex/src/com.oracle.truffle.regex/src/com/oracle/truffle/regex/tregex/nfa/QuantifierGuard.java

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
4545
import com.oracle.truffle.regex.tregex.parser.Token.Quantifier;
4646
import com.oracle.truffle.regex.tregex.parser.ast.ConditionalBackReferenceGroup;
47+
import com.oracle.truffle.regex.tregex.parser.flavors.RegexFlavor;
4748

4849
import java.util.Objects;
4950

@@ -85,15 +86,17 @@ public enum Kind {
8586
enterZeroWidth,
8687
/**
8788
* Transition is leaving a quantified expression that may match the empty string. Check if
88-
* the current index is greater than the saved index. In the case of Ruby, also check if any
89-
* capture groups were modified.
89+
* the current index is greater than the saved index. In the case of flavors in which
90+
* {@link RegexFlavor#emptyChecksMonitorCaptureGroups()}, also check if any capture groups
91+
* were modified.
9092
*/
9193
exitZeroWidth,
9294
/**
9395
* Transition is leaving a quantified expression that may match the empty string and it is
94-
* about to continue to what follows the loop. This is only possible in Ruby and only when
95-
* the last iteration of the quantiifed expression fails the empty check (the check for the
96-
* index and the state of capture groups tested by {@link #exitZeroWidth}).
96+
* about to continue to what follows the loop. This is possible in flavors in which
97+
* {@link RegexFlavor#failingEmptyChecksDontBacktrack()} and only when the last iteration of
98+
* the quantified expression fails the empty check (the check for the index and the state of
99+
* capture groups tested by {@link #exitZeroWidth}).
97100
*/
98101
escapeZeroWidth,
99102
/**
@@ -111,8 +114,9 @@ public enum Kind {
111114
exitEmptyMatch,
112115
/**
113116
* Transition is passing a capture group boundary. We need this information in order to
114-
* implement the empty check test in {@link #exitZeroWidth}, which, in the case of Ruby,
115-
* also needs to monitor the state of capture groups in between {@link #enterZeroWidth} and
117+
* implement the empty check test in {@link #exitZeroWidth}, which, in the case of flavors
118+
* in which {@link RegexFlavor#emptyChecksMonitorCaptureGroups()}, where we need to monitor
119+
* the state of capture groups in between {@link #enterZeroWidth} and
116120
* {@link #exitZeroWidth}.
117121
*/
118122
updateCG,

0 commit comments

Comments
 (0)