github
diff --git a/‎javascript/downgrades/5b5db607d20c7b449cef2d1c926b24d77c69bebb/old.dbscheme
Lines changed: 1193 additions & 0 deletions b/‎javascript/downgrades/5b5db607d20c7b449cef2d1c926b24d77c69bebb/old.dbscheme
Lines changed: 1193 additions & 0 deletions
diff --git a/‎javascript/downgrades/5b5db607d20c7b449cef2d1c926b24d77c69bebb/semmlecode.javascript.dbscheme
Lines changed: 1190 additions & 0 deletions b/‎javascript/downgrades/5b5db607d20c7b449cef2d1c926b24d77c69bebb/semmlecode.javascript.dbscheme
Lines changed: 1190 additions & 0 deletions
diff --git a/‎javascript/downgrades/5b5db607d20c7b449cef2d1c926b24d77c69bebb/upgrade.properties
Lines changed: 2 additions & 0 deletions b/‎javascript/downgrades/5b5db607d20c7b449cef2d1c926b24d77c69bebb/upgrade.properties
Lines changed: 2 additions & 0 deletions
diff --git a/‎javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassIntersection.java
Lines changed: 26 additions & 0 deletions b/‎javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassIntersection.java
Lines changed: 26 additions & 0 deletions
diff --git a/‎javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassQuotedString.java
Lines changed: 28 additions & 0 deletions b/‎javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassQuotedString.java
Lines changed: 28 additions & 0 deletions
diff --git a/‎javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassSubtraction.java
Lines changed: 26 additions & 0 deletions b/‎javascript/extractor/src/com/semmle/js/ast/regexp/CharacterClassSubtraction.java
Lines changed: 26 additions & 0 deletions
diff --git a/‎javascript/extractor/src/com/semmle/js/ast/regexp/Visitor.java
Lines changed: 6 additions & 0 deletions b/‎javascript/extractor/src/com/semmle/js/ast/regexp/Visitor.java
Lines changed: 6 additions & 0 deletions
diff --git a/‎javascript/extractor/src/com/semmle/js/extractor/ASTExtractor.java
Lines changed: 1 addition & 1 deletion b/‎javascript/extractor/src/com/semmle/js/extractor/ASTExtractor.java
Lines changed: 1 addition & 1 deletion
diff --git a/‎javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java
Lines changed: 34 additions & 2 deletions b/‎javascript/extractor/src/com/semmle/js/extractor/RegExpExtractor.java
Lines changed: 34 additions & 2 deletions
diff --git a/‎javascript/extractor/src/com/semmle/js/parser/RegExpParser.java
Lines changed: 95 additions & 2 deletions b/‎javascript/extractor/src/com/semmle/js/parser/RegExpParser.java
Lines changed: 95 additions & 2 deletions
@@ -0,0 +1,2 @@
+description: Add support for quoted string, intersection and subtraction
+compatibility: backwards
@@ -0,0 +1,26 @@
+package com.semmle.js.ast.regexp;
+
+import com.semmle.js.ast.SourceLocation;
+import java.util.List;
+
+/**
+ * A character class intersection in a regular expression available only with the `v` flag.
+ * Example: [[abc]&&[ab]&&[b]] matches character `b` only.
+ */
+public class CharacterClassIntersection extends RegExpTerm {
+    private final List<RegExpTerm> elements;
+
+    public CharacterClassIntersection(SourceLocation loc, List<RegExpTerm> elements) {
+        super(loc, "CharacterClassIntersection");
+        this.elements = elements;
+    }
+
+    @Override
+    public void accept(Visitor v) {
+        v.visit(this);
+    }
+
+    public List<RegExpTerm> getElements() {
+        return elements;
+    }
+}
@@ -0,0 +1,28 @@
+package com.semmle.js.ast.regexp;
+
+import com.semmle.js.ast.SourceLocation;
+
+/**
+ * A quoted string escape sequence '\q{}' in a regular expression.
+ * This feature is a non-standard extension that requires the 'v' flag.
+ * 
+ * Example: [\q{abc|def}] creates a character class that matches either the string
+ * "abc" or "def". Within the quoted string, only the alternation operator '|' is supported.
+ */
+public class CharacterClassQuotedString extends RegExpTerm {
+  private final RegExpTerm term;
+
+  public CharacterClassQuotedString(SourceLocation loc, RegExpTerm term) {
+    super(loc, "CharacterClassQuotedString");
+    this.term = term;
+  }
+
+  public RegExpTerm getTerm() {
+    return term;
+  }
+  
+  @Override
+  public void accept(Visitor v) {
+    v.visit(this);
+  }
+}
@@ -0,0 +1,26 @@
+package com.semmle.js.ast.regexp;
+
+import com.semmle.js.ast.SourceLocation;
+import java.util.List;
+
+/**
+ * A character class subtraction in a regular expression available only with the `v` flag.
+ * Example: [[abc]--[a]--[b]] matches character `c` only.
+ */
+public class CharacterClassSubtraction extends RegExpTerm {
+    private final List<RegExpTerm> elements;
+
+    public CharacterClassSubtraction(SourceLocation loc, List<RegExpTerm> elements) {
+        super(loc, "CharacterClassSubtraction");
+        this.elements = elements;
+    }
+
+    @Override
+    public void accept(Visitor v) {
+        v.visit(this);
+    }
+
+    public List<RegExpTerm> getElements() {
+        return elements;
+    }
+}
@@ -61,4 +61,10 @@ public interface Visitor {
   public void visit(ZeroWidthNegativeLookbehind nd);
 
   public void visit(UnicodePropertyEscape nd);
+
+  public void visit(CharacterClassQuotedString nd);
+
+  public void visit(CharacterClassIntersection nd);
+
+  public void visit(CharacterClassSubtraction nd);
 }
@@ -600,7 +600,7 @@ public Label visit(Literal nd, Context c) {
         SourceMap sourceMap =
             SourceMap.legacyWithStartPos(
                 SourceMap.fromString(nd.getRaw()).offsetBy(0, offsets), startPos);
-        regexpExtractor.extract(source.substring(1, source.lastIndexOf('/')), sourceMap, nd, false);
+        regexpExtractor.extract(source.substring(1, source.lastIndexOf('/')), sourceMap, nd, false, source.substring(source.lastIndexOf('/'), source.length()));
       } else if (nd.isStringLiteral()
           && !c.isInsideType()
           && nd.getRaw().length() < 1000
 
@@ -10,7 +10,9 @@
 import com.semmle.js.ast.regexp.Caret;
 import com.semmle.js.ast.regexp.CharacterClass;
 import com.semmle.js.ast.regexp.CharacterClassEscape;
+import com.semmle.js.ast.regexp.CharacterClassQuotedString;
 import com.semmle.js.ast.regexp.CharacterClassRange;
+import com.semmle.js.ast.regexp.CharacterClassSubtraction;
 import com.semmle.js.ast.regexp.Constant;
 import com.semmle.js.ast.regexp.ControlEscape;
 import com.semmle.js.ast.regexp.ControlLetter;
@@ -22,6 +24,7 @@
 import com.semmle.js.ast.regexp.Group;
 import com.semmle.js.ast.regexp.HexEscapeSequence;
 import com.semmle.js.ast.regexp.IdentityEscape;
+import com.semmle.js.ast.regexp.CharacterClassIntersection;
 import com.semmle.js.ast.regexp.Literal;
 import com.semmle.js.ast.regexp.NamedBackReference;
 import com.semmle.js.ast.regexp.NonWordBoundary;
@@ -92,6 +95,9 @@ public RegExpExtractor(TrapWriter trapwriter, LocationManager locationManager) {
     termkinds.put("ZeroWidthPositiveLookbehind", 25);
     termkinds.put("ZeroWidthNegativeLookbehind", 26);
     termkinds.put("UnicodePropertyEscape", 27);
+    termkinds.put("CharacterClassQuotedString", 28);
+    termkinds.put("CharacterClassIntersection", 29);
+    termkinds.put("CharacterClassSubtraction", 30);
   }
 
   private static final String[] errmsgs =
@@ -344,10 +350,32 @@ public void visit(CharacterClassRange nd) {
       visit(nd.getLeft(), lbl, 0);
       visit(nd.getRight(), lbl, 1);
     }
+
+    @Override
+    public void visit(CharacterClassQuotedString nd) {
+      Label lbl = extractTerm(nd, parent, idx);
+      visit(nd.getTerm(), lbl, 0);
+    }
+
+    @Override
+    public void visit(CharacterClassIntersection nd) {
+      Label lbl = extractTerm(nd, parent, idx);
+      int i = 0;
+      for (RegExpTerm element : nd.getElements())
+        visit(element, lbl, i++);
+    }
+
+    @Override
+    public void visit(CharacterClassSubtraction nd) {
+      Label lbl = extractTerm(nd, parent, idx);
+      int i = 0;
+      for (RegExpTerm element : nd.getElements())
+        visit(element, lbl, i++);
+    }
   }
 
-  public void extract(String src, SourceMap sourceMap, Node parent, boolean isSpeculativeParsing) {
-    Result res = parser.parse(src);
+  public void extract(String src, SourceMap sourceMap, Node parent, boolean isSpeculativeParsing, String flags) {
+    Result res = parser.parse(src, flags);
     if (isSpeculativeParsing && res.getErrors().size() > 0) {
       return;
     }
@@ -364,4 +392,8 @@ public void extract(String src, SourceMap sourceMap, Node parent, boolean isSpec
       this.emitLocation(err, lbl);
     }
   }
+
+    public void extract(String src, SourceMap sourceMap, Node parent, boolean isSpeculativeParsing) {
+      extract(src, sourceMap, parent, isSpeculativeParsing, "");
+    }
 }
@@ -6,7 +6,9 @@
 import com.semmle.js.ast.regexp.Caret;
 import com.semmle.js.ast.regexp.CharacterClass;
 import com.semmle.js.ast.regexp.CharacterClassEscape;
+import com.semmle.js.ast.regexp.CharacterClassQuotedString;
 import com.semmle.js.ast.regexp.CharacterClassRange;
+import com.semmle.js.ast.regexp.CharacterClassSubtraction;
 import com.semmle.js.ast.regexp.Constant;
 import com.semmle.js.ast.regexp.ControlEscape;
 import com.semmle.js.ast.regexp.ControlLetter;
@@ -18,6 +20,7 @@
 import com.semmle.js.ast.regexp.Group;
 import com.semmle.js.ast.regexp.HexEscapeSequence;
 import com.semmle.js.ast.regexp.IdentityEscape;
+import com.semmle.js.ast.regexp.CharacterClassIntersection;
 import com.semmle.js.ast.regexp.NamedBackReference;
 import com.semmle.js.ast.regexp.NonWordBoundary;
 import com.semmle.js.ast.regexp.OctalEscape;
@@ -36,6 +39,7 @@
 import com.semmle.js.ast.regexp.ZeroWidthPositiveLookbehind;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.List;
 
 /** A parser for ECMAScript 2018 regular expressions. */
@@ -67,6 +71,8 @@ public List<Error> getErrors() {
   private List<Error> errors;
   private List<BackReference> backrefs;
   private int maxbackref;
+  private boolean vFlagEnabled = false;
+  private boolean uFlagEnabled = false;
 
   /** Parse the given string as a regular expression. */
   public Result parse(String src) {
@@ -82,6 +88,12 @@ public Result parse(String src) {
     return new Result(root, errors);
   }
 
+  public Result parse(String src, String flags) {
+    vFlagEnabled = flags != null && flags.contains("v");
+    uFlagEnabled = flags != null && flags.contains("u");
+    return parse(src);
+  }
+
   private static String fromCodePoint(int codepoint) {
     if (Character.isValidCodePoint(codepoint)) return new String(Character.toChars(codepoint));
     // replacement character
@@ -277,6 +289,43 @@ private RegExpTerm parseTerm() {
     return this.finishTerm(this.parseQuantifierOpt(loc, this.parseAtom()));
   }
 
+  private RegExpTerm parseDisjunctionInsideQuotedString() {
+    SourceLocation loc = new SourceLocation(pos());
+    List<RegExpTerm> disjuncts = new ArrayList<>();
+    disjuncts.add(this.parseAlternativeInsideQuotedString());
+    while (this.match("|")) {
+        disjuncts.add(this.parseAlternativeInsideQuotedString());
+    }
+    if (disjuncts.size() == 1) return disjuncts.get(0);
+    return this.finishTerm(new Disjunction(loc, disjuncts));
+  }
+
+  private RegExpTerm parseAlternativeInsideQuotedString() {
+    SourceLocation loc = new SourceLocation(pos());
+    int startPos = this.pos;
+    boolean escaped = false;
+    while (true) {
+      // If we're at the end of the string, something went wrong.
+      if (this.atEOS()) {
+        this.error(Error.UNEXPECTED_EOS);
+        break;
+      }
+      // We can end parsing if we're not escaped and we see a `|` which would mean Alternation
+      // or `}` which would mean the end of the Quoted String.
+      if(!escaped && this.lookahead(null, "|", "}")){
+        break;
+      }
+      char c = this.nextChar();
+      // Track whether the character is an escape character. 
+      escaped = !escaped && (c == '\\');
+    }
+    String literal = src.substring(startPos, pos);
+    loc.setEnd(pos());
+    loc.setSource(literal);
+
+    return new Constant(loc, literal);
+  }
+
   private RegExpTerm parseQuantifierOpt(SourceLocation loc, RegExpTerm atom) {
     if (this.match("*")) return this.finishTerm(new Star(loc, atom, !this.match("?")));
     if (this.match("+")) return this.finishTerm(new Plus(loc, atom, !this.match("?")));
@@ -421,7 +470,13 @@ private RegExpTerm parseAtomEscape(SourceLocation loc, boolean inCharClass) {
       return this.finishTerm(new NamedBackReference(loc, name, "\\k<" + name + ">"));
     }
 
-    if (this.match("p{", "P{")) {
+    if (vFlagEnabled && this.match("q{")) {
+      RegExpTerm term = parseDisjunctionInsideQuotedString();
+      this.expectRBrace();
+      return this.finishTerm(new CharacterClassQuotedString(loc, term));
+    }
+
+    if ((vFlagEnabled || uFlagEnabled) && this.match("p{", "P{")) {
       String name = this.readIdentifier();
       if (this.match("=")) {
         value = this.readIdentifier();
@@ -493,6 +548,7 @@ private RegExpTerm parseAtomEscape(SourceLocation loc, boolean inCharClass) {
   }
 
   private RegExpTerm parseCharacterClass() {
+    if (vFlagEnabled) return parseNestedCharacterClass();
     SourceLocation loc = new SourceLocation(pos());
     List<RegExpTerm> elements = new ArrayList<>();
 
@@ -508,6 +564,43 @@ private RegExpTerm parseCharacterClass() {
     return this.finishTerm(new CharacterClass(loc, elements, inverted));
   }
 
+  private enum CharacterClassType {
+    STANDARD,
+    INTERSECTION,
+    SUBTRACTION
+  }
+
+  // ECMA 2024 `v` flag allows nested character classes.
+  private RegExpTerm parseNestedCharacterClass() {
+    SourceLocation loc = new SourceLocation(pos());
+    List<RegExpTerm> elements = new ArrayList<>();
+    CharacterClassType classType = CharacterClassType.STANDARD;
+
+    this.match("[");
+    boolean inverted = this.match("^");
+    while (!this.match("]")) {
+      if (this.atEOS()) {
+        this.error(Error.EXPECTED_RBRACKET);
+        break;
+      }
+      if (lookahead("[")) elements.add(parseNestedCharacterClass());
+      else if (this.match("&&")) classType = CharacterClassType.INTERSECTION;
+      else if (this.match("--")) classType = CharacterClassType.SUBTRACTION;
+      else elements.add(this.parseCharacterClassElement());
+    }
+
+    // Create appropriate RegExpTerm based on the detected class type
+    switch (classType) {
+      case INTERSECTION:
+        return this.finishTerm(new CharacterClass(loc, Collections.singletonList(new CharacterClassIntersection(loc, elements)), inverted));
+      case SUBTRACTION:
+        return this.finishTerm(new CharacterClass(loc, Collections.singletonList(new CharacterClassSubtraction(loc, elements)), inverted));
+      case STANDARD:
+      default:
+        return this.finishTerm(new CharacterClass(loc, elements, inverted));
+    }
+  }
+
   private static final List<String> escapeClasses = Arrays.asList("d", "D", "s", "S", "w", "W");
 
   private RegExpTerm parseCharacterClassElement() {
@@ -519,7 +612,7 @@ private RegExpTerm parseCharacterClassElement() {
           return atom;
       }
     }
-    if (!this.lookahead("-]") && this.match("-") && !(atom instanceof CharacterClassEscape))
+    if (!this.lookahead("-]") && !this.lookahead("--") && this.match("-") && !(atom instanceof CharacterClassEscape))
       return this.finishTerm(new CharacterClassRange(loc, atom, this.parseCharacterClassAtom()));
     return atom;
   }
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+description: Add support for quoted string, intersection and subtraction`
	`2`	`+compatibility: backwards`