Raise parser error on invalid unicode escape sequence

matetokodi · matetokodi · commit 025596a6555e · 2024-07-01T11:27:20.000+02:00
JerryScript-DCO-1.0-Signed-off-by: Máté Tokodi mate.tokodi@szteszoftver.hu
diff --git a/jerry-core/parser/js/js-lexer.c b/jerry-core/parser/js/js-lexer.c
@@ -147,7 +147,7 @@ lexer_hex_in_braces_to_code_point (const uint8_t *source_p, /**< current source
 /**
  * Parse hexadecimal character sequence
  *
- * @return character value
+ * @return character value (-1 if the escape sequence is invalid)
  */
 static lit_code_point_t
 lexer_unchecked_hex_to_character (const uint8_t **source_p) /**< [in, out] current source position */
@@ -174,13 +174,19 @@ lexer_unchecked_hex_to_character (const uint8_t **source_p) /**< [in, out] curre
     }
     else
     {
-      JERRY_ASSERT ((byte >= LIT_CHAR_LOWERCASE_A && byte <= LIT_CHAR_LOWERCASE_F)
-                    || (byte >= LIT_CHAR_UPPERCASE_A && byte <= LIT_CHAR_UPPERCASE_F));
+      if (!((byte >= LIT_CHAR_LOWERCASE_A && byte <= LIT_CHAR_LOWERCASE_F)
+            || (byte >= LIT_CHAR_UPPERCASE_A && byte <= LIT_CHAR_UPPERCASE_F)))
+      {
+        return (lit_code_point_t) -1;
+      }
 
       result += LEXER_TO_ASCII_LOWERCASE (byte) - (LIT_CHAR_LOWERCASE_A - 10);
     }
 
-    JERRY_ASSERT (result <= LIT_UNICODE_CODE_POINT_MAX);
+    if (result > LIT_UNICODE_CODE_POINT_MAX)
+    {
+      return (lit_code_point_t) -1;
+    }
 
     if (length == 0)
     {
@@ -2068,8 +2074,10 @@ lexer_scan_private_identifier (parser_context_t *context_p) /**< context */
 
 /**
  * Convert an ident with escapes to a utf8 string.
+ *
+ * @return false if source contains invalid unicode escape sequence, true otherwise
  */
-void
+bool
 lexer_convert_ident_to_cesu8 (uint8_t *destination_p, /**< destination string */
                               const uint8_t *source_p, /**< source string */
                               prop_length_t length) /**< length of destination string */
@@ -2083,7 +2091,12 @@ lexer_convert_ident_to_cesu8 (uint8_t *destination_p, /**< destination string */
     if (*source_p == LIT_CHAR_BACKSLASH)
     {
       source_p += 2;
-      destination_p += lit_code_point_to_cesu8_bytes (destination_p, lexer_unchecked_hex_to_character (&source_p));
+      lit_code_point_t code_point = lexer_unchecked_hex_to_character (&source_p);
+      if (code_point == (lit_code_point_t) -1)
+      {
+        return false;
+      }
+      destination_p += lit_code_point_to_cesu8_bytes (destination_p, code_point);
       continue;
     }
 
@@ -2098,6 +2111,7 @@ lexer_convert_ident_to_cesu8 (uint8_t *destination_p, /**< destination string */
 
     *destination_p++ = *source_p++;
   } while (destination_p < destination_end_p);
+  return true;
 } /* lexer_convert_ident_to_cesu8 */
 
 /**
@@ -2130,7 +2144,10 @@ lexer_convert_literal_to_chars (parser_context_t *context_p, /**< context */
 
   if (literal_p->type == LEXER_IDENT_LITERAL)
   {
-    lexer_convert_ident_to_cesu8 (destination_start_p, literal_p->char_p, literal_p->length);
+    if (!lexer_convert_ident_to_cesu8 (destination_start_p, literal_p->char_p, literal_p->length))
+    {
+      parser_raise_error (context_p, PARSER_ERR_INVALID_UNICODE_ESCAPE_SEQUENCE);
+    }
     return destination_start_p;
   }
 
@@ -2229,7 +2246,12 @@ lexer_convert_literal_to_chars (parser_context_t *context_p, /**< context */
       if (*source_p == LIT_CHAR_LOWERCASE_X || *source_p == LIT_CHAR_LOWERCASE_U)
       {
         source_p++;
-        destination_p += lit_code_point_to_cesu8_bytes (destination_p, lexer_unchecked_hex_to_character (&source_p));
+        lit_code_point_t code_point = lexer_unchecked_hex_to_character (&source_p);
+        if (code_point == (lit_code_point_t) -1)
+        {
+          parser_raise_error (context_p, PARSER_ERR_INVALID_UNICODE_ESCAPE_SEQUENCE);
+        }
+        destination_p += lit_code_point_to_cesu8_bytes (destination_p, code_point);
         continue;
       }
 
@@ -3308,6 +3330,10 @@ lexer_compare_identifier_to_chars (const uint8_t *left_p, /**< left identifier *
     {
       left_p += 2;
       lit_code_point_t code_point = lexer_unchecked_hex_to_character (&left_p);
+      if (code_point == (lit_code_point_t) -1)
+      {
+        return false;
+      }
 
       escape_size = lit_code_point_to_cesu8_bytes (utf8_buf, code_point);
     }
diff --git a/jerry-core/parser/js/js-parser-internal.h b/jerry-core/parser/js/js-parser-internal.h
@@ -748,7 +748,7 @@ void lexer_parse_string (parser_context_t *context_p, lexer_string_options_t opt
 void lexer_expect_identifier (parser_context_t *context_p, uint8_t literal_type);
 bool lexer_scan_identifier (parser_context_t *context_p, lexer_parse_options_t opts);
 void lexer_check_property_modifier (parser_context_t *context_p);
-void lexer_convert_ident_to_cesu8 (uint8_t *destination_p, const uint8_t *source_p, prop_length_t length);
+bool lexer_convert_ident_to_cesu8 (uint8_t *destination_p, const uint8_t *source_p, prop_length_t length);
 
 const uint8_t *lexer_convert_literal_to_chars (parser_context_t *context_p,
                                                const lexer_lit_location_t *literal_p,
diff --git a/tests/jerry/fail/regression-test-issue-5134.js b/tests/jerry/fail/regression-test-issue-5134.js
@@ -0,0 +1,15 @@
+// Copyright JS Foundation and other contributors, http://js.foundation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import{a as "\{{12,34}"