projectfluent · alerque · May 5, 2024 · Dec 21, 2022 · Dec 21, 2022
diff --git a/fluent-syntax/src/unicode.rs b/fluent-syntax/src/unicode.rs
@@ -66,6 +66,16 @@ fn encode_unicode(s: Option<&str>) -> char {
 /// assert_eq!(s, "Foo 😊 Bar");
 /// ```
 pub fn unescape_unicode<W>(w: &mut W, input: &str) -> fmt::Result
+where
+    W: fmt::Write,
+{
+    if unescape(w, input)? {
+        return Ok(());
+    }
+    w.write_str(input)
+}
+
+fn unescape<W>(w: &mut W, input: &str) -> Result<bool, std::fmt::Error>
 where
     W: fmt::Write,
 {
@@ -100,10 +110,15 @@ where
         w.write_char(new_char)?;
         start = ptr;
     }
+
+    if start == 0 {
+        return Ok(false);
+    }
+
     if start != ptr {
         w.write_str(&input[start..ptr])?;
     }
-    Ok(())
+    Ok(true)
 }
 
 /// Unescapes to a `Cow<str>` optionally allocating.
@@ -119,41 +134,11 @@ where
 /// );
 /// ```
 pub fn unescape_unicode_to_string(input: &str) -> Cow<str> {
-    let bytes = input.as_bytes();
-    let mut result = Cow::from(input);
-
-    let mut ptr = 0;
-
-    while let Some(b) = bytes.get(ptr) {
-        if b != &b'\\' {
-            if let Cow::Owned(ref mut s) = result {
-                s.push(*b as char);
-            }
-            ptr += 1;
-            continue;
-        }
-
-        if let Cow::Borrowed(_) = result {
-            result = Cow::from(&input[0..ptr]);
-        }
-
-        ptr += 1;
-
-        let new_char = match bytes.get(ptr) {
-            Some(b'\\') => '\\',
-            Some(b'"') => '"',
-            Some(u @ b'u') | Some(u @ b'U') => {
-                let start = ptr + 1;
-                let len = if u == &b'u' { 4 } else { 6 };
-                ptr += len;
-                input
-                    .get(start..(start + len))
-                    .map_or(UNKNOWN_CHAR, |slice| encode_unicode(Some(slice)))
-            }
-            _ => UNKNOWN_CHAR,
-        };
-        result.to_mut().push(new_char);
-        ptr += 1;
+    let mut result = String::new();
+    let owned = unescape(&mut result, input).expect("String write methods don't Err");
+    if owned {
+        Cow::Owned(result)
+    } else {
+        Cow::Borrowed(input)
     }
-    result
 }
diff --git a/fluent-syntax/tests/unicode.rs b/fluent-syntax/tests/unicode.rs
@@ -1,23 +1,33 @@
+use std::borrow::Cow;
+
 use fluent_syntax::unicode::{unescape_unicode, unescape_unicode_to_string};
 
-fn test_unescape_unicode(input: &str, output: &str) {
+/// Asserts that decoding unicode escape sequences in `input` matches `output`.
+/// When `borrowed` = true, asserts that the escaped value is passed back by reference.
+fn test_unescape_unicode(input: &str, output: &str, borrowed: bool) {
     let mut s = String::new();
     unescape_unicode(&mut s, input).expect("Failed to write.");
-    assert_eq!(&s, output);
+    assert_eq!(s, output);
     let result = unescape_unicode_to_string(input);
-    assert_eq!(&result, output);
+    assert_eq!(result, output);
+
+    assert_eq!(matches!(result, Cow::Borrowed(_)), borrowed);
 }
 
 #[test]
 fn unescape_unicode_test() {
-    test_unescape_unicode("foo", "foo");
-    test_unescape_unicode("foo \\\\", "foo \\");
-    test_unescape_unicode("foo \\\"", "foo \"");
-    test_unescape_unicode("foo \\\\ faa", "foo \\ faa");
-    test_unescape_unicode("foo \\\\ faa \\\\ fii", "foo \\ faa \\ fii");
-    test_unescape_unicode("foo \\\\\\\" faa \\\"\\\\ fii", "foo \\\" faa \"\\ fii");
-    test_unescape_unicode("\\u0041\\u004F", "AO");
-    test_unescape_unicode("\\uA", "�");
-    test_unescape_unicode("\\uA0Pl", "�");
-    test_unescape_unicode("\\d Foo", "� Foo");
+    test_unescape_unicode("foo", "foo", true);
+    test_unescape_unicode("foo \\\\", "foo \\", false);
+    test_unescape_unicode("foo \\\"", "foo \"", false);
+    test_unescape_unicode("foo \\\\ faa", "foo \\ faa", false);
+    test_unescape_unicode("foo \\\\ faa \\\\ fii", "foo \\ faa \\ fii", false);
+    test_unescape_unicode(
+        "foo \\\\\\\" faa \\\"\\\\ fii",
+        "foo \\\" faa \"\\ fii",
+        false,
+    );
+    test_unescape_unicode("\\u0041\\u004F", "AO", false);
+    test_unescape_unicode("\\uA", "�", false);
+    test_unescape_unicode("\\uA0Pl", "�", false);
+    test_unescape_unicode("\\d Foo", "� Foo", false);
 }