From f76042dab01bb43db7ebc95fb6c9184300748a0d Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Tue, 25 Jun 2024 22:29:37 -0700 Subject: [PATCH 1/7] test a variety of additions to wtf8buf --- library/std/src/sys_common/wtf8/tests.rs | 25 ++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/library/std/src/sys_common/wtf8/tests.rs b/library/std/src/sys_common/wtf8/tests.rs index 6a1cc41a8fb04..b086a5ac26ff8 100644 --- a/library/std/src/sys_common/wtf8/tests.rs +++ b/library/std/src/sys_common/wtf8/tests.rs @@ -725,3 +725,28 @@ fn wtf8_utf8_boundary_between_surrogates() { string.push(CodePoint::from_u32(0xD800).unwrap()); check_utf8_boundary(&string, 3); } + +#[test] +fn wobbled_wtf8_plus_bytes_isnt_utf8() { + let mut string: Wtf8Buf = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_owned() }; + assert!(!string.is_known_utf8); + string.extend_from_slice(b"some utf-8"); + assert!(!string.is_known_utf8); +} + +#[test] +fn wobbled_wtf8_plus_str_isnt_utf8() { + let mut string: Wtf8Buf = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_owned() }; + assert!(!string.is_known_utf8); + string.push_str("some utf-8"); + assert!(!string.is_known_utf8); +} + + +#[test] +fn unwobbly_wtf8_plus_utf8_is_utf8() { + let mut string: Wtf8Buf = Wtf8Buf::from_str("hello world"); + assert!(string.is_known_utf8); + string.push_str("some utf-8"); + assert!(string.is_known_utf8); +} From 96322f18802dffea3245cdbede43b5efa4683bf1 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Wed, 26 Jun 2024 00:54:06 -0700 Subject: [PATCH 2/7] another test --- library/std/src/sys_common/wtf8/tests.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/library/std/src/sys_common/wtf8/tests.rs b/library/std/src/sys_common/wtf8/tests.rs index b086a5ac26ff8..46f5be68028ea 100644 --- a/library/std/src/sys_common/wtf8/tests.rs +++ b/library/std/src/sys_common/wtf8/tests.rs @@ -750,3 +750,11 @@ fn unwobbly_wtf8_plus_utf8_is_utf8() { string.push_str("some utf-8"); assert!(string.is_known_utf8); } + +#[test] +fn unwobbly_wtf8_plus_wobbled_bytes_isnt_utf8() { + let mut string: Wtf8Buf = Wtf8Buf::from_str("hello world"); + assert!(string.is_known_utf8); + string.extend_from_slice(b"\xED\xa0\x80"); + assert!(!string.is_known_utf8); +} From ab2d4f736f525ac03227d0bd4e04da5153bc8f59 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Wed, 26 Jun 2024 01:48:59 -0700 Subject: [PATCH 3/7] pass --- library/std/src/sys_common/wtf8.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/std/src/sys_common/wtf8.rs b/library/std/src/sys_common/wtf8.rs index 708f62f476e73..117a3e23044ea 100644 --- a/library/std/src/sys_common/wtf8.rs +++ b/library/std/src/sys_common/wtf8.rs @@ -480,7 +480,7 @@ impl Wtf8Buf { #[inline] pub(crate) fn extend_from_slice(&mut self, other: &[u8]) { self.bytes.extend_from_slice(other); - self.is_known_utf8 = self.is_known_utf8 || self.next_surrogate(0).is_none(); + self.is_known_utf8 = false; } } From 3ebe62b5ef61b221dd944226461fcc14d82b575b Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Wed, 26 Jun 2024 01:50:50 -0700 Subject: [PATCH 4/7] lol --- library/std/src/sys_common/wtf8/tests.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/std/src/sys_common/wtf8/tests.rs b/library/std/src/sys_common/wtf8/tests.rs index 46f5be68028ea..58b01a19c1059 100644 --- a/library/std/src/sys_common/wtf8/tests.rs +++ b/library/std/src/sys_common/wtf8/tests.rs @@ -744,7 +744,7 @@ fn wobbled_wtf8_plus_str_isnt_utf8() { #[test] -fn unwobbly_wtf8_plus_utf8_is_utf8() { +fn unwobbly_wtf8_plus_str_is_utf8() { let mut string: Wtf8Buf = Wtf8Buf::from_str("hello world"); assert!(string.is_known_utf8); string.push_str("some utf-8"); From 712adca04b11e1e06d3b75eed9c8071fdef6ee19 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Wed, 26 Jun 2024 01:51:08 -0700 Subject: [PATCH 5/7] quality of impl wtf8buf --- library/std/src/sys_common/wtf8/tests.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/library/std/src/sys_common/wtf8/tests.rs b/library/std/src/sys_common/wtf8/tests.rs index 58b01a19c1059..888bbdfb4ae23 100644 --- a/library/std/src/sys_common/wtf8/tests.rs +++ b/library/std/src/sys_common/wtf8/tests.rs @@ -758,3 +758,11 @@ fn unwobbly_wtf8_plus_wobbled_bytes_isnt_utf8() { string.extend_from_slice(b"\xED\xa0\x80"); assert!(!string.is_known_utf8); } + +#[test] +fn unwobbly_wtf8_plus_unwobbly_bytes_is_utf8() { + let mut string: Wtf8Buf = Wtf8Buf::from_str("hello world"); + assert!(string.is_known_utf8); + string.extend_from_slice(b"some utf-8"); + assert!(string.is_known_utf8); +} From dbfc619796fc2c8efcd7c0209b26a0474d7a4684 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Wed, 26 Jun 2024 01:54:10 -0700 Subject: [PATCH 6/7] quality of impl impl --- library/std/src/sys_common/wtf8.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/std/src/sys_common/wtf8.rs b/library/std/src/sys_common/wtf8.rs index 117a3e23044ea..86bf70ccfedb0 100644 --- a/library/std/src/sys_common/wtf8.rs +++ b/library/std/src/sys_common/wtf8.rs @@ -480,7 +480,7 @@ impl Wtf8Buf { #[inline] pub(crate) fn extend_from_slice(&mut self, other: &[u8]) { self.bytes.extend_from_slice(other); - self.is_known_utf8 = false; + self.is_known_utf8 = self.is_known_utf8 && str::from_utf8(other).is_ok(); } } From 26b27a9b9eda3a5cc0d364fa6d69bd4254a2787e Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Wed, 26 Jun 2024 02:10:42 -0700 Subject: [PATCH 7/7] fmt --- library/std/src/sys_common/wtf8/tests.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/library/std/src/sys_common/wtf8/tests.rs b/library/std/src/sys_common/wtf8/tests.rs index 888bbdfb4ae23..31080a6b4b880 100644 --- a/library/std/src/sys_common/wtf8/tests.rs +++ b/library/std/src/sys_common/wtf8/tests.rs @@ -742,7 +742,6 @@ fn wobbled_wtf8_plus_str_isnt_utf8() { assert!(!string.is_known_utf8); } - #[test] fn unwobbly_wtf8_plus_str_is_utf8() { let mut string: Wtf8Buf = Wtf8Buf::from_str("hello world");