diff --git a/library/std/src/sys_common/wtf8.rs b/library/std/src/sys_common/wtf8.rs index 708f62f476e73..86bf70ccfedb0 100644 --- a/library/std/src/sys_common/wtf8.rs +++ b/library/std/src/sys_common/wtf8.rs @@ -480,7 +480,7 @@ impl Wtf8Buf { #[inline] pub(crate) fn extend_from_slice(&mut self, other: &[u8]) { self.bytes.extend_from_slice(other); - self.is_known_utf8 = self.is_known_utf8 || self.next_surrogate(0).is_none(); + self.is_known_utf8 = self.is_known_utf8 && str::from_utf8(other).is_ok(); } } diff --git a/library/std/src/sys_common/wtf8/tests.rs b/library/std/src/sys_common/wtf8/tests.rs index 6a1cc41a8fb04..31080a6b4b880 100644 --- a/library/std/src/sys_common/wtf8/tests.rs +++ b/library/std/src/sys_common/wtf8/tests.rs @@ -725,3 +725,43 @@ fn wtf8_utf8_boundary_between_surrogates() { string.push(CodePoint::from_u32(0xD800).unwrap()); check_utf8_boundary(&string, 3); } + +#[test] +fn wobbled_wtf8_plus_bytes_isnt_utf8() { + let mut string: Wtf8Buf = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_owned() }; + assert!(!string.is_known_utf8); + string.extend_from_slice(b"some utf-8"); + assert!(!string.is_known_utf8); +} + +#[test] +fn wobbled_wtf8_plus_str_isnt_utf8() { + let mut string: Wtf8Buf = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_owned() }; + assert!(!string.is_known_utf8); + string.push_str("some utf-8"); + assert!(!string.is_known_utf8); +} + +#[test] +fn unwobbly_wtf8_plus_str_is_utf8() { + let mut string: Wtf8Buf = Wtf8Buf::from_str("hello world"); + assert!(string.is_known_utf8); + string.push_str("some utf-8"); + assert!(string.is_known_utf8); +} + +#[test] +fn unwobbly_wtf8_plus_wobbled_bytes_isnt_utf8() { + let mut string: Wtf8Buf = Wtf8Buf::from_str("hello world"); + assert!(string.is_known_utf8); + string.extend_from_slice(b"\xED\xa0\x80"); + assert!(!string.is_known_utf8); +} + +#[test] +fn unwobbly_wtf8_plus_unwobbly_bytes_is_utf8() { + let mut string: Wtf8Buf = Wtf8Buf::from_str("hello world"); + assert!(string.is_known_utf8); + string.extend_from_slice(b"some utf-8"); + assert!(string.is_known_utf8); +}