diff --git a/library/std/src/sys/os_str/wtf8.rs b/library/std/src/sys/os_str/wtf8.rs index 19728d33990ac..8acec6f949fc5 100644 --- a/library/std/src/sys/os_str/wtf8.rs +++ b/library/std/src/sys/os_str/wtf8.rs @@ -41,13 +41,13 @@ impl AsInner for Buf { impl fmt::Debug for Buf { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Debug::fmt(self.as_slice(), f) + fmt::Debug::fmt(&self.inner, f) } } impl fmt::Display for Buf { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(self.as_slice(), f) + fmt::Display::fmt(&self.inner, f) } } diff --git a/library/std/src/sys_common/wtf8.rs b/library/std/src/sys_common/wtf8.rs index 952c39132b056..f9ec112b19747 100644 --- a/library/std/src/sys_common/wtf8.rs +++ b/library/std/src/sys_common/wtf8.rs @@ -169,6 +169,18 @@ impl fmt::Debug for Wtf8Buf { } } +/// Formats the string with unpaired surrogates substituted with the replacement +/// character, U+FFFD. +impl fmt::Display for Wtf8Buf { + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(s) = self.as_known_utf8() { + fmt::Display::fmt(s, formatter) + } else { + fmt::Display::fmt(&**self, formatter) + } + } +} + impl Wtf8Buf { /// Creates a new, empty WTF-8 string. #[inline] @@ -262,6 +274,18 @@ impl Wtf8Buf { unsafe { Wtf8::from_mut_bytes_unchecked(&mut self.bytes) } } + /// Converts the string to UTF-8 without validation, if it was created from + /// valid UTF-8. + #[inline] + fn as_known_utf8(&self) -> Option<&str> { + if self.is_known_utf8 { + // SAFETY: The buffer is known to be valid UTF-8. + Some(unsafe { str::from_utf8_unchecked(self.as_bytes()) }) + } else { + None + } + } + /// Reserves capacity for at least `additional` more bytes to be inserted /// in the given `Wtf8Buf`. /// The collection may reserve more space to avoid frequent reallocations. @@ -364,7 +388,7 @@ impl Wtf8Buf { _ => { // If we'll be pushing a string containing a surrogate, we may // no longer have UTF-8. - if other.next_surrogate(0).is_some() { + if self.is_known_utf8 && other.next_surrogate(0).is_some() { self.is_known_utf8 = false; }