Skip to content

Commit 8247948

Browse files
committed
Handle formatter flags in WTF-8 OsStr Display
The Display implementation for `OsStr` and `Path` on Windows (the WTF-8 version) only handles formatter flags when the entire string is valid UTF-8. As most paths are valid UTF-8, the common case is formatted like `str`; however, flags are ignored when they contain an unpaired surrogate. Implement its Display with the same logic as that of `str`. Fixes #136617 for Windows.
1 parent 5bc6231 commit 8247948

File tree

5 files changed

+120
-20
lines changed

5 files changed

+120
-20
lines changed

library/core/src/fmt/mod.rs

+10-3
Original file line numberDiff line numberDiff line change
@@ -1513,8 +1513,11 @@ unsafe fn getcount(args: &[rt::Argument<'_>], cnt: &rt::Count) -> Option<usize>
15131513
}
15141514

15151515
/// Padding after the end of something. Returned by `Formatter::padding`.
1516+
#[doc(hidden)]
15161517
#[must_use = "don't forget to write the post padding"]
1517-
pub(crate) struct PostPadding {
1518+
#[unstable(feature = "fmt_internals", reason = "internal to standard library", issue = "none")]
1519+
#[derive(Debug)]
1520+
pub struct PostPadding {
15181521
fill: char,
15191522
padding: usize,
15201523
}
@@ -1525,7 +1528,9 @@ impl PostPadding {
15251528
}
15261529

15271530
/// Writes this post padding.
1528-
pub(crate) fn write(self, f: &mut Formatter<'_>) -> Result {
1531+
#[doc(hidden)]
1532+
#[unstable(feature = "fmt_internals", reason = "internal to standard library", issue = "none")]
1533+
pub fn write(self, f: &mut Formatter<'_>) -> Result {
15291534
for _ in 0..self.padding {
15301535
f.buf.write_char(self.fill)?;
15311536
}
@@ -1743,7 +1748,9 @@ impl<'a> Formatter<'a> {
17431748
///
17441749
/// Callers are responsible for ensuring post-padding is written after the
17451750
/// thing that is being padded.
1746-
pub(crate) fn padding(
1751+
#[doc(hidden)]
1752+
#[unstable(feature = "fmt_internals", reason = "internal to standard library", issue = "none")]
1753+
pub fn padding(
17471754
&mut self,
17481755
padding: usize,
17491756
default: Alignment,

library/std/src/ffi/os_str/tests.rs

+16
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,22 @@ fn test_os_string_join() {
105105
assert_eq!("a b c", strings_abc.join(OsStr::new(" ")));
106106
}
107107

108+
#[test]
109+
fn display() {
110+
let os_string = OsString::from("bcd");
111+
assert_eq!(format!("a{:^10}e", os_string.display()), "a bcd e");
112+
}
113+
114+
#[cfg(windows)]
115+
#[test]
116+
fn display_invalid_wtf8_windows() {
117+
use crate::os::windows::ffi::OsStringExt;
118+
119+
let os_string = OsString::from_wide(&[b'b' as _, 0xD800, b'd' as _]);
120+
assert_eq!(format!("a{:^10}e", os_string.display()), "a b�d e");
121+
assert_eq!(format!("a{:^10}e", os_string.as_os_str().display()), "a b�d e");
122+
}
123+
108124
#[test]
109125
fn test_os_string_default() {
110126
let os_string: OsString = Default::default();

library/std/src/sys_common/wtf8.rs

+67-17
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,14 @@ impl fmt::Debug for Wtf8Buf {
166166
}
167167
}
168168

169+
/// Formats the string with unpaired surrogates substituted with the replacement
170+
/// character, U+FFFD.
171+
impl fmt::Display for Wtf8Buf {
172+
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
173+
fmt::Display::fmt(&**self, formatter)
174+
}
175+
}
176+
169177
impl Wtf8Buf {
170178
/// Creates a new, empty WTF-8 string.
171179
#[inline]
@@ -562,24 +570,43 @@ impl fmt::Debug for Wtf8 {
562570
}
563571
}
564572

573+
/// Formats the string with unpaired surrogates substituted with the replacement
574+
/// character, U+FFFD.
565575
impl fmt::Display for Wtf8 {
566-
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
567-
let wtf8_bytes = &self.bytes;
568-
let mut pos = 0;
569-
loop {
570-
match self.next_surrogate(pos) {
571-
Some((surrogate_pos, _)) => {
572-
formatter.write_str(unsafe {
573-
str::from_utf8_unchecked(&wtf8_bytes[pos..surrogate_pos])
574-
})?;
575-
formatter.write_str(UTF8_REPLACEMENT_CHARACTER)?;
576-
pos = surrogate_pos + 3;
577-
}
578-
None => {
579-
let s = unsafe { str::from_utf8_unchecked(&wtf8_bytes[pos..]) };
580-
if pos == 0 { return s.fmt(formatter) } else { return formatter.write_str(s) }
581-
}
582-
}
576+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
577+
// Corresponds to `Formatter::pad`, but for `Wtf8` instead of `str`.
578+
579+
// Make sure there's a fast path up front.
580+
if f.options().get_width().is_none() && f.options().get_precision().is_none() {
581+
return self.write_lossy(f);
582+
}
583+
584+
// The `precision` field can be interpreted as a maximum width for the
585+
// string being formatted.
586+
let max_code_point_count = f.options().get_precision().unwrap_or(usize::MAX);
587+
let mut iter = self.code_points();
588+
let code_point_count = iter.by_ref().take(max_code_point_count).count();
589+
590+
// If our string is longer than the maximum width, truncate it and
591+
// handle other flags in terms of the truncated string.
592+
let byte_len = self.len() - iter.as_slice().len();
593+
// SAFETY: The index is derived from the offset of `.code_points()`,
594+
// which is guaranteed to be in-bounds and between character boundaries.
595+
let s = unsafe { Wtf8::from_bytes_unchecked(self.bytes.get_unchecked(..byte_len)) };
596+
597+
// The `width` field is more of a minimum width parameter at this point.
598+
if let Some(width) = f.options().get_width()
599+
&& code_point_count < width
600+
{
601+
// If we're under the minimum width, then fill up the minimum width
602+
// with the specified string + some alignment.
603+
let post_padding = f.padding(width - code_point_count, fmt::Alignment::Left)?;
604+
s.write_lossy(f)?;
605+
post_padding.write(f)
606+
} else {
607+
// If we're over the minimum width or there is no minimum width, we
608+
// can just emit the string.
609+
s.write_lossy(f)
583610
}
584611
}
585612
}
@@ -696,6 +723,19 @@ impl Wtf8 {
696723
}
697724
}
698725

726+
/// Writes the string as lossy UTF-8 like [`Wtf8::to_string_lossy`].
727+
/// It ignores formatter flags.
728+
fn write_lossy(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
729+
let wtf8_bytes = &self.bytes;
730+
let mut pos = 0;
731+
while let Some((surrogate_pos, _)) = self.next_surrogate(pos) {
732+
f.write_str(unsafe { str::from_utf8_unchecked(&wtf8_bytes[pos..surrogate_pos]) })?;
733+
f.write_str(UTF8_REPLACEMENT_CHARACTER)?;
734+
pos = surrogate_pos + 3;
735+
}
736+
f.write_str(unsafe { str::from_utf8_unchecked(&wtf8_bytes[pos..]) })
737+
}
738+
699739
/// Converts the WTF-8 string to potentially ill-formed UTF-16
700740
/// and return an iterator of 16-bit code units.
701741
///
@@ -980,6 +1020,16 @@ impl<'a> Iterator for Wtf8CodePoints<'a> {
9801020
}
9811021
}
9821022

1023+
impl<'a> Wtf8CodePoints<'a> {
1024+
/// Views the underlying data as a subslice of the original data.
1025+
#[inline]
1026+
pub fn as_slice(&self) -> &Wtf8 {
1027+
// SAFETY: `Wtf8CodePoints` is only made from a `Wtf8Str`, which
1028+
// guarantees the iter is valid WTF-8.
1029+
unsafe { Wtf8::from_bytes_unchecked(self.bytes.as_slice()) }
1030+
}
1031+
}
1032+
9831033
/// Generates a wide character sequence for potentially ill-formed UTF-16.
9841034
#[stable(feature = "rust1", since = "1.0.0")]
9851035
#[derive(Clone)]

library/std/src/sys_common/wtf8/tests.rs

+15
Original file line numberDiff line numberDiff line change
@@ -749,3 +749,18 @@ fn unwobbly_wtf8_plus_utf8_is_utf8() {
749749
string.push_str("some utf-8");
750750
assert!(string.is_known_utf8);
751751
}
752+
753+
#[test]
754+
fn display_wtf8() {
755+
let string = Wtf8Buf::from_wide(&[b'b' as _, 0xD800, b'd' as _]);
756+
assert!(!string.is_known_utf8);
757+
assert_eq!(format!("a{:^10}e", string), "a b�d e");
758+
assert_eq!(format!("a{:^10}e", string.as_slice()), "a b�d e");
759+
760+
let mut string = Wtf8Buf::from_str("bcd");
761+
assert!(string.is_known_utf8);
762+
assert_eq!(format!("a{:^10}e", string), "a bcd e");
763+
assert_eq!(format!("a{:^10}e", string.as_slice()), "a bcd e");
764+
string.is_known_utf8 = false;
765+
assert_eq!(format!("a{:^10}e", string), "a bcd e");
766+
}

library/std/tests/path.rs

+12
Original file line numberDiff line numberDiff line change
@@ -1819,6 +1819,18 @@ fn test_clone_into() {
18191819
fn display_format_flags() {
18201820
assert_eq!(format!("a{:#<5}b", Path::new("").display()), "a#####b");
18211821
assert_eq!(format!("a{:#<5}b", Path::new("a").display()), "aa####b");
1822+
assert_eq!(format!("a{:^10}e", Path::new("bcd").display()), "a bcd e");
1823+
}
1824+
1825+
#[cfg(windows)]
1826+
#[test]
1827+
fn display_invalid_wtf8_windows() {
1828+
use std::ffi::OsString;
1829+
use std::os::windows::ffi::OsStringExt;
1830+
1831+
let path_buf = PathBuf::from(OsString::from_wide(&[b'b' as _, 0xD800, b'd' as _]));
1832+
assert_eq!(format!("a{:^10}e", path_buf.display()), "a b�d e");
1833+
assert_eq!(format!("a{:^10}e", path_buf.as_path().display()), "a b�d e");
18221834
}
18231835

18241836
#[test]

0 commit comments

Comments
 (0)