@@ -588,23 +588,40 @@ impl fmt::Debug for Wtf8 {
588
588
/// Formats the string with unpaired surrogates substituted with the replacement
589
589
/// character, U+FFFD.
590
590
impl fmt:: Display for Wtf8 {
591
- fn fmt ( & self , formatter : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
592
- let wtf8_bytes = & self . bytes ;
593
- let mut pos = 0 ;
594
- loop {
595
- match self . next_surrogate ( pos) {
596
- Some ( ( surrogate_pos, _) ) => {
597
- formatter. write_str ( unsafe {
598
- str:: from_utf8_unchecked ( & wtf8_bytes[ pos..surrogate_pos] )
599
- } ) ?;
600
- formatter. write_str ( UTF8_REPLACEMENT_CHARACTER ) ?;
601
- pos = surrogate_pos + 3 ;
602
- }
603
- None => {
604
- let s = unsafe { str:: from_utf8_unchecked ( & wtf8_bytes[ pos..] ) } ;
605
- if pos == 0 { return s. fmt ( formatter) } else { return formatter. write_str ( s) }
606
- }
607
- }
591
+ fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
592
+ // Corresponds to `Formatter::pad`, but for `Wtf8` instead of `str`.
593
+
594
+ // Make sure there's a fast path up front.
595
+ if f. options ( ) . get_width ( ) . is_none ( ) && f. options ( ) . get_precision ( ) . is_none ( ) {
596
+ return self . write_lossy ( f) ;
597
+ }
598
+
599
+ // The `precision` field can be interpreted as a maximum width for the
600
+ // string being formatted.
601
+ let max_code_point_count = f. options ( ) . get_precision ( ) . unwrap_or ( usize:: MAX ) ;
602
+ let mut iter = self . code_points ( ) ;
603
+ let code_point_count = iter. by_ref ( ) . take ( max_code_point_count) . count ( ) ;
604
+
605
+ // If our string is longer than the maximum width, truncate it and
606
+ // handle other flags in terms of the truncated string.
607
+ let byte_len = self . len ( ) - iter. as_slice ( ) . len ( ) ;
608
+ // SAFETY: The index is derived from the offset of `.code_points()`,
609
+ // which is guaranteed to be in-bounds and between character boundaries.
610
+ let s = unsafe { Wtf8 :: from_bytes_unchecked ( self . bytes . get_unchecked ( ..byte_len) ) } ;
611
+
612
+ // The `width` field is more of a minimum width parameter at this point.
613
+ if let Some ( width) = f. options ( ) . get_width ( )
614
+ && code_point_count < width
615
+ {
616
+ // If we're under the minimum width, then fill up the minimum width
617
+ // with the specified string + some alignment.
618
+ let post_padding = f. padding ( width - code_point_count, fmt:: Alignment :: Left ) ?;
619
+ s. write_lossy ( f) ?;
620
+ post_padding. write ( f)
621
+ } else {
622
+ // If we're over the minimum width or there is no minimum width, we
623
+ // can just emit the string.
624
+ s. write_lossy ( f)
608
625
}
609
626
}
610
627
}
@@ -720,6 +737,19 @@ impl Wtf8 {
720
737
}
721
738
}
722
739
740
+ /// Writes the string as lossy UTF-8 like [`Wtf8::to_string_lossy`].
741
+ /// It ignores formatter flags.
742
+ fn write_lossy ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
743
+ let wtf8_bytes = & self . bytes ;
744
+ let mut pos = 0 ;
745
+ while let Some ( ( surrogate_pos, _) ) = self . next_surrogate ( pos) {
746
+ f. write_str ( unsafe { str:: from_utf8_unchecked ( & wtf8_bytes[ pos..surrogate_pos] ) } ) ?;
747
+ f. write_str ( UTF8_REPLACEMENT_CHARACTER ) ?;
748
+ pos = surrogate_pos + 3 ;
749
+ }
750
+ f. write_str ( unsafe { str:: from_utf8_unchecked ( & wtf8_bytes[ pos..] ) } )
751
+ }
752
+
723
753
/// Converts the WTF-8 string to potentially ill-formed UTF-16
724
754
/// and return an iterator of 16-bit code units.
725
755
///
@@ -1004,6 +1034,16 @@ impl Iterator for Wtf8CodePoints<'_> {
1004
1034
}
1005
1035
}
1006
1036
1037
+ impl < ' a > Wtf8CodePoints < ' a > {
1038
+ /// Views the underlying data as a subslice of the original data.
1039
+ #[ inline]
1040
+ pub fn as_slice ( & self ) -> & Wtf8 {
1041
+ // SAFETY: `Wtf8CodePoints` is only made from a `Wtf8Str`, which
1042
+ // guarantees the iter is valid WTF-8.
1043
+ unsafe { Wtf8 :: from_bytes_unchecked ( self . bytes . as_slice ( ) ) }
1044
+ }
1045
+ }
1046
+
1007
1047
/// Generates a wide character sequence for potentially ill-formed UTF-16.
1008
1048
#[ stable( feature = "rust1" , since = "1.0.0" ) ]
1009
1049
#[ derive( Clone ) ]
0 commit comments