@@ -166,6 +166,14 @@ impl fmt::Debug for Wtf8Buf {
166
166
}
167
167
}
168
168
169
+ /// Formats the string with unpaired surrogates substituted with the replacement
170
+ /// character, U+FFFD.
171
+ impl fmt:: Display for Wtf8Buf {
172
+ fn fmt ( & self , formatter : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
173
+ fmt:: Display :: fmt ( & * * self , formatter)
174
+ }
175
+ }
176
+
169
177
impl Wtf8Buf {
170
178
/// Creates a new, empty WTF-8 string.
171
179
#[ inline]
@@ -562,24 +570,43 @@ impl fmt::Debug for Wtf8 {
562
570
}
563
571
}
564
572
573
+ /// Formats the string with unpaired surrogates substituted with the replacement
574
+ /// character, U+FFFD.
565
575
impl fmt:: Display for Wtf8 {
566
- fn fmt ( & self , formatter : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
567
- let wtf8_bytes = & self . bytes ;
568
- let mut pos = 0 ;
569
- loop {
570
- match self . next_surrogate ( pos) {
571
- Some ( ( surrogate_pos, _) ) => {
572
- formatter. write_str ( unsafe {
573
- str:: from_utf8_unchecked ( & wtf8_bytes[ pos..surrogate_pos] )
574
- } ) ?;
575
- formatter. write_str ( UTF8_REPLACEMENT_CHARACTER ) ?;
576
- pos = surrogate_pos + 3 ;
577
- }
578
- None => {
579
- let s = unsafe { str:: from_utf8_unchecked ( & wtf8_bytes[ pos..] ) } ;
580
- if pos == 0 { return s. fmt ( formatter) } else { return formatter. write_str ( s) }
581
- }
582
- }
576
+ fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
577
+ // Corresponds to `Formatter::pad`, but for `Wtf8` instead of `str`.
578
+
579
+ // Make sure there's a fast path up front.
580
+ if f. options ( ) . get_width ( ) . is_none ( ) && f. options ( ) . get_precision ( ) . is_none ( ) {
581
+ return self . write_lossy ( f) ;
582
+ }
583
+
584
+ // The `precision` field can be interpreted as a maximum width for the
585
+ // string being formatted.
586
+ let max_code_point_count = f. options ( ) . get_precision ( ) . unwrap_or ( usize:: MAX ) ;
587
+ let mut iter = self . code_points ( ) ;
588
+ let code_point_count = iter. by_ref ( ) . take ( max_code_point_count) . count ( ) ;
589
+
590
+ // If our string is longer than the maximum width, truncate it and
591
+ // handle other flags in terms of the truncated string.
592
+ let byte_len = self . len ( ) - iter. as_slice ( ) . len ( ) ;
593
+ // SAFETY: The index is derived from the offset of `.code_points()`,
594
+ // which is guaranteed to be in-bounds and between character boundaries.
595
+ let s = unsafe { Wtf8 :: from_bytes_unchecked ( self . bytes . get_unchecked ( ..byte_len) ) } ;
596
+
597
+ // The `width` field is more of a minimum width parameter at this point.
598
+ if let Some ( width) = f. options ( ) . get_width ( )
599
+ && code_point_count < width
600
+ {
601
+ // If we're under the minimum width, then fill up the minimum width
602
+ // with the specified string + some alignment.
603
+ let post_padding = f. padding ( width - code_point_count, fmt:: Alignment :: Left ) ?;
604
+ s. write_lossy ( f) ?;
605
+ post_padding. write ( f)
606
+ } else {
607
+ // If we're over the minimum width or there is no minimum width, we
608
+ // can just emit the string.
609
+ s. write_lossy ( f)
583
610
}
584
611
}
585
612
}
@@ -696,6 +723,19 @@ impl Wtf8 {
696
723
}
697
724
}
698
725
726
+ /// Writes the string as lossy UTF-8 like [`Wtf8::to_string_lossy`].
727
+ /// It ignores formatter flags.
728
+ fn write_lossy ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
729
+ let wtf8_bytes = & self . bytes ;
730
+ let mut pos = 0 ;
731
+ while let Some ( ( surrogate_pos, _) ) = self . next_surrogate ( pos) {
732
+ f. write_str ( unsafe { str:: from_utf8_unchecked ( & wtf8_bytes[ pos..surrogate_pos] ) } ) ?;
733
+ f. write_str ( UTF8_REPLACEMENT_CHARACTER ) ?;
734
+ pos = surrogate_pos + 3 ;
735
+ }
736
+ f. write_str ( unsafe { str:: from_utf8_unchecked ( & wtf8_bytes[ pos..] ) } )
737
+ }
738
+
699
739
/// Converts the WTF-8 string to potentially ill-formed UTF-16
700
740
/// and return an iterator of 16-bit code units.
701
741
///
@@ -980,6 +1020,16 @@ impl<'a> Iterator for Wtf8CodePoints<'a> {
980
1020
}
981
1021
}
982
1022
1023
+ impl < ' a > Wtf8CodePoints < ' a > {
1024
+ /// Views the underlying data as a subslice of the original data.
1025
+ #[ inline]
1026
+ pub fn as_slice ( & self ) -> & Wtf8 {
1027
+ // SAFETY: `Wtf8CodePoints` is only made from a `Wtf8Str`, which
1028
+ // guarantees the iter is valid WTF-8.
1029
+ unsafe { Wtf8 :: from_bytes_unchecked ( self . bytes . as_slice ( ) ) }
1030
+ }
1031
+ }
1032
+
983
1033
/// Generates a wide character sequence for potentially ill-formed UTF-16.
984
1034
#[ stable( feature = "rust1" , since = "1.0.0" ) ]
985
1035
#[ derive( Clone ) ]
0 commit comments