@@ -588,23 +588,48 @@ impl fmt::Debug for Wtf8 {
588
588
/// Formats the string with unpaired surrogates substituted with the replacement
589
589
/// character, U+FFFD.
590
590
impl fmt:: Display for Wtf8 {
591
- fn fmt ( & self , formatter : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
592
- let wtf8_bytes = & self . bytes ;
593
- let mut pos = 0 ;
594
- loop {
595
- match self . next_surrogate ( pos) {
596
- Some ( ( surrogate_pos, _) ) => {
597
- formatter. write_str ( unsafe {
598
- str:: from_utf8_unchecked ( & wtf8_bytes[ pos..surrogate_pos] )
599
- } ) ?;
600
- formatter. write_str ( UTF8_REPLACEMENT_CHARACTER ) ?;
601
- pos = surrogate_pos + 3 ;
602
- }
603
- None => {
604
- let s = unsafe { str:: from_utf8_unchecked ( & wtf8_bytes[ pos..] ) } ;
605
- if pos == 0 { return s. fmt ( formatter) } else { return formatter. write_str ( s) }
606
- }
607
- }
591
+ fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
592
+ // Corresponds to `Formatter::pad`, but for `Wtf8` instead of `str`.
593
+
594
+ // Make sure there's a fast path up front.
595
+ if f. options ( ) . get_width ( ) . is_none ( ) && f. options ( ) . get_precision ( ) . is_none ( ) {
596
+ return self . write_lossy ( f) ;
597
+ }
598
+
599
+ // The `precision` field can be interpreted as a maximum width for the
600
+ // string being formatted.
601
+ let ( s, code_point_count) = if let Some ( max_code_point_count) = f. options ( ) . get_precision ( )
602
+ {
603
+ let mut iter = self . code_point_indices ( ) ;
604
+ let remaining = match iter. advance_by ( max_code_point_count as usize ) {
605
+ Ok ( ( ) ) => 0 ,
606
+ Err ( remaining) => remaining. get ( ) ,
607
+ } ;
608
+ // SAFETY: The offset of `.code_point_indices()` is guaranteed to be
609
+ // in-bounds and between code point boundaries.
610
+ let truncated = unsafe {
611
+ Wtf8 :: from_bytes_unchecked ( self . bytes . get_unchecked ( ..iter. front_offset ) )
612
+ } ;
613
+ ( truncated, max_code_point_count as usize - remaining)
614
+ } else {
615
+ // Use the optimized code point counting algorithm for the full
616
+ // string.
617
+ ( self , self . code_points ( ) . count ( ) )
618
+ } ;
619
+
620
+ // The `width` field is more of a minimum width parameter at this point.
621
+ if let Some ( width) = f. options ( ) . get_width ( )
622
+ && code_point_count < width as usize
623
+ {
624
+ // If we're under the minimum width, then fill up the minimum width
625
+ // with the specified string + some alignment.
626
+ let post_padding = f. padding ( width - code_point_count as u16 , fmt:: Alignment :: Left ) ?;
627
+ s. write_lossy ( f) ?;
628
+ post_padding. write ( f)
629
+ } else {
630
+ // If we're over the minimum width or there is no minimum width, we
631
+ // can just emit the string.
632
+ s. write_lossy ( f)
608
633
}
609
634
}
610
635
}
@@ -726,6 +751,19 @@ impl Wtf8 {
726
751
}
727
752
}
728
753
754
+ /// Writes the string as lossy UTF-8 like [`Wtf8::to_string_lossy`].
755
+ /// It ignores formatter flags.
756
+ fn write_lossy ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
757
+ let wtf8_bytes = & self . bytes ;
758
+ let mut pos = 0 ;
759
+ while let Some ( ( surrogate_pos, _) ) = self . next_surrogate ( pos) {
760
+ f. write_str ( unsafe { str:: from_utf8_unchecked ( & wtf8_bytes[ pos..surrogate_pos] ) } ) ?;
761
+ f. write_str ( UTF8_REPLACEMENT_CHARACTER ) ?;
762
+ pos = surrogate_pos + 3 ;
763
+ }
764
+ f. write_str ( unsafe { str:: from_utf8_unchecked ( & wtf8_bytes[ pos..] ) } )
765
+ }
766
+
729
767
/// Converts the WTF-8 string to potentially ill-formed UTF-16
730
768
/// and return an iterator of 16-bit code units.
731
769
///
0 commit comments