@@ -1555,18 +1555,13 @@ impl<'h> Match<'h> {
1555
1555
1556
1556
impl < ' h > core:: fmt:: Debug for Match < ' h > {
1557
1557
fn fmt ( & self , f : & mut core:: fmt:: Formatter ) -> core:: fmt:: Result {
1558
+ use regex_automata:: util:: escape:: DebugHaystack ;
1559
+
1558
1560
let mut fmt = f. debug_struct ( "Match" ) ;
1559
- fmt. field ( "start" , & self . start ) . field ( "end" , & self . end ) ;
1560
- if let Ok ( s) = core:: str:: from_utf8 ( self . as_bytes ( ) ) {
1561
- fmt. field ( "bytes" , & s) ;
1562
- } else {
1563
- // FIXME: It would be nice if this could be printed as a string
1564
- // with invalid UTF-8 replaced with hex escapes. A alloc would
1565
- // probably okay if that makes it easier, but regex-automata does
1566
- // (at time of writing) have internal routines that do this. So
1567
- // maybe we should expose them.
1568
- fmt. field ( "bytes" , & self . as_bytes ( ) ) ;
1569
- }
1561
+ fmt. field ( "start" , & self . start )
1562
+ . field ( "end" , & self . end )
1563
+ . field ( "bytes" , & DebugHaystack ( & self . as_bytes ( ) ) ) ;
1564
+
1570
1565
fmt. finish ( )
1571
1566
}
1572
1567
}
@@ -2620,3 +2615,88 @@ fn no_expansion<T: AsRef<[u8]>>(replacement: &T) -> Option<Cow<'_, [u8]>> {
2620
2615
None => Some ( Cow :: Borrowed ( replacement) ) ,
2621
2616
}
2622
2617
}
2618
+
2619
+ #[ cfg( test) ]
2620
+ mod tests {
2621
+ use super :: * ;
2622
+ use alloc:: format;
2623
+
2624
+ #[ test]
2625
+ fn test_match_properties ( ) {
2626
+ let haystack = b"Hello, world!" ;
2627
+ let m = Match :: new ( haystack, 7 , 12 ) ;
2628
+
2629
+ assert_eq ! ( m. start( ) , 7 ) ;
2630
+ assert_eq ! ( m. end( ) , 12 ) ;
2631
+ assert_eq ! ( m. is_empty( ) , false ) ;
2632
+ assert_eq ! ( m. len( ) , 5 ) ;
2633
+ assert_eq ! ( m. as_bytes( ) , b"world" ) ;
2634
+ }
2635
+
2636
+ #[ test]
2637
+ fn test_empty_match ( ) {
2638
+ let haystack = b"" ;
2639
+ let m = Match :: new ( haystack, 0 , 0 ) ;
2640
+
2641
+ assert_eq ! ( m. is_empty( ) , true ) ;
2642
+ assert_eq ! ( m. len( ) , 0 ) ;
2643
+ }
2644
+
2645
+ #[ test]
2646
+ fn test_debug_output_valid_utf8 ( ) {
2647
+ let haystack = b"Hello, world!" ;
2648
+ let m = Match :: new ( haystack, 7 , 12 ) ;
2649
+ let debug_str = format ! ( "{:?}" , m) ;
2650
+
2651
+ assert_eq ! (
2652
+ debug_str,
2653
+ r#"Match { start: 7, end: 12, bytes: "world" }"#
2654
+ ) ;
2655
+ }
2656
+
2657
+ #[ test]
2658
+ fn test_debug_output_invalid_utf8 ( ) {
2659
+ let haystack = b"Hello, \xFF world!" ;
2660
+ let m = Match :: new ( haystack, 7 , 13 ) ;
2661
+ let debug_str = format ! ( "{:?}" , m) ;
2662
+
2663
+ assert_eq ! (
2664
+ debug_str,
2665
+ r#"Match { start: 7, end: 13, bytes: "\xffworld" }"#
2666
+ ) ;
2667
+ }
2668
+
2669
+ #[ test]
2670
+ fn test_debug_output_various_unicode ( ) {
2671
+ let haystack =
2672
+ "Hello, 😊 world! 안녕하세요? مرحبا بالعالم!" . as_bytes ( ) ;
2673
+ let m = Match :: new ( haystack, 0 , haystack. len ( ) ) ;
2674
+ let debug_str = format ! ( "{:?}" , m) ;
2675
+
2676
+ assert_eq ! (
2677
+ debug_str,
2678
+ r#"Match { start: 0, end: 62, bytes: "Hello, 😊 world! 안녕하세요? مرحبا بالعالم!" }"#
2679
+ ) ;
2680
+ }
2681
+
2682
+ #[ test]
2683
+ fn test_debug_output_ascii_escape ( ) {
2684
+ let haystack = b"Hello,\t world!\n This is a \x1b [31mtest\x1b [0m." ;
2685
+ let m = Match :: new ( haystack, 0 , haystack. len ( ) ) ;
2686
+ let debug_str = format ! ( "{:?}" , m) ;
2687
+
2688
+ assert_eq ! (
2689
+ debug_str,
2690
+ r#"Match { start: 0, end: 38, bytes: "Hello,\tworld!\nThis is a \u{1b}[31mtest\u{1b}[0m." }"#
2691
+ ) ;
2692
+ }
2693
+
2694
+ #[ test]
2695
+ fn test_debug_output_match_in_middle ( ) {
2696
+ let haystack = b"The quick brown fox jumps over the lazy dog." ;
2697
+ let m = Match :: new ( haystack, 16 , 19 ) ;
2698
+ let debug_str = format ! ( "{:?}" , m) ;
2699
+
2700
+ assert_eq ! ( debug_str, r#"Match { start: 16, end: 19, bytes: "fox" }"# ) ;
2701
+ }
2702
+ }
0 commit comments