@@ -2,6 +2,8 @@ use std::ffi::OsStr;
2
2
3
3
pub ( crate ) trait OsStrExt : private:: Sealed {
4
4
/// Converts to a string slice.
5
+ /// The Utf8Error is guaranteed to have a valid UTF8 boundary
6
+ /// in its `valid_up_to()`
5
7
fn try_str ( & self ) -> Result < & str , std:: str:: Utf8Error > ;
6
8
/// Returns `true` if the given pattern matches a sub-slice of
7
9
/// this string slice.
@@ -35,7 +37,7 @@ pub(crate) trait OsStrExt: private::Sealed {
35
37
36
38
impl OsStrExt for OsStr {
37
39
fn try_str ( & self ) -> Result < & str , std:: str:: Utf8Error > {
38
- let bytes = to_bytes ( self ) ;
40
+ let bytes = self . as_encoded_bytes ( ) ;
39
41
std:: str:: from_utf8 ( bytes)
40
42
}
41
43
@@ -44,22 +46,22 @@ impl OsStrExt for OsStr {
44
46
}
45
47
46
48
fn find ( & self , needle : & str ) -> Option < usize > {
47
- let bytes = to_bytes ( self ) ;
49
+ let bytes = self . as_encoded_bytes ( ) ;
48
50
( 0 ..=self . len ( ) . checked_sub ( needle. len ( ) ) ?)
49
51
. find ( |& x| bytes[ x..] . starts_with ( needle. as_bytes ( ) ) )
50
52
}
51
53
52
54
fn strip_prefix ( & self , prefix : & str ) -> Option < & OsStr > {
53
- let bytes = to_bytes ( self ) ;
55
+ let bytes = self . as_encoded_bytes ( ) ;
54
56
bytes. strip_prefix ( prefix. as_bytes ( ) ) . map ( |s| {
55
57
// SAFETY:
56
- // - This came from `to_bytes `
57
- // - Since `prefix` is `&str`, any split will be along UTF-8 boundarie
58
- unsafe { to_os_str_unchecked ( s) }
58
+ // - This came from `as_encoded_bytes `
59
+ // - Since `prefix` is `&str`, any split will be along UTF-8 boundary
60
+ unsafe { OsStr :: from_encoded_bytes_unchecked ( s) }
59
61
} )
60
62
}
61
63
fn starts_with ( & self , prefix : & str ) -> bool {
62
- let bytes = to_bytes ( self ) ;
64
+ let bytes = self . as_encoded_bytes ( ) ;
63
65
bytes. starts_with ( prefix. as_bytes ( ) )
64
66
}
65
67
@@ -74,13 +76,18 @@ impl OsStrExt for OsStr {
74
76
fn split_once ( & self , needle : & ' _ str ) -> Option < ( & OsStr , & OsStr ) > {
75
77
let start = self . find ( needle) ?;
76
78
let end = start + needle. len ( ) ;
77
- let haystack = to_bytes ( self ) ;
79
+ let haystack = self . as_encoded_bytes ( ) ;
78
80
let first = & haystack[ 0 ..start] ;
79
81
let second = & haystack[ end..] ;
80
82
// SAFETY:
81
- // - This came from `to_bytes`
82
- // - Since `needle` is `&str`, any split will be along UTF-8 boundarie
83
- unsafe { Some ( ( to_os_str_unchecked ( first) , to_os_str_unchecked ( second) ) ) }
83
+ // - This came from `as_encoded_bytes`
84
+ // - Since `needle` is `&str`, any split will be along UTF-8 boundary
85
+ unsafe {
86
+ Some ( (
87
+ OsStr :: from_encoded_bytes_unchecked ( first) ,
88
+ OsStr :: from_encoded_bytes_unchecked ( second) ,
89
+ ) )
90
+ }
84
91
}
85
92
}
86
93
@@ -90,45 +97,6 @@ mod private {
90
97
impl Sealed for std:: ffi:: OsStr { }
91
98
}
92
99
93
- /// Allow access to raw bytes
94
- ///
95
- /// As the non-UTF8 encoding is not defined, the bytes only make sense when compared with
96
- /// 7-bit ASCII or `&str`
97
- ///
98
- /// # Compatibility
99
- ///
100
- /// There is no guarantee how non-UTF8 bytes will be encoded, even within versions of this crate
101
- /// (since its dependent on rustc)
102
- fn to_bytes ( s : & OsStr ) -> & [ u8 ] {
103
- // SAFETY:
104
- // - Lifetimes are the same
105
- // - Types are compatible (`OsStr` is effectively a transparent wrapper for `[u8]`)
106
- // - The primary contract is that the encoding for invalid surrogate code points is not
107
- // guaranteed which isn't a problem here
108
- //
109
- // There is a proposal to support this natively (https://github.com/rust-lang/rust/pull/95290)
110
- // but its in limbo
111
- unsafe { std:: mem:: transmute ( s) }
112
- }
113
-
114
- /// Restore raw bytes as `OsStr`
115
- ///
116
- /// # Safety
117
- ///
118
- /// - `&[u8]` must either by a `&str` or originated with `to_bytes` within the same binary
119
- /// - Any splits of the original `&[u8]` must be done along UTF-8 boundaries
120
- unsafe fn to_os_str_unchecked ( s : & [ u8 ] ) -> & OsStr {
121
- // SAFETY:
122
- // - Lifetimes are the same
123
- // - Types are compatible (`OsStr` is effectively a transparent wrapper for `[u8]`)
124
- // - The primary contract is that the encoding for invalid surrogate code points is not
125
- // guaranteed which isn't a problem here
126
- //
127
- // There is a proposal to support this natively (https://github.com/rust-lang/rust/pull/95290)
128
- // but its in limbo
129
- std:: mem:: transmute ( s)
130
- }
131
-
132
100
pub struct Split < ' s , ' n > {
133
101
haystack : Option < & ' s OsStr > ,
134
102
needle : & ' n str ,
@@ -161,7 +129,10 @@ impl<'s, 'n> Iterator for Split<'s, 'n> {
161
129
///
162
130
/// `index` must be at a valid UTF-8 boundary
163
131
pub ( crate ) unsafe fn split_at ( os : & OsStr , index : usize ) -> ( & OsStr , & OsStr ) {
164
- let bytes = to_bytes ( os ) ;
132
+ let bytes = os . as_encoded_bytes ( ) ;
165
133
let ( first, second) = bytes. split_at ( index) ;
166
- ( to_os_str_unchecked ( first) , to_os_str_unchecked ( second) )
134
+ (
135
+ OsStr :: from_encoded_bytes_unchecked ( first) ,
136
+ OsStr :: from_encoded_bytes_unchecked ( second) ,
137
+ )
167
138
}
0 commit comments