Skip to content

Commit f0b2ddb

Browse files
authored
Merge pull request #76 from epage/osstr
fix(lexarg): Use official encoded_bytes API
2 parents 055306d + 23d54d9 commit f0b2ddb

File tree

1 file changed

+23
-52
lines changed

1 file changed

+23
-52
lines changed

crates/lexarg/src/ext.rs

Lines changed: 23 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ use std::ffi::OsStr;
22

33
pub(crate) trait OsStrExt: private::Sealed {
44
/// Converts to a string slice.
5+
/// The Utf8Error is guaranteed to have a valid UTF8 boundary
6+
/// in its `valid_up_to()`
57
fn try_str(&self) -> Result<&str, std::str::Utf8Error>;
68
/// Returns `true` if the given pattern matches a sub-slice of
79
/// this string slice.
@@ -35,7 +37,7 @@ pub(crate) trait OsStrExt: private::Sealed {
3537

3638
impl OsStrExt for OsStr {
3739
fn try_str(&self) -> Result<&str, std::str::Utf8Error> {
38-
let bytes = to_bytes(self);
40+
let bytes = self.as_encoded_bytes();
3941
std::str::from_utf8(bytes)
4042
}
4143

@@ -44,22 +46,22 @@ impl OsStrExt for OsStr {
4446
}
4547

4648
fn find(&self, needle: &str) -> Option<usize> {
47-
let bytes = to_bytes(self);
49+
let bytes = self.as_encoded_bytes();
4850
(0..=self.len().checked_sub(needle.len())?)
4951
.find(|&x| bytes[x..].starts_with(needle.as_bytes()))
5052
}
5153

5254
fn strip_prefix(&self, prefix: &str) -> Option<&OsStr> {
53-
let bytes = to_bytes(self);
55+
let bytes = self.as_encoded_bytes();
5456
bytes.strip_prefix(prefix.as_bytes()).map(|s| {
5557
// SAFETY:
56-
// - This came from `to_bytes`
57-
// - Since `prefix` is `&str`, any split will be along UTF-8 boundarie
58-
unsafe { to_os_str_unchecked(s) }
58+
// - This came from `as_encoded_bytes`
59+
// - Since `prefix` is `&str`, any split will be along UTF-8 boundary
60+
unsafe { OsStr::from_encoded_bytes_unchecked(s) }
5961
})
6062
}
6163
fn starts_with(&self, prefix: &str) -> bool {
62-
let bytes = to_bytes(self);
64+
let bytes = self.as_encoded_bytes();
6365
bytes.starts_with(prefix.as_bytes())
6466
}
6567

@@ -74,13 +76,18 @@ impl OsStrExt for OsStr {
7476
fn split_once(&self, needle: &'_ str) -> Option<(&OsStr, &OsStr)> {
7577
let start = self.find(needle)?;
7678
let end = start + needle.len();
77-
let haystack = to_bytes(self);
79+
let haystack = self.as_encoded_bytes();
7880
let first = &haystack[0..start];
7981
let second = &haystack[end..];
8082
// SAFETY:
81-
// - This came from `to_bytes`
82-
// - Since `needle` is `&str`, any split will be along UTF-8 boundarie
83-
unsafe { Some((to_os_str_unchecked(first), to_os_str_unchecked(second))) }
83+
// - This came from `as_encoded_bytes`
84+
// - Since `needle` is `&str`, any split will be along UTF-8 boundary
85+
unsafe {
86+
Some((
87+
OsStr::from_encoded_bytes_unchecked(first),
88+
OsStr::from_encoded_bytes_unchecked(second),
89+
))
90+
}
8491
}
8592
}
8693

@@ -90,45 +97,6 @@ mod private {
9097
impl Sealed for std::ffi::OsStr {}
9198
}
9299

93-
/// Allow access to raw bytes
94-
///
95-
/// As the non-UTF8 encoding is not defined, the bytes only make sense when compared with
96-
/// 7-bit ASCII or `&str`
97-
///
98-
/// # Compatibility
99-
///
100-
/// There is no guarantee how non-UTF8 bytes will be encoded, even within versions of this crate
101-
/// (since its dependent on rustc)
102-
fn to_bytes(s: &OsStr) -> &[u8] {
103-
// SAFETY:
104-
// - Lifetimes are the same
105-
// - Types are compatible (`OsStr` is effectively a transparent wrapper for `[u8]`)
106-
// - The primary contract is that the encoding for invalid surrogate code points is not
107-
// guaranteed which isn't a problem here
108-
//
109-
// There is a proposal to support this natively (https://github.com/rust-lang/rust/pull/95290)
110-
// but its in limbo
111-
unsafe { std::mem::transmute(s) }
112-
}
113-
114-
/// Restore raw bytes as `OsStr`
115-
///
116-
/// # Safety
117-
///
118-
/// - `&[u8]` must either by a `&str` or originated with `to_bytes` within the same binary
119-
/// - Any splits of the original `&[u8]` must be done along UTF-8 boundaries
120-
unsafe fn to_os_str_unchecked(s: &[u8]) -> &OsStr {
121-
// SAFETY:
122-
// - Lifetimes are the same
123-
// - Types are compatible (`OsStr` is effectively a transparent wrapper for `[u8]`)
124-
// - The primary contract is that the encoding for invalid surrogate code points is not
125-
// guaranteed which isn't a problem here
126-
//
127-
// There is a proposal to support this natively (https://github.com/rust-lang/rust/pull/95290)
128-
// but its in limbo
129-
std::mem::transmute(s)
130-
}
131-
132100
pub struct Split<'s, 'n> {
133101
haystack: Option<&'s OsStr>,
134102
needle: &'n str,
@@ -161,7 +129,10 @@ impl<'s, 'n> Iterator for Split<'s, 'n> {
161129
///
162130
/// `index` must be at a valid UTF-8 boundary
163131
pub(crate) unsafe fn split_at(os: &OsStr, index: usize) -> (&OsStr, &OsStr) {
164-
let bytes = to_bytes(os);
132+
let bytes = os.as_encoded_bytes();
165133
let (first, second) = bytes.split_at(index);
166-
(to_os_str_unchecked(first), to_os_str_unchecked(second))
134+
(
135+
OsStr::from_encoded_bytes_unchecked(first),
136+
OsStr::from_encoded_bytes_unchecked(second),
137+
)
167138
}

0 commit comments

Comments
 (0)