Skip to content

Commit 0877d11

Browse files
authored
Rollup merge of #113442 - epage:osstring, r=cuviper
Allow limited access to `OsString` bytes This extends #109698 to allow no-cost conversion between `Vec<u8>` and `OsString` as suggested in feedback from `os_str_bytes` crate in #111544.
2 parents 58a4be1 + ee604fc commit 0877d11

File tree

4 files changed

+100
-0
lines changed

4 files changed

+100
-0
lines changed

library/std/src/ffi/os_str.rs

+65
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,51 @@ impl OsString {
141141
OsString { inner: Buf::from_string(String::new()) }
142142
}
143143

144+
/// Converts bytes to an `OsString` without checking that the bytes contains
145+
/// valid [`OsStr`]-encoded data.
146+
///
147+
/// The byte encoding is an unspecified, platform-specific, self-synchronizing superset of UTF-8.
148+
/// By being a self-synchronizing superset of UTF-8, this encoding is also a superset of 7-bit
149+
/// ASCII.
150+
///
151+
/// See the [module's toplevel documentation about conversions][conversions] for safe,
152+
/// cross-platform [conversions] from/to native representations.
153+
///
154+
/// # Safety
155+
///
156+
/// As the encoding is unspecified, callers must pass in bytes that originated as a mixture of
157+
/// validated UTF-8 and bytes from [`OsStr::as_os_str_bytes`] from within the same rust version
158+
/// built for the same target platform. For example, reconstructing an `OsString` from bytes sent
159+
/// over the network or stored in a file will likely violate these safety rules.
160+
///
161+
/// Due to the encoding being self-synchronizing, the bytes from [`OsStr::as_os_str_bytes`] can be
162+
/// split either immediately before or immediately after any valid non-empty UTF-8 substring.
163+
///
164+
/// # Example
165+
///
166+
/// ```
167+
/// #![feature(os_str_bytes)]
168+
///
169+
/// use std::ffi::OsStr;
170+
///
171+
/// let os_str = OsStr::new("Mary had a little lamb");
172+
/// let bytes = os_str.as_os_str_bytes();
173+
/// let words = bytes.split(|b| *b == b' ');
174+
/// let words: Vec<&OsStr> = words.map(|word| {
175+
/// // SAFETY:
176+
/// // - Each `word` only contains content that originated from `OsStr::as_os_str_bytes`
177+
/// // - Only split with ASCII whitespace which is a non-empty UTF-8 substring
178+
/// unsafe { OsStr::from_os_str_bytes_unchecked(word) }
179+
/// }).collect();
180+
/// ```
181+
///
182+
/// [conversions]: super#conversions
183+
#[inline]
184+
#[unstable(feature = "os_str_bytes", issue = "111544")]
185+
pub unsafe fn from_os_str_bytes_unchecked(bytes: Vec<u8>) -> Self {
186+
OsString { inner: Buf::from_os_str_bytes_unchecked(bytes) }
187+
}
188+
144189
/// Converts to an [`OsStr`] slice.
145190
///
146191
/// # Examples
@@ -159,6 +204,26 @@ impl OsString {
159204
self
160205
}
161206

207+
/// Converts the `OsString` into a byte slice. To convert the byte slice back into an
208+
/// `OsString`, use the [`OsStr::from_os_str_bytes_unchecked`] function.
209+
///
210+
/// The byte encoding is an unspecified, platform-specific, self-synchronizing superset of UTF-8.
211+
/// By being a self-synchronizing superset of UTF-8, this encoding is also a superset of 7-bit
212+
/// ASCII.
213+
///
214+
/// Note: As the encoding is unspecified, any sub-slice of bytes that is not valid UTF-8 should
215+
/// be treated as opaque and only comparable within the same rust version built for the same
216+
/// target platform. For example, sending the bytes over the network or storing it in a file
217+
/// will likely result in incompatible data. See [`OsString`] for more encoding details
218+
/// and [`std::ffi`] for platform-specific, specified conversions.
219+
///
220+
/// [`std::ffi`]: crate::ffi
221+
#[inline]
222+
#[unstable(feature = "os_str_bytes", issue = "111544")]
223+
pub fn into_os_str_bytes(self) -> Vec<u8> {
224+
self.inner.into_os_str_bytes()
225+
}
226+
162227
/// Converts the `OsString` into a [`String`] if it contains valid Unicode data.
163228
///
164229
/// On failure, ownership of the original `OsString` is returned.

library/std/src/sys/unix/os_str.rs

+10
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,16 @@ impl AsInner<[u8]> for Buf {
9696
}
9797

9898
impl Buf {
99+
#[inline]
100+
pub fn into_os_str_bytes(self) -> Vec<u8> {
101+
self.inner
102+
}
103+
104+
#[inline]
105+
pub unsafe fn from_os_str_bytes_unchecked(s: Vec<u8>) -> Self {
106+
Self { inner: s }
107+
}
108+
99109
pub fn from_string(s: String) -> Buf {
100110
Buf { inner: s.into_bytes() }
101111
}

library/std/src/sys/windows/os_str.rs

+10
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,16 @@ impl fmt::Display for Slice {
6363
}
6464

6565
impl Buf {
66+
#[inline]
67+
pub fn into_os_str_bytes(self) -> Vec<u8> {
68+
self.inner.into_bytes()
69+
}
70+
71+
#[inline]
72+
pub unsafe fn from_os_str_bytes_unchecked(s: Vec<u8>) -> Self {
73+
Self { inner: Wtf8Buf::from_bytes_unchecked(s) }
74+
}
75+
6676
pub fn with_capacity(capacity: usize) -> Buf {
6777
Buf { inner: Wtf8Buf::with_capacity(capacity) }
6878
}

library/std/src/sys_common/wtf8.rs

+15
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,15 @@ impl Wtf8Buf {
182182
Wtf8Buf { bytes: Vec::with_capacity(capacity), is_known_utf8: true }
183183
}
184184

185+
/// Creates a WTF-8 string from a WTF-8 byte vec.
186+
///
187+
/// Since the byte vec is not checked for valid WTF-8, this functions is
188+
/// marked unsafe.
189+
#[inline]
190+
pub unsafe fn from_bytes_unchecked(value: Vec<u8>) -> Wtf8Buf {
191+
Wtf8Buf { bytes: value, is_known_utf8: false }
192+
}
193+
185194
/// Creates a WTF-8 string from a UTF-8 `String`.
186195
///
187196
/// This takes ownership of the `String` and does not copy.
@@ -402,6 +411,12 @@ impl Wtf8Buf {
402411
self.bytes.truncate(new_len)
403412
}
404413

414+
/// Consumes the WTF-8 string and tries to convert it to a vec of bytes.
415+
#[inline]
416+
pub fn into_bytes(self) -> Vec<u8> {
417+
self.bytes
418+
}
419+
405420
/// Consumes the WTF-8 string and tries to convert it to UTF-8.
406421
///
407422
/// This does not copy the data.

0 commit comments

Comments
 (0)