Skip to content

Commit 2b80b80

Browse files
committed
Document that OsString and OsStr are bytes; provide conversions to bytes
This commit consists almost entirely of documentation, updating the module documentation in `std::ffi` and the type documentation on `OsString`. This only adds two methods: `OsStr::as_bytes` and `OsString::into_vec`, and plumbs those methods down to the existing implementations for both UNIX and Windows.
1 parent 8d8135f commit 2b80b80

File tree

9 files changed

+112
-41
lines changed

9 files changed

+112
-41
lines changed

compiler/rustc_codegen_cranelift/src/debuginfo/line_info.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ fn osstr_as_utf8_bytes(path: &OsStr) -> &[u8] {
3939
#[cfg(unix)]
4040
{
4141
use std::os::unix::ffi::OsStrExt;
42-
path.as_bytes()
42+
OsStrExt::as_bytes(path)
4343
}
4444
#[cfg(not(unix))]
4545
{

compiler/rustc_fs_util/src/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ pub fn path_to_c_string(p: &Path) -> CString {
8282
use std::ffi::OsStr;
8383
use std::os::unix::ffi::OsStrExt;
8484
let p: &OsStr = p.as_ref();
85-
CString::new(p.as_bytes()).unwrap()
85+
CString::new(OsStrExt::as_bytes(p)).unwrap()
8686
}
8787
#[cfg(windows)]
8888
pub fn path_to_c_string(p: &Path) -> CString {

library/std/src/ffi/mod.rs

+20-10
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,8 @@
9090
//! exists you will get a <code>[Some]\(os_string)</code>, which you can
9191
//! *then* try to convert to a Rust string. This yields a [`Result`], so that
9292
//! your code can detect errors in case the environment variable did
93-
//! not in fact contain valid Unicode data.
93+
//! not in fact contain valid Unicode data. You can also process the `OsString` directly, such as
94+
//! by using it as a filename.
9495
//!
9596
//! * [`OsStr`] losslessly represents a borrowed reference to a platform string.
9697
//! However, this representation is not necessarily in a form native to the platform.
@@ -99,17 +100,28 @@
99100
//!
100101
//! # Conversions
101102
//!
103+
//! ## On all platforms
104+
//!
105+
//! On all platforms, `OsStr` and `OsString` consist of a sequence of bytes; see [`OsString`] for
106+
//! more details on its encoding on different platforms.
107+
//!
108+
//! `OsStr` provides the method [`OsStr::as_bytes`], which provides a zero-cost conversion to a
109+
//! byte slice. (`OsString` provides this method as well, along with all other `OsStr` methods, via
110+
//! `Deref`.)
111+
//!
112+
//! `OsString` provides the method [`OsString::into_vec`], which provides a zero-cost conversion to
113+
//! `Vec<u8>`.
114+
//!
102115
//! ## On Unix
103116
//!
104117
//! On Unix, [`OsStr`] implements the
105118
//! <code>std::os::unix::ffi::[OsStrExt][unix.OsStrExt]</code> trait, which
106-
//! augments it with two methods, [`from_bytes`] and [`as_bytes`].
107-
//! These do inexpensive conversions from and to UTF-8 byte slices.
119+
//! augments it with an additional method [`from_bytes`], providing a zero-cost conversion from a
120+
//! byte slice.
108121
//!
109122
//! Additionally, on Unix [`OsString`] implements the
110-
//! <code>std::os::unix::ffi::[OsStringExt][unix.OsStringExt]</code> trait,
111-
//! which provides [`from_vec`] and [`into_vec`] methods that consume
112-
//! their arguments, and take or produce vectors of [`u8`].
123+
//! <code>std::os::unix::ffi::[OsStringExt][unix.OsStringExt]</code> trait, which provides the
124+
//! [`from_vec`] method that consumes a `Vec<u8>` and produces an `OsString`.
113125
//!
114126
//! ## On Windows
115127
//!
@@ -119,8 +131,8 @@
119131
//! On Windows, [`OsStr`] implements the
120132
//! <code>std::os::windows::ffi::[OsStrExt][windows.OsStrExt]</code> trait,
121133
//! which provides an [`encode_wide`] method. This provides an
122-
//! iterator that can be [`collect`]ed into a vector of [`u16`]. After a nul
123-
//! characters is appended, this is the same as a native Windows string.
134+
//! iterator that can be [`collect`]ed into a vector of [`u16`]. After a 16-bit nul
135+
//! character is appended, this is the same as a native Windows string.
124136
//!
125137
//! Additionally, on Windows [`OsString`] implements the
126138
//! <code>std::os::windows:ffi::[OsStringExt][windows.OsStringExt]</code>
@@ -133,10 +145,8 @@
133145
//! [`env::var_os()`]: crate::env::var_os "env::var_os"
134146
//! [unix.OsStringExt]: crate::os::unix::ffi::OsStringExt "os::unix::ffi::OsStringExt"
135147
//! [`from_vec`]: crate::os::unix::ffi::OsStringExt::from_vec "os::unix::ffi::OsStringExt::from_vec"
136-
//! [`into_vec`]: crate::os::unix::ffi::OsStringExt::into_vec "os::unix::ffi::OsStringExt::into_vec"
137148
//! [unix.OsStrExt]: crate::os::unix::ffi::OsStrExt "os::unix::ffi::OsStrExt"
138149
//! [`from_bytes`]: crate::os::unix::ffi::OsStrExt::from_bytes "os::unix::ffi::OsStrExt::from_bytes"
139-
//! [`as_bytes`]: crate::os::unix::ffi::OsStrExt::as_bytes "os::unix::ffi::OsStrExt::as_bytes"
140150
//! [`OsStrExt`]: crate::os::unix::ffi::OsStrExt "os::unix::ffi::OsStrExt"
141151
//! [windows.OsStrExt]: crate::os::windows::ffi::OsStrExt "os::windows::ffi::OsStrExt"
142152
//! [`encode_wide`]: crate::os::windows::ffi::OsStrExt::encode_wide "os::windows::ffi::OsStrExt::encode_wide"

library/std/src/ffi/os_str.rs

+63-13
Original file line numberDiff line numberDiff line change
@@ -30,20 +30,27 @@ use crate::sys_common::{AsInner, FromInner, IntoInner};
3030
///
3131
/// `OsString` and [`OsStr`] bridge this gap by simultaneously representing Rust
3232
/// and platform-native string values, and in particular allowing a Rust string
33-
/// to be converted into an "OS" string with no cost if possible. A consequence
34-
/// of this is that `OsString` instances are *not* `NUL` terminated; in order
35-
/// to pass to e.g., Unix system call, you should create a [`CStr`].
33+
/// to be converted into an "OS" string with no cost. A consequence of this is
34+
/// that `OsString` instances are *not* `NUL` terminated; in order to pass to
35+
/// e.g., a Unix system call, you should create a [`CStr`].
3636
///
37-
/// `OsString` is to <code>&[OsStr]</code> as [`String`] is to <code>&[str]</code>: the former
38-
/// in each pair are owned strings; the latter are borrowed
39-
/// references.
37+
/// `OsString` is to <code>&[OsStr]</code> as [`String`] is to <code>&[str]</code>: `OsString` is
38+
/// an owned string like `String, while `&OsStr` is a borrowed reference like `&str`.
4039
///
41-
/// Note, `OsString` and [`OsStr`] internally do not necessarily hold strings in
42-
/// the form native to the platform; While on Unix, strings are stored as a
43-
/// sequence of 8-bit values, on Windows, where strings are 16-bit value based
44-
/// as just discussed, strings are also actually stored as a sequence of 8-bit
45-
/// values, encoded in a less-strict variant of UTF-8. This is useful to
46-
/// understand when handling capacity and length values.
40+
/// Note that `OsString` and [`OsStr`] internally do not necessarily hold strings in the form
41+
/// native to the platform. On all platforms, `OsString` and `OsStr` consist of a sequence of
42+
/// bytes, in a superset of UTF-8; any valid UTF-8 sequence is a valid `OsString` or `OsStr`.
43+
/// * On Unix, these bytes can contain any values, in an arbitrary encoding (not necessarily
44+
/// UTF-8, and not necessarily the same encoding for different OS strings).
45+
/// * On Windows, where the native OS uses a sequence of 16-bit values, `OsString` and `OsStr`
46+
/// still consist of a sequence of 8-bit values, encoded in a superset of UTF-8 called
47+
/// ["WTF-8"](https://simonsapin.github.io/wtf-8/) ("Wobbly Translation Format 8-bit"). The
48+
/// WTF-8 format allows encoding arbitrary 16-bit values, including unpaired UTF-16 surrogates
49+
/// that do not constitute valid Unicode, since Windows accepts sequences of arbitrary 16-bit
50+
/// values. (In practice, Windows filenames and similar are almost always valid UTF-16.)
51+
///
52+
/// Capacity and length values are always in terms of the sequence of bytes, not characters or
53+
/// 16-bit values.
4754
///
4855
/// # Creating an `OsString`
4956
///
@@ -65,8 +72,16 @@ use crate::sys_common::{AsInner, FromInner, IntoInner};
6572
///
6673
/// # Conversions
6774
///
75+
/// `OsStr` provides the method [`OsStr::as_bytes`], which provides a zero-cost conversion to a
76+
/// byte slice. (`OsString` provides this method as well, along with all other `OsStr` methods, via
77+
/// `Deref`.)
78+
///
79+
/// `OsString` provides the method [`OsString::into_vec`], which provides a zero-cost conversion to
80+
/// `Vec<u8>`.
81+
///
6882
/// See the [module's toplevel documentation about conversions][conversions] for a discussion on
69-
/// the traits which `OsString` implements for [conversions] from/to native representations.
83+
/// OS-specific traits which `OsString` and `OsStr` implement for [conversions] from/to native
84+
/// representations.
7085
///
7186
/// [`CStr`]: crate::ffi::CStr
7287
/// [conversions]: super#conversions
@@ -163,6 +178,24 @@ impl OsString {
163178
self.inner.into_string().map_err(|buf| OsString { inner: buf })
164179
}
165180

181+
/// Converts the `OsString` into a `Vec<u8>`.
182+
///
183+
/// # Examples
184+
///
185+
/// ```
186+
/// #![feature(osstr_bytes)]
187+
/// use std::ffi::OsString;
188+
///
189+
/// let os_string = OsString::from("foo");
190+
/// let v = os_string.into_vec();
191+
/// assert_eq!(v, b"foo");
192+
/// ```
193+
#[unstable(feature = "osstr_bytes", issue = "none")]
194+
#[inline]
195+
pub fn into_vec(self) -> Vec<u8> {
196+
self.inner.into_vec()
197+
}
198+
166199
/// Extends the string with the given <code>&[OsStr]</code> slice.
167200
///
168201
/// # Examples
@@ -667,6 +700,23 @@ impl OsStr {
667700
self.inner.to_str()
668701
}
669702

703+
/// Converts the `OsStr` into a `&[u8]`.
704+
///
705+
/// # Examples
706+
///
707+
/// ```
708+
/// #![feature(osstr_bytes)]
709+
/// use std::ffi::OsStr;
710+
///
711+
/// let os_str = OsStr::new("foo");
712+
/// assert_eq!(os_str.as_bytes(), b"foo");
713+
/// ```
714+
#[unstable(feature = "osstr_bytes", issue = "none")]
715+
#[inline]
716+
pub fn as_bytes(&self) -> &[u8] {
717+
self.inner.as_u8_slice()
718+
}
719+
670720
/// Converts an `OsStr` to a <code>[Cow]<[str]></code>.
671721
///
672722
/// Any non-Unicode sequences are replaced with

library/std/src/os/unix/ffi/mod.rs

-8
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,6 @@
1111
//! // OsStringExt::from_vec
1212
//! let os_string = OsString::from_vec(bytes);
1313
//! assert_eq!(os_string.to_str(), Some("foo"));
14-
//!
15-
//! // OsStringExt::into_vec
16-
//! let bytes = os_string.into_vec();
17-
//! assert_eq!(bytes, b"foo");
1814
//! ```
1915
//!
2016
//! ```
@@ -26,10 +22,6 @@
2622
//! // OsStrExt::from_bytes
2723
//! let os_str = OsStr::from_bytes(bytes);
2824
//! assert_eq!(os_str.to_str(), Some("foo"));
29-
//!
30-
//! // OsStrExt::as_bytes
31-
//! let bytes = os_str.as_bytes();
32-
//! assert_eq!(bytes, b"foo");
3325
//! ```
3426
//!
3527
//! [`std::ffi`]: crate::ffi

library/std/src/sys/unix/os_str.rs

+10
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,11 @@ impl Buf {
182182
pub fn into_rc(&self) -> Rc<Slice> {
183183
self.as_slice().into_rc()
184184
}
185+
186+
#[inline]
187+
pub fn into_vec(self) -> Vec<u8> {
188+
self.inner
189+
}
185190
}
186191

187192
impl Slice {
@@ -190,6 +195,11 @@ impl Slice {
190195
unsafe { mem::transmute(s) }
191196
}
192197

198+
#[inline]
199+
pub fn as_u8_slice(&self) -> &[u8] {
200+
unsafe { mem::transmute(self) }
201+
}
202+
193203
#[inline]
194204
pub fn from_str(s: &str) -> &Slice {
195205
Slice::from_u8_slice(s.as_bytes())

library/std/src/sys/windows/os_str.rs

+10
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,11 @@ impl Buf {
146146
pub fn into_rc(&self) -> Rc<Slice> {
147147
self.as_slice().into_rc()
148148
}
149+
150+
#[inline]
151+
pub fn into_vec(self) -> Vec<u8> {
152+
self.inner.into_vec()
153+
}
149154
}
150155

151156
impl Slice {
@@ -193,6 +198,11 @@ impl Slice {
193198
unsafe { Rc::from_raw(Rc::into_raw(rc) as *const Slice) }
194199
}
195200

201+
#[inline]
202+
pub fn as_u8_slice(&self) -> &[u8] {
203+
self.inner.as_inner()
204+
}
205+
196206
#[inline]
197207
pub fn make_ascii_lowercase(&mut self) {
198208
self.inner.make_ascii_lowercase()

library/std/src/sys_common/wtf8.rs

+6-7
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,6 @@
33
//! This library uses Rust’s type system to maintain
44
//! [well-formedness](https://simonsapin.github.io/wtf-8/#well-formed),
55
//! like the `String` and `&str` types do for UTF-8.
6-
//!
7-
//! Since [WTF-8 must not be used
8-
//! for interchange](https://simonsapin.github.io/wtf-8/#intended-audience),
9-
//! this library deliberately does not provide access to the underlying bytes
10-
//! of WTF-8 strings,
11-
//! nor can it decode WTF-8 from arbitrary bytes.
12-
//! WTF-8 strings can be obtained from UTF-8, UTF-16, or code points.
136
147
// this module is imported from @SimonSapin's repo and has tons of dead code on
158
// unix (it's mostly used on windows), so don't worry about dead code here.
@@ -399,6 +392,12 @@ impl Wtf8Buf {
399392
let bytes: Box<[u8]> = unsafe { mem::transmute(boxed) };
400393
Wtf8Buf { bytes: bytes.into_vec() }
401394
}
395+
396+
/// Converts this `Wtf8Buf` into a `Vec<u8>`.
397+
#[inline]
398+
pub fn into_vec(self) -> Vec<u8> {
399+
self.bytes
400+
}
402401
}
403402

404403
/// Creates a new WTF-8 string from an iterator of code points.

src/test/ui/env-funky-keys.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,14 @@
99
// no-prefer-dynamic
1010

1111
#![feature(rustc_private)]
12+
#![feature(osstr_bytes)]
1213

1314
extern crate libc;
1415

1516
use libc::c_char;
1617
use libc::execve;
1718
use std::env;
1819
use std::ffi::CString;
19-
use std::os::unix::prelude::*;
2020
use std::ptr;
2121

2222
fn main() {

0 commit comments

Comments
 (0)