diff --git a/compiler/rustc_codegen_cranelift/src/debuginfo/line_info.rs b/compiler/rustc_codegen_cranelift/src/debuginfo/line_info.rs
index 476d6a54e1256..cc558352bcf3b 100644
--- a/compiler/rustc_codegen_cranelift/src/debuginfo/line_info.rs
+++ b/compiler/rustc_codegen_cranelift/src/debuginfo/line_info.rs
@@ -39,7 +39,7 @@ fn osstr_as_utf8_bytes(path: &OsStr) -> &[u8] {
#[cfg(unix)]
{
use std::os::unix::ffi::OsStrExt;
- path.as_bytes()
+ OsStrExt::as_bytes(path)
}
#[cfg(not(unix))]
{
diff --git a/compiler/rustc_fs_util/src/lib.rs b/compiler/rustc_fs_util/src/lib.rs
index 87e97c746ef56..abc188059680d 100644
--- a/compiler/rustc_fs_util/src/lib.rs
+++ b/compiler/rustc_fs_util/src/lib.rs
@@ -82,7 +82,7 @@ pub fn path_to_c_string(p: &Path) -> CString {
use std::ffi::OsStr;
use std::os::unix::ffi::OsStrExt;
let p: &OsStr = p.as_ref();
- CString::new(p.as_bytes()).unwrap()
+ CString::new(OsStrExt::as_bytes(p)).unwrap()
}
#[cfg(windows)]
pub fn path_to_c_string(p: &Path) -> CString {
diff --git a/library/std/src/ffi/mod.rs b/library/std/src/ffi/mod.rs
index 13e3dacc30d63..7fda7497e616c 100644
--- a/library/std/src/ffi/mod.rs
+++ b/library/std/src/ffi/mod.rs
@@ -90,7 +90,8 @@
//! exists you will get a [Some]\(os_string)
, which you can
//! *then* try to convert to a Rust string. This yields a [`Result`], so that
//! your code can detect errors in case the environment variable did
-//! not in fact contain valid Unicode data.
+//! not in fact contain valid Unicode data. You can also process the `OsString` directly, such as
+//! by using it as a filename.
//!
//! * [`OsStr`] losslessly represents a borrowed reference to a platform string.
//! However, this representation is not necessarily in a form native to the platform.
@@ -99,17 +100,28 @@
//!
//! # Conversions
//!
+//! ## On all platforms
+//!
+//! On all platforms, `OsStr` and `OsString` consist of a sequence of bytes; see [`OsString`] for
+//! more details on its encoding on different platforms.
+//!
+//! `OsStr` provides the method `OsStr::as_bytes`, which provides a zero-cost conversion to a byte
+//! slice. (`OsString` provides this method as well, along with all other `OsStr` methods, via
+//! `Deref`.)
+//!
+//! `OsString` provides the method `OsString::into_vec`, which provides a zero-cost conversion to
+//! `Vec`.
+//!
//! ## On Unix
//!
//! On Unix, [`OsStr`] implements the
//! std::os::unix::ffi::[OsStrExt][unix.OsStrExt]
trait, which
-//! augments it with two methods, [`from_bytes`] and [`as_bytes`].
-//! These do inexpensive conversions from and to UTF-8 byte slices.
+//! augments it with an additional method [`from_bytes`], providing a zero-cost conversion from a
+//! byte slice.
//!
//! Additionally, on Unix [`OsString`] implements the
-//! std::os::unix::ffi::[OsStringExt][unix.OsStringExt]
trait,
-//! which provides [`from_vec`] and [`into_vec`] methods that consume
-//! their arguments, and take or produce vectors of [`u8`].
+//! std::os::unix::ffi::[OsStringExt][unix.OsStringExt]
trait, which provides the
+//! [`from_vec`] method that consumes a `Vec` and produces an `OsString`.
//!
//! ## On Windows
//!
@@ -119,8 +131,8 @@
//! On Windows, [`OsStr`] implements the
//! std::os::windows::ffi::[OsStrExt][windows.OsStrExt]
trait,
//! which provides an [`encode_wide`] method. This provides an
-//! iterator that can be [`collect`]ed into a vector of [`u16`]. After a nul
-//! characters is appended, this is the same as a native Windows string.
+//! iterator that can be [`collect`]ed into a vector of [`u16`]. After a 16-bit nul
+//! character is appended, this is the same as a native Windows string.
//!
//! Additionally, on Windows [`OsString`] implements the
//! std::os::windows:ffi::[OsStringExt][windows.OsStringExt]
@@ -133,10 +145,8 @@
//! [`env::var_os()`]: crate::env::var_os "env::var_os"
//! [unix.OsStringExt]: crate::os::unix::ffi::OsStringExt "os::unix::ffi::OsStringExt"
//! [`from_vec`]: crate::os::unix::ffi::OsStringExt::from_vec "os::unix::ffi::OsStringExt::from_vec"
-//! [`into_vec`]: crate::os::unix::ffi::OsStringExt::into_vec "os::unix::ffi::OsStringExt::into_vec"
//! [unix.OsStrExt]: crate::os::unix::ffi::OsStrExt "os::unix::ffi::OsStrExt"
//! [`from_bytes`]: crate::os::unix::ffi::OsStrExt::from_bytes "os::unix::ffi::OsStrExt::from_bytes"
-//! [`as_bytes`]: crate::os::unix::ffi::OsStrExt::as_bytes "os::unix::ffi::OsStrExt::as_bytes"
//! [`OsStrExt`]: crate::os::unix::ffi::OsStrExt "os::unix::ffi::OsStrExt"
//! [windows.OsStrExt]: crate::os::windows::ffi::OsStrExt "os::windows::ffi::OsStrExt"
//! [`encode_wide`]: crate::os::windows::ffi::OsStrExt::encode_wide "os::windows::ffi::OsStrExt::encode_wide"
diff --git a/library/std/src/ffi/os_str.rs b/library/std/src/ffi/os_str.rs
index 9b5e5d6c0cc4b..b287c4053d93d 100644
--- a/library/std/src/ffi/os_str.rs
+++ b/library/std/src/ffi/os_str.rs
@@ -30,20 +30,27 @@ use crate::sys_common::{AsInner, FromInner, IntoInner};
///
/// `OsString` and [`OsStr`] bridge this gap by simultaneously representing Rust
/// and platform-native string values, and in particular allowing a Rust string
-/// to be converted into an "OS" string with no cost if possible. A consequence
-/// of this is that `OsString` instances are *not* `NUL` terminated; in order
-/// to pass to e.g., Unix system call, you should create a [`CStr`].
+/// to be converted into an "OS" string with no cost. A consequence of this is
+/// that `OsString` instances are *not* `NUL` terminated; in order to pass to
+/// e.g., a Unix system call, you should create a [`CStr`].
///
-/// `OsString` is to &[OsStr]
as [`String`] is to &[str]
: the former
-/// in each pair are owned strings; the latter are borrowed
-/// references.
+/// `OsString` is to &[OsStr]
as [`String`] is to &[str]
: `OsString` is
+/// an owned string like `String, while `&OsStr` is a borrowed reference like `&str`.
///
-/// Note, `OsString` and [`OsStr`] internally do not necessarily hold strings in
-/// the form native to the platform; While on Unix, strings are stored as a
-/// sequence of 8-bit values, on Windows, where strings are 16-bit value based
-/// as just discussed, strings are also actually stored as a sequence of 8-bit
-/// values, encoded in a less-strict variant of UTF-8. This is useful to
-/// understand when handling capacity and length values.
+/// Note that `OsString` and [`OsStr`] internally do not necessarily hold strings in the form
+/// native to the platform. On all platforms, `OsString` and `OsStr` consist of a sequence of
+/// bytes, in a superset of UTF-8; any valid UTF-8 sequence is a valid `OsString` or `OsStr`.
+/// * On Unix, these bytes can contain any values, in an arbitrary encoding (not necessarily
+/// UTF-8, and not necessarily the same encoding for different OS strings).
+/// * On Windows, where the native OS uses a sequence of 16-bit values, `OsString` and `OsStr`
+/// still consist of a sequence of 8-bit values, encoded in a superset of UTF-8 called
+/// ["WTF-8"](https://simonsapin.github.io/wtf-8/) ("Wobbly Translation Format 8-bit"). The
+/// WTF-8 format allows encoding arbitrary 16-bit values, including unpaired UTF-16 surrogates
+/// that do not constitute valid Unicode, since Windows accepts sequences of arbitrary 16-bit
+/// values. (In practice, Windows filenames and similar are almost always valid UTF-16.)
+///
+/// Capacity and length values are always in terms of the sequence of bytes, not characters or
+/// 16-bit values.
///
/// # Creating an `OsString`
///
@@ -65,8 +72,16 @@ use crate::sys_common::{AsInner, FromInner, IntoInner};
///
/// # Conversions
///
+/// `OsStr` provides the method `OsStr::as_bytes`, which provides a zero-cost conversion to a byte
+/// slice. (`OsString` provides this method as well, along with all other `OsStr` methods, via
+/// `Deref`.)
+///
+/// `OsString` provides the method `OsString::into_vec`, which provides a zero-cost conversion to
+/// `Vec`.
+///
/// See the [module's toplevel documentation about conversions][conversions] for a discussion on
-/// the traits which `OsString` implements for [conversions] from/to native representations.
+/// OS-specific traits which `OsString` and `OsStr` implement for [conversions] from/to native
+/// representations.
///
/// [`CStr`]: crate::ffi::CStr
/// [conversions]: super#conversions
@@ -163,6 +178,24 @@ impl OsString {
self.inner.into_string().map_err(|buf| OsString { inner: buf })
}
+ /// Converts the `OsString` into a `Vec`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// #![feature(osstr_bytes)]
+ /// use std::ffi::OsString;
+ ///
+ /// let os_string = OsString::from("foo");
+ /// let v = os_string.into_vec();
+ /// assert_eq!(v, b"foo");
+ /// ```
+ #[unstable(feature = "osstr_bytes", issue = "none")]
+ #[inline]
+ pub fn into_vec(self) -> Vec {
+ self.inner.into_vec()
+ }
+
/// Extends the string with the given &[OsStr]
slice.
///
/// # Examples
@@ -667,6 +700,23 @@ impl OsStr {
self.inner.to_str()
}
+ /// Converts the `OsStr` into a `&[u8]`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// #![feature(osstr_bytes)]
+ /// use std::ffi::OsStr;
+ ///
+ /// let os_str = OsStr::new("foo");
+ /// assert_eq!(os_str.as_bytes(), b"foo");
+ /// ```
+ #[unstable(feature = "osstr_bytes", issue = "none")]
+ #[inline]
+ pub fn as_bytes(&self) -> &[u8] {
+ self.inner.as_u8_slice()
+ }
+
/// Converts an `OsStr` to a [Cow]<[str]>
.
///
/// Any non-Unicode sequences are replaced with
diff --git a/library/std/src/os/unix/ffi/mod.rs b/library/std/src/os/unix/ffi/mod.rs
index 5b49f50763d74..357a89248ce7a 100644
--- a/library/std/src/os/unix/ffi/mod.rs
+++ b/library/std/src/os/unix/ffi/mod.rs
@@ -11,10 +11,6 @@
//! // OsStringExt::from_vec
//! let os_string = OsString::from_vec(bytes);
//! assert_eq!(os_string.to_str(), Some("foo"));
-//!
-//! // OsStringExt::into_vec
-//! let bytes = os_string.into_vec();
-//! assert_eq!(bytes, b"foo");
//! ```
//!
//! ```
@@ -26,10 +22,6 @@
//! // OsStrExt::from_bytes
//! let os_str = OsStr::from_bytes(bytes);
//! assert_eq!(os_str.to_str(), Some("foo"));
-//!
-//! // OsStrExt::as_bytes
-//! let bytes = os_str.as_bytes();
-//! assert_eq!(bytes, b"foo");
//! ```
//!
//! [`std::ffi`]: crate::ffi
diff --git a/library/std/src/sys/unix/os_str.rs b/library/std/src/sys/unix/os_str.rs
index ccbc182240cf3..09c2557086ea3 100644
--- a/library/std/src/sys/unix/os_str.rs
+++ b/library/std/src/sys/unix/os_str.rs
@@ -182,6 +182,11 @@ impl Buf {
pub fn into_rc(&self) -> Rc {
self.as_slice().into_rc()
}
+
+ #[inline]
+ pub fn into_vec(self) -> Vec {
+ self.inner
+ }
}
impl Slice {
@@ -190,6 +195,11 @@ impl Slice {
unsafe { mem::transmute(s) }
}
+ #[inline]
+ pub fn as_u8_slice(&self) -> &[u8] {
+ unsafe { mem::transmute(self) }
+ }
+
#[inline]
pub fn from_str(s: &str) -> &Slice {
Slice::from_u8_slice(s.as_bytes())
diff --git a/library/std/src/sys/windows/os_str.rs b/library/std/src/sys/windows/os_str.rs
index 78e92a3331a1c..c11d8730b6c72 100644
--- a/library/std/src/sys/windows/os_str.rs
+++ b/library/std/src/sys/windows/os_str.rs
@@ -146,6 +146,11 @@ impl Buf {
pub fn into_rc(&self) -> Rc {
self.as_slice().into_rc()
}
+
+ #[inline]
+ pub fn into_vec(self) -> Vec {
+ self.inner.into_vec()
+ }
}
impl Slice {
@@ -193,6 +198,11 @@ impl Slice {
unsafe { Rc::from_raw(Rc::into_raw(rc) as *const Slice) }
}
+ #[inline]
+ pub fn as_u8_slice(&self) -> &[u8] {
+ self.inner.as_inner()
+ }
+
#[inline]
pub fn make_ascii_lowercase(&mut self) {
self.inner.make_ascii_lowercase()
diff --git a/library/std/src/sys_common/wtf8.rs b/library/std/src/sys_common/wtf8.rs
index 10ef6662115c1..af8c1e3349437 100644
--- a/library/std/src/sys_common/wtf8.rs
+++ b/library/std/src/sys_common/wtf8.rs
@@ -3,13 +3,6 @@
//! This library uses Rust’s type system to maintain
//! [well-formedness](https://simonsapin.github.io/wtf-8/#well-formed),
//! like the `String` and `&str` types do for UTF-8.
-//!
-//! Since [WTF-8 must not be used
-//! for interchange](https://simonsapin.github.io/wtf-8/#intended-audience),
-//! this library deliberately does not provide access to the underlying bytes
-//! of WTF-8 strings,
-//! nor can it decode WTF-8 from arbitrary bytes.
-//! WTF-8 strings can be obtained from UTF-8, UTF-16, or code points.
// this module is imported from @SimonSapin's repo and has tons of dead code on
// unix (it's mostly used on windows), so don't worry about dead code here.
@@ -399,6 +392,12 @@ impl Wtf8Buf {
let bytes: Box<[u8]> = unsafe { mem::transmute(boxed) };
Wtf8Buf { bytes: bytes.into_vec() }
}
+
+ /// Converts this `Wtf8Buf` into a `Vec`.
+ #[inline]
+ pub fn into_vec(self) -> Vec {
+ self.bytes
+ }
}
/// Creates a new WTF-8 string from an iterator of code points.
diff --git a/src/test/ui/env-funky-keys.rs b/src/test/ui/env-funky-keys.rs
index 4548d3339472d..04e3e45122c56 100644
--- a/src/test/ui/env-funky-keys.rs
+++ b/src/test/ui/env-funky-keys.rs
@@ -9,6 +9,7 @@
// no-prefer-dynamic
#![feature(rustc_private)]
+#![feature(osstr_bytes)]
extern crate libc;
@@ -16,7 +17,6 @@ use libc::c_char;
use libc::execve;
use std::env;
use std::ffi::CString;
-use std::os::unix::prelude::*;
use std::ptr;
fn main() {