@@ -30,20 +30,27 @@ use crate::sys_common::{AsInner, FromInner, IntoInner};
30
30
///
31
31
/// `OsString` and [`OsStr`] bridge this gap by simultaneously representing Rust
32
32
/// and platform-native string values, and in particular allowing a Rust string
33
- /// to be converted into an "OS" string with no cost if possible . A consequence
34
- /// of this is that `OsString` instances are *not* `NUL` terminated; in order
35
- /// to pass to e.g., Unix system call, you should create a [`CStr`].
33
+ /// to be converted into an "OS" string with no cost. A consequence of this is
34
+ /// that `OsString` instances are *not* `NUL` terminated; in order to pass to
35
+ /// e.g., a Unix system call, you should create a [`CStr`].
36
36
///
37
- /// `OsString` is to <code>&[OsStr]</code> as [`String`] is to <code>&[str]</code>: the former
38
- /// in each pair are owned strings; the latter are borrowed
39
- /// references.
37
+ /// `OsString` is to <code>&[OsStr]</code> as [`String`] is to <code>&[str]</code>: `OsString` is
38
+ /// an owned string like `String, while `&OsStr` is a borrowed reference like `&str`.
40
39
///
41
- /// Note, `OsString` and [`OsStr`] internally do not necessarily hold strings in
42
- /// the form native to the platform; While on Unix, strings are stored as a
43
- /// sequence of 8-bit values, on Windows, where strings are 16-bit value based
44
- /// as just discussed, strings are also actually stored as a sequence of 8-bit
45
- /// values, encoded in a less-strict variant of UTF-8. This is useful to
46
- /// understand when handling capacity and length values.
40
+ /// Note that `OsString` and [`OsStr`] internally do not necessarily hold strings in the form
41
+ /// native to the platform. On all platforms, `OsString` and `OsStr` consist of a sequence of
42
+ /// bytes, in a superset of UTF-8; any valid UTF-8 sequence is a valid `OsString` or `OsStr`.
43
+ /// * On Unix, these bytes can contain any values, in an arbitrary encoding (not necessarily
44
+ /// UTF-8, and not necessarily the same encoding for different OS strings).
45
+ /// * On Windows, where the native OS uses a sequence of 16-bit values, `OsString` and `OsStr`
46
+ /// still consist of a sequence of 8-bit values, encoded in a superset of UTF-8 called
47
+ /// ["WTF-8"](https://simonsapin.github.io/wtf-8/) ("Wobbly Translation Format 8-bit"). The
48
+ /// WTF-8 format allows encoding arbitrary 16-bit values, including unpaired UTF-16 surrogates
49
+ /// that do not constitute valid Unicode, since Windows accepts sequences of arbitrary 16-bit
50
+ /// values. (In practice, Windows filenames and similar are almost always valid UTF-16.)
51
+ ///
52
+ /// Capacity and length values are always in terms of the sequence of bytes, not characters or
53
+ /// 16-bit values.
47
54
///
48
55
/// # Creating an `OsString`
49
56
///
@@ -65,8 +72,16 @@ use crate::sys_common::{AsInner, FromInner, IntoInner};
65
72
///
66
73
/// # Conversions
67
74
///
75
+ /// `OsStr` provides the method [`OsStr::as_bytes`], which provides a zero-cost conversion to a
76
+ /// byte slice. (`OsString` provides this method as well, along with all other `OsStr` methods, via
77
+ /// `Deref`.)
78
+ ///
79
+ /// `OsString` provides the method [`OsString::into_vec`], which provides a zero-cost conversion to
80
+ /// `Vec<u8>`.
81
+ ///
68
82
/// See the [module's toplevel documentation about conversions][conversions] for a discussion on
69
- /// the traits which `OsString` implements for [conversions] from/to native representations.
83
+ /// OS-specific traits which `OsString` and `OsStr` implement for [conversions] from/to native
84
+ /// representations.
70
85
///
71
86
/// [`CStr`]: crate::ffi::CStr
72
87
/// [conversions]: super#conversions
@@ -163,6 +178,24 @@ impl OsString {
163
178
self . inner . into_string ( ) . map_err ( |buf| OsString { inner : buf } )
164
179
}
165
180
181
+ /// Converts the `OsString` into a `Vec<u8>`.
182
+ ///
183
+ /// # Examples
184
+ ///
185
+ /// ```
186
+ /// #![feature(osstr_bytes)]
187
+ /// use std::ffi::OsString;
188
+ ///
189
+ /// let os_string = OsString::from("foo");
190
+ /// let v = os_string.into_vec();
191
+ /// assert_eq!(v, b"foo");
192
+ /// ```
193
+ #[ unstable( feature = "osstr_bytes" , issue = "none" ) ]
194
+ #[ inline]
195
+ pub fn into_vec ( self ) -> Vec < u8 > {
196
+ self . inner . into_vec ( )
197
+ }
198
+
166
199
/// Extends the string with the given <code>&[OsStr]</code> slice.
167
200
///
168
201
/// # Examples
@@ -667,6 +700,23 @@ impl OsStr {
667
700
self . inner . to_str ( )
668
701
}
669
702
703
+ /// Converts the `OsStr` into a `&[u8]`.
704
+ ///
705
+ /// # Examples
706
+ ///
707
+ /// ```
708
+ /// #![feature(osstr_bytes)]
709
+ /// use std::ffi::OsStr;
710
+ ///
711
+ /// let os_str = OsStr::new("foo");
712
+ /// assert_eq!(os_str.as_bytes(), b"foo");
713
+ /// ```
714
+ #[ unstable( feature = "osstr_bytes" , issue = "none" ) ]
715
+ #[ inline]
716
+ pub fn as_bytes ( & self ) -> & [ u8 ] {
717
+ self . inner . as_u8_slice ( )
718
+ }
719
+
670
720
/// Converts an `OsStr` to a <code>[Cow]<[str]></code>.
671
721
///
672
722
/// Any non-Unicode sequences are replaced with
0 commit comments