From b607a8a4af89c54d6cfcd633d9aabcc33e9c3502 Mon Sep 17 00:00:00 2001 From: Alex Macleod Date: Sat, 10 Jun 2023 13:07:04 +0100 Subject: [PATCH 1/7] Remove `#[cfg(all())]` workarounds from `c_char` --- library/core/src/ffi/mod.rs | 5 ----- library/std/src/os/raw/mod.rs | 5 ----- 2 files changed, 10 deletions(-) diff --git a/library/core/src/ffi/mod.rs b/library/core/src/ffi/mod.rs index 86ea154a8868e..d11f24c198cda 100644 --- a/library/core/src/ffi/mod.rs +++ b/library/core/src/ffi/mod.rs @@ -52,11 +52,6 @@ macro_rules! type_alias { } type_alias! { "c_char.md", c_char = c_char_definition::c_char, NonZero_c_char = c_char_definition::NonZero_c_char; -// Make this type alias appear cfg-dependent so that Clippy does not suggest -// replacing `0 as c_char` with `0_i8`/`0_u8`. This #[cfg(all())] can be removed -// after the false positive in https://github.com/rust-lang/rust-clippy/issues/8093 -// is fixed. -#[cfg(all())] #[doc(cfg(all()))] } type_alias! { "c_schar.md", c_schar = i8, NonZero_c_schar = NonZeroI8; } diff --git a/library/std/src/os/raw/mod.rs b/library/std/src/os/raw/mod.rs index 19d0ffb2e39cb..5b302e3c2c824 100644 --- a/library/std/src/os/raw/mod.rs +++ b/library/std/src/os/raw/mod.rs @@ -9,11 +9,6 @@ macro_rules! alias_core_ffi { ($($t:ident)*) => {$( #[stable(feature = "raw_os", since = "1.1.0")] #[doc = include_str!(concat!("../../../../core/src/ffi/", stringify!($t), ".md"))] - // Make this type alias appear cfg-dependent so that Clippy does not suggest - // replacing expressions like `0 as c_char` with `0_i8`/`0_u8`. This #[cfg(all())] can be - // removed after the false positive in https://github.com/rust-lang/rust-clippy/issues/8093 - // is fixed. - #[cfg(all())] #[doc(cfg(all()))] pub type $t = core::ffi::$t; )*} From 0699345e7afb036a61bf52a3703ec12307a97de3 Mon Sep 17 00:00:00 2001 From: Zachary S Date: Sat, 1 Jul 2023 02:12:14 -0500 Subject: [PATCH 2/7] Remove lifetime bound for A for `impl Extend<&'a T> for Vec`. --- library/alloc/src/vec/mod.rs | 2 +- library/alloc/src/vec/spec_extend.rs | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/library/alloc/src/vec/mod.rs b/library/alloc/src/vec/mod.rs index 3736a6e0b0ecb..ff8fed33f0838 100644 --- a/library/alloc/src/vec/mod.rs +++ b/library/alloc/src/vec/mod.rs @@ -2971,7 +2971,7 @@ impl Vec { /// [`copy_from_slice`]: slice::copy_from_slice #[cfg(not(no_global_oom_handling))] #[stable(feature = "extend_ref", since = "1.2.0")] -impl<'a, T: Copy + 'a, A: Allocator + 'a> Extend<&'a T> for Vec { +impl<'a, T: Copy + 'a, A: Allocator> Extend<&'a T> for Vec { fn extend>(&mut self, iter: I) { self.spec_extend(iter.into_iter()) } diff --git a/library/alloc/src/vec/spec_extend.rs b/library/alloc/src/vec/spec_extend.rs index 56065ce565bfc..e2f865d0f7167 100644 --- a/library/alloc/src/vec/spec_extend.rs +++ b/library/alloc/src/vec/spec_extend.rs @@ -36,7 +36,7 @@ impl SpecExtend> for Vec { } } -impl<'a, T: 'a, I, A: Allocator + 'a> SpecExtend<&'a T, I> for Vec +impl<'a, T: 'a, I, A: Allocator> SpecExtend<&'a T, I> for Vec where I: Iterator, T: Clone, @@ -46,7 +46,7 @@ where } } -impl<'a, T: 'a, A: Allocator + 'a> SpecExtend<&'a T, slice::Iter<'a, T>> for Vec +impl<'a, T: 'a, A: Allocator> SpecExtend<&'a T, slice::Iter<'a, T>> for Vec where T: Copy, { From 5ed429f3ab0aaa69b98f599f0ec8fb4104204510 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 1 Jul 2023 19:52:11 -0400 Subject: [PATCH 3/7] Update the tracking issue for `const_cstr_from_ptr` Tracking issue #101719 was for `const_cstr_methods`, #113219 is a new issue specific for `const_cstr_from_ptr`. --- library/core/src/ffi/c_str.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/core/src/ffi/c_str.rs b/library/core/src/ffi/c_str.rs index 3de9188baf6d4..1a9cbc98c6a42 100644 --- a/library/core/src/ffi/c_str.rs +++ b/library/core/src/ffi/c_str.rs @@ -256,7 +256,7 @@ impl CStr { #[inline] #[must_use] #[stable(feature = "rust1", since = "1.0.0")] - #[rustc_const_unstable(feature = "const_cstr_from_ptr", issue = "101719")] + #[rustc_const_unstable(feature = "const_cstr_from_ptr", issue = "113219")] pub const unsafe fn from_ptr<'a>(ptr: *const c_char) -> &'a CStr { // SAFETY: The caller has provided a pointer that points to a valid C // string with a NUL terminator of size less than `isize::MAX`, whose From ee604fccd9d91b8f4cf9b4cad5bafdf698dd0335 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Fri, 7 Jul 2023 09:46:48 -0500 Subject: [PATCH 4/7] Allow limited access to `OsString` bytes This extends #109698 to allow no-cost conversion between `Vec` and `OsString` as suggested in feedback from `os_str_bytes` crate in #111544. --- library/std/src/ffi/os_str.rs | 65 +++++++++++++++++++++++++++ library/std/src/sys/unix/os_str.rs | 10 +++++ library/std/src/sys/windows/os_str.rs | 10 +++++ library/std/src/sys_common/wtf8.rs | 15 +++++++ 4 files changed, 100 insertions(+) diff --git a/library/std/src/ffi/os_str.rs b/library/std/src/ffi/os_str.rs index fbdf7f5ecacc1..c0f2dfa4e0b4b 100644 --- a/library/std/src/ffi/os_str.rs +++ b/library/std/src/ffi/os_str.rs @@ -141,6 +141,51 @@ impl OsString { OsString { inner: Buf::from_string(String::new()) } } + /// Converts bytes to an `OsString` without checking that the bytes contains + /// valid [`OsStr`]-encoded data. + /// + /// The byte encoding is an unspecified, platform-specific, self-synchronizing superset of UTF-8. + /// By being a self-synchronizing superset of UTF-8, this encoding is also a superset of 7-bit + /// ASCII. + /// + /// See the [module's toplevel documentation about conversions][conversions] for safe, + /// cross-platform [conversions] from/to native representations. + /// + /// # Safety + /// + /// As the encoding is unspecified, callers must pass in bytes that originated as a mixture of + /// validated UTF-8 and bytes from [`OsStr::as_os_str_bytes`] from within the same rust version + /// built for the same target platform. For example, reconstructing an `OsString` from bytes sent + /// over the network or stored in a file will likely violate these safety rules. + /// + /// Due to the encoding being self-synchronizing, the bytes from [`OsStr::as_os_str_bytes`] can be + /// split either immediately before or immediately after any valid non-empty UTF-8 substring. + /// + /// # Example + /// + /// ``` + /// #![feature(os_str_bytes)] + /// + /// use std::ffi::OsStr; + /// + /// let os_str = OsStr::new("Mary had a little lamb"); + /// let bytes = os_str.as_os_str_bytes(); + /// let words = bytes.split(|b| *b == b' '); + /// let words: Vec<&OsStr> = words.map(|word| { + /// // SAFETY: + /// // - Each `word` only contains content that originated from `OsStr::as_os_str_bytes` + /// // - Only split with ASCII whitespace which is a non-empty UTF-8 substring + /// unsafe { OsStr::from_os_str_bytes_unchecked(word) } + /// }).collect(); + /// ``` + /// + /// [conversions]: super#conversions + #[inline] + #[unstable(feature = "os_str_bytes", issue = "111544")] + pub unsafe fn from_os_str_bytes_unchecked(bytes: Vec) -> Self { + OsString { inner: Buf::from_os_str_bytes_unchecked(bytes) } + } + /// Converts to an [`OsStr`] slice. /// /// # Examples @@ -159,6 +204,26 @@ impl OsString { self } + /// Converts the `OsString` into a byte slice. To convert the byte slice back into an + /// `OsString`, use the [`OsStr::from_os_str_bytes_unchecked`] function. + /// + /// The byte encoding is an unspecified, platform-specific, self-synchronizing superset of UTF-8. + /// By being a self-synchronizing superset of UTF-8, this encoding is also a superset of 7-bit + /// ASCII. + /// + /// Note: As the encoding is unspecified, any sub-slice of bytes that is not valid UTF-8 should + /// be treated as opaque and only comparable within the same rust version built for the same + /// target platform. For example, sending the bytes over the network or storing it in a file + /// will likely result in incompatible data. See [`OsString`] for more encoding details + /// and [`std::ffi`] for platform-specific, specified conversions. + /// + /// [`std::ffi`]: crate::ffi + #[inline] + #[unstable(feature = "os_str_bytes", issue = "111544")] + pub fn into_os_str_bytes(self) -> Vec { + self.inner.into_os_str_bytes() + } + /// Converts the `OsString` into a [`String`] if it contains valid Unicode data. /// /// On failure, ownership of the original `OsString` is returned. diff --git a/library/std/src/sys/unix/os_str.rs b/library/std/src/sys/unix/os_str.rs index f7333fd5a1fed..463b0a275157a 100644 --- a/library/std/src/sys/unix/os_str.rs +++ b/library/std/src/sys/unix/os_str.rs @@ -96,6 +96,16 @@ impl AsInner<[u8]> for Buf { } impl Buf { + #[inline] + pub fn into_os_str_bytes(self) -> Vec { + self.inner + } + + #[inline] + pub unsafe fn from_os_str_bytes_unchecked(s: Vec) -> Self { + Self { inner: s } + } + pub fn from_string(s: String) -> Buf { Buf { inner: s.into_bytes() } } diff --git a/library/std/src/sys/windows/os_str.rs b/library/std/src/sys/windows/os_str.rs index 16c4f55c6879a..4708657a907f5 100644 --- a/library/std/src/sys/windows/os_str.rs +++ b/library/std/src/sys/windows/os_str.rs @@ -63,6 +63,16 @@ impl fmt::Display for Slice { } impl Buf { + #[inline] + pub fn into_os_str_bytes(self) -> Vec { + self.inner.into_bytes() + } + + #[inline] + pub unsafe fn from_os_str_bytes_unchecked(s: Vec) -> Self { + Self { inner: Wtf8Buf::from_bytes_unchecked(s) } + } + pub fn with_capacity(capacity: usize) -> Buf { Buf { inner: Wtf8Buf::with_capacity(capacity) } } diff --git a/library/std/src/sys_common/wtf8.rs b/library/std/src/sys_common/wtf8.rs index c9d3e13cf0c56..195d175cc9bde 100644 --- a/library/std/src/sys_common/wtf8.rs +++ b/library/std/src/sys_common/wtf8.rs @@ -182,6 +182,15 @@ impl Wtf8Buf { Wtf8Buf { bytes: Vec::with_capacity(capacity), is_known_utf8: true } } + /// Creates a WTF-8 string from a WTF-8 byte vec. + /// + /// Since the byte vec is not checked for valid WTF-8, this functions is + /// marked unsafe. + #[inline] + pub unsafe fn from_bytes_unchecked(value: Vec) -> Wtf8Buf { + Wtf8Buf { bytes: value, is_known_utf8: false } + } + /// Creates a WTF-8 string from a UTF-8 `String`. /// /// This takes ownership of the `String` and does not copy. @@ -402,6 +411,12 @@ impl Wtf8Buf { self.bytes.truncate(new_len) } + /// Consumes the WTF-8 string and tries to convert it to a vec of bytes. + #[inline] + pub fn into_bytes(self) -> Vec { + self.bytes + } + /// Consumes the WTF-8 string and tries to convert it to UTF-8. /// /// This does not copy the data. From e7dc177442e7c91ea0c691a992a634ac879db16f Mon Sep 17 00:00:00 2001 From: darklyspaced Date: Thu, 20 Jul 2023 11:27:13 +0800 Subject: [PATCH 5/7] fix docs & example for FileExt::write_at --- library/std/src/os/unix/fs.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/library/std/src/os/unix/fs.rs b/library/std/src/os/unix/fs.rs index 1e1c369310547..358ab33fffdfb 100644 --- a/library/std/src/os/unix/fs.rs +++ b/library/std/src/os/unix/fs.rs @@ -149,7 +149,14 @@ pub trait FileExt { /// Note that similar to [`File::write`], it is not an error to return a /// short write. /// + /// # Bug + /// On some systems, due to a [bug] with [`pwrite64`] (the underlying + /// syscall), files opened with the `O_APPEND` flag fail to respect the + /// offset parameter, always appending to the end of the file instead. + /// /// [`File::write`]: fs::File::write + /// [`pwrite64`]: https://man7.org/linux/man-pages/man2/pwrite.2.html + /// [bug]: https://man7.org/linux/man-pages/man2/pwrite.2.html#BUGS /// /// # Examples /// @@ -159,7 +166,7 @@ pub trait FileExt { /// use std::os::unix::prelude::FileExt; /// /// fn main() -> io::Result<()> { - /// let file = File::open("foo.txt")?; + /// let file = File::create("foo.txt")?; /// /// // We now write at the offset 10. /// file.write_at(b"sushi", 10)?; From 7d17a263d1a6e46700f65d378a9ae7280a2b3371 Mon Sep 17 00:00:00 2001 From: darklyspaced Date: Thu, 20 Jul 2023 14:25:50 +0800 Subject: [PATCH 6/7] added a problematic example --- library/std/src/os/unix/fs.rs | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/library/std/src/os/unix/fs.rs b/library/std/src/os/unix/fs.rs index 358ab33fffdfb..19d9e89a63154 100644 --- a/library/std/src/os/unix/fs.rs +++ b/library/std/src/os/unix/fs.rs @@ -150,9 +150,31 @@ pub trait FileExt { /// short write. /// /// # Bug - /// On some systems, due to a [bug] with [`pwrite64`] (the underlying - /// syscall), files opened with the `O_APPEND` flag fail to respect the - /// offset parameter, always appending to the end of the file instead. + /// On some systems, `write_at` utilises [`pwrite64`] to write to files. + /// However, this syscall has a [bug] where files opened with the `O_APPEND` + /// flag fail to respect the offset parameter, always appending to the end + /// of the file instead. + /// + /// It is possible to inadvertantly set this flag, like in the example below. + /// Therefore, it is important to be vigilant while changing options to mitigate + /// unexpected behaviour. + /// + /// ```no_run + /// use std::fs::File; + /// use std::io; + /// use std::os::unix::prelude::FileExt; + /// + /// fn main() -> io::Result<()> { + /// // Open a file with the append option (sets the `O_APPEND` flag) + /// let file = File::options().append(true).open("foo.txt")?; + /// + /// // We attempt to write at offset 10; instead appended to EOF + /// file.write_at(b"sushi", 10)?; + /// + /// // foo.txt is 5 bytes long instead of 15 + /// Ok(()) + /// } + /// ``` /// /// [`File::write`]: fs::File::write /// [`pwrite64`]: https://man7.org/linux/man-pages/man2/pwrite.2.html From 2c145982a519f78f81930fc16e7fef46f485d266 Mon Sep 17 00:00:00 2001 From: Andrew Tribick Date: Fri, 21 Jul 2023 23:40:55 +0200 Subject: [PATCH 7/7] Demonstrate multibyte character removal in String::pop and String::remove doctests --- library/alloc/src/string.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/library/alloc/src/string.rs b/library/alloc/src/string.rs index ad7b77f549740..0019f51b3d707 100644 --- a/library/alloc/src/string.rs +++ b/library/alloc/src/string.rs @@ -1290,11 +1290,11 @@ impl String { /// Basic usage: /// /// ``` - /// let mut s = String::from("foo"); + /// let mut s = String::from("abč"); /// - /// assert_eq!(s.pop(), Some('o')); - /// assert_eq!(s.pop(), Some('o')); - /// assert_eq!(s.pop(), Some('f')); + /// assert_eq!(s.pop(), Some('č')); + /// assert_eq!(s.pop(), Some('b')); + /// assert_eq!(s.pop(), Some('a')); /// /// assert_eq!(s.pop(), None); /// ``` @@ -1324,11 +1324,11 @@ impl String { /// Basic usage: /// /// ``` - /// let mut s = String::from("foo"); + /// let mut s = String::from("abç"); /// - /// assert_eq!(s.remove(0), 'f'); - /// assert_eq!(s.remove(1), 'o'); - /// assert_eq!(s.remove(0), 'o'); + /// assert_eq!(s.remove(0), 'a'); + /// assert_eq!(s.remove(1), 'ç'); + /// assert_eq!(s.remove(0), 'b'); /// ``` #[inline] #[stable(feature = "rust1", since = "1.0.0")]