From d2e78d86a505771a1b367ac8668898ce340d097a Mon Sep 17 00:00:00 2001 From: Waffle Date: Sat, 29 May 2021 17:26:58 +0300 Subject: [PATCH 1/4] Add `ptr::{str_from_raw_parts, str_from_raw_parts_mut}` functions The functions are under feature gate `str_from_raw_parts` and are similar to `slice_from_raw_parts`, `slice_from_raw_parts_mut`. --- library/core/src/ptr/mod.rs | 63 +++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/library/core/src/ptr/mod.rs b/library/core/src/ptr/mod.rs index 014170604ecaa..8e11e1bdf7748 100644 --- a/library/core/src/ptr/mod.rs +++ b/library/core/src/ptr/mod.rs @@ -290,6 +290,69 @@ pub const fn slice_from_raw_parts_mut(data: *mut T, len: usize) -> *mut [T] { from_raw_parts_mut(data.cast(), len) } +/// Forms a raw string slice from a pointer and a length. +/// +/// The `len` argument is the number of **bytes**, not the number of characters. +/// +/// This function is safe, but actually using the return value is unsafe. +/// See the documentation of [`slice::from_raw_parts`] for slice safety requirements and [`str::from_utf8`] for string safety requirements. +/// +/// [`slice::from_raw_parts`]: crate::slice::from_raw_parts +/// [`str::from_utf8`]: crate::str::from_utf8 +/// +/// # Examples +/// +/// ```rust +/// #![feature(str_from_raw_parts)] +/// use std::ptr; +/// +/// // create a string slice pointer when starting out with a pointer to the first element +/// let x = "abc"; +/// let raw_pointer = x.as_ptr(); +/// let str = ptr::str_from_raw_parts(raw_pointer, 3); +/// assert_eq!(unsafe { &*str }, x); +/// ``` +#[inline] +#[unstable(feature = "str_from_raw_parts", issue = "none")] +#[rustc_const_unstable(feature = "const_str_from_raw_parts", issue = "none")] +pub const fn str_from_raw_parts(data: *const u8, len: usize) -> *const str { + from_raw_parts(data.cast(), len) +} + +/// Performs the same functionality as [`str_from_raw_parts`], except that a +/// raw mutable string slice is returned, as opposed to a raw immutable string slice. +/// +/// See the documentation of [`slice_from_raw_parts`] for more details. +/// +/// This function is safe, but actually using the return value is unsafe. +/// See the documentation of [`slice::from_raw_parts_mut`] for slice safety requirements and [`str::from_utf8_mut`] for string safety requirements. +/// +/// [`slice::from_raw_parts_mut`]: crate::slice::from_raw_parts_mut +/// [`str::from_utf8_mut`]: crate::str::from_utf8_mut +/// +/// # Examples +/// +/// ```rust +/// #![feature(str_from_raw_parts)] +/// use std::ptr; +/// +/// let mut x = [b'a', b'b', b'c']; +/// let raw_pointer = x.as_mut_ptr(); +/// let str = ptr::str_from_raw_parts_mut(raw_pointer, 3); +/// +/// unsafe { +/// (*(str as *mut [u8]))[2] = b'z'; // assign a value at an index in the string slice +/// }; +/// +/// assert_eq!(unsafe { &*str }, "abz"); +/// ``` +#[inline] +#[unstable(feature = "str_from_raw_parts", issue = "none")] +#[rustc_const_unstable(feature = "const_str_from_raw_parts", issue = "none")] +pub const fn str_from_raw_parts_mut(data: *mut u8, len: usize) -> *mut str { + from_raw_parts_mut(data.cast(), len) +} + /// Swaps the values at two mutable locations of the same type, without /// deinitializing either. /// From ab968360363ff7ac50fad8080e9fcfe91d103dc8 Mon Sep 17 00:00:00 2001 From: Waffle Date: Sat, 29 May 2021 17:33:06 +0300 Subject: [PATCH 2/4] Add `const_str_ptr` and `mut_str_ptr` lang items These items allow to make inherent impls for `*const str` and `*mut str`. --- compiler/rustc_hir/src/lang_items.rs | 2 ++ compiler/rustc_span/src/symbol.rs | 2 ++ .../rustc_typeck/src/check/method/probe.rs | 19 +++++++++----- .../src/coherence/inherent_impls.rs | 26 +++++++++++++++++++ 4 files changed, 42 insertions(+), 7 deletions(-) diff --git a/compiler/rustc_hir/src/lang_items.rs b/compiler/rustc_hir/src/lang_items.rs index b85ed0cb4bbe5..96188d7c81884 100644 --- a/compiler/rustc_hir/src/lang_items.rs +++ b/compiler/rustc_hir/src/lang_items.rs @@ -184,7 +184,9 @@ language_item_table! { ConstPtr, sym::const_ptr, const_ptr_impl, Target::Impl, GenericRequirement::None; MutPtr, sym::mut_ptr, mut_ptr_impl, Target::Impl, GenericRequirement::None; ConstSlicePtr, sym::const_slice_ptr, const_slice_ptr_impl, Target::Impl, GenericRequirement::None; + ConstStrPtr, sym::const_str_ptr, const_str_ptr_impl, Target::Impl, GenericRequirement::None; MutSlicePtr, sym::mut_slice_ptr, mut_slice_ptr_impl, Target::Impl, GenericRequirement::None; + MutStrPtr, sym::mut_str_ptr, mut_str_ptr_impl, Target::Impl, GenericRequirement::None; I8, sym::i8, i8_impl, Target::Impl, GenericRequirement::None; I16, sym::i16, i16_impl, Target::Impl, GenericRequirement::None; I32, sym::i32, i32_impl, Target::Impl, GenericRequirement::None; diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index 24023163cc30e..d383fb09b90cf 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -458,6 +458,7 @@ symbols! { const_raw_ptr_to_usize_cast, const_refs_to_cell, const_slice_ptr, + const_str_ptr, const_trait_bound_opt_out, const_trait_impl, const_transmute, @@ -839,6 +840,7 @@ symbols! { must_use, mut_ptr, mut_slice_ptr, + mut_str_ptr, naked, naked_functions, name, diff --git a/compiler/rustc_typeck/src/check/method/probe.rs b/compiler/rustc_typeck/src/check/method/probe.rs index 1c7d68a3d57d4..d17506fa4e21d 100644 --- a/compiler/rustc_typeck/src/check/method/probe.rs +++ b/compiler/rustc_typeck/src/check/method/probe.rs @@ -680,16 +680,21 @@ impl<'a, 'tcx> ProbeContext<'a, 'tcx> { self.assemble_inherent_impl_for_primitive(lang_def_id); } ty::RawPtr(ty::TypeAndMut { ty: _, mutbl }) => { - let (lang_def_id1, lang_def_id2) = match mutbl { - hir::Mutability::Not => { - (lang_items.const_ptr_impl(), lang_items.const_slice_ptr_impl()) - } - hir::Mutability::Mut => { - (lang_items.mut_ptr_impl(), lang_items.mut_slice_ptr_impl()) - } + let (lang_def_id1, lang_def_id2, lang_def_id3) = match mutbl { + hir::Mutability::Not => ( + lang_items.const_ptr_impl(), + lang_items.const_slice_ptr_impl(), + lang_items.const_str_ptr_impl(), + ), + hir::Mutability::Mut => ( + lang_items.mut_ptr_impl(), + lang_items.mut_slice_ptr_impl(), + lang_items.mut_str_ptr_impl(), + ), }; self.assemble_inherent_impl_for_primitive(lang_def_id1); self.assemble_inherent_impl_for_primitive(lang_def_id2); + self.assemble_inherent_impl_for_primitive(lang_def_id3); } ty::Int(i) => { let lang_def_id = match i { diff --git a/compiler/rustc_typeck/src/coherence/inherent_impls.rs b/compiler/rustc_typeck/src/coherence/inherent_impls.rs index c7be9e2123512..86f71b52f7004 100644 --- a/compiler/rustc_typeck/src/coherence/inherent_impls.rs +++ b/compiler/rustc_typeck/src/coherence/inherent_impls.rs @@ -150,6 +150,19 @@ impl ItemLikeVisitor<'v> for InherentCollect<'tcx> { assoc_items, ); } + ty::RawPtr(ty::TypeAndMut { ty: inner, mutbl: hir::Mutability::Not }) + if matches!(inner.kind(), ty::Str) => + { + self.check_primitive_impl( + item.def_id, + lang_items.const_str_ptr_impl(), + None, + "const_str_ptr", + "*const str", + item.span, + assoc_items, + ); + } ty::RawPtr(ty::TypeAndMut { ty: inner, mutbl: hir::Mutability::Mut }) if matches!(inner.kind(), ty::Slice(_)) => { @@ -163,6 +176,19 @@ impl ItemLikeVisitor<'v> for InherentCollect<'tcx> { assoc_items, ); } + ty::RawPtr(ty::TypeAndMut { ty: inner, mutbl: hir::Mutability::Mut }) + if matches!(inner.kind(), ty::Str) => + { + self.check_primitive_impl( + item.def_id, + lang_items.mut_str_ptr_impl(), + None, + "mut_str_ptr", + "*mut str", + item.span, + assoc_items, + ); + } ty::RawPtr(ty::TypeAndMut { ty: _, mutbl: hir::Mutability::Not }) => { self.check_primitive_impl( item.def_id, From c482f970ebb6a704517ec2850c5c1278dfd75e6c Mon Sep 17 00:00:00 2001 From: Waffle Date: Sat, 29 May 2021 17:38:42 +0300 Subject: [PATCH 3/4] Add raw str methods This patch adds the following methods to `*const str` and `*mut str`: - `len` - `as_ptr` (`as_mut_ptr`) - `get_unchecked` (`get_unchecked_mut`) Similar methods have already existed for raw slices. --- library/core/src/ptr/const_ptr.rs | 79 ++++++++++++++++++++++++++++++ library/core/src/ptr/mut_ptr.rs | 81 +++++++++++++++++++++++++++++++ 2 files changed, 160 insertions(+) diff --git a/library/core/src/ptr/const_ptr.rs b/library/core/src/ptr/const_ptr.rs index 95e86a688beb9..4afa12cef8919 100644 --- a/library/core/src/ptr/const_ptr.rs +++ b/library/core/src/ptr/const_ptr.rs @@ -1015,6 +1015,85 @@ impl *const [T] { } } +#[cfg(not(bootstrap))] +#[lang = "const_str_ptr"] +impl *const str { + /// Returns the length of a raw string slice. + /// + /// The returned value is the number of **bytes**, not the number of characters. + /// + /// This function is safe, even when the raw string slice cannot be cast to a slice + /// reference because the pointer is null or unaligned. + /// + /// # Examples + /// + /// ```rust + /// #![feature(str_ptr_len)] + /// #![feature(str_from_raw_parts)] + /// + /// use std::ptr; + /// + /// let str: *const str = ptr::str_from_raw_parts(ptr::null(), 3); + /// assert_eq!(str.len(), 3); + /// ``` + #[inline] + #[unstable(feature = "str_ptr_len", issue = "none")] + #[rustc_const_unstable(feature = "const_str_ptr_len", issue = "none")] + pub const fn len(self) -> usize { + metadata(self) + } + + /// Returns a raw pointer to the string slice's buffer. + /// + /// This is equivalent to casting `self` to `*const u8`, but more type-safe. + /// + /// # Examples + /// + /// ```rust + /// #![feature(str_ptr_as_ptr)] + /// #![feature(str_from_raw_parts)] + /// use std::ptr; + /// + /// let str: *const str = ptr::str_from_raw_parts(ptr::null(), 3); + /// assert_eq!(str.as_ptr(), 0 as *const u8); + /// ``` + #[inline] + #[unstable(feature = "str_ptr_as_ptr", issue = "none")] + #[rustc_const_unstable(feature = "str_ptr_as_ptr", issue = "none")] + pub const fn as_ptr(self) -> *const u8 { + self as *const u8 + } + + /// Returns a raw pointer to an substring, without doing bounds + /// checking. + /// + /// Calling this method with an out-of-bounds index, index that does not lie on an UTF-8 sequence boundaries or when `self` is not dereferencable + /// is *[undefined behavior]* even if the resulting pointer is not used. + /// + /// [undefined behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + /// + /// # Examples + /// + /// ``` + /// #![feature(str_ptr_get)] + /// + /// let x = "abc" as *const str; + /// + /// unsafe { + /// assert_eq!(&*x.get_unchecked(1..), "bc"); + /// } + /// ``` + #[unstable(feature = "str_ptr_get", issue = "none")] + #[inline] + pub unsafe fn get_unchecked(self, index: I) -> *const I::Output + where + I: SliceIndex, + { + // SAFETY: the caller ensures that `self` is dereferencable, `index` in-bounds and lie on an UTF-8 sequence boundaries. + unsafe { index.get_unchecked(self) } + } +} + // Equality for pointers #[stable(feature = "rust1", since = "1.0.0")] impl PartialEq for *const T { diff --git a/library/core/src/ptr/mut_ptr.rs b/library/core/src/ptr/mut_ptr.rs index adc64cb2bd39a..e10773c646c68 100644 --- a/library/core/src/ptr/mut_ptr.rs +++ b/library/core/src/ptr/mut_ptr.rs @@ -1330,6 +1330,87 @@ impl *mut [T] { } } +#[cfg(not(bootstrap))] +#[lang = "mut_str_ptr"] +impl *mut str { + /// Returns the length of a raw string slice. + /// + /// The returned value is the number of **bytes**, not the number of characters. + /// + /// This function is safe, even when the raw string slice cannot be cast to a slice + /// reference because the pointer is null or unaligned. + /// + /// # Examples + /// + /// ```rust + /// #![feature(str_ptr_len)] + /// #![feature(str_from_raw_parts)] + /// + /// use std::ptr; + /// + /// let str: *mut str = ptr::str_from_raw_parts_mut(ptr::null_mut(), 3); + /// assert_eq!(str.len(), 3); + /// ``` + #[inline] + #[unstable(feature = "str_ptr_len", issue = "none")] + #[rustc_const_unstable(feature = "const_str_ptr_len", issue = "none")] + pub const fn len(self) -> usize { + metadata(self) + } + + /// Returns a raw pointer to the string slice's buffer. + /// + /// This is equivalent to casting `self` to `*mut u8`, but more type-safe. + /// + /// # Examples + /// + /// ```rust + /// #![feature(str_ptr_as_ptr)] + /// #![feature(str_from_raw_parts)] + /// use std::ptr; + /// + /// let str: *mut str = ptr::str_from_raw_parts_mut(ptr::null_mut(), 3); + /// assert_eq!(str.as_mut_ptr(), 0 as *mut u8); + /// ``` + #[inline] + #[unstable(feature = "str_ptr_as_ptr", issue = "none")] + #[rustc_const_unstable(feature = "str_ptr_as_ptr", issue = "none")] + pub const fn as_mut_ptr(self) -> *mut u8 { + self as *mut u8 + } + + /// Returns a raw pointer to an substring, without doing bounds + /// checking. + /// + /// Calling this method with an out-of-bounds index, index that does not lie on an UTF-8 sequence boundaries or when `self` is not dereferencable + /// is *[undefined behavior]* even if the resulting pointer is not used. + /// + /// [undefined behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + /// + /// # Examples + /// + /// ``` + /// #![feature(str_ptr_get)] + /// + /// let mut x = [b'a', b'b', b'c']; + /// let x: &mut str = std::str::from_utf8_mut(&mut x).unwrap(); + /// let x: *mut str = x as *mut str; + /// + /// unsafe { + /// assert_eq!(&*x.get_unchecked_mut(1..), "bc"); + /// } + /// ``` + #[unstable(feature = "str_ptr_get", issue = "none")] + #[inline] + pub unsafe fn get_unchecked_mut(self, index: I) -> *mut I::Output + where + I: SliceIndex, + { + // SAFETY: the caller ensures that `self` is dereferencable, `index` in-bounds and lie on an UTF-8 sequence boundaries. + unsafe { index.get_unchecked_mut(self) } + } +} + // Equality for pointers #[stable(feature = "rust1", since = "1.0.0")] impl PartialEq for *mut T { From 3ebb712c12b7ada62852e6e309fa67dec1ee8ec4 Mon Sep 17 00:00:00 2001 From: Waffle Date: Sun, 15 Aug 2021 20:09:39 +0300 Subject: [PATCH 4/4] Add nonnull raw str methods This patch adds the following methods to `NonNull`: - `str_from_raw_parts` - `len` - `as_non_null_ptr` - `as_mut_ptr` - `get_unchecked_mut` Similar methods have already existed for raw slices, raw strings and nonnull raw strings. --- library/core/src/lib.rs | 5 ++ library/core/src/ptr/non_null.rs | 128 +++++++++++++++++++++++++++++++ 2 files changed, 133 insertions(+) diff --git a/library/core/src/lib.rs b/library/core/src/lib.rs index d667fff4b81ee..9bf2304cc9547 100644 --- a/library/core/src/lib.rs +++ b/library/core/src/lib.rs @@ -106,6 +106,8 @@ #![feature(const_type_name)] #![feature(const_unreachable_unchecked)] #![feature(const_default_impls)] +#![feature(const_str_from_raw_parts)] +#![cfg_attr(not(bootstrap), feature(const_str_ptr_len))] #![feature(duration_consts_2)] #![feature(ptr_metadata)] #![feature(slice_ptr_get)] @@ -156,6 +158,9 @@ #![feature(simd_ffi)] #![feature(staged_api)] #![feature(stmt_expr_attributes)] +#![cfg_attr(not(bootstrap), feature(str_ptr_len))] +#![cfg_attr(not(bootstrap), feature(str_ptr_as_ptr))] +#![cfg_attr(not(bootstrap), feature(str_ptr_get))] #![feature(trait_alias)] #![feature(transparent_unions)] #![feature(try_blocks)] diff --git a/library/core/src/ptr/non_null.rs b/library/core/src/ptr/non_null.rs index 87c8674af0dc5..e78ee0763f60e 100644 --- a/library/core/src/ptr/non_null.rs +++ b/library/core/src/ptr/non_null.rs @@ -618,6 +618,134 @@ impl NonNull<[T]> { } } +#[cfg(not(bootstrap))] +impl NonNull { + /// Creates a non-null raw string slice from a thin pointer and a length. + /// + /// The `len` argument is the number of **bytes**, not the number of characters. + /// + /// This function is safe, but dereferencing the return value is unsafe. + /// See the documentation of [`slice::from_raw_parts`] for slice safety requirements and [`str::from_utf8`] for string safety requirements. + /// + /// [`str::from_utf8`]: crate::str::from_utf8 + /// + /// # Examples + /// + /// ```rust + /// #![feature(nonnull_str_from_raw_parts)] + /// + /// use std::ptr::NonNull; + /// + /// // create a string slice pointer when starting out with a pointer to the first byte + /// let mut x = [b'a', b'b', b'c']; + /// let nonnull_pointer = NonNull::new(x.as_mut_ptr()).unwrap(); + /// let str = NonNull::str_from_raw_parts(nonnull_pointer, 3); + /// assert_eq!(unsafe { str.as_ref() }, "abc"); + /// ``` + /// + /// (Note that this example artificially demonstrates a use of this method, + /// but `let str = NonNull::from(str::from_utf8_unchecked(&x[..]));` would be a better way to write code like this.) + #[unstable(feature = "nonnull_str_from_raw_parts", issue = "none")] + #[rustc_const_unstable(feature = "const_nonnull_str_from_raw_parts", issue = "none")] + #[inline] + pub const fn str_from_raw_parts(data: NonNull, len: usize) -> Self { + // SAFETY: `data` is a `NonNull` pointer which is necessarily non-null + unsafe { Self::new_unchecked(super::str_from_raw_parts_mut(data.as_ptr(), len)) } + } + + /// Returns the length of a non-null raw slice. + /// + /// The returned value is the number of **bytes**, not the number of characters. + /// + /// This function is safe, even when the non-null raw slice cannot be dereferenced to a slice + /// because the pointer does not have a valid address. + /// + /// # Examples + /// + /// ```rust + /// #![feature(str_ptr_len, nonnull_str_from_raw_parts)] + /// use std::ptr::NonNull; + /// + /// let slice: NonNull = NonNull::str_from_raw_parts(NonNull::dangling(), 3); + /// assert_eq!(slice.len(), 3); + /// ``` + #[unstable(feature = "str_ptr_len", issue = "none")] + #[rustc_const_unstable(feature = "const_str_ptr_len", issue = "none")] + #[inline] + pub const fn len(self) -> usize { + self.as_ptr().len() + } + + /// Returns a non-null pointer to the string slice's buffer. + /// + /// # Examples + /// + /// ```rust + /// #![feature(str_ptr_as_ptr, nonnull_str_from_raw_parts)] + /// use std::ptr::NonNull; + /// + /// let str: NonNull = NonNull::str_from_raw_parts(NonNull::dangling(), 3); + /// assert_eq!(str.as_non_null_ptr(), NonNull::new(1 as *mut u8).unwrap()); + /// ``` + #[inline] + #[unstable(feature = "str_ptr_as_ptr", issue = "none")] + #[rustc_const_unstable(feature = "str_ptr_as_ptr", issue = "none")] + pub const fn as_non_null_ptr(self) -> NonNull { + // SAFETY: We know `self` is non-null. + unsafe { NonNull::new_unchecked(self.as_ptr().as_mut_ptr()) } + } + + /// Returns a raw pointer to the string slice's buffer. + /// + /// # Examples + /// + /// ```rust + /// #![feature(str_ptr_as_ptr, nonnull_str_from_raw_parts)] + /// use std::ptr::NonNull; + /// + /// let str: NonNull = NonNull::str_from_raw_parts(NonNull::dangling(), 3); + /// assert_eq!(str.as_mut_ptr(), 1 as *mut u8); + /// ``` + #[inline] + #[unstable(feature = "str_ptr_as_ptr", issue = "none")] + #[rustc_const_unstable(feature = "str_ptr_as_ptr", issue = "none")] + pub const fn as_mut_ptr(self) -> *mut u8 { + self.as_non_null_ptr().as_ptr() + } + + /// Returns a raw pointer to an element or substring, without doing bounds + /// checking. + /// + /// Calling this method with an out-of-bounds index, index that does not lie on an UTF-8 sequence boundaries or when `self` is not dereferencable + /// is *[undefined behavior]* even if the resulting pointer is not used. + /// + /// [undefined behavior]: https://doc.rust-lang.org/reference/behavior-considered-undefined.html + /// + /// # Examples + /// + /// ``` + /// #![feature(str_ptr_get, str_ptr_as_ptr, nonnull_str_from_raw_parts)] + /// use std::ptr::NonNull; + /// + /// let x = &mut [b'a', b'b', b'c']; + /// let x = NonNull::str_from_raw_parts(NonNull::new(x.as_mut_ptr()).unwrap(), x.len()); + /// + /// unsafe { + /// assert_eq!(x.get_unchecked_mut(1..).as_mut_ptr(), x.as_non_null_ptr().as_ptr().add(1)); + /// } + /// ``` + #[unstable(feature = "str_ptr_get", issue = "none")] + #[inline] + pub unsafe fn get_unchecked_mut(self, index: I) -> NonNull + where + I: SliceIndex, + { + // SAFETY: the caller ensures that `self` is dereferencable and `index` in-bounds. + // As a consequence, the resulting pointer cannot be null. + unsafe { NonNull::new_unchecked(self.as_ptr().get_unchecked_mut(index)) } + } +} + #[stable(feature = "nonnull", since = "1.25.0")] impl Clone for NonNull { #[inline]