From f8141a8704b030e34c46cfcd427d3605179c14bf Mon Sep 17 00:00:00 2001 From: kadmin Date: Sat, 15 Aug 2020 19:31:42 +0000 Subject: [PATCH 1/3] Add in-place optimization for array map --- library/core/src/array/mod.rs | 67 +++++++++++++++++++++++++++++++++-- library/core/tests/array.rs | 57 +++++++++++++++++++++-------- library/core/tests/lib.rs | 1 + 3 files changed, 107 insertions(+), 18 deletions(-) diff --git a/library/core/src/array/mod.rs b/library/core/src/array/mod.rs index a7cb1023229bb..b92ddd20a77a5 100644 --- a/library/core/src/array/mod.rs +++ b/library/core/src/array/mod.rs @@ -429,7 +429,68 @@ impl [T; N] { where F: FnMut(T) -> U, { - use crate::mem::MaybeUninit; + use crate::mem::{align_of, forget, size_of, transmute_copy, ManuallyDrop, MaybeUninit}; + + if align_of::() == align_of::() && size_of::() == size_of::() { + // this branch allows reuse of the original array as a backing store + // kind of. As written with no compiler optimizations, transmute copy will + // still require 2 copies of the original array, but when it can be converted to + // transmute, this will require 0 copies. + union Translated { + src: MaybeUninit, + dst: ManuallyDrop, + }; + struct Guard { + data: *mut [Translated; N], + initialized: usize, + } + impl Drop for Guard { + fn drop(&mut self) { + debug_assert!(self.initialized < N); + let initialized_part = + crate::ptr::slice_from_raw_parts_mut(self.data as *mut U, self.initialized); + // SAFETY: + // since we read from the element at initialized then panicked, + // we have to skip over it to not double drop. + let todo_ptr = unsafe { self.data.add(self.initialized + 1) as *mut T }; + let todo_part = + crate::ptr::slice_from_raw_parts_mut(todo_ptr, N - self.initialized - 1); + // SAFETY: + // Have to remove both the initialized and not yet reached items. + unsafe { + crate::ptr::drop_in_place(initialized_part); + crate::ptr::drop_in_place(todo_part); + } + } + } + // SAFETY: + // Since we know that T & U have the same size and alignment we can safely transmute + // between them here + let mut src_dst = unsafe { transmute_copy::<_, [Translated; N]>(&self) }; + + let mut guard: Guard = Guard { data: &mut src_dst, initialized: 0 }; + // Need to forget self now because the guard is responsible for dropping the items + forget(self); + for i in 0..N { + // SAFETY: + // All items prior to `i` are the `dst` variant. + // In order to convert `i` from src to dst, we take it from `MaybeUninit`, + // leaving uninitialized in its place, and set the destination as + // ManuallyDrop::new(..), and implicitly know that it will be a `dst` variant + // from where + unsafe { + let v = f(src_dst[i].src.read()); + src_dst[i].dst = ManuallyDrop::new(v); + } + guard.initialized += 1; + } + forget(guard); + // SAFETY: + // At this point all the items have been initialized and are in `dst` discriminant. + // We can switch them over to being of type `U`. + return unsafe { transmute_copy::<_, [U; N]>(&src_dst) }; + } + struct Guard { dst: *mut T, initialized: usize, @@ -457,10 +518,10 @@ impl [T; N] { } // FIXME: Convert to crate::mem::transmute once it works with generics. // unsafe { crate::mem::transmute::<[MaybeUninit; N], [U; N]>(dst) } - crate::mem::forget(guard); + forget(guard); // SAFETY: At this point we've properly initialized the whole array // and we just need to cast it to the correct type. - unsafe { crate::mem::transmute_copy::<_, [U; N]>(&dst) } + unsafe { transmute_copy::<_, [U; N]>(&dst) } } /// Returns a slice containing the entire array. Equivalent to `&s[..]`. diff --git a/library/core/tests/array.rs b/library/core/tests/array.rs index 89c2a969c28bb..497d734f9880b 100644 --- a/library/core/tests/array.rs +++ b/library/core/tests/array.rs @@ -323,26 +323,53 @@ fn array_map() { fn array_map_drop_safety() { use core::sync::atomic::AtomicUsize; use core::sync::atomic::Ordering; - static DROPPED: AtomicUsize = AtomicUsize::new(0); - struct DropCounter; - impl Drop for DropCounter { + static DROPPED: [AtomicUsize; 3] = + [AtomicUsize::new(0), AtomicUsize::new(0), AtomicUsize::new(0)]; + struct DropCounter; + impl Drop for DropCounter { fn drop(&mut self) { - DROPPED.fetch_add(1, Ordering::SeqCst); + DROPPED[N].fetch_add(1, Ordering::SeqCst); } } - let num_to_create = 5; - let success = std::panic::catch_unwind(|| { - let items = [0; 10]; - let mut nth = 0; - items.map(|_| { - assert!(nth < num_to_create); - nth += 1; - DropCounter + { + let num_to_create = 5; + let success = std::panic::catch_unwind(|| { + let items = [0; 10]; + let mut nth = 0; + items.map(|_| { + assert!(nth < num_to_create); + nth += 1; + DropCounter::<0> + }); + }); + assert!(success.is_err()); + assert_eq!(DROPPED[0].load(Ordering::SeqCst), num_to_create); + } + + { + assert_eq!(DROPPED[1].load(Ordering::SeqCst), 0); + let num_to_create = 3; + const TOTAL: usize = 5; + let success = std::panic::catch_unwind(|| { + let items: [DropCounter<1>; TOTAL] = [ + DropCounter::<1>, + DropCounter::<1>, + DropCounter::<1>, + DropCounter::<1>, + DropCounter::<1>, + ]; + let mut nth = 0; + items.map(|_| { + assert!(nth < num_to_create); + nth += 1; + DropCounter::<2> + }); }); - }); - assert!(success.is_err()); - assert_eq!(DROPPED.load(Ordering::SeqCst), num_to_create); + assert!(success.is_err()); + assert_eq!(DROPPED[2].load(Ordering::SeqCst), num_to_create); + assert_eq!(DROPPED[1].load(Ordering::SeqCst), TOTAL); + } panic!("test succeeded") } diff --git a/library/core/tests/lib.rs b/library/core/tests/lib.rs index 106c9fe5da3e6..cfdd7f810376a 100644 --- a/library/core/tests/lib.rs +++ b/library/core/tests/lib.rs @@ -32,6 +32,7 @@ #![feature(raw)] #![feature(sort_internals)] #![feature(slice_partition_at_index)] +#![feature(min_const_generics)] #![feature(min_specialization)] #![feature(step_trait)] #![feature(step_trait_ext)] From c0559d4b612d031c1cd73ac1d2d628c42410322d Mon Sep 17 00:00:00 2001 From: kadmin Date: Mon, 24 Aug 2020 23:18:51 +0000 Subject: [PATCH 2/3] Add unmarked test This adds a codegen test which once annotated can check that the optimization is applied in the case expected. I'm not sure how to annotate it, though, so I've left it unmarked. --- library/core/src/array/mod.rs | 2 +- src/test/codegen/array-map.rs | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 src/test/codegen/array-map.rs diff --git a/library/core/src/array/mod.rs b/library/core/src/array/mod.rs index b92ddd20a77a5..7afb16560b0e9 100644 --- a/library/core/src/array/mod.rs +++ b/library/core/src/array/mod.rs @@ -479,7 +479,7 @@ impl [T; N] { // ManuallyDrop::new(..), and implicitly know that it will be a `dst` variant // from where unsafe { - let v = f(src_dst[i].src.read()); + let v = f(src_dst[i].src.assume_init_read()); src_dst[i].dst = ManuallyDrop::new(v); } guard.initialized += 1; diff --git a/src/test/codegen/array-map.rs b/src/test/codegen/array-map.rs new file mode 100644 index 0000000000000..defa130e95176 --- /dev/null +++ b/src/test/codegen/array-map.rs @@ -0,0 +1,18 @@ +// compile-flags: -O + +#![crate_type = "lib"] +#![feature(array_map)] + +// CHECK-LABEL: @array_inc +// CHECK-NOT: alloca +#[no_mangle] +pub fn array_inc(x: [u8; 1000]) -> [u8; 1000] { + x.map(|v| v + 1) +} + +// CHECK-LABEL: @array_inc_cast +// CHECK: alloca +#[no_mangle] +pub fn array_inc_cast(x: [u8; 1000]) -> [u16; 1000] { + x.map(|v| v as u16 + 1) +} From fd4ab51b64e54621820362ef23c455d78dffffd8 Mon Sep 17 00:00:00 2001 From: kadmin Date: Wed, 7 Oct 2020 08:39:45 +0000 Subject: [PATCH 3/3] Update w/ suggestion This instead changes the code to use one union type for the constructed array, which basically rms the numbers of moves that LLVM has to do as well better mimicking C-like ptrs allowing LLVM to optimize it better. I also updated the test to basically check for NRVO optimizations --- library/core/src/array/mod.rs | 127 ++++++++++++---------------------- src/test/codegen/array-map.rs | 27 +++++--- 2 files changed, 60 insertions(+), 94 deletions(-) diff --git a/library/core/src/array/mod.rs b/library/core/src/array/mod.rs index 7afb16560b0e9..d0bb59df428fe 100644 --- a/library/core/src/array/mod.rs +++ b/library/core/src/array/mod.rs @@ -425,103 +425,62 @@ impl [T; N] { /// assert_eq!(y, [6, 9, 3, 3]); /// ``` #[unstable(feature = "array_map", issue = "75243")] - pub fn map(self, mut f: F) -> [U; N] + pub fn map(self, f: F) -> [U; N] where F: FnMut(T) -> U, { - use crate::mem::{align_of, forget, size_of, transmute_copy, ManuallyDrop, MaybeUninit}; - - if align_of::() == align_of::() && size_of::() == size_of::() { - // this branch allows reuse of the original array as a backing store - // kind of. As written with no compiler optimizations, transmute copy will - // still require 2 copies of the original array, but when it can be converted to - // transmute, this will require 0 copies. - union Translated { - src: MaybeUninit, - dst: ManuallyDrop, - }; - struct Guard { - data: *mut [Translated; N], - initialized: usize, - } - impl Drop for Guard { - fn drop(&mut self) { - debug_assert!(self.initialized < N); - let initialized_part = - crate::ptr::slice_from_raw_parts_mut(self.data as *mut U, self.initialized); - // SAFETY: - // since we read from the element at initialized then panicked, - // we have to skip over it to not double drop. - let todo_ptr = unsafe { self.data.add(self.initialized + 1) as *mut T }; - let todo_part = - crate::ptr::slice_from_raw_parts_mut(todo_ptr, N - self.initialized - 1); - // SAFETY: - // Have to remove both the initialized and not yet reached items. - unsafe { - crate::ptr::drop_in_place(initialized_part); - crate::ptr::drop_in_place(todo_part); - } - } - } - // SAFETY: - // Since we know that T & U have the same size and alignment we can safely transmute - // between them here - let mut src_dst = unsafe { transmute_copy::<_, [Translated; N]>(&self) }; - - let mut guard: Guard = Guard { data: &mut src_dst, initialized: 0 }; - // Need to forget self now because the guard is responsible for dropping the items - forget(self); - for i in 0..N { - // SAFETY: - // All items prior to `i` are the `dst` variant. - // In order to convert `i` from src to dst, we take it from `MaybeUninit`, - // leaving uninitialized in its place, and set the destination as - // ManuallyDrop::new(..), and implicitly know that it will be a `dst` variant - // from where + use crate::mem::{forget, ManuallyDrop, MaybeUninit}; + use crate::ptr; + + union MaybeUninitArray { + none: (), + partial: ManuallyDrop<[MaybeUninit; N]>, + complete: ManuallyDrop<[T; N]>, + } + struct MapGuard<'a, T, const N: usize> { + arr: &'a mut MaybeUninitArray, + len: usize, + } + impl<'a, T, const N: usize> MapGuard<'a, T, N> { + fn push(&mut self, value: T) { + // SAFETY: Since we know the input size is N, and the output is N, + // this will never exceed the capacity, and MaybeUninit is always in the + // structure of an array. unsafe { - let v = f(src_dst[i].src.assume_init_read()); - src_dst[i].dst = ManuallyDrop::new(v); + self.arr.partial[self.len].write(value); + self.len += 1; } - guard.initialized += 1; } - forget(guard); - // SAFETY: - // At this point all the items have been initialized and are in `dst` discriminant. - // We can switch them over to being of type `U`. - return unsafe { transmute_copy::<_, [U; N]>(&src_dst) }; } - - struct Guard { - dst: *mut T, - initialized: usize, - } - - impl Drop for Guard { + impl<'a, T, const N: usize> Drop for MapGuard<'a, T, N> { fn drop(&mut self) { - debug_assert!(self.initialized <= N); - - let initialized_part = - crate::ptr::slice_from_raw_parts_mut(self.dst, self.initialized); - // SAFETY: this raw slice will contain only initialized objects - // that's why, it is allowed to drop it. + //debug_assert!(self.len <= N); + // SAFETY: already pushed `len` elements, but need to drop them now that + // `f` panicked. unsafe { - crate::ptr::drop_in_place(initialized_part); + let p: *mut MaybeUninit = self.arr.partial.as_mut_ptr(); + let slice: *mut [T] = ptr::slice_from_raw_parts_mut(p.cast(), self.len); + ptr::drop_in_place(slice); } } } - let mut dst = MaybeUninit::uninit_array::(); - let mut guard: Guard = - Guard { dst: MaybeUninit::slice_as_mut_ptr(&mut dst), initialized: 0 }; - for (src, dst) in IntoIter::new(self).zip(&mut dst) { - dst.write(f(src)); - guard.initialized += 1; + + fn map_guard_fn( + buffer: &mut MaybeUninitArray, + iter: impl Iterator, + ) { + let mut guard = MapGuard { arr: buffer, len: 0 }; + for v in iter { + guard.push(v); + } + forget(guard); } - // FIXME: Convert to crate::mem::transmute once it works with generics. - // unsafe { crate::mem::transmute::<[MaybeUninit; N], [U; N]>(dst) } - forget(guard); - // SAFETY: At this point we've properly initialized the whole array - // and we just need to cast it to the correct type. - unsafe { transmute_copy::<_, [U; N]>(&dst) } + + let mut buffer = MaybeUninitArray:: { none: () }; + map_guard_fn(&mut buffer, IntoIter::new(self).map(f)); + // SAFETY: all elements have successfully initialized, don't run guard's drop code + // and take completed buffer out of MaybeUninitArray. + unsafe { ManuallyDrop::into_inner(buffer.complete) } } /// Returns a slice containing the entire array. Equivalent to `&s[..]`. diff --git a/src/test/codegen/array-map.rs b/src/test/codegen/array-map.rs index defa130e95176..f4409cb5d9ca3 100644 --- a/src/test/codegen/array-map.rs +++ b/src/test/codegen/array-map.rs @@ -1,18 +1,25 @@ -// compile-flags: -O - +// compile-flags: -C opt-level=3 -Zmir-opt-level=3 #![crate_type = "lib"] #![feature(array_map)] -// CHECK-LABEL: @array_inc -// CHECK-NOT: alloca +const SIZE: usize = 4; + +// CHECK-LABEL: @array_cast_to_float #[no_mangle] -pub fn array_inc(x: [u8; 1000]) -> [u8; 1000] { - x.map(|v| v + 1) +pub fn array_cast_to_float(x: [u32; SIZE]) -> [f32; SIZE] { + // CHECK: cast + // CHECK: @llvm.memcpy + // CHECK: ret + // CHECK-EMPTY + x.map(|v| v as f32) } -// CHECK-LABEL: @array_inc_cast -// CHECK: alloca +// CHECK-LABEL: @array_cast_to_u64 #[no_mangle] -pub fn array_inc_cast(x: [u8; 1000]) -> [u16; 1000] { - x.map(|v| v as u16 + 1) +pub fn array_cast_to_u64(x: [u32; SIZE]) -> [u64; SIZE] { + // CHECK: cast + // CHECK: @llvm.memcpy + // CHECK: ret + // CHECK-EMPTY + x.map(|v| v as u64) }