From 708e9bfeeec15ab6898b77f437e8c5e5d189d28e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Dr=C3=B6ge?= Date: Sun, 11 Oct 2020 18:56:49 +0300 Subject: [PATCH 1/3] Implement a specialized version std::iter::Enumerate for TrustedLen This allows to hint at the compiler via `intrinsics::assume()` that the returned index is smaller than the length of the underlying iterator and allows the compiler to optimize code better based on that. Fixes https://github.com/rust-lang/rust/issues/75935 --- library/core/src/iter/adapters/enumerate.rs | 162 ++++++++++++++++++-- 1 file changed, 146 insertions(+), 16 deletions(-) diff --git a/library/core/src/iter/adapters/enumerate.rs b/library/core/src/iter/adapters/enumerate.rs index 5978c2da98c35..c6c60c21a1276 100644 --- a/library/core/src/iter/adapters/enumerate.rs +++ b/library/core/src/iter/adapters/enumerate.rs @@ -1,3 +1,4 @@ +use crate::intrinsics; use crate::iter::adapters::{zip::try_get_unchecked, SourceIter, TrustedRandomAccess}; use crate::iter::{FusedIterator, InPlaceIterable, TrustedLen}; use crate::ops::{Add, AddAssign, Try}; @@ -15,10 +16,149 @@ use crate::ops::{Add, AddAssign, Try}; pub struct Enumerate { iter: I, count: usize, + len: usize, } -impl Enumerate { +impl Enumerate { pub(in crate::iter) fn new(iter: I) -> Enumerate { - Enumerate { iter, count: 0 } + EnumerateImpl::new(iter) + } +} + +/// Enumerate specialization trait +/// +/// This exists to work around https://bugs.llvm.org/show_bug.cgi?id=48965. It can be removed again +/// once this is solved in LLVM and the implementation of the trait functions can be folded again +/// into the corresponding functions on `Enumerate` based on the default implementation. +/// +/// The trait is implemented via specialization on any iterator that implements `TrustedRandomAccess` +/// to provide the information about the maximum value this iterator can return to the optimizer. +/// Specifically, for slices this allows the optimizer to know that the returned values are never +/// bigger than the size of the slice. +/// +/// The only difference between the default and specialized implementation is the use of +/// `intrinsics::assume()` on the to be returned values, and both implementations must be kept in +/// sync. +#[doc(hidden)] +trait EnumerateImpl { + type Item; + fn new(iter: I) -> Self; + fn next(&mut self) -> Option; + unsafe fn __iterator_get_unchecked(&mut self, idx: usize) -> Self::Item + where + Self: TrustedRandomAccess; + fn next_back(&mut self) -> Option + where + I: ExactSizeIterator + DoubleEndedIterator; +} + +impl EnumerateImpl for Enumerate +where + I: Iterator, +{ + type Item = (usize, I::Item); + + default fn new(iter: I) -> Self { + Enumerate { + iter, + count: 0, + len: 0, // unused + } + } + + #[inline] + default fn next(&mut self) -> Option { + let a = self.iter.next()?; + let i = self.count; + // Possible undefined overflow. + AddAssign::add_assign(&mut self.count, 1); + Some((i, a)) + } + + #[inline] + default unsafe fn __iterator_get_unchecked(&mut self, idx: usize) -> Self::Item + where + Self: TrustedRandomAccess, + { + // SAFETY: the caller must uphold the contract for + // `Iterator::__iterator_get_unchecked`. + let value = unsafe { try_get_unchecked(&mut self.iter, idx) }; + (Add::add(self.count, idx), value) + } + + #[inline] + default fn next_back(&mut self) -> Option + where + I: ExactSizeIterator + DoubleEndedIterator, + { + let a = self.iter.next_back()?; + let len = self.iter.len(); + // Can safely add, `ExactSizeIterator` promises that the number of + // elements fits into a `usize`. + Some((self.count + len, a)) + } +} + +// This is the same code as above but using `intrinsics::assume()` to hint at the compiler +// that the returned index is smaller than the length of the underlying iterator. +// +// This could be bound to `TrustedLen + ExactSizeIterator` or `TrustedRandomAccess` to guarantee +// that the number of elements fits into an `usize` and that the returned length is actually the +// real length. `TrustedRandomAccess` was selected because specialization on `ExactSizeIterator` is +// not possible (yet?). +impl EnumerateImpl for Enumerate +where + I: TrustedRandomAccess + Iterator, +{ + fn new(iter: I) -> Self { + let len = iter.size(); + + Enumerate { iter, count: 0, len } + } + + #[inline] + fn next(&mut self) -> Option { + let a = self.iter.next()?; + // SAFETY: There must be fewer than `self.len` items because of `TrustedLen`'s API contract + unsafe { + intrinsics::assume(self.count < self.len); + } + let i = self.count; + // Possible undefined overflow. + AddAssign::add_assign(&mut self.count, 1); + Some((i, a)) + } + + #[inline] + unsafe fn __iterator_get_unchecked(&mut self, idx: usize) -> Self::Item + where + Self: TrustedRandomAccess, + { + // SAFETY: the caller must uphold the contract for + // `Iterator::__iterator_get_unchecked`. + let value = unsafe { try_get_unchecked(&mut self.iter, idx) }; + let idx = Add::add(self.count, idx); + // SAFETY: There must be fewer than `self.len` items because of `TrustedLen`'s API contract + unsafe { + intrinsics::assume(idx < self.len); + } + (idx, value) + } + + #[inline] + fn next_back(&mut self) -> Option + where + I: ExactSizeIterator + DoubleEndedIterator, + { + let a = self.iter.next_back()?; + let len = self.iter.len(); + // Can safely add, `ExactSizeIterator` promises that the number of + // elements fits into a `usize`. + let idx = self.count + len; + // SAFETY: There must be fewer than `self.len` items because of `TrustedLen`'s API contract + unsafe { + intrinsics::assume(idx < self.len); + } + Some((idx, a)) } } @@ -40,11 +180,7 @@ where /// Might panic if the index of the element overflows a `usize`. #[inline] fn next(&mut self) -> Option<(usize, ::Item)> { - let a = self.iter.next()?; - let i = self.count; - // Possible undefined overflow. - AddAssign::add_assign(&mut self.count, 1); - Some((i, a)) + EnumerateImpl::next(self) } #[inline] @@ -114,10 +250,8 @@ where where Self: TrustedRandomAccess, { - // SAFETY: the caller must uphold the contract for - // `Iterator::__iterator_get_unchecked`. - let value = unsafe { try_get_unchecked(&mut self.iter, idx) }; - (Add::add(self.count, idx), value) + // SAFETY: Just forwarding to the actual implementation. + unsafe { EnumerateImpl::__iterator_get_unchecked(self, idx) } } } @@ -128,11 +262,7 @@ where { #[inline] fn next_back(&mut self) -> Option<(usize, ::Item)> { - let a = self.iter.next_back()?; - let len = self.iter.len(); - // Can safely add, `ExactSizeIterator` promises that the number of - // elements fits into a `usize`. - Some((self.count + len, a)) + EnumerateImpl::next_back(self) } #[inline] From 44c5554dacf9597cd2006fe1e57b3b38adc125ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Dr=C3=B6ge?= Date: Sat, 30 Jan 2021 18:03:19 +0200 Subject: [PATCH 2/3] Add explanation why Add::add() is called directly instead of + in the Enumerate implementation --- library/core/src/iter/adapters/enumerate.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/library/core/src/iter/adapters/enumerate.rs b/library/core/src/iter/adapters/enumerate.rs index c6c60c21a1276..24ac4e4539751 100644 --- a/library/core/src/iter/adapters/enumerate.rs +++ b/library/core/src/iter/adapters/enumerate.rs @@ -69,7 +69,10 @@ where default fn next(&mut self) -> Option { let a = self.iter.next()?; let i = self.count; - // Possible undefined overflow. + // Possible undefined overflow. By directly calling the trait method instead of using the + // `+=` operator the decision about overflow checking is delayed to the crate that does code + // generation, even if overflow checks are disabled for the current crate. This is + // especially useful because overflow checks are usually disabled for the standard library. AddAssign::add_assign(&mut self.count, 1); Some((i, a)) } @@ -82,6 +85,7 @@ where // SAFETY: the caller must uphold the contract for // `Iterator::__iterator_get_unchecked`. let value = unsafe { try_get_unchecked(&mut self.iter, idx) }; + // See comment in `next()` for the reason why `Add::add()` is used here instead of `+`. (Add::add(self.count, idx), value) } @@ -123,7 +127,8 @@ where intrinsics::assume(self.count < self.len); } let i = self.count; - // Possible undefined overflow. + // See comment in `next()` of the default implementation for the reason why + // `AddAssign::add_assign()` is used here instead of `+=`. AddAssign::add_assign(&mut self.count, 1); Some((i, a)) } @@ -136,6 +141,7 @@ where // SAFETY: the caller must uphold the contract for // `Iterator::__iterator_get_unchecked`. let value = unsafe { try_get_unchecked(&mut self.iter, idx) }; + // See comment in `next()` for the reason why `Add::add()` is used here instead of `+`. let idx = Add::add(self.count, idx); // SAFETY: There must be fewer than `self.len` items because of `TrustedLen`'s API contract unsafe { From 49b8f872726036f1b967e45f921362f68fffea22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Dr=C3=B6ge?= Date: Sat, 30 Jan 2021 18:38:25 +0200 Subject: [PATCH 3/3] Document that TrustedRandomAccess also requires that the number of items fits into an usize --- library/core/src/iter/adapters/zip.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/library/core/src/iter/adapters/zip.rs b/library/core/src/iter/adapters/zip.rs index 98b8dca961407..75b770ea4be0a 100644 --- a/library/core/src/iter/adapters/zip.rs +++ b/library/core/src/iter/adapters/zip.rs @@ -384,7 +384,8 @@ impl ZipFmt