Skip to content

Commit 5850f36

Browse files
committed
handcode the loops so LLVM has to chew less IR
1 parent 2f78bce commit 5850f36

File tree

1 file changed

+42
-39
lines changed

1 file changed

+42
-39
lines changed

library/core/src/slice/cmp.rs

+42-39
Original file line numberDiff line numberDiff line change
@@ -55,54 +55,57 @@ impl<A, B> SlicePartialEq<B> for [A]
5555
where
5656
A: PartialEq<B>,
5757
{
58-
#[inline]
5958
default fn equal(&self, other: &[B]) -> bool {
6059
if self.len() != other.len() {
6160
return false;
6261
}
6362

64-
// at least 8 items for unrolling to make sense (4 peeled + 4+ unrolled)
65-
if self.len() < 8 {
66-
return eq_small(self, other);
63+
if self.len() == 0 {
64+
return true;
6765
}
6866

69-
eq_unroll(self, other)
70-
}
71-
}
67+
// ZSTs have no identity and slices don't guarantee which addresses-to-ZSTs they produce
68+
// so we only need to compare them once to determine the behavior of the PartialEq impl
69+
if const { mem::size_of::<A>() == 0 && mem::size_of::<B>() == 0 } {
70+
// zero-length slices are always equal
71+
// SAFETY: A and B are ZSTs so it's ok to conjure them out of thin air
72+
return unsafe { mem::zeroed::<A>() == mem::zeroed::<B>() };
73+
}
7274

73-
#[inline]
74-
fn eq_small<A, B>(a: &[A], b: &[B]) -> bool
75-
where
76-
A: PartialEq<B>,
77-
{
78-
a.iter().zip(b).all(|(a, b)| a == b)
79-
}
75+
const UNROLL: usize = 4;
76+
let mut i = 0;
77+
let mut is_eq = true;
78+
79+
let a = self.as_ptr();
80+
let b = other.as_ptr();
81+
let len = self.len();
82+
83+
// compare items 1 by 1 in case comparisons are expensive. at least one item, then
84+
// until the remainder is a multiple of UNROLL
85+
loop {
86+
// SAFETY: slices are of the same length and loop conditions ensure indexes are in bounds
87+
unsafe {
88+
is_eq = is_eq & PartialEq::eq(&*a.add(i), &*b.add(i));
89+
i = i.unchecked_add(1);
90+
}
8091

81-
fn eq_unroll<A, B>(a: &[A], b: &[B]) -> bool
82-
where
83-
A: PartialEq<B>,
84-
{
85-
let (mut chunks_a, residual_a) = a.as_chunks::<4>();
86-
let (mut chunks_b, residual_b) = b.as_chunks::<4>();
87-
let peeled_a = chunks_a.take_first().unwrap();
88-
let peeled_b = chunks_b.take_first().unwrap();
89-
90-
// peel the first chunk and do a short-circuiting comparison to bail early on mismatches
91-
// in case comparisons are expensive
92-
let mut result = eq_small(peeled_a, peeled_b);
93-
94-
// then check the residual, another chance to bail early
95-
result = result && eq_small(residual_a, residual_b);
96-
97-
// iter.all short-circuits which means the backend can't unroll the loop due to early exits.
98-
// So we unroll it manually.
99-
result = result
100-
&& chunks_a
101-
.iter()
102-
.zip(chunks_b)
103-
.all(|(a, b)| (a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2]) & (a[3] == b[3]));
104-
105-
result
92+
if !is_eq || i == len || (len - i) % UNROLL == 0 {
93+
break;
94+
}
95+
}
96+
while is_eq && i + UNROLL <= len {
97+
// SAFETY: slices are of the same length and loop conditions ensure indexes are in bounds
98+
unsafe {
99+
is_eq = is_eq & PartialEq::eq(&*a.add(i), &*b.add(i));
100+
is_eq = is_eq & PartialEq::eq(&*a.add(i + 1), &*b.add(i + 1));
101+
is_eq = is_eq & PartialEq::eq(&*a.add(i + 2), &*b.add(i + 2));
102+
is_eq = is_eq & PartialEq::eq(&*a.add(i + 3), &*b.add(i + 3));
103+
i = i.unchecked_add(UNROLL);
104+
}
105+
}
106+
107+
is_eq
108+
}
106109
}
107110

108111
// When each element can be compared byte-wise, we can compare all the bytes

0 commit comments

Comments
 (0)