Skip to content

Commit eb92641

Browse files
authored
Rollup merge of rust-lang#39107 - llogiq:branchless_filter_count, r=alexcrichton
branchless .filter(_).count() I found that the branchless version is only slower if we have little to no branch misses, which usually isn't the case. I notice speedups between -5% (perfect prediction) and 60% (real world data).
2 parents 79027e9 + bfabe81 commit eb92641

File tree

2 files changed

+27
-1
lines changed

2 files changed

+27
-1
lines changed

src/libcore/iter/mod.rs

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1086,7 +1086,7 @@ impl<I: Iterator, P> Iterator for Filter<I, P> where P: FnMut(&I::Item) -> bool
10861086

10871087
#[inline]
10881088
fn next(&mut self) -> Option<I::Item> {
1089-
for x in self.iter.by_ref() {
1089+
for x in &mut self.iter {
10901090
if (self.predicate)(&x) {
10911091
return Some(x);
10921092
}
@@ -1099,6 +1099,26 @@ impl<I: Iterator, P> Iterator for Filter<I, P> where P: FnMut(&I::Item) -> bool
10991099
let (_, upper) = self.iter.size_hint();
11001100
(0, upper) // can't know a lower bound, due to the predicate
11011101
}
1102+
1103+
// this special case allows the compiler to make `.filter(_).count()`
1104+
// branchless. Barring perfect branch prediction (which is unattainable in
1105+
// the general case), this will be much faster in >90% of cases (containing
1106+
// virtually all real workloads) and only a tiny bit slower in the rest.
1107+
//
1108+
// Having this specialization thus allows us to write `.filter(p).count()`
1109+
// where we would otherwise write `.map(|x| p(x) as usize).sum()`, which is
1110+
// less readable and also less backwards-compatible to Rust before 1.10.
1111+
//
1112+
// Using the branchless version will also simplify the LLVM byte code, thus
1113+
// leaving more budget for LLVM optimizations.
1114+
#[inline]
1115+
fn count(mut self) -> usize {
1116+
let mut count = 0;
1117+
for x in &mut self.iter {
1118+
count += (self.predicate)(&x) as usize;
1119+
}
1120+
count
1121+
}
11021122
}
11031123

11041124
#[stable(feature = "rust1", since = "1.0.0")]

src/libcoretest/iter.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,12 @@ fn test_iterator_enumerate_count() {
191191
assert_eq!(xs.iter().count(), 6);
192192
}
193193

194+
#[test]
195+
fn test_iterator_filter_count() {
196+
let xs = [0, 1, 2, 3, 4, 5, 6, 7, 8];
197+
assert_eq!(xs.iter().filter(|&&x| x % 2 == 0).count(), 5);
198+
}
199+
194200
#[test]
195201
fn test_iterator_peekable() {
196202
let xs = vec![0, 1, 2, 3, 4, 5];

0 commit comments

Comments
 (0)