Skip to content

Commit 49ca53e

Browse files
committed
MaxHeap now uses Vec
Being generic over storage at the algorithm level is a neater strategy (see #654)
1 parent 7e0ff8b commit 49ca53e

File tree

1 file changed

+36
-59
lines changed

1 file changed

+36
-59
lines changed

src/k_smallest.rs

+36-59
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,18 @@
11
use alloc::vec::IntoIter;
22
use core::cmp::{Ord, Ordering, Reverse};
3-
use core::mem::{replace, transmute, MaybeUninit};
4-
use core::ops::Range;
3+
use core::mem::replace;
54

6-
fn k_smallest_dynamic<T, I: Iterator<Item = T>>(
5+
/// Consumes a given iterator, leaving the minimum elements in the provided storage in **ascending** order.
6+
fn k_smallest_general<T, I: Iterator<Item = T>>(
77
iter: I,
88
k: usize,
99
order: impl Fn(&T, &T) -> Ordering,
1010
) -> IntoIter<T> {
11-
let mut storage = Vec::new();
12-
storage.resize_with(k, MaybeUninit::uninit);
13-
let Range { end, .. } = capped_heapsort(iter, &mut storage, order);
14-
storage.truncate(end);
15-
let initialized: Vec<_> = unsafe { transmute(storage) };
16-
initialized.into_iter()
11+
if k == 0 {
12+
return Vec::new().into_iter();
13+
}
14+
let heap = MaxHeap::from_iter(k, order, iter);
15+
heap.unwrap_sorted().into_iter()
1716
}
1817

1918
pub(crate) fn reverse_cmp<T, F>(cmp: F) -> impl Fn(&T, &T) -> Ordering
@@ -35,15 +34,15 @@ where
3534
T: Ord,
3635
I: Iterator<Item = T>,
3736
{
38-
k_smallest_dynamic(iter, k, T::cmp)
37+
k_smallest_general(iter, k, T::cmp)
3938
}
4039

4140
pub(crate) fn k_smallest_by<T, I, F>(iter: I, k: usize, cmp: F) -> IntoIter<T>
4241
where
4342
I: Iterator<Item = T>,
4443
F: Fn(&T, &T) -> Ordering,
4544
{
46-
k_smallest_dynamic(iter, k, cmp)
45+
k_smallest_general(iter, k, cmp)
4746
}
4847

4948
pub(crate) fn k_smallest_by_key<T, I, F, K>(iter: I, k: usize, key: F) -> IntoIter<T>
@@ -54,75 +53,54 @@ where
5453
{
5554
let iter = iter.map(|v| Pair(key(&v), v));
5655

57-
let results: Vec<_> = k_smallest_dynamic(iter, k, Ord::cmp)
56+
let results: Vec<_> = k_smallest_general(iter, k, Ord::cmp)
5857
.map(|Pair(_, t)| t)
5958
.collect();
6059
results.into_iter()
6160
}
6261

63-
/// Consumes a given iterator, leaving the minimum elements in the provided storage in **ascending** order.
64-
/// Returns the range of initialized elements
65-
fn capped_heapsort<T, I: Iterator<Item = T>>(
66-
iter: I,
67-
storage: &mut [MaybeUninit<T>],
68-
order: impl Fn(&T, &T) -> Ordering,
69-
) -> Range<usize> {
70-
if storage.is_empty() {
71-
return 0..0;
72-
}
73-
let mut heap = MaxHeap::from_iter(storage, order, iter);
74-
75-
let valid_elements = 0..heap.len;
76-
while heap.len > 0 {
77-
heap.pop();
78-
}
79-
valid_elements
80-
}
81-
8262
/// An efficient heapsort requires that the heap ordering is the inverse of the desired sort order
8363
/// So the basic case of retrieving minimum elements requires a max heap
8464
///
8565
/// This type does not attempt to reproduce all the functionality of [std::collections::BinaryHeap] and instead only implements what is needed for iter operations,
8666
/// e.g. we do not need to insert single elements.
8767
/// Additionally, some minor optimizations used in the std BinaryHeap are not used here, e.g. elements are actually swapped rather than managing a "hole"
88-
///
89-
/// To be generic over the underlying storage, it takes a mutable reference to avoid having to define a storage trait.
90-
struct MaxHeap<'a, T, C> {
68+
struct MaxHeap<T, C> {
9169
// It may be not be possible to shrink the storage for smaller sequencess
9270
// so manually manage the initialization
9371
// This is **assumed not to be empty**
94-
storage: &'a mut [MaybeUninit<T>],
72+
storage: alloc::vec::Vec<T>,
9573
comparator: C,
96-
// SAFETY: this must always be less or equal to the count of actually initialized elements
74+
// this is always less or equal to the count of actual elements
75+
// allowing it to be less means the heap property can cover only a subset of the vec
76+
// while reusing the storage
9777
len: usize,
9878
}
9979

100-
impl<'a, T, C> MaxHeap<'a, T, C>
80+
impl<T, C> MaxHeap<T, C>
10181
where
10282
C: Fn(&T, &T) -> Ordering,
10383
{
104-
fn from_iter<I>(storage: &'a mut [MaybeUninit<T>], comparator: C, mut iter: I) -> Self
84+
fn from_iter<I>(k: usize, comparator: C, mut iter: I) -> Self
10585
where
10686
I: Iterator<Item = T>,
10787
{
88+
let storage: Vec<T> = iter.by_ref().take(k).collect();
89+
10890
let mut heap = Self {
91+
len: storage.len(),
10992
storage,
11093
comparator,
111-
len: 0,
11294
};
113-
for (i, initial_item) in iter.by_ref().take(heap.storage.len()).enumerate() {
114-
heap.storage[i] = MaybeUninit::new(initial_item);
115-
heap.len += 1;
116-
}
11795
// Filling up the storage and only afterwards rearranging to form a valid heap is slightly more efficient
11896
// (But only by a factor of lg(k) and I'd love to hear of a usecase where that matters)
11997
heap.heapify();
12098

121-
if heap.len == heap.storage.len() {
99+
if k == heap.storage.len() {
122100
// Nothing else needs done if we didn't fill the storage in the first place
123101
// Also avoids unexpected behaviour with restartable iterators
124102
for val in iter {
125-
let _ = heap.push_pop(val);
103+
heap.push_pop(val);
126104
}
127105
}
128106
heap
@@ -133,11 +111,7 @@ where
133111
/// element ordering.
134112
fn get(&self, index: usize) -> Option<&T> {
135113
if index < self.len {
136-
let ptr = unsafe {
137-
// There might be a better way to do this but assume_init_ref doesn't exist on MSRV
138-
self.storage[index].as_ptr().as_ref().unwrap()
139-
};
140-
Some(ptr)
114+
self.storage.get(index)
141115
} else {
142116
None
143117
}
@@ -168,7 +142,6 @@ where
168142
let (original_item, replacement_item) = (self.get(origin), self.get(replacement_idx));
169143

170144
let cmp = self.compare(original_item, replacement_item);
171-
// If the left item also doesn't exist, this comparison will fall through
172145
if Some(Ordering::Less) == cmp {
173146
self.storage.swap(origin, replacement_idx);
174147
self.sift_down(replacement_idx);
@@ -188,17 +161,13 @@ where
188161

189162
/// Insert the given element into the heap without changing its size
190163
/// The displaced element is returned, i.e. either the input or previous max
191-
fn push_pop(&mut self, val: T) -> Option<T> {
164+
fn push_pop(&mut self, val: T) -> T {
192165
if self.compare(self.get(0), Some(&val)) == Some(Ordering::Greater) {
193-
let out = replace(&mut self.storage[0], MaybeUninit::new(val));
166+
let out = replace(&mut self.storage[0], val);
194167
self.sift_down(0);
195-
// SAFETY: This has been moved out of storage[0]
196-
// storage[0] will be uninitialized if and only if self.len == 0
197-
// In that case, self.get(0) above will return None, and the comparison will fall through to None
198-
// So to get here, self.len > 0 and therefore this element was initialized
199-
unsafe { Some(out.assume_init()) }
168+
out
200169
} else {
201-
Some(val)
170+
val
202171
}
203172
}
204173

@@ -213,6 +182,14 @@ where
213182
fn compare(&self, a: Option<&T>, b: Option<&T>) -> Option<Ordering> {
214183
(self.comparator)(a?, b?).into()
215184
}
185+
186+
// Totally orders the elements and returns the raw storage
187+
fn unwrap_sorted(mut self) -> Vec<T> {
188+
while self.len > 1 {
189+
self.pop();
190+
}
191+
self.storage
192+
}
216193
}
217194

218195
struct Pair<K, T>(K, T);

0 commit comments

Comments
 (0)