Overhauling heap con/destruction process

ejmount · ejmount · commit 58c0362f94ff · 2022-10-21T19:58:37.000+01:00
diff --git a/src/k_smallest.rs b/src/k_smallest.rs
@@ -1,7 +1,7 @@
 use alloc::vec::IntoIter;
 use core::cmp::{Ord, Ordering, Reverse};
 use core::mem::{replace, transmute, MaybeUninit};
-
+use core::ops::Range;
 
 fn k_smallest_dynamic<T, I: Iterator<Item = T>>(
     iter: I,
@@ -10,8 +10,8 @@ fn k_smallest_dynamic<T, I: Iterator<Item = T>>(
 ) -> IntoIter<T> {
     let mut storage = Vec::new();
     storage.resize_with(k, MaybeUninit::uninit);
-    let num_elements = capped_heapsort(iter, &mut storage, order);
-    storage.truncate(num_elements);
+    let Range { end, .. } = capped_heapsort(iter, &mut storage, order);
+    storage.truncate(end);
     let initialized: Vec<_> = unsafe { transmute(storage) };
     initialized.into_iter()
 }
@@ -61,15 +61,22 @@ where
 }
 
 /// Consumes a given iterator, leaving the minimum elements in the provided storage in **ascending** order.
-/// Returns the number of elements processed, up to the size of the storage
+/// Returns the range of initialized elements
 fn capped_heapsort<T, I: Iterator<Item = T>>(
     iter: I,
     storage: &mut [MaybeUninit<T>],
     order: impl Fn(&T, &T) -> Ordering,
-) -> usize {
-    let mut heap = MaxHeap::new(&mut storage[..], order);
-    heap.extend(iter);
-    heap.total_sort()
+) -> Range<usize> {
+    if storage.is_empty() {
+        return 0..0;
+    }
+    let mut heap = MaxHeap::from_iter(storage, order, iter);
+
+    let valid_elements = 0..heap.len;
+    while heap.len > 0 {
+        heap.pop();
+    }
+    valid_elements
 }
 
 /// An efficient heapsort requires that the heap ordering is the inverse of the desired sort order
@@ -83,6 +90,7 @@ fn capped_heapsort<T, I: Iterator<Item = T>>(
 struct MaxHeap<'a, T, C> {
     // It may be not be possible to shrink the storage for smaller sequencess
     // so manually manage the initialization
+    // This is **assumed not to be empty**
     storage: &'a mut [MaybeUninit<T>],
     comparator: C,
     // SAFETY: this must always be less or equal to the count of actually initialized elements
@@ -93,37 +101,31 @@ impl<'a, T, C> MaxHeap<'a, T, C>
 where
     C: Fn(&T, &T) -> Ordering,
 {
-    fn extend<T: IntoIterator<Item = A>>(&mut self, iter: T) {
-        let mut iter = iter.into_iter();
-
-        for initial_item in iter.by_ref().take(self.storage.len() - self.len) {
-            // This is the only point where the length is increased
-            // And the element we are increasing from is always initialized
-            self.storage[self.len] = MaybeUninit::new(initial_item);
-            self.len += 1;
+    fn from_iter<I>(storage: &'a mut [MaybeUninit<T>], comparator: C, mut iter: I) -> Self
+    where
+        I: Iterator<Item = T>,
+    {
+        let mut heap = Self {
+            storage,
+            comparator,
+            len: 0,
+        };
+        for (i, initial_item) in iter.by_ref().take(heap.storage.len()).enumerate() {
+            heap.storage[i] = MaybeUninit::new(initial_item);
+            heap.len += 1;
         }
         // Filling up the storage and only afterwards rearranging to form a valid heap is slightly more efficient
         // (But only by a factor of lg(k) and I'd love to hear of a usecase where that matters)
-        self.heapify();
+        heap.heapify();
 
-        if self.len == self.storage.len() {
+        if heap.len == heap.storage.len() {
             // Nothing else needs done if we didn't fill the storage in the first place
             // Also avoids unexpected behaviour with restartable iterators
             for val in iter {
-                let _ = self.push_pop(val);
+                let _ = heap.push_pop(val);
             }
         }
-    }
-}
-
-impl<'a, T, C> MaxHeap<'a, T, C> {
-    /// Creates an empty [`MaxHeap<T, C>`].
-    fn new(storage: &'a mut [MaybeUninit<T>], comparator: C) -> Self {
-        Self {
-            storage,
-            comparator,
-            len: 0,
-        }
+        heap
     }
 
     /// Retrieves the element at the given index.
@@ -169,9 +171,21 @@ impl<'a, T, C> MaxHeap<'a, T, C> {
         }
     }
 
+    /// Pop the greatest element by putting it at the back of the storage
+    /// shrinking the heap by 1, and reordering if needed
+    fn pop(&mut self) {
+        debug_assert!(self.len > 0);
+        self.storage.swap(0, self.len - 1);
+        // Leaves the length shorter than the number of initialized elements
+        // so that sifting does not disturb already popped elements
+        self.len -= 1;
+        self.sift_down(0);
+    }
+
     /// Insert the given element into the heap without changing its size
     /// The displaced element is returned, i.e. either the input or previous max
     fn push_pop(&mut self, val: T) -> Option<T> {
+        if self.compare(self.get(0), Some(&val)) == Some(Ordering::Greater) {
             let out = replace(&mut self.storage[0], MaybeUninit::new(val));
             self.sift_down(0);
             // SAFETY: This has been moved out of storage[0]
@@ -195,23 +209,6 @@ impl<'a, T, C> MaxHeap<'a, T, C> {
     fn compare(&self, a: Option<&T>, b: Option<&T>) -> Option<Ordering> {
         (self.comparator)(a?, b?).into()
     }
-
-    /// Heapsorts the storage into **ascending** order by repeatedly popping.
-    /// The number of elements in the heap is returned.
-    fn total_sort(mut self) -> usize
-    where
-        C: Fn(&T, &T) -> Ordering,
-    {
-        let original_len = self.len;
-        while self.len > 1 {
-            self.storage.swap(0, self.len - 1);
-            // Leaves the length shorter than the number of initialized elements
-            // so that sifting does not disturb already popped elements
-            self.len -= 1;
-            self.sift_down(0);
-        }
-        original_len
-    }
 }
 
 struct Pair<K, T>(K, T);