1
1
use alloc:: vec:: IntoIter ;
2
2
use core:: cmp:: { Ord , Ordering , Reverse } ;
3
- use core:: mem:: { replace, transmute, MaybeUninit } ;
4
- use core:: ops:: Range ;
3
+ use core:: mem:: replace;
5
4
6
- fn k_smallest_dynamic < T , I : Iterator < Item = T > > (
5
+ /// Consumes a given iterator, leaving the minimum elements in the provided storage in **ascending** order.
6
+ fn k_smallest_general < T , I : Iterator < Item = T > > (
7
7
iter : I ,
8
8
k : usize ,
9
9
order : impl Fn ( & T , & T ) -> Ordering ,
10
10
) -> IntoIter < T > {
11
- let mut storage = Vec :: new ( ) ;
12
- storage. resize_with ( k, MaybeUninit :: uninit) ;
13
- let Range { end, .. } = capped_heapsort ( iter, & mut storage, order) ;
14
- storage. truncate ( end) ;
15
- let initialized: Vec < _ > = unsafe { transmute ( storage) } ;
16
- initialized. into_iter ( )
11
+ if k == 0 {
12
+ return Vec :: new ( ) . into_iter ( ) ;
13
+ }
14
+ let heap = MaxHeap :: from_iter ( k, order, iter) ;
15
+ heap. unwrap_sorted ( ) . into_iter ( )
17
16
}
18
17
19
18
pub ( crate ) fn reverse_cmp < T , F > ( cmp : F ) -> impl Fn ( & T , & T ) -> Ordering
@@ -35,15 +34,15 @@ where
35
34
T : Ord ,
36
35
I : Iterator < Item = T > ,
37
36
{
38
- k_smallest_dynamic ( iter, k, T :: cmp)
37
+ k_smallest_general ( iter, k, T :: cmp)
39
38
}
40
39
41
40
pub ( crate ) fn k_smallest_by < T , I , F > ( iter : I , k : usize , cmp : F ) -> IntoIter < T >
42
41
where
43
42
I : Iterator < Item = T > ,
44
43
F : Fn ( & T , & T ) -> Ordering ,
45
44
{
46
- k_smallest_dynamic ( iter, k, cmp)
45
+ k_smallest_general ( iter, k, cmp)
47
46
}
48
47
49
48
pub ( crate ) fn k_smallest_by_key < T , I , F , K > ( iter : I , k : usize , key : F ) -> IntoIter < T >
@@ -54,75 +53,54 @@ where
54
53
{
55
54
let iter = iter. map ( |v| Pair ( key ( & v) , v) ) ;
56
55
57
- let results: Vec < _ > = k_smallest_dynamic ( iter, k, Ord :: cmp)
56
+ let results: Vec < _ > = k_smallest_general ( iter, k, Ord :: cmp)
58
57
. map ( |Pair ( _, t) | t)
59
58
. collect ( ) ;
60
59
results. into_iter ( )
61
60
}
62
61
63
- /// Consumes a given iterator, leaving the minimum elements in the provided storage in **ascending** order.
64
- /// Returns the range of initialized elements
65
- fn capped_heapsort < T , I : Iterator < Item = T > > (
66
- iter : I ,
67
- storage : & mut [ MaybeUninit < T > ] ,
68
- order : impl Fn ( & T , & T ) -> Ordering ,
69
- ) -> Range < usize > {
70
- if storage. is_empty ( ) {
71
- return 0 ..0 ;
72
- }
73
- let mut heap = MaxHeap :: from_iter ( storage, order, iter) ;
74
-
75
- let valid_elements = 0 ..heap. len ;
76
- while heap. len > 0 {
77
- heap. pop ( ) ;
78
- }
79
- valid_elements
80
- }
81
-
82
62
/// An efficient heapsort requires that the heap ordering is the inverse of the desired sort order
83
63
/// So the basic case of retrieving minimum elements requires a max heap
84
64
///
85
65
/// This type does not attempt to reproduce all the functionality of [std::collections::BinaryHeap] and instead only implements what is needed for iter operations,
86
66
/// e.g. we do not need to insert single elements.
87
67
/// Additionally, some minor optimizations used in the std BinaryHeap are not used here, e.g. elements are actually swapped rather than managing a "hole"
88
- ///
89
- /// To be generic over the underlying storage, it takes a mutable reference to avoid having to define a storage trait.
90
- struct MaxHeap < ' a , T , C > {
68
+ struct MaxHeap < T , C > {
91
69
// It may be not be possible to shrink the storage for smaller sequencess
92
70
// so manually manage the initialization
93
71
// This is **assumed not to be empty**
94
- storage : & ' a mut [ MaybeUninit < T > ] ,
72
+ storage : alloc :: vec :: Vec < T > ,
95
73
comparator : C ,
96
- // SAFETY: this must always be less or equal to the count of actually initialized elements
74
+ // this is always less or equal to the count of actual elements
75
+ // allowing it to be less means the heap property can cover only a subset of the vec
76
+ // while reusing the storage
97
77
len : usize ,
98
78
}
99
79
100
- impl < ' a , T , C > MaxHeap < ' a , T , C >
80
+ impl < T , C > MaxHeap < T , C >
101
81
where
102
82
C : Fn ( & T , & T ) -> Ordering ,
103
83
{
104
- fn from_iter < I > ( storage : & ' a mut [ MaybeUninit < T > ] , comparator : C , mut iter : I ) -> Self
84
+ fn from_iter < I > ( k : usize , comparator : C , mut iter : I ) -> Self
105
85
where
106
86
I : Iterator < Item = T > ,
107
87
{
88
+ let storage: Vec < T > = iter. by_ref ( ) . take ( k) . collect ( ) ;
89
+
108
90
let mut heap = Self {
91
+ len : storage. len ( ) ,
109
92
storage,
110
93
comparator,
111
- len : 0 ,
112
94
} ;
113
- for ( i, initial_item) in iter. by_ref ( ) . take ( heap. storage . len ( ) ) . enumerate ( ) {
114
- heap. storage [ i] = MaybeUninit :: new ( initial_item) ;
115
- heap. len += 1 ;
116
- }
117
95
// Filling up the storage and only afterwards rearranging to form a valid heap is slightly more efficient
118
96
// (But only by a factor of lg(k) and I'd love to hear of a usecase where that matters)
119
97
heap. heapify ( ) ;
120
98
121
- if heap . len == heap. storage . len ( ) {
99
+ if k == heap. storage . len ( ) {
122
100
// Nothing else needs done if we didn't fill the storage in the first place
123
101
// Also avoids unexpected behaviour with restartable iterators
124
102
for val in iter {
125
- let _ = heap. push_pop ( val) ;
103
+ heap. push_pop ( val) ;
126
104
}
127
105
}
128
106
heap
@@ -133,11 +111,7 @@ where
133
111
/// element ordering.
134
112
fn get ( & self , index : usize ) -> Option < & T > {
135
113
if index < self . len {
136
- let ptr = unsafe {
137
- // There might be a better way to do this but assume_init_ref doesn't exist on MSRV
138
- self . storage [ index] . as_ptr ( ) . as_ref ( ) . unwrap ( )
139
- } ;
140
- Some ( ptr)
114
+ self . storage . get ( index)
141
115
} else {
142
116
None
143
117
}
@@ -168,7 +142,6 @@ where
168
142
let ( original_item, replacement_item) = ( self . get ( origin) , self . get ( replacement_idx) ) ;
169
143
170
144
let cmp = self . compare ( original_item, replacement_item) ;
171
- // If the left item also doesn't exist, this comparison will fall through
172
145
if Some ( Ordering :: Less ) == cmp {
173
146
self . storage . swap ( origin, replacement_idx) ;
174
147
self . sift_down ( replacement_idx) ;
@@ -188,17 +161,13 @@ where
188
161
189
162
/// Insert the given element into the heap without changing its size
190
163
/// The displaced element is returned, i.e. either the input or previous max
191
- fn push_pop ( & mut self , val : T ) -> Option < T > {
164
+ fn push_pop ( & mut self , val : T ) -> T {
192
165
if self . compare ( self . get ( 0 ) , Some ( & val) ) == Some ( Ordering :: Greater ) {
193
- let out = replace ( & mut self . storage [ 0 ] , MaybeUninit :: new ( val) ) ;
166
+ let out = replace ( & mut self . storage [ 0 ] , val) ;
194
167
self . sift_down ( 0 ) ;
195
- // SAFETY: This has been moved out of storage[0]
196
- // storage[0] will be uninitialized if and only if self.len == 0
197
- // In that case, self.get(0) above will return None, and the comparison will fall through to None
198
- // So to get here, self.len > 0 and therefore this element was initialized
199
- unsafe { Some ( out. assume_init ( ) ) }
168
+ out
200
169
} else {
201
- Some ( val)
170
+ val
202
171
}
203
172
}
204
173
@@ -213,6 +182,14 @@ where
213
182
fn compare ( & self , a : Option < & T > , b : Option < & T > ) -> Option < Ordering > {
214
183
( self . comparator ) ( a?, b?) . into ( )
215
184
}
185
+
186
+ // Totally orders the elements and returns the raw storage
187
+ fn unwrap_sorted ( mut self ) -> Vec < T > {
188
+ while self . len > 1 {
189
+ self . pop ( ) ;
190
+ }
191
+ self . storage
192
+ }
216
193
}
217
194
218
195
struct Pair < K , T > ( K , T ) ;
0 commit comments