8
8
// option. This file may not be copied, modified, or distributed
9
9
// except according to those terms.
10
10
11
+ use array_vec:: ArrayVec ;
11
12
use std:: borrow:: { Borrow , BorrowMut , ToOwned } ;
12
13
use std:: fmt;
13
14
use std:: iter;
@@ -25,6 +26,8 @@ use rustc_serialize;
25
26
///
26
27
/// In other words, `T` is the type used to index into the bitvector
27
28
/// this type uses to represent the set of object it holds.
29
+ ///
30
+ /// The representation is dense, using one bit per possible element.
28
31
#[ derive( Eq , PartialEq ) ]
29
32
pub struct IdxSetBuf < T : Idx > {
30
33
_pd : PhantomData < fn ( & T ) > ,
@@ -90,6 +93,8 @@ impl<T: Idx> ToOwned for IdxSet<T> {
90
93
}
91
94
}
92
95
96
+ const BITS_PER_WORD : usize = mem:: size_of :: < Word > ( ) * 8 ;
97
+
93
98
impl < T : Idx > fmt:: Debug for IdxSetBuf < T > {
94
99
fn fmt ( & self , w : & mut fmt:: Formatter ) -> fmt:: Result {
95
100
w. debug_list ( )
@@ -108,8 +113,7 @@ impl<T: Idx> fmt::Debug for IdxSet<T> {
108
113
109
114
impl < T : Idx > IdxSetBuf < T > {
110
115
fn new ( init : Word , universe_size : usize ) -> Self {
111
- let bits_per_word = mem:: size_of :: < Word > ( ) * 8 ;
112
- let num_words = ( universe_size + ( bits_per_word - 1 ) ) / bits_per_word;
116
+ let num_words = ( universe_size + ( BITS_PER_WORD - 1 ) ) / BITS_PER_WORD ;
113
117
IdxSetBuf {
114
118
_pd : Default :: default ( ) ,
115
119
bits : vec ! [ init; num_words] ,
@@ -160,6 +164,16 @@ impl<T: Idx> IdxSet<T> {
160
164
}
161
165
}
162
166
167
+ /// Duplicates as a hybrid set.
168
+ pub fn to_hybrid ( & self ) -> HybridIdxSetBuf < T > {
169
+ // This universe_size may be slightly larger than the one specified
170
+ // upon creation, due to rounding up to a whole word. That's ok.
171
+ let universe_size = self . bits . len ( ) * BITS_PER_WORD ;
172
+
173
+ // Note: we currently don't bother trying to make a Sparse set.
174
+ HybridIdxSetBuf :: Dense ( self . to_owned ( ) , universe_size)
175
+ }
176
+
163
177
/// Removes all elements
164
178
pub fn clear ( & mut self ) {
165
179
for b in & mut self . bits {
@@ -177,21 +191,19 @@ impl<T: Idx> IdxSet<T> {
177
191
178
192
/// Clear all elements above `universe_size`.
179
193
fn trim_to ( & mut self , universe_size : usize ) {
180
- let word_bits = mem:: size_of :: < Word > ( ) * 8 ;
181
-
182
194
// `trim_block` is the first block where some bits have
183
195
// to be cleared.
184
- let trim_block = universe_size / word_bits ;
196
+ let trim_block = universe_size / BITS_PER_WORD ;
185
197
186
198
// all the blocks above it have to be completely cleared.
187
199
if trim_block < self . bits . len ( ) {
188
200
for b in & mut self . bits [ trim_block+1 ..] {
189
201
* b = 0 ;
190
202
}
191
203
192
- // at that block, the `universe_size % word_bits ` lsbs
204
+ // at that block, the `universe_size % BITS_PER_WORD ` lsbs
193
205
// should remain.
194
- let remaining_bits = universe_size % word_bits ;
206
+ let remaining_bits = universe_size % BITS_PER_WORD ;
195
207
let mask = ( 1 <<remaining_bits) -1 ;
196
208
self . bits [ trim_block] &= mask;
197
209
}
@@ -242,12 +254,46 @@ impl<T: Idx> IdxSet<T> {
242
254
bitwise ( self . words_mut ( ) , other. words ( ) , & Union )
243
255
}
244
256
257
+ /// Like `union()`, but takes a `SparseIdxSetBuf` argument.
258
+ fn union_sparse ( & mut self , other : & SparseIdxSetBuf < T > ) -> bool {
259
+ let mut changed = false ;
260
+ for elem in other. iter ( ) {
261
+ changed |= self . add ( & elem) ;
262
+ }
263
+ changed
264
+ }
265
+
266
+ /// Like `union()`, but takes a `HybridIdxSetBuf` argument.
267
+ pub fn union_hybrid ( & mut self , other : & HybridIdxSetBuf < T > ) -> bool {
268
+ match other {
269
+ HybridIdxSetBuf :: Sparse ( sparse, _) => self . union_sparse ( sparse) ,
270
+ HybridIdxSetBuf :: Dense ( dense, _) => self . union ( dense) ,
271
+ }
272
+ }
273
+
245
274
/// Set `self = self - other` and return true if `self` changed.
246
275
/// (i.e., if any bits were removed).
247
276
pub fn subtract ( & mut self , other : & IdxSet < T > ) -> bool {
248
277
bitwise ( self . words_mut ( ) , other. words ( ) , & Subtract )
249
278
}
250
279
280
+ /// Like `subtract()`, but takes a `SparseIdxSetBuf` argument.
281
+ fn subtract_sparse ( & mut self , other : & SparseIdxSetBuf < T > ) -> bool {
282
+ let mut changed = false ;
283
+ for elem in other. iter ( ) {
284
+ changed |= self . remove ( & elem) ;
285
+ }
286
+ changed
287
+ }
288
+
289
+ /// Like `subtract()`, but takes a `HybridIdxSetBuf` argument.
290
+ pub fn subtract_hybrid ( & mut self , other : & HybridIdxSetBuf < T > ) -> bool {
291
+ match other {
292
+ HybridIdxSetBuf :: Sparse ( sparse, _) => self . subtract_sparse ( sparse) ,
293
+ HybridIdxSetBuf :: Dense ( dense, _) => self . subtract ( dense) ,
294
+ }
295
+ }
296
+
251
297
/// Set `self = self & other` and return true if `self` changed.
252
298
/// (i.e., if any bits were removed).
253
299
pub fn intersect ( & mut self , other : & IdxSet < T > ) -> bool {
@@ -273,19 +319,200 @@ impl<'a, T: Idx> Iterator for Iter<'a, T> {
273
319
type Item = T ;
274
320
275
321
fn next ( & mut self ) -> Option < T > {
276
- let word_bits = mem:: size_of :: < Word > ( ) * 8 ;
277
322
loop {
278
323
if let Some ( ( ref mut word, offset) ) = self . cur {
279
324
let bit_pos = word. trailing_zeros ( ) as usize ;
280
- if bit_pos != word_bits {
325
+ if bit_pos != BITS_PER_WORD {
281
326
let bit = 1 << bit_pos;
282
327
* word ^= bit;
283
328
return Some ( T :: new ( bit_pos + offset) )
284
329
}
285
330
}
286
331
287
332
let ( i, word) = self . iter . next ( ) ?;
288
- self . cur = Some ( ( * word, word_bits * i) ) ;
333
+ self . cur = Some ( ( * word, BITS_PER_WORD * i) ) ;
334
+ }
335
+ }
336
+ }
337
+
338
+ const SPARSE_MAX : usize = 8 ;
339
+
340
+ /// A sparse index set with a maximum of SPARSE_MAX elements. Used by
341
+ /// HybridIdxSetBuf; do not use directly.
342
+ ///
343
+ /// The elements are stored as an unsorted vector with no duplicates.
344
+ #[ derive( Clone , Debug ) ]
345
+ pub struct SparseIdxSetBuf < T : Idx > ( ArrayVec < [ T ; SPARSE_MAX ] > ) ;
346
+
347
+ impl < T : Idx > SparseIdxSetBuf < T > {
348
+ fn new ( ) -> Self {
349
+ SparseIdxSetBuf ( ArrayVec :: new ( ) )
350
+ }
351
+
352
+ fn len ( & self ) -> usize {
353
+ self . 0 . len ( )
354
+ }
355
+
356
+ fn contains ( & self , elem : & T ) -> bool {
357
+ self . 0 . contains ( elem)
358
+ }
359
+
360
+ fn add ( & mut self , elem : & T ) -> bool {
361
+ // Ensure there are no duplicates.
362
+ if self . 0 . contains ( elem) {
363
+ false
364
+ } else {
365
+ self . 0 . push ( * elem) ;
366
+ true
367
+ }
368
+ }
369
+
370
+ fn remove ( & mut self , elem : & T ) -> bool {
371
+ if let Some ( i) = self . 0 . iter ( ) . position ( |e| e == elem) {
372
+ // Swap the found element to the end, then pop it.
373
+ let len = self . 0 . len ( ) ;
374
+ self . 0 . swap ( i, len - 1 ) ;
375
+ self . 0 . pop ( ) ;
376
+ true
377
+ } else {
378
+ false
379
+ }
380
+ }
381
+
382
+ fn to_dense ( & self , universe_size : usize ) -> IdxSetBuf < T > {
383
+ let mut dense = IdxSetBuf :: new_empty ( universe_size) ;
384
+ for elem in self . 0 . iter ( ) {
385
+ dense. add ( elem) ;
386
+ }
387
+ dense
388
+ }
389
+
390
+ fn iter ( & self ) -> SparseIter < T > {
391
+ SparseIter {
392
+ iter : self . 0 . iter ( ) ,
393
+ }
394
+ }
395
+ }
396
+
397
+ pub struct SparseIter < ' a , T : Idx > {
398
+ iter : slice:: Iter < ' a , T > ,
399
+ }
400
+
401
+ impl < ' a , T : Idx > Iterator for SparseIter < ' a , T > {
402
+ type Item = T ;
403
+
404
+ fn next ( & mut self ) -> Option < T > {
405
+ self . iter . next ( ) . map ( |e| * e)
406
+ }
407
+ }
408
+
409
+ /// Like IdxSetBuf, but with a hybrid representation: sparse when there are few
410
+ /// elements in the set, but dense when there are many. It's especially
411
+ /// efficient for sets that typically have a small number of elements, but a
412
+ /// large `universe_size`, and are cleared frequently.
413
+ #[ derive( Clone , Debug ) ]
414
+ pub enum HybridIdxSetBuf < T : Idx > {
415
+ Sparse ( SparseIdxSetBuf < T > , usize ) ,
416
+ Dense ( IdxSetBuf < T > , usize ) ,
417
+ }
418
+
419
+ impl < T : Idx > HybridIdxSetBuf < T > {
420
+ pub fn new_empty ( universe_size : usize ) -> Self {
421
+ HybridIdxSetBuf :: Sparse ( SparseIdxSetBuf :: new ( ) , universe_size)
422
+ }
423
+
424
+ fn universe_size ( & mut self ) -> usize {
425
+ match * self {
426
+ HybridIdxSetBuf :: Sparse ( _, size) => size,
427
+ HybridIdxSetBuf :: Dense ( _, size) => size,
428
+ }
429
+ }
430
+
431
+ pub fn clear ( & mut self ) {
432
+ let universe_size = self . universe_size ( ) ;
433
+ * self = HybridIdxSetBuf :: new_empty ( universe_size) ;
434
+ }
435
+
436
+ /// Returns true iff set `self` contains `elem`.
437
+ pub fn contains ( & self , elem : & T ) -> bool {
438
+ match self {
439
+ HybridIdxSetBuf :: Sparse ( sparse, _) => sparse. contains ( elem) ,
440
+ HybridIdxSetBuf :: Dense ( dense, _) => dense. contains ( elem) ,
441
+ }
442
+ }
443
+
444
+ /// Adds `elem` to the set `self`.
445
+ pub fn add ( & mut self , elem : & T ) -> bool {
446
+ match self {
447
+ HybridIdxSetBuf :: Sparse ( sparse, _) if sparse. len ( ) < SPARSE_MAX => {
448
+ // The set is sparse and has space for `elem`.
449
+ sparse. add ( elem)
450
+ }
451
+ HybridIdxSetBuf :: Sparse ( sparse, _) if sparse. contains ( elem) => {
452
+ // The set is sparse and does not have space for `elem`, but
453
+ // that doesn't matter because `elem` is already present.
454
+ false
455
+ }
456
+ HybridIdxSetBuf :: Sparse ( _, _) => {
457
+ // The set is sparse and full. Convert to a dense set.
458
+ //
459
+ // FIXME: This code is awful, but I can't work out how else to
460
+ // appease the borrow checker.
461
+ let dummy = HybridIdxSetBuf :: Sparse ( SparseIdxSetBuf :: new ( ) , 0 ) ;
462
+ match mem:: replace ( self , dummy) {
463
+ HybridIdxSetBuf :: Sparse ( sparse, universe_size) => {
464
+ let mut dense = sparse. to_dense ( universe_size) ;
465
+ let changed = dense. add ( elem) ;
466
+ assert ! ( changed) ;
467
+ mem:: replace ( self , HybridIdxSetBuf :: Dense ( dense, universe_size) ) ;
468
+ changed
469
+ }
470
+ _ => panic ! ( "impossible" ) ,
471
+ }
472
+ }
473
+
474
+ HybridIdxSetBuf :: Dense ( dense, _) => dense. add ( elem) ,
475
+ }
476
+ }
477
+
478
+ /// Removes `elem` from the set `self`.
479
+ pub fn remove ( & mut self , elem : & T ) -> bool {
480
+ // Note: we currently don't bother going from Dense back to Sparse.
481
+ match self {
482
+ HybridIdxSetBuf :: Sparse ( sparse, _) => sparse. remove ( elem) ,
483
+ HybridIdxSetBuf :: Dense ( dense, _) => dense. remove ( elem) ,
484
+ }
485
+ }
486
+
487
+ /// Converts to a dense set, consuming itself in the process.
488
+ pub fn to_dense ( self ) -> IdxSetBuf < T > {
489
+ match self {
490
+ HybridIdxSetBuf :: Sparse ( sparse, universe_size) => sparse. to_dense ( universe_size) ,
491
+ HybridIdxSetBuf :: Dense ( dense, _) => dense,
492
+ }
493
+ }
494
+
495
+ /// Iteration order is unspecified.
496
+ pub fn iter ( & self ) -> HybridIter < T > {
497
+ match self {
498
+ HybridIdxSetBuf :: Sparse ( sparse, _) => HybridIter :: Sparse ( sparse. iter ( ) ) ,
499
+ HybridIdxSetBuf :: Dense ( dense, _) => HybridIter :: Dense ( dense. iter ( ) ) ,
500
+ }
501
+ }
502
+ }
503
+
504
+ pub enum HybridIter < ' a , T : Idx > {
505
+ Sparse ( SparseIter < ' a , T > ) ,
506
+ Dense ( Iter < ' a , T > ) ,
507
+ }
508
+
509
+ impl < ' a , T : Idx > Iterator for HybridIter < ' a , T > {
510
+ type Item = T ;
511
+
512
+ fn next ( & mut self ) -> Option < T > {
513
+ match self {
514
+ HybridIter :: Sparse ( sparse) => sparse. next ( ) ,
515
+ HybridIter :: Dense ( dense) => dense. next ( ) ,
289
516
}
290
517
}
291
518
}
0 commit comments