8
8
// option. This file may not be copied, modified, or distributed
9
9
// except according to those terms.
10
10
11
+ use array_vec:: ArrayVec ;
11
12
use std:: borrow:: { Borrow , BorrowMut , ToOwned } ;
12
13
use std:: fmt;
13
14
use std:: iter;
@@ -25,6 +26,8 @@ use rustc_serialize;
25
26
///
26
27
/// In other words, `T` is the type used to index into the bitvector
27
28
/// this type uses to represent the set of object it holds.
29
+ ///
30
+ /// The representation is dense, using one bit per possible element.
28
31
#[ derive( Eq , PartialEq ) ]
29
32
pub struct IdxSetBuf < T : Idx > {
30
33
_pd : PhantomData < fn ( & T ) > ,
@@ -93,6 +96,8 @@ impl<T: Idx> ToOwned for IdxSet<T> {
93
96
}
94
97
}
95
98
99
+ const BITS_PER_WORD : usize = mem:: size_of :: < Word > ( ) * 8 ;
100
+
96
101
impl < T : Idx > fmt:: Debug for IdxSetBuf < T > {
97
102
fn fmt ( & self , w : & mut fmt:: Formatter ) -> fmt:: Result {
98
103
w. debug_list ( )
@@ -111,8 +116,7 @@ impl<T: Idx> fmt::Debug for IdxSet<T> {
111
116
112
117
impl < T : Idx > IdxSetBuf < T > {
113
118
fn new ( init : Word , universe_size : usize ) -> Self {
114
- let bits_per_word = mem:: size_of :: < Word > ( ) * 8 ;
115
- let num_words = ( universe_size + ( bits_per_word - 1 ) ) / bits_per_word;
119
+ let num_words = ( universe_size + ( BITS_PER_WORD - 1 ) ) / BITS_PER_WORD ;
116
120
IdxSetBuf {
117
121
_pd : Default :: default ( ) ,
118
122
bits : vec ! [ init; num_words] ,
@@ -163,6 +167,16 @@ impl<T: Idx> IdxSet<T> {
163
167
}
164
168
}
165
169
170
+ /// Duplicates as a hybrid set.
171
+ pub fn to_hybrid ( & self ) -> HybridIdxSetBuf < T > {
172
+ // This universe_size may be slightly larger than the one specified
173
+ // upon creation, due to rounding up to a whole word. That's ok.
174
+ let universe_size = self . bits . len ( ) * BITS_PER_WORD ;
175
+
176
+ // Note: we currently don't bother trying to make a Sparse set.
177
+ HybridIdxSetBuf :: Dense ( self . to_owned ( ) , universe_size)
178
+ }
179
+
166
180
/// Removes all elements
167
181
pub fn clear ( & mut self ) {
168
182
for b in & mut self . bits {
@@ -180,21 +194,19 @@ impl<T: Idx> IdxSet<T> {
180
194
181
195
/// Clear all elements above `universe_size`.
182
196
fn trim_to ( & mut self , universe_size : usize ) {
183
- let word_bits = mem:: size_of :: < Word > ( ) * 8 ;
184
-
185
197
// `trim_block` is the first block where some bits have
186
198
// to be cleared.
187
- let trim_block = universe_size / word_bits ;
199
+ let trim_block = universe_size / BITS_PER_WORD ;
188
200
189
201
// all the blocks above it have to be completely cleared.
190
202
if trim_block < self . bits . len ( ) {
191
203
for b in & mut self . bits [ trim_block+1 ..] {
192
204
* b = 0 ;
193
205
}
194
206
195
- // at that block, the `universe_size % word_bits ` lsbs
207
+ // at that block, the `universe_size % BITS_PER_WORD ` lsbs
196
208
// should remain.
197
- let remaining_bits = universe_size % word_bits ;
209
+ let remaining_bits = universe_size % BITS_PER_WORD ;
198
210
let mask = ( 1 <<remaining_bits) -1 ;
199
211
self . bits [ trim_block] &= mask;
200
212
}
@@ -245,12 +257,46 @@ impl<T: Idx> IdxSet<T> {
245
257
bitwise ( self . words_mut ( ) , other. words ( ) , & Union )
246
258
}
247
259
260
+ /// Like `union()`, but takes a `SparseIdxSetBuf` argument.
261
+ fn union_sparse ( & mut self , other : & SparseIdxSetBuf < T > ) -> bool {
262
+ let mut changed = false ;
263
+ for elem in other. iter ( ) {
264
+ changed |= self . add ( & elem) ;
265
+ }
266
+ changed
267
+ }
268
+
269
+ /// Like `union()`, but takes a `HybridIdxSetBuf` argument.
270
+ pub fn union_hybrid ( & mut self , other : & HybridIdxSetBuf < T > ) -> bool {
271
+ match other {
272
+ HybridIdxSetBuf :: Sparse ( sparse, _) => self . union_sparse ( sparse) ,
273
+ HybridIdxSetBuf :: Dense ( dense, _) => self . union ( dense) ,
274
+ }
275
+ }
276
+
248
277
/// Set `self = self - other` and return true if `self` changed.
249
278
/// (i.e., if any bits were removed).
250
279
pub fn subtract ( & mut self , other : & IdxSet < T > ) -> bool {
251
280
bitwise ( self . words_mut ( ) , other. words ( ) , & Subtract )
252
281
}
253
282
283
+ /// Like `subtract()`, but takes a `SparseIdxSetBuf` argument.
284
+ fn subtract_sparse ( & mut self , other : & SparseIdxSetBuf < T > ) -> bool {
285
+ let mut changed = false ;
286
+ for elem in other. iter ( ) {
287
+ changed |= self . remove ( & elem) ;
288
+ }
289
+ changed
290
+ }
291
+
292
+ /// Like `subtract()`, but takes a `HybridIdxSetBuf` argument.
293
+ pub fn subtract_hybrid ( & mut self , other : & HybridIdxSetBuf < T > ) -> bool {
294
+ match other {
295
+ HybridIdxSetBuf :: Sparse ( sparse, _) => self . subtract_sparse ( sparse) ,
296
+ HybridIdxSetBuf :: Dense ( dense, _) => self . subtract ( dense) ,
297
+ }
298
+ }
299
+
254
300
/// Set `self = self & other` and return true if `self` changed.
255
301
/// (i.e., if any bits were removed).
256
302
pub fn intersect ( & mut self , other : & IdxSet < T > ) -> bool {
@@ -276,19 +322,200 @@ impl<'a, T: Idx> Iterator for Iter<'a, T> {
276
322
type Item = T ;
277
323
278
324
fn next ( & mut self ) -> Option < T > {
279
- let word_bits = mem:: size_of :: < Word > ( ) * 8 ;
280
325
loop {
281
326
if let Some ( ( ref mut word, offset) ) = self . cur {
282
327
let bit_pos = word. trailing_zeros ( ) as usize ;
283
- if bit_pos != word_bits {
328
+ if bit_pos != BITS_PER_WORD {
284
329
let bit = 1 << bit_pos;
285
330
* word ^= bit;
286
331
return Some ( T :: new ( bit_pos + offset) )
287
332
}
288
333
}
289
334
290
335
let ( i, word) = self . iter . next ( ) ?;
291
- self . cur = Some ( ( * word, word_bits * i) ) ;
336
+ self . cur = Some ( ( * word, BITS_PER_WORD * i) ) ;
337
+ }
338
+ }
339
+ }
340
+
341
+ const SPARSE_MAX : usize = 8 ;
342
+
343
+ /// A sparse index set with a maximum of SPARSE_MAX elements. Used by
344
+ /// HybridIdxSetBuf; do not use directly.
345
+ ///
346
+ /// The elements are stored as an unsorted vector with no duplicates.
347
+ #[ derive( Clone , Debug ) ]
348
+ pub struct SparseIdxSetBuf < T : Idx > ( ArrayVec < [ T ; SPARSE_MAX ] > ) ;
349
+
350
+ impl < T : Idx > SparseIdxSetBuf < T > {
351
+ fn new ( ) -> Self {
352
+ SparseIdxSetBuf ( ArrayVec :: new ( ) )
353
+ }
354
+
355
+ fn len ( & self ) -> usize {
356
+ self . 0 . len ( )
357
+ }
358
+
359
+ fn contains ( & self , elem : & T ) -> bool {
360
+ self . 0 . contains ( elem)
361
+ }
362
+
363
+ fn add ( & mut self , elem : & T ) -> bool {
364
+ // Ensure there are no duplicates.
365
+ if self . 0 . contains ( elem) {
366
+ false
367
+ } else {
368
+ self . 0 . push ( * elem) ;
369
+ true
370
+ }
371
+ }
372
+
373
+ fn remove ( & mut self , elem : & T ) -> bool {
374
+ if let Some ( i) = self . 0 . iter ( ) . position ( |e| e == elem) {
375
+ // Swap the found element to the end, then pop it.
376
+ let len = self . 0 . len ( ) ;
377
+ self . 0 . swap ( i, len - 1 ) ;
378
+ self . 0 . pop ( ) ;
379
+ true
380
+ } else {
381
+ false
382
+ }
383
+ }
384
+
385
+ fn to_dense ( & self , universe_size : usize ) -> IdxSetBuf < T > {
386
+ let mut dense = IdxSetBuf :: new_empty ( universe_size) ;
387
+ for elem in self . 0 . iter ( ) {
388
+ dense. add ( elem) ;
389
+ }
390
+ dense
391
+ }
392
+
393
+ fn iter ( & self ) -> SparseIter < T > {
394
+ SparseIter {
395
+ iter : self . 0 . iter ( ) ,
396
+ }
397
+ }
398
+ }
399
+
400
+ pub struct SparseIter < ' a , T : Idx > {
401
+ iter : slice:: Iter < ' a , T > ,
402
+ }
403
+
404
+ impl < ' a , T : Idx > Iterator for SparseIter < ' a , T > {
405
+ type Item = T ;
406
+
407
+ fn next ( & mut self ) -> Option < T > {
408
+ self . iter . next ( ) . map ( |e| * e)
409
+ }
410
+ }
411
+
412
+ /// Like IdxSetBuf, but with a hybrid representation: sparse when there are few
413
+ /// elements in the set, but dense when there are many. It's especially
414
+ /// efficient for sets that typically have a small number of elements, but a
415
+ /// large `universe_size`, and are cleared frequently.
416
+ #[ derive( Clone , Debug ) ]
417
+ pub enum HybridIdxSetBuf < T : Idx > {
418
+ Sparse ( SparseIdxSetBuf < T > , usize ) ,
419
+ Dense ( IdxSetBuf < T > , usize ) ,
420
+ }
421
+
422
+ impl < T : Idx > HybridIdxSetBuf < T > {
423
+ pub fn new_empty ( universe_size : usize ) -> Self {
424
+ HybridIdxSetBuf :: Sparse ( SparseIdxSetBuf :: new ( ) , universe_size)
425
+ }
426
+
427
+ fn universe_size ( & mut self ) -> usize {
428
+ match * self {
429
+ HybridIdxSetBuf :: Sparse ( _, size) => size,
430
+ HybridIdxSetBuf :: Dense ( _, size) => size,
431
+ }
432
+ }
433
+
434
+ pub fn clear ( & mut self ) {
435
+ let universe_size = self . universe_size ( ) ;
436
+ * self = HybridIdxSetBuf :: new_empty ( universe_size) ;
437
+ }
438
+
439
+ /// Returns true iff set `self` contains `elem`.
440
+ pub fn contains ( & self , elem : & T ) -> bool {
441
+ match self {
442
+ HybridIdxSetBuf :: Sparse ( sparse, _) => sparse. contains ( elem) ,
443
+ HybridIdxSetBuf :: Dense ( dense, _) => dense. contains ( elem) ,
444
+ }
445
+ }
446
+
447
+ /// Adds `elem` to the set `self`.
448
+ pub fn add ( & mut self , elem : & T ) -> bool {
449
+ match self {
450
+ HybridIdxSetBuf :: Sparse ( sparse, _) if sparse. len ( ) < SPARSE_MAX => {
451
+ // The set is sparse and has space for `elem`.
452
+ sparse. add ( elem)
453
+ }
454
+ HybridIdxSetBuf :: Sparse ( sparse, _) if sparse. contains ( elem) => {
455
+ // The set is sparse and does not have space for `elem`, but
456
+ // that doesn't matter because `elem` is already present.
457
+ false
458
+ }
459
+ HybridIdxSetBuf :: Sparse ( _, _) => {
460
+ // The set is sparse and full. Convert to a dense set.
461
+ //
462
+ // FIXME: This code is awful, but I can't work out how else to
463
+ // appease the borrow checker.
464
+ let dummy = HybridIdxSetBuf :: Sparse ( SparseIdxSetBuf :: new ( ) , 0 ) ;
465
+ match mem:: replace ( self , dummy) {
466
+ HybridIdxSetBuf :: Sparse ( sparse, universe_size) => {
467
+ let mut dense = sparse. to_dense ( universe_size) ;
468
+ let changed = dense. add ( elem) ;
469
+ assert ! ( changed) ;
470
+ mem:: replace ( self , HybridIdxSetBuf :: Dense ( dense, universe_size) ) ;
471
+ changed
472
+ }
473
+ _ => panic ! ( "impossible" ) ,
474
+ }
475
+ }
476
+
477
+ HybridIdxSetBuf :: Dense ( dense, _) => dense. add ( elem) ,
478
+ }
479
+ }
480
+
481
+ /// Removes `elem` from the set `self`.
482
+ pub fn remove ( & mut self , elem : & T ) -> bool {
483
+ // Note: we currently don't bother going from Dense back to Sparse.
484
+ match self {
485
+ HybridIdxSetBuf :: Sparse ( sparse, _) => sparse. remove ( elem) ,
486
+ HybridIdxSetBuf :: Dense ( dense, _) => dense. remove ( elem) ,
487
+ }
488
+ }
489
+
490
+ /// Converts to a dense set, consuming itself in the process.
491
+ pub fn to_dense ( self ) -> IdxSetBuf < T > {
492
+ match self {
493
+ HybridIdxSetBuf :: Sparse ( sparse, universe_size) => sparse. to_dense ( universe_size) ,
494
+ HybridIdxSetBuf :: Dense ( dense, _) => dense,
495
+ }
496
+ }
497
+
498
+ /// Iteration order is unspecified.
499
+ pub fn iter ( & self ) -> HybridIter < T > {
500
+ match self {
501
+ HybridIdxSetBuf :: Sparse ( sparse, _) => HybridIter :: Sparse ( sparse. iter ( ) ) ,
502
+ HybridIdxSetBuf :: Dense ( dense, _) => HybridIter :: Dense ( dense. iter ( ) ) ,
503
+ }
504
+ }
505
+ }
506
+
507
+ pub enum HybridIter < ' a , T : Idx > {
508
+ Sparse ( SparseIter < ' a , T > ) ,
509
+ Dense ( Iter < ' a , T > ) ,
510
+ }
511
+
512
+ impl < ' a , T : Idx > Iterator for HybridIter < ' a , T > {
513
+ type Item = T ;
514
+
515
+ fn next ( & mut self ) -> Option < T > {
516
+ match self {
517
+ HybridIter :: Sparse ( sparse) => sparse. next ( ) ,
518
+ HybridIter :: Dense ( dense) => dense. next ( ) ,
292
519
}
293
520
}
294
521
}
0 commit comments