@@ -177,6 +177,51 @@ impl DefaultResizePolicy {
177
177
// element.
178
178
//
179
179
// FIXME(Gankro, pczarn): review the proof and put it all in a separate README.md
180
+ //
181
+ // Adaptive early resizing
182
+ // ----------------------
183
+ // To protect against degenerate performance scenarios (including DOS attacks),
184
+ // the implementation includes an adaptive behavior that can resize the map
185
+ // early (before it's capacity is exceeded) when suspiciously long probe or
186
+ // foward shifts sequences are encounted.
187
+ //
188
+ // With this algorithm in place it would be possible to turn a CPU attack into
189
+ // a memory attack due to the agressive resizing. To prevent that the
190
+ // adaptive behavior only triggers when the map occupancy is half the maximum occupancy.
191
+ // This reduces the effectivenes of the algorithm but also makes it completelly safe.
192
+ //
193
+ // The previous safety measure that also prevents degenerate iteractions with
194
+ // really bad quality hash algorithms that can make normal inputs look like a
195
+ // DOS attack.
196
+ //
197
+ const DISPLACEMENT_THRESHOLD : usize = 128 ;
198
+ const FORWARD_SHIFT_THRESHOLD : usize = 512 ;
199
+ //
200
+ // The thresholds of 128 and 512 are chosen to minimize the chance of exceeding them.
201
+ // In particular, we want that chance to be less than 10^-8 with a load of 90%.
202
+ // For displacement, the smallest constant that fits our needs is 90,
203
+ // so we round that up to 128. For the number of forward-shifted buckets,
204
+ // we choose k=512. Keep in mind that the run length is a sum of the displacement and
205
+ // the number of forward-shifted buckets, so its threshold is 128+512=640.
206
+ // Even though the probability of having a run length of more than 640 buckets may be
207
+ // higher than the probability we want, it should be low enough.
208
+ //
209
+ // At a load factor of α, the odds of finding the target bucket after exactly n
210
+ // unsuccesful probes[1] are
211
+ //
212
+ // Pr_α{displacement = n} =
213
+ // (1 - α) / α * ∑_{k≥1} e^(-kα) * (kα)^(k+n) / (k + n)! * (1 - kα / (k + n + 1))
214
+ //
215
+ // We use this formula to find the probability of loading half of triggering the adaptive behavior
216
+ //
217
+ // Pr_0.909{displacement > 128} = 1.601 * 10^-11
218
+ //
219
+ // FIXME: Extend with math for shift threshold in [2]
220
+ //
221
+ // 1. Alfredo Viola (2005). Distributional analysis of Robin Hood linear probing
222
+ // hashing with buckets.
223
+ // 2. http://www.cs.tau.ac.il/~zwick/Adv-Alg-2015/Linear-Probing.pdf
224
+
180
225
181
226
/// A hash map implementation which uses linear probing with Robin Hood bucket
182
227
/// stealing.
@@ -360,6 +405,8 @@ pub struct HashMap<K, V, S = RandomState> {
360
405
table : RawTable < K , V > ,
361
406
362
407
resize_policy : DefaultResizePolicy ,
408
+
409
+ long_probes : bool ,
363
410
}
364
411
365
412
/// Search for a pre-hashed key.
@@ -385,7 +432,7 @@ fn search_hashed<K, V, M, F>(table: M, hash: SafeHash, mut is_match: F) -> Inter
385
432
// Found a hole!
386
433
return InternalEntry :: Vacant {
387
434
hash : hash,
388
- elem : NoElem ( bucket) ,
435
+ elem : NoElem ( bucket, displacement ) ,
389
436
} ;
390
437
}
391
438
Full ( bucket) => bucket,
@@ -447,15 +494,15 @@ fn robin_hood<'a, K: 'a, V: 'a>(bucket: FullBucketMut<'a, K, V>,
447
494
mut hash : SafeHash ,
448
495
mut key : K ,
449
496
mut val : V )
450
- -> & ' a mut V {
451
- let starting_index = bucket. index ( ) ;
497
+ -> ( usize , & ' a mut V ) {
498
+ let start_index = bucket. index ( ) ;
452
499
let size = bucket. table ( ) . size ( ) ;
453
500
// Save the *starting point*.
454
501
let mut bucket = bucket. stash ( ) ;
455
502
// There can be at most `size - dib` buckets to displace, because
456
503
// in the worst case, there are `size` elements and we already are
457
504
// `displacement` buckets away from the initial one.
458
- let idx_end = starting_index + size - bucket. displacement ( ) ;
505
+ let idx_end = start_index + size - bucket. displacement ( ) ;
459
506
460
507
loop {
461
508
let ( old_hash, old_key, old_val) = bucket. replace ( hash, key, val) ;
@@ -472,14 +519,15 @@ fn robin_hood<'a, K: 'a, V: 'a>(bucket: FullBucketMut<'a, K, V>,
472
519
Empty ( bucket) => {
473
520
// Found a hole!
474
521
let bucket = bucket. put ( hash, key, val) ;
522
+ let end_index = bucket. index ( ) ;
475
523
// Now that it's stolen, just read the value's pointer
476
524
// right out of the table! Go back to the *starting point*.
477
525
//
478
526
// This use of `into_table` is misleading. It turns the
479
527
// bucket, which is a FullBucket on top of a
480
528
// FullBucketMut, into just one FullBucketMut. The "table"
481
529
// refers to the inner FullBucketMut in this context.
482
- return bucket. into_table ( ) . into_mut_refs ( ) . 1 ;
530
+ return ( end_index - start_index , bucket. into_table ( ) . into_mut_refs ( ) . 1 ) ;
483
531
}
484
532
Full ( bucket) => bucket,
485
533
} ;
@@ -617,6 +665,7 @@ impl<K, V, S> HashMap<K, V, S>
617
665
hash_builder : hash_builder,
618
666
resize_policy : DefaultResizePolicy :: new ( ) ,
619
667
table : RawTable :: new ( 0 ) ,
668
+ long_probes : false ,
620
669
}
621
670
}
622
671
@@ -649,6 +698,7 @@ impl<K, V, S> HashMap<K, V, S>
649
698
hash_builder : hash_builder,
650
699
resize_policy : resize_policy,
651
700
table : RawTable :: new ( raw_cap) ,
701
+ long_probes : false ,
652
702
}
653
703
}
654
704
@@ -706,6 +756,11 @@ impl<K, V, S> HashMap<K, V, S>
706
756
let min_cap = self . len ( ) . checked_add ( additional) . expect ( "reserve overflow" ) ;
707
757
let raw_cap = self . resize_policy . raw_capacity ( min_cap) ;
708
758
self . resize ( raw_cap) ;
759
+ } else if self . long_probes && remaining <= self . len ( ) {
760
+ // Probe sequence is too long and table is half full,
761
+ // resize early to reduce probing length.
762
+ let new_capacity = self . table . capacity ( ) * 2 ;
763
+ self . resize ( new_capacity) ;
709
764
}
710
765
}
711
766
@@ -718,10 +773,11 @@ impl<K, V, S> HashMap<K, V, S>
718
773
assert ! ( self . table. size( ) <= new_raw_cap) ;
719
774
assert ! ( new_raw_cap. is_power_of_two( ) || new_raw_cap == 0 ) ;
720
775
776
+ self . long_probes = false ;
721
777
let mut old_table = replace ( & mut self . table , RawTable :: new ( new_raw_cap) ) ;
722
778
let old_size = old_table. size ( ) ;
723
779
724
- if old_table. capacity ( ) == 0 || old_table . size ( ) == 0 {
780
+ if old_table. size ( ) == 0 {
725
781
return ;
726
782
}
727
783
@@ -798,7 +854,8 @@ impl<K, V, S> HashMap<K, V, S>
798
854
/// If the key already exists, the hashtable will be returned untouched
799
855
/// and a reference to the existing element will be returned.
800
856
fn insert_hashed_nocheck ( & mut self , hash : SafeHash , k : K , v : V ) -> Option < V > {
801
- let entry = search_hashed ( & mut self . table , hash, |key| * key == k) . into_entry ( k) ;
857
+ let entry = search_hashed ( & mut self . table , hash, |key| * key == k)
858
+ . into_entry ( k, & mut self . long_probes ) ;
802
859
match entry {
803
860
Some ( Occupied ( mut elem) ) => Some ( elem. insert ( v) ) ,
804
861
Some ( Vacant ( elem) ) => {
@@ -953,7 +1010,9 @@ impl<K, V, S> HashMap<K, V, S>
953
1010
pub fn entry ( & mut self , key : K ) -> Entry < K , V > {
954
1011
// Gotta resize now.
955
1012
self . reserve ( 1 ) ;
956
- self . search_mut ( & key) . into_entry ( key) . expect ( "unreachable" )
1013
+ let hash = self . make_hash ( & key) ;
1014
+ search_hashed ( & mut self . table , hash, |q| q. eq ( & key) )
1015
+ . into_entry ( key, & mut self . long_probes ) . expect ( "unreachable" )
957
1016
}
958
1017
959
1018
/// Returns the number of elements in the map.
@@ -1407,7 +1466,7 @@ impl<K, V, M> InternalEntry<K, V, M> {
1407
1466
1408
1467
impl < ' a , K , V > InternalEntry < K , V , & ' a mut RawTable < K , V > > {
1409
1468
#[ inline]
1410
- fn into_entry ( self , key : K ) -> Option < Entry < ' a , K , V > > {
1469
+ fn into_entry ( self , key : K , long_probes : & ' a mut bool ) -> Option < Entry < ' a , K , V > > {
1411
1470
match self {
1412
1471
InternalEntry :: Occupied { elem } => {
1413
1472
Some ( Occupied ( OccupiedEntry {
@@ -1420,6 +1479,7 @@ impl<'a, K, V> InternalEntry<K, V, &'a mut RawTable<K, V>> {
1420
1479
hash : hash,
1421
1480
key : key,
1422
1481
elem : elem,
1482
+ long_probes : long_probes,
1423
1483
} ) )
1424
1484
}
1425
1485
InternalEntry :: TableIsEmpty => None ,
@@ -1492,6 +1552,7 @@ pub struct VacantEntry<'a, K: 'a, V: 'a> {
1492
1552
hash : SafeHash ,
1493
1553
key : K ,
1494
1554
elem : VacantEntryState < K , V , & ' a mut RawTable < K , V > > ,
1555
+ long_probes : & ' a mut bool ,
1495
1556
}
1496
1557
1497
1558
#[ stable( feature= "debug_hash_map" , since = "1.12.0" ) ]
@@ -1509,7 +1570,7 @@ enum VacantEntryState<K, V, M> {
1509
1570
/// and will kick the current one out on insertion.
1510
1571
NeqElem ( FullBucket < K , V , M > , usize ) ,
1511
1572
/// The index is genuinely vacant.
1512
- NoElem ( EmptyBucket < K , V , M > ) ,
1573
+ NoElem ( EmptyBucket < K , V , M > , usize ) ,
1513
1574
}
1514
1575
1515
1576
#[ stable( feature = "rust1" , since = "1.0.0" ) ]
@@ -2066,8 +2127,20 @@ impl<'a, K: 'a, V: 'a> VacantEntry<'a, K, V> {
2066
2127
#[ stable( feature = "rust1" , since = "1.0.0" ) ]
2067
2128
pub fn insert ( self , value : V ) -> & ' a mut V {
2068
2129
match self . elem {
2069
- NeqElem ( bucket, disp) => robin_hood ( bucket, disp, self . hash , self . key , value) ,
2070
- NoElem ( bucket) => bucket. put ( self . hash , self . key , value) . into_mut_refs ( ) . 1 ,
2130
+ NeqElem ( bucket, disp) => {
2131
+ let ( shift, v_ref) = robin_hood ( bucket, disp, self . hash , self . key , value) ;
2132
+ if disp >= DISPLACEMENT_THRESHOLD || shift >= FORWARD_SHIFT_THRESHOLD {
2133
+ * self . long_probes = true ;
2134
+ }
2135
+ v_ref
2136
+ } ,
2137
+ NoElem ( bucket, disp) => {
2138
+ if disp >= DISPLACEMENT_THRESHOLD {
2139
+ * self . long_probes = true ;
2140
+ }
2141
+ let bucket = bucket. put ( self . hash , self . key , value) ;
2142
+ bucket. into_mut_refs ( ) . 1
2143
+ } ,
2071
2144
}
2072
2145
}
2073
2146
}
@@ -3192,4 +3265,24 @@ mod test_map {
3192
3265
assert_eq ! ( map[ & 4 ] , 40 ) ;
3193
3266
assert_eq ! ( map[ & 6 ] , 60 ) ;
3194
3267
}
3268
+
3269
+ #[ test]
3270
+ fn test_adaptive ( ) {
3271
+ const TEST_LEN : usize = 5000 ;
3272
+ // by cloning we get maps with the same hasher seed
3273
+ let mut first = HashMap :: new ( ) ;
3274
+ let mut second = first. clone ( ) ;
3275
+ first. extend ( ( 0 ..TEST_LEN ) . map ( |i| ( i, i) ) ) ;
3276
+ second. extend ( ( TEST_LEN ..TEST_LEN * 2 ) . map ( |i| ( i, i) ) ) ;
3277
+
3278
+ for ( & k, & v) in & second {
3279
+ let prev_cap = first. capacity ( ) ;
3280
+ let expect_grow = first. len ( ) == prev_cap;
3281
+ first. insert ( k, v) ;
3282
+ if !expect_grow && first. capacity ( ) != prev_cap {
3283
+ return ;
3284
+ }
3285
+ }
3286
+ panic ! ( "Adaptive early resize failed" ) ;
3287
+ }
3195
3288
}
0 commit comments