@@ -49,6 +49,8 @@ module Data.HashMap.Internal
49
49
, update
50
50
, alter
51
51
, alterF
52
+ , isSubmapOf
53
+ , isSubmapOfBy
52
54
53
55
-- * Combine
54
56
-- ** Union
@@ -148,7 +150,7 @@ import qualified Data.Foldable as Foldable
148
150
import Data.Bifoldable
149
151
#endif
150
152
import qualified Data.List as L
151
- import GHC.Exts ((==#) , build , reallyUnsafePtrEquality #)
153
+ import GHC.Exts ((==#) , build , reallyUnsafePtrEquality #, inline )
152
154
import Prelude hiding (filter , foldl , foldr , lookup , map , null , pred )
153
155
import Text.Read hiding (step )
154
156
@@ -590,12 +592,12 @@ lookup k m = case lookup# k m of
590
592
{-# INLINE lookup #-}
591
593
592
594
lookup # :: (Eq k , Hashable k ) => k -> HashMap k v -> (# (# # ) | v # )
593
- lookup # k m = lookupCont (\ _ -> (# (# # ) | # )) (\ v _i -> (# | v # )) (hash k) k m
595
+ lookup # k m = lookupCont (\ _ -> (# (# # ) | # )) (\ v _i -> (# | v # )) (hash k) k 0 m
594
596
{-# INLINABLE lookup# #-}
595
597
596
598
#else
597
599
598
- lookup k m = lookupCont (\ _ -> Nothing ) (\ v _i -> Just v) (hash k) k m
600
+ lookup k m = lookupCont (\ _ -> Nothing ) (\ v _i -> Just v) (hash k) k 0 m
599
601
{-# INLINABLE lookup #-}
600
602
#endif
601
603
@@ -614,7 +616,7 @@ lookup' h k m = case lookupRecordCollision# h k m of
614
616
(# | (# a, _i # ) # ) -> Just a
615
617
{-# INLINE lookup' #-}
616
618
#else
617
- lookup' h k m = lookupCont (\ _ -> Nothing ) (\ v _i -> Just v) h k m
619
+ lookup' h k m = lookupCont (\ _ -> Nothing ) (\ v _i -> Just v) h k 0 m
618
620
{-# INLINABLE lookup' #-}
619
621
#endif
620
622
@@ -649,13 +651,13 @@ lookupRecordCollision h k m = case lookupRecordCollision# h k m of
649
651
-- into lookupCont because inlining takes care of that.
650
652
lookupRecordCollision# :: Eq k => Hash -> k -> HashMap k v -> (# (# # ) | (# v , Int # # ) # )
651
653
lookupRecordCollision# h k m =
652
- lookupCont (\ _ -> (# (# # ) | # )) (\ v (I # i) -> (# | (# v, i # ) # )) h k m
654
+ lookupCont (\ _ -> (# (# # ) | # )) (\ v (I # i) -> (# | (# v, i # ) # )) h k 0 m
653
655
-- INLINABLE to specialize to the Eq instance.
654
656
{-# INLINABLE lookupRecordCollision# #-}
655
657
656
658
#else /* GHC < 8.2 so there are no unboxed sums */
657
659
658
- lookupRecordCollision h k m = lookupCont (\ _ -> Absent ) Present h k m
660
+ lookupRecordCollision h k m = lookupCont (\ _ -> Absent ) Present h k 0 m
659
661
{-# INLINABLE lookupRecordCollision #-}
660
662
#endif
661
663
@@ -667,6 +669,10 @@ lookupRecordCollision h k m = lookupCont (\_ -> Absent) Present h k m
667
669
-- so we can be representation-polymorphic in the result type. Since
668
670
-- this whole thing is always inlined, we don't have to worry about
669
671
-- any extra CPS overhead.
672
+ --
673
+ -- The @Int@ argument is the offset of the subkey in the hash. When looking up
674
+ -- keys at the top-level of a hashmap, the offset should be 0. When looking up
675
+ -- keys at level @n@ of a hashmap, the offset should be @n * bitsPerSubkey@.
670
676
lookupCont ::
671
677
#if __GLASGOW_HASKELL__ >= 802
672
678
forall rep (r :: TYPE rep ) k v.
@@ -677,8 +683,10 @@ lookupCont ::
677
683
=> ((# # ) -> r) -- Absent continuation
678
684
-> (v -> Int -> r) -- Present continuation
679
685
-> Hash -- The hash of the key
680
- -> k -> HashMap k v -> r
681
- lookupCont absent present ! h0 ! k0 ! m0 = go h0 k0 0 m0
686
+ -> k
687
+ -> Int -- The offset of the subkey in the hash.
688
+ -> HashMap k v -> r
689
+ lookupCont absent present ! h0 ! k0 ! s0 ! m0 = go h0 k0 s0 m0
682
690
where
683
691
go :: Eq k => Hash -> k -> Int -> HashMap k v -> r
684
692
go ! _ ! _ ! _ Empty = absent (# # )
@@ -1409,6 +1417,116 @@ alterFEager f !k m = (<$> f mv) $ \fres ->
1409
1417
{-# INLINABLE alterFEager #-}
1410
1418
#endif
1411
1419
1420
+ -- | /O(n*log m)/ Inclusion of maps. A map is included in another map if the keys
1421
+ -- are subsets and the corresponding values are equal:
1422
+ --
1423
+ -- > isSubmapOf m1 m2 = keys m1 `isSubsetOf` keys m2 &&
1424
+ -- > and [ v1 == v2 | (k1,v1) <- toList m1; let v2 = m2 ! k1 ]
1425
+ --
1426
+ -- ==== __Examples__
1427
+ --
1428
+ -- >>> fromList [(1,'a')] `isSubmapOf` fromList [(1,'a'),(2,'b')]
1429
+ -- True
1430
+ --
1431
+ -- >>> fromList [(1,'a'),(2,'b')] `isSubmapOf` fromList [(1,'a')]
1432
+ -- False
1433
+ isSubmapOf :: (Eq k , Hashable k , Eq v ) => HashMap k v -> HashMap k v -> Bool
1434
+ isSubmapOf = (inline isSubmapOfBy) (==)
1435
+ {-# INLINABLE isSubmapOf #-}
1436
+
1437
+ -- | /O(n*log m)/ Inclusion of maps with value comparison. A map is included in
1438
+ -- another map if the keys are subsets and if the comparison function is true
1439
+ -- for the corresponding values:
1440
+ --
1441
+ -- > isSubmapOfBy cmpV m1 m2 = keys m1 `isSubsetOf` keys m2 &&
1442
+ -- > and [ v1 `cmpV` v2 | (k1,v1) <- toList m1; let v2 = m2 ! k1 ]
1443
+ --
1444
+ -- ==== __Examples__
1445
+ --
1446
+ -- >>> isSubmapOfBy (<=) (fromList [(1,'a')]) (fromList [(1,'b'),(2,'c')])
1447
+ -- True
1448
+ --
1449
+ -- >>> isSubmapOfBy (<=) (fromList [(1,'b')]) (fromList [(1,'a'),(2,'c')])
1450
+ -- False
1451
+ isSubmapOfBy :: (Eq k , Hashable k ) => (v1 -> v2 -> Bool ) -> HashMap k v1 -> HashMap k v2 -> Bool
1452
+ -- For maps without collisions the complexity is O(n*log m), where n is the size
1453
+ -- of m1 and m the size of m2: the inclusion operation visits every leaf in m1 at least once.
1454
+ -- For each leaf in m1, it looks up the key in m2.
1455
+ --
1456
+ -- The worst case complexity is O(n*m). The worst case is when both hashmaps m1
1457
+ -- and m2 are collision nodes for the same hash. Since collision nodes are
1458
+ -- unsorted arrays, it requires for every key in m1 a linear search to to find a
1459
+ -- matching key in m2, hence O(n*m).
1460
+ isSubmapOfBy comp ! m1 ! m2 = go 0 m1 m2
1461
+ where
1462
+ -- An empty map is always a submap of any other map.
1463
+ go _ Empty _ = True
1464
+
1465
+ -- If the second map is empty and the first is not, it cannot be a submap.
1466
+ go _ _ Empty = False
1467
+
1468
+ -- If the first map contains only one entry, lookup the key in the second map.
1469
+ go s (Leaf h1 (L k1 v1)) t2 = lookupCont (\ _ -> False ) (\ v2 _ -> comp v1 v2) h1 k1 s t2
1470
+
1471
+ -- In this case, we need to check that for each x in ls1, there is a y in
1472
+ -- ls2 such that x `comp` y. This is the worst case complexity-wise since it
1473
+ -- requires a O(m*n) check.
1474
+ go _ (Collision h1 ls1) (Collision h2 ls2) =
1475
+ h1 == h2 && subsetArray comp ls1 ls2
1476
+
1477
+ -- In this case, we only need to check the entries in ls2 with the hash h1.
1478
+ go s t1@ (Collision h1 _) (BitmapIndexed b ls2)
1479
+ | b .&. m == 0 = False
1480
+ | otherwise =
1481
+ go (s+ bitsPerSubkey) t1 (A. index ls2 (sparseIndex b m))
1482
+ where m = mask h1 s
1483
+
1484
+ -- Similar to the previous case we need to traverse l2 at the index for the hash h1.
1485
+ go s t1@ (Collision h1 _) (Full ls2) =
1486
+ go (s+ bitsPerSubkey) t1 (A. index ls2 (index h1 s))
1487
+
1488
+ -- In cases where the first and second map are BitmapIndexed or Full,
1489
+ -- traverse down the tree at the appropriate indices.
1490
+ go s (BitmapIndexed b1 ls1) (BitmapIndexed b2 ls2) =
1491
+ submapBitmapIndexed (go (s+ bitsPerSubkey)) b1 ls1 b2 ls2
1492
+ go s (BitmapIndexed b1 ls1) (Full ls2) =
1493
+ submapBitmapIndexed (go (s+ bitsPerSubkey)) b1 ls1 fullNodeMask ls2
1494
+ go s (Full ls1) (Full ls2) =
1495
+ submapBitmapIndexed (go (s+ bitsPerSubkey)) fullNodeMask ls1 fullNodeMask ls2
1496
+
1497
+ -- Collision and Full nodes always contain at least two entries. Hence it
1498
+ -- cannot be a map of a leaf.
1499
+ go _ (Collision {}) (Leaf {}) = False
1500
+ go _ (BitmapIndexed {}) (Leaf {}) = False
1501
+ go _ (Full {}) (Leaf {}) = False
1502
+ go _ (BitmapIndexed {}) (Collision {}) = False
1503
+ go _ (Full {}) (Collision {}) = False
1504
+ go _ (Full {}) (BitmapIndexed {}) = False
1505
+ {-# INLINABLE isSubmapOfBy #-}
1506
+
1507
+ -- | /O(min n m))/ Checks if a bitmap indexed node is a submap of another.
1508
+ submapBitmapIndexed :: (HashMap k v1 -> HashMap k v2 -> Bool ) -> Bitmap -> A. Array (HashMap k v1 ) -> Bitmap -> A. Array (HashMap k v2 ) -> Bool
1509
+ submapBitmapIndexed comp ! b1 ! ary1 ! b2 ! ary2 = subsetBitmaps && go 0 0 (b1Orb2 .&. negate b1Orb2)
1510
+ where
1511
+ go :: Int -> Int -> Bitmap -> Bool
1512
+ go ! i ! j ! m
1513
+ | m > b1Orb2 = True
1514
+
1515
+ -- In case a key is both in ary1 and ary2, check ary1[i] <= ary2[j] and
1516
+ -- increment the indices i and j.
1517
+ | b1Andb2 .&. m /= 0 = comp (A. index ary1 i) (A. index ary2 j) &&
1518
+ go (i+ 1 ) (j+ 1 ) (m `unsafeShiftL` 1 )
1519
+
1520
+ -- In case a key occurs in ary1, but not ary2, only increment index j.
1521
+ | b2 .&. m /= 0 = go i (j+ 1 ) (m `unsafeShiftL` 1 )
1522
+
1523
+ -- In case a key neither occurs in ary1 nor ary2, continue.
1524
+ | otherwise = go i j (m `unsafeShiftL` 1 )
1525
+
1526
+ b1Andb2 = b1 .&. b2
1527
+ b1Orb2 = b1 .|. b2
1528
+ subsetBitmaps = b1Orb2 == b2
1529
+ {-# INLINABLE submapBitmapIndexed #-}
1412
1530
1413
1531
------------------------------------------------------------------------
1414
1532
-- * Combine
@@ -2076,6 +2194,13 @@ updateOrConcatWithKey f ary1 ary2 = A.run $ do
2076
2194
return mary
2077
2195
{-# INLINABLE updateOrConcatWithKey #-}
2078
2196
2197
+ -- | /O(n*m)/ Check if the first array is a subset of the second array.
2198
+ subsetArray :: Eq k => (v1 -> v2 -> Bool ) -> A. Array (Leaf k v1 ) -> A. Array (Leaf k v2 ) -> Bool
2199
+ subsetArray cmpV ary1 ary2 = A. length ary1 <= A. length ary2 && A. all inAry2 ary1
2200
+ where
2201
+ inAry2 (L k1 v1) = lookupInArrayCont (\ _ -> False ) (\ v2 _ -> cmpV v1 v2) k1 ary2
2202
+ {-# INLINE inAry2 #-}
2203
+
2079
2204
------------------------------------------------------------------------
2080
2205
-- Manually unrolled loops
2081
2206
0 commit comments