Skip to content

Commit 70fdaa7

Browse files
authored
Add invariant checking (#444)
Context: #366
1 parent 163a3ec commit 70fdaa7

File tree

4 files changed

+207
-13
lines changed

4 files changed

+207
-13
lines changed

Data/HashMap/Internal.hs

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,10 @@ module Data.HashMap.Internal
117117
, mask
118118
, index
119119
, bitsPerSubkey
120+
, maxChildren
121+
, isLeafOrCollision
120122
, fullBitmap
123+
, subkeyMask
121124
, nextShift
122125
, sparseIndex
123126
, two
@@ -210,10 +213,42 @@ instance NFData2 Leaf where
210213
-- each key can map to at most one value.
211214
data HashMap k v
212215
= Empty
216+
-- ^ Invariants:
217+
--
218+
-- * 'Empty' is not a valid sub-node. It can only appear at the root. (INV1)
213219
| BitmapIndexed !Bitmap !(A.Array (HashMap k v))
220+
-- ^ Invariants:
221+
--
222+
-- * Only the lower @maxChildren@ bits of the 'Bitmap' may be set. The
223+
-- remaining upper bits must be 0. (INV2)
224+
-- * The array of a 'BitmapIndexed' node stores at least 1 and at most
225+
-- @'maxChildren' - 1@ sub-nodes. (INV3)
226+
-- * The number of sub-nodes is equal to the number of 1-bits in its
227+
-- 'Bitmap'. (INV4)
228+
-- * If a 'BitmapIndexed' node has only one sub-node, this sub-node must
229+
-- be a 'BitmapIndexed' or a 'Full' node. (INV5)
214230
| Leaf !Hash !(Leaf k v)
231+
-- ^ Invariants:
232+
--
233+
-- * The location of a 'Leaf' or 'Collision' node in the tree must be
234+
-- compatible with its 'Hash'. (INV6)
235+
-- (TODO: Document this properly (#425))
236+
-- * The 'Hash' of a 'Leaf' node must be the 'hash' of its key. (INV7)
215237
| Full !(A.Array (HashMap k v))
238+
-- ^ Invariants:
239+
--
240+
-- * The array of a 'Full' node stores exactly 'maxChildren' sub-nodes. (INV8)
216241
| Collision !Hash !(A.Array (Leaf k v))
242+
-- ^ Invariants:
243+
--
244+
-- * The location of a 'Leaf' or 'Collision' node in the tree must be
245+
-- compatible with its 'Hash'. (INV6)
246+
-- (TODO: Document this properly (#425))
247+
-- * The array of a 'Collision' node must contain at least two sub-nodes. (INV9)
248+
-- * The 'hash' of each key in a 'Collision' node must be the one stored in
249+
-- the node. (INV7)
250+
-- * No two keys stored in a 'Collision' can be equal according to their
251+
-- 'Eq' instance. (INV10)
217252

218253
type role HashMap nominal representational
219254

Data/HashMap/Internal/Debug.hs

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
{-# LANGUAGE CPP #-}
2+
{-# LANGUAGE TypeApplications #-}
3+
4+
-- | = WARNING
5+
--
6+
-- This module is considered __internal__.
7+
--
8+
-- The Package Versioning Policy __does not apply__.
9+
--
10+
-- The contents of this module may change __in any way whatsoever__
11+
-- and __without any warning__ between minor versions of this package.
12+
--
13+
-- Authors importing this module are expected to track development
14+
-- closely.
15+
--
16+
-- = Description
17+
--
18+
-- Debugging utilities for 'HashMap's.
19+
20+
module Data.HashMap.Internal.Debug
21+
( valid
22+
, Validity(..)
23+
, Error(..)
24+
, SubHash
25+
, SubHashPath
26+
) where
27+
28+
import Data.Bits (complement, countTrailingZeros, popCount, shiftL,
29+
unsafeShiftL, (.&.), (.|.))
30+
import Data.Hashable (Hashable)
31+
import Data.HashMap.Internal (Bitmap, Hash, HashMap (..), Leaf (..),
32+
bitsPerSubkey, fullBitmap, hash,
33+
isLeafOrCollision, maxChildren, sparseIndex)
34+
import Data.Semigroup (Sum (..))
35+
36+
import qualified Data.HashMap.Internal.Array as A
37+
38+
39+
#if !MIN_VERSION_base(4,11,0)
40+
import Data.Semigroup (Semigroup (..))
41+
#endif
42+
43+
data Validity k = Invalid (Error k) SubHashPath | Valid
44+
deriving (Eq, Show)
45+
46+
instance Semigroup (Validity k) where
47+
Valid <> y = y
48+
x <> _ = x
49+
50+
instance Monoid (Validity k) where
51+
mempty = Valid
52+
mappend = (<>)
53+
54+
-- | An error corresponding to a broken invariant.
55+
--
56+
-- See 'HashMap' for the documentation of the invariants.
57+
data Error k
58+
= INV1_internal_Empty
59+
| INV2_Bitmap_unexpected_1_bits !Bitmap
60+
| INV3_bad_BitmapIndexed_size !Int
61+
| INV4_bitmap_array_size_mismatch !Bitmap !Int
62+
| INV5_BitmapIndexed_invalid_single_subtree
63+
| INV6_misplaced_hash !Hash
64+
| INV7_key_hash_mismatch k !Hash
65+
| INV8_bad_Full_size !Int
66+
| INV9_Collision_size !Int
67+
| INV10_Collision_duplicate_key k !Hash
68+
deriving (Eq, Show)
69+
70+
-- TODO: Name this 'Index'?!
71+
-- (https://github.com/haskell-unordered-containers/unordered-containers/issues/425)
72+
-- | A part of a 'Hash' with 'bitsPerSubkey' bits.
73+
type SubHash = Word
74+
75+
data SubHashPath = SubHashPath
76+
{ partialHash :: !Word
77+
-- ^ The bits we already know, starting from the lower bits.
78+
-- The unknown upper bits are @0@.
79+
, lengthInBits :: !Int
80+
-- ^ The number of bits known.
81+
} deriving (Eq, Show)
82+
83+
initialSubHashPath :: SubHashPath
84+
initialSubHashPath = SubHashPath 0 0
85+
86+
addSubHash :: SubHashPath -> SubHash -> SubHashPath
87+
addSubHash (SubHashPath ph l) sh =
88+
SubHashPath (ph .|. (sh `unsafeShiftL` l)) (l + bitsPerSubkey)
89+
90+
hashMatchesSubHashPath :: SubHashPath -> Hash -> Bool
91+
hashMatchesSubHashPath (SubHashPath ph l) h = maskToLength h l == ph
92+
where
93+
-- Note: This needs to use `shiftL` instead of `unsafeShiftL` because
94+
-- @l'@ may be greater than 32/64 at the deepest level.
95+
maskToLength h' l' = h' .&. complement (complement 0 `shiftL` l')
96+
97+
valid :: Hashable k => HashMap k v -> Validity k
98+
valid Empty = Valid
99+
valid t = validInternal initialSubHashPath t
100+
where
101+
validInternal p Empty = Invalid INV1_internal_Empty p
102+
validInternal p (Leaf h l) = validHash p h <> validLeaf p h l
103+
validInternal p (Collision h ary) = validHash p h <> validCollision p h ary
104+
validInternal p (BitmapIndexed b ary) = validBitmapIndexed p b ary
105+
validInternal p (Full ary) = validFull p ary
106+
107+
validHash p h | hashMatchesSubHashPath p h = Valid
108+
| otherwise = Invalid (INV6_misplaced_hash h) p
109+
110+
validLeaf p h (L k _) | hash k == h = Valid
111+
| otherwise = Invalid (INV7_key_hash_mismatch k h) p
112+
113+
validCollision p h ary = validCollisionSize <> A.foldMap (validLeaf p h) ary <> distinctKeys
114+
where
115+
n = A.length ary
116+
validCollisionSize | n < 2 = Invalid (INV9_Collision_size n) p
117+
| otherwise = Valid
118+
distinctKeys = A.foldMap (\(L k _) -> appearsOnce k) ary
119+
appearsOnce k | A.foldMap (\(L k' _) -> if k' == k then Sum @Int 1 else Sum 0) ary == 1 = Valid
120+
| otherwise = Invalid (INV10_Collision_duplicate_key k h) p
121+
122+
validBitmapIndexed p b ary = validBitmap <> validArraySize <> validSubTrees p b ary
123+
where
124+
validBitmap | b .&. complement fullBitmap == 0 = Valid
125+
| otherwise = Invalid (INV2_Bitmap_unexpected_1_bits b) p
126+
n = A.length ary
127+
validArraySize | n < 1 || n >= maxChildren = Invalid (INV3_bad_BitmapIndexed_size n) p
128+
| popCount b == n = Valid
129+
| otherwise = Invalid (INV4_bitmap_array_size_mismatch b n) p
130+
131+
validSubTrees p b ary
132+
| A.length ary == 1
133+
, isLeafOrCollision (A.index ary 0)
134+
= Invalid INV5_BitmapIndexed_invalid_single_subtree p
135+
| otherwise = go b
136+
where
137+
go 0 = Valid
138+
go b' = validInternal (addSubHash p (fromIntegral c)) (A.index ary i) <> go b''
139+
where
140+
c = countTrailingZeros b'
141+
m = 1 `unsafeShiftL` c
142+
i = sparseIndex b m
143+
b'' = b' .&. complement m
144+
145+
validFull p ary = validArraySize <> validSubTrees p fullBitmap ary
146+
where
147+
n = A.length ary
148+
validArraySize | n == maxChildren = Valid
149+
| otherwise = Invalid (INV8_bad_Full_size n) p

tests/Properties/HashMapLazy.hs

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14,20 +14,21 @@
1414

1515
module MODULE_NAME (tests) where
1616

17-
import Control.Applicative (Const (..))
18-
import Control.Monad (guard)
17+
import Control.Applicative (Const (..))
18+
import Control.Monad (guard)
1919
import Data.Bifoldable
20-
import Data.Function (on)
21-
import Data.Functor.Identity (Identity (..))
22-
import Data.Hashable (Hashable (hashWithSalt))
23-
import Data.Ord (comparing)
24-
import Test.QuickCheck (Arbitrary (..), Property, elements, forAll,
25-
property, (===), (==>))
26-
import Test.QuickCheck.Function (Fun, apply)
27-
import Test.QuickCheck.Poly (A, B)
28-
import Test.Tasty (TestTree, testGroup)
29-
import Test.Tasty.QuickCheck (testProperty)
30-
import Util.Key (Key, keyToInt)
20+
import Data.Function (on)
21+
import Data.Functor.Identity (Identity (..))
22+
import Data.Hashable (Hashable (hashWithSalt))
23+
import Data.HashMap.Internal.Debug (Validity (..), valid)
24+
import Data.Ord (comparing)
25+
import Test.QuickCheck (Arbitrary (..), Property, elements, forAll,
26+
property, (===), (==>))
27+
import Test.QuickCheck.Function (Fun, apply)
28+
import Test.QuickCheck.Poly (A, B)
29+
import Test.Tasty (TestTree, testGroup)
30+
import Test.Tasty.QuickCheck (testProperty)
31+
import Util.Key (Key, keyToInt)
3132

3233
import qualified Data.Foldable as Foldable
3334
import qualified Data.List as List
@@ -319,6 +320,9 @@ pIntersection xs ys =
319320
`eq_` HM.intersection (HM.fromList xs)
320321
$ ys
321322

323+
pIntersectionValid :: HashMap Key () -> HashMap Key () -> Property
324+
pIntersectionValid x y = valid (HM.intersection x y) === Valid
325+
322326
pIntersectionWith :: [(Key, Int)] -> [(Key, Int)] -> Property
323327
pIntersectionWith xs ys = M.intersectionWith (-) (M.fromList xs) `eq_`
324328
HM.intersectionWith (-) (HM.fromList xs) $ ys
@@ -421,6 +425,9 @@ instance Hashable a => Hashable (Magma a) where
421425
pFromList :: [(Key, Int)] -> Property
422426
pFromList = id `eq_` id
423427

428+
pFromListValid :: [(Key, ())] -> Property
429+
pFromListValid xs = valid (HM.fromList xs) === Valid
430+
424431
pFromListWith :: [(Key, Int)] -> Property
425432
pFromListWith kvs = (M.toAscList $ M.fromListWith Op kvsM) ===
426433
(toAscList $ HM.fromListWith Op kvsM)
@@ -529,6 +536,7 @@ tests =
529536
[ testProperty "difference" pDifference
530537
, testProperty "differenceWith" pDifferenceWith
531538
, testProperty "intersection" pIntersection
539+
, testProperty "intersection produces valid HashMap" pIntersectionValid
532540
, testProperty "intersectionWith" pIntersectionWith
533541
, testProperty "intersectionWithKey" pIntersectionWithKey
534542
]
@@ -544,6 +552,7 @@ tests =
544552
[ testProperty "elems" pElems
545553
, testProperty "keys" pKeys
546554
, testProperty "fromList" pFromList
555+
, testProperty "fromList.valid" pFromListValid
547556
, testProperty "fromListWith" pFromListWith
548557
, testProperty "fromListWithKey" pFromListWithKey
549558
, testProperty "toList" pToList

unordered-containers.cabal

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ library
4545
exposed-modules:
4646
Data.HashMap.Internal
4747
Data.HashMap.Internal.Array
48+
Data.HashMap.Internal.Debug
4849
Data.HashMap.Internal.List
4950
Data.HashMap.Internal.Strict
5051
Data.HashMap.Lazy

0 commit comments

Comments
 (0)