Skip to content

Add invariant checking #444

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
May 3, 2022
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions Data/HashMap/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,10 @@ module Data.HashMap.Internal
, mask
, index
, bitsPerSubkey
, maxChildren
, isLeafOrCollision
, fullBitmap
, subkeyMask
, nextShift
, sparseIndex
, two
Expand Down Expand Up @@ -210,10 +213,40 @@ instance NFData2 Leaf where
-- each key can map to at most one value.
data HashMap k v
= Empty
-- ^ Invariants:
--
-- * 'Empty' is not a valid sub-node. It can only appear at the root. (INV1)
| BitmapIndexed !Bitmap !(A.Array (HashMap k v))
-- ^ Invariants:
--
-- * The array of a 'BitmapIndexed' node stores at least 1 and at most
-- @'maxChildren' - 1@ sub-nodes. (INV2)
-- * The number of sub-nodes is equal to the number of 1-bits in its
-- 'Bitmap'. (INV3)
-- * If a 'BitmapIndexed' node has only one sub-node, this sub-node must
-- be a 'BitmapIndexed' or a 'Full' node. (INV4)
| Leaf !Hash !(Leaf k v)
-- ^ Invariants:
--
-- * The location of a 'Leaf' or 'Collision' node in the tree must be
-- compatible with its 'Hash'. (INV5)
-- (TODO: Document this properly (#425))
-- * The 'Hash' of a 'Leaf' node must be the 'hash' of its key. (INV6)
| Full !(A.Array (HashMap k v))
-- ^ Invariants:
--
-- * The array of a 'Full' node stores exactly 'maxChildren' sub-nodes. (INV7)
| Collision !Hash !(A.Array (Leaf k v))
-- ^ Invariants:
--
-- * The location of a 'Leaf' or 'Collision' node in the tree must be
-- compatible with its 'Hash'. (INV5)
-- (TODO: Document this properly (#425))
-- * The array of a 'Collision' node must contain at least two sub-nodes. (INV8)
-- * The 'hash' of each key in a 'Collision' node must be the one stored in
-- the node. (INV6)
-- * No two keys stored in a 'Collision' can be equal according to their
-- 'Eq' instance. (INV9)

type role HashMap nominal representational

Expand Down
146 changes: 146 additions & 0 deletions Data/HashMap/Internal/Debug.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE TypeApplications #-}

-- | = WARNING
--
-- This module is considered __internal__.
--
-- The Package Versioning Policy __does not apply__.
--
-- The contents of this module may change __in any way whatsoever__
-- and __without any warning__ between minor versions of this package.
--
-- Authors importing this module are expected to track development
-- closely.
--
-- = Description
--
-- Debugging utilities for 'HashMap's.

module Data.HashMap.Internal.Debug
( valid
, Validity(..)
, Error(..)
, SubHash
, SubHashPath
) where

import Data.Bits (complement, countTrailingZeros, popCount, shiftL,
unsafeShiftL, (.&.), (.|.))
import Data.Hashable (Hashable)
import Data.HashMap.Internal (Bitmap, Hash, HashMap (..), Leaf (..),
bitsPerSubkey, fullBitmap, hash,
isLeafOrCollision, maxChildren, sparseIndex)
import Data.Semigroup (Sum (..))

import qualified Data.HashMap.Internal.Array as A


#if !MIN_VERSION_base(4,11,0)
import Data.Semigroup (Semigroup (..))
#endif

data Validity k = Invalid (Error k) SubHashPath | Valid
deriving (Eq, Show)

instance Semigroup (Validity k) where
Valid <> y = y
x <> _ = x

instance Monoid (Validity k) where
mempty = Valid
mappend = (<>)

-- | An error corresponding to a broken invariant.
--
-- See 'HashMap' for the documentation of the invariants.
data Error k
= INV1_internal_Empty
| INV2_bad_BitmapIndexed_size !Int
| INV3_bitmap_array_size_mismatch !Bitmap !Int
| INV4_BitmapIndexed_invalid_single_subtree
| INV5_misplaced_hash !Hash
| INV6_key_hash_mismatch k !Hash
| INV7_bad_Full_size !Int
| INV8_Collision_size !Int
| INV9_Collision_duplicate_key k !Hash
deriving (Eq, Show)

-- TODO: Name this 'Index'?!
-- (https://github.com/haskell-unordered-containers/unordered-containers/issues/425)
-- | A part of a 'Hash' with 'bitsPerSubkey' bits.
type SubHash = Word

data SubHashPath = SubHashPath
{ partialHash :: !Word
-- ^ The bits we already know, starting from the lower bits.
-- The unknown upper bits are @0@.
, lengthInBits :: !Int
-- ^ The number of bits known.
} deriving (Eq, Show)

initialSubHashPath :: SubHashPath
initialSubHashPath = SubHashPath 0 0

addSubHash :: SubHashPath -> SubHash -> SubHashPath
addSubHash (SubHashPath ph l) sh =
SubHashPath (ph .|. (sh `unsafeShiftL` l)) (l + bitsPerSubkey)

hashMatchesSubHashPath :: SubHashPath -> Hash -> Bool
hashMatchesSubHashPath (SubHashPath ph l) h = maskToLength h l == ph
where
-- Note: This needs to use `shiftL` instead of `unsafeShiftL` because
-- @l'@ may be greater than 32/64 at the deepest level.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Greater than, or just equal to?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

l is 65 when the deepest leaves are checked on a 64-bit computer.

maskToLength h' l' = h' .&. complement (complement 0 `shiftL` l')

valid :: Hashable k => HashMap k v -> Validity k
valid Empty = Valid
valid t = validInternal initialSubHashPath t
where
validInternal p Empty = Invalid INV1_internal_Empty p
validInternal p (Leaf h l) = validHash p h <> validLeaf p h l
validInternal p (Collision h ary) = validHash p h <> validCollision p h ary
validInternal p (BitmapIndexed b ary) = validBitmapIndexed p b ary
validInternal p (Full ary) = validFull p ary

validHash p h | hashMatchesSubHashPath p h = Valid
| otherwise = Invalid (INV5_misplaced_hash h) p

validLeaf p h (L k _) | hash k == h = Valid
| otherwise = Invalid (INV6_key_hash_mismatch k h) p

validCollision p h ary = validCollisionSize <> A.foldMap (validLeaf p h) ary <> distinctKeys
where
n = A.length ary
validCollisionSize | n < 2 = Invalid (INV8_Collision_size n) p
| otherwise = Valid
distinctKeys = A.foldMap (\(L k _) -> appearsOnce k) ary
appearsOnce k | A.foldMap (\(L k' _) -> if k' == k then Sum @Int 1 else Sum 0) ary == 1 = Valid
| otherwise = Invalid (INV9_Collision_duplicate_key k h) p

validBitmapIndexed p b ary = validArraySize <> validSubTrees p b ary
where
n = A.length ary
validArraySize | n < 1 || n >= maxChildren = Invalid (INV2_bad_BitmapIndexed_size n) p
| popCount b == n = Valid
| otherwise = Invalid (INV3_bitmap_array_size_mismatch b n) p

validSubTrees p b ary
| A.length ary == 1
, isLeafOrCollision (A.index ary 0)
= Invalid INV4_BitmapIndexed_invalid_single_subtree p
| otherwise = go b
where
go 0 = Valid
go b' = validInternal (addSubHash p (fromIntegral c)) (A.index ary i) <> go b''
where
c = countTrailingZeros b'
m = 1 `unsafeShiftL` c
i = sparseIndex b m
b'' = b' .&. complement m

validFull p ary = validArraySize <> validSubTrees p fullBitmap ary
where
n = A.length ary
validArraySize | n == maxChildren = Valid
| otherwise = Invalid (INV7_bad_Full_size n) p
35 changes: 22 additions & 13 deletions tests/Properties/HashMapLazy.hs
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,21 @@

module MODULE_NAME (tests) where

import Control.Applicative (Const (..))
import Control.Monad (guard)
import Control.Applicative (Const (..))
import Control.Monad (guard)
import Data.Bifoldable
import Data.Function (on)
import Data.Functor.Identity (Identity (..))
import Data.Hashable (Hashable (hashWithSalt))
import Data.Ord (comparing)
import Test.QuickCheck (Arbitrary (..), Property, elements, forAll,
property, (===), (==>))
import Test.QuickCheck.Function (Fun, apply)
import Test.QuickCheck.Poly (A, B)
import Test.Tasty (TestTree, testGroup)
import Test.Tasty.QuickCheck (testProperty)
import Util.Key (Key, keyToInt)
import Data.Function (on)
import Data.Functor.Identity (Identity (..))
import Data.Hashable (Hashable (hashWithSalt))
import Data.HashMap.Internal.Debug (Validity (..), valid)
import Data.Ord (comparing)
import Test.QuickCheck (Arbitrary (..), Property, elements, forAll,
property, (===), (==>))
import Test.QuickCheck.Function (Fun, apply)
import Test.QuickCheck.Poly (A, B)
import Test.Tasty (TestTree, testGroup)
import Test.Tasty.QuickCheck (testProperty)
import Util.Key (Key, keyToInt)

import qualified Data.Foldable as Foldable
import qualified Data.List as List
Expand Down Expand Up @@ -319,6 +320,9 @@ pIntersection xs ys =
`eq_` HM.intersection (HM.fromList xs)
$ ys

pIntersectionValid :: HashMap Key () -> HashMap Key () -> Property
pIntersectionValid x y = valid (HM.intersection x y) === Valid

pIntersectionWith :: [(Key, Int)] -> [(Key, Int)] -> Property
pIntersectionWith xs ys = M.intersectionWith (-) (M.fromList xs) `eq_`
HM.intersectionWith (-) (HM.fromList xs) $ ys
Expand Down Expand Up @@ -421,6 +425,9 @@ instance Hashable a => Hashable (Magma a) where
pFromList :: [(Key, Int)] -> Property
pFromList = id `eq_` id

pFromListValid :: [(Key, ())] -> Property
pFromListValid xs = valid (HM.fromList xs) === Valid

pFromListWith :: [(Key, Int)] -> Property
pFromListWith kvs = (M.toAscList $ M.fromListWith Op kvsM) ===
(toAscList $ HM.fromListWith Op kvsM)
Expand Down Expand Up @@ -529,6 +536,7 @@ tests =
[ testProperty "difference" pDifference
, testProperty "differenceWith" pDifferenceWith
, testProperty "intersection" pIntersection
, testProperty "intersection produces valid HashMap" pIntersectionValid
, testProperty "intersectionWith" pIntersectionWith
, testProperty "intersectionWithKey" pIntersectionWithKey
]
Expand All @@ -544,6 +552,7 @@ tests =
[ testProperty "elems" pElems
, testProperty "keys" pKeys
, testProperty "fromList" pFromList
, testProperty "fromList.valid" pFromListValid
, testProperty "fromListWith" pFromListWith
, testProperty "fromListWithKey" pFromListWithKey
, testProperty "toList" pToList
Expand Down
1 change: 1 addition & 0 deletions unordered-containers.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ library
exposed-modules:
Data.HashMap.Internal
Data.HashMap.Internal.Array
Data.HashMap.Internal.Debug
Data.HashMap.Internal.List
Data.HashMap.Internal.Strict
Data.HashMap.Lazy
Expand Down