Skip to content

Commit b86f32c

Browse files
authored
Improve docs of bit twiddling functions (#396)
Fixes #394.
1 parent 4da2c20 commit b86f32c

File tree

1 file changed

+59
-11
lines changed

1 file changed

+59
-11
lines changed

Data/HashMap/Internal.hs

+59-11
Original file line numberDiff line numberDiff line change
@@ -311,8 +311,20 @@ fromListConstr = Data.mkConstr hashMapDataType "fromList" [] Data.Prefix
311311
hashMapDataType :: DataType
312312
hashMapDataType = Data.mkDataType "Data.HashMap.Internal.HashMap" [fromListConstr]
313313

314+
-- | This type is used to store the hash of a key, as produced with 'hash'.
314315
type Hash = Word
316+
317+
-- | A bitmap as contained by a 'BitmapIndexed' node, or a 'fullNodeMask'
318+
-- corresponding to a 'Full' node.
319+
--
320+
-- Only the lower 'maxChildren' bits are used. The remaining bits must be zeros.
315321
type Bitmap = Word
322+
323+
-- | 'Shift' values correspond to the level of the tree that we're currently
324+
-- operating at. At the root level the 'Shift' is @0@. For the subsequent
325+
-- levels the 'Shift' values are 'bitsPerSubkey', @2*'bitsPerSubkey'@ etc.
326+
--
327+
-- Valid values are non-negative and less than @bitSize (0 :: Word)@.
316328
type Shift = Int
317329

318330
instance Show2 HashMap where
@@ -2358,36 +2370,72 @@ clone ary =
23582370
------------------------------------------------------------------------
23592371
-- Bit twiddling
23602372

2373+
-- TODO: Name this 'bitsPerLevel'?! What is a "subkey"?
2374+
-- https://github.com/haskell-unordered-containers/unordered-containers/issues/425
2375+
2376+
-- | Number of bits that are inspected at each level of the hash tree.
2377+
--
2378+
-- This constant is named /t/ in the original /Ideal Hash Trees/ paper.
23612379
bitsPerSubkey :: Int
23622380
bitsPerSubkey = 5
23632381

2382+
-- | The size of a 'Full' node, i.e. @2 ^ 'bitsPerSubkey'@.
23642383
maxChildren :: Int
23652384
maxChildren = 1 `unsafeShiftL` bitsPerSubkey
23662385

2367-
subkeyMask :: Bitmap
2386+
-- | Bit mask with the lowest 'bitsPerSubkey' bits set, i.e. @0b11111@.
2387+
subkeyMask :: Word
23682388
subkeyMask = 1 `unsafeShiftL` bitsPerSubkey - 1
23692389

2370-
sparseIndex :: Bitmap -> Bitmap -> Int
2371-
sparseIndex b m = popCount (b .&. (m - 1))
2372-
{-# INLINE sparseIndex #-}
2390+
-- | Given a 'Hash' and a 'Shift' that indicates the level in the tree, compute
2391+
-- the index into a 'Full' node or into the bitmap of a `BitmapIndexed` node.
2392+
--
2393+
-- >>> index 0b0010_0010 0
2394+
-- 0b0000_0010
2395+
index :: Hash -> Shift -> Int
2396+
index w s = fromIntegral $ unsafeShiftR w s .&. subkeyMask
2397+
{-# INLINE index #-}
23732398

2374-
mask :: Word -> Shift -> Bitmap
2399+
-- | Given a 'Hash' and a 'Shift' that indicates the level in the tree, compute
2400+
-- the bitmap that contains only the 'index' of the hash at this level.
2401+
--
2402+
-- The result can be used for constructing one-element 'BitmapIndexed' nodes or
2403+
-- to check whether a 'BitmapIndexed' node may possibly contain the given 'Hash'.
2404+
--
2405+
-- >>> mask 0b0010_0010 0
2406+
-- 0b0100
2407+
mask :: Hash -> Shift -> Bitmap
23752408
mask w s = 1 `unsafeShiftL` index w s
23762409
{-# INLINE mask #-}
23772410

2378-
-- | Mask out the 'bitsPerSubkey' bits used for indexing at this level
2379-
-- of the tree.
2380-
index :: Hash -> Shift -> Int
2381-
index w s = fromIntegral $ unsafeShiftR w s .&. subkeyMask
2382-
{-# INLINE index #-}
2411+
-- | This array index is computed by counting the number of bits below the
2412+
-- 'index' represented by the mask.
2413+
--
2414+
-- >>> sparseIndex 0b0110_0110 0b0010_0000
2415+
-- 2
2416+
sparseIndex
2417+
:: Bitmap
2418+
-- ^ Bitmap of a 'BitmapIndexed' node
2419+
-> Bitmap
2420+
-- ^ One-bit 'mask' corresponding to the 'index' of a hash
2421+
-> Int
2422+
-- ^ Index into the array of the 'BitmapIndexed' node
2423+
sparseIndex b m = popCount (b .&. (m - 1))
2424+
{-# INLINE sparseIndex #-}
2425+
2426+
-- TODO: Should be named _(bit)map_ instead of _mask_
23832427

2384-
-- | A bitmask with the 'bitsPerSubkey' least significant bits set.
2428+
-- | A bitmap with the 'maxChildren' least significant bits set, i.e.
2429+
-- @0xFF_FF_FF_FF@.
23852430
fullNodeMask :: Bitmap
23862431
-- This needs to use 'shiftL' instead of 'unsafeShiftL', to avoid UB.
23872432
-- See issue #412.
23882433
fullNodeMask = complement (complement 0 `shiftL` maxChildren)
23892434
{-# INLINE fullNodeMask #-}
23902435

2436+
------------------------------------------------------------------------
2437+
-- Pointer equality
2438+
23912439
-- | Check if two the two arguments are the same value. N.B. This
23922440
-- function might give false negatives (due to GC moving objects.)
23932441
ptrEq :: a -> a -> Bool

0 commit comments

Comments
 (0)