Skip to content

Optimization potential in intersection #415

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
2 tasks
sjakobi opened this issue Apr 15, 2022 · 0 comments
Open
2 tasks

Optimization potential in intersection #415

sjakobi opened this issue Apr 15, 2022 · 0 comments

Comments

@sjakobi
Copy link
Member

sjakobi commented Apr 15, 2022

These are some follow-up tasks based on the code introduced in #406:

-- | /O(n*log m)/ Intersection of two maps. Return elements of the first
-- map for keys existing in the second.
intersection :: (Eq k, Hashable k) => HashMap k v -> HashMap k w -> HashMap k v
intersection = Exts.inline intersectionWith const
{-# INLINABLE intersection #-}
-- | /O(n*log m)/ Intersection of two maps. If a key occurs in both maps
-- the provided function is used to combine the values from the two
-- maps.
intersectionWith :: (Eq k, Hashable k) => (v1 -> v2 -> v3) -> HashMap k v1 -> HashMap k v2 -> HashMap k v3
intersectionWith f = Exts.inline intersectionWithKey $ const f
{-# INLINABLE intersectionWith #-}
-- | /O(n*log m)/ Intersection of two maps. If a key occurs in both maps
-- the provided function is used to combine the values from the two
-- maps.
intersectionWithKey :: (Eq k, Hashable k) => (k -> v1 -> v2 -> v3) -> HashMap k v1 -> HashMap k v2 -> HashMap k v3
intersectionWithKey f = intersectionWithKey# $ \k v1 v2 -> (# f k v1 v2 #)
{-# INLINABLE intersectionWithKey #-}
intersectionWithKey# :: Eq k => (k -> v1 -> v2 -> (# v3 #)) -> HashMap k v1 -> HashMap k v2 -> HashMap k v3
intersectionWithKey# f = go 0
where
-- empty vs. anything
go !_ _ Empty = Empty
go _ Empty _ = Empty
-- leaf vs. anything
go s (Leaf h1 (L k1 v1)) t2 =
lookupCont
(\_ -> Empty)
(\v _ -> case f k1 v1 v of (# v' #) -> Leaf h1 $ L k1 v')
h1 k1 s t2
go s t1 (Leaf h2 (L k2 v2)) =
lookupCont
(\_ -> Empty)
(\v _ -> case f k2 v v2 of (# v' #) -> Leaf h2 $ L k2 v')
h2 k2 s t1
-- collision vs. collision
go _ (Collision h1 ls1) (Collision h2 ls2) = intersectionCollisions f h1 h2 ls1 ls2
-- branch vs. branch
go s (BitmapIndexed b1 ary1) (BitmapIndexed b2 ary2) =
intersectionArrayBy (go (s + bitsPerSubkey)) b1 b2 ary1 ary2
go s (BitmapIndexed b1 ary1) (Full ary2) =
intersectionArrayBy (go (s + bitsPerSubkey)) b1 fullNodeMask ary1 ary2
go s (Full ary1) (BitmapIndexed b2 ary2) =
intersectionArrayBy (go (s + bitsPerSubkey)) fullNodeMask b2 ary1 ary2
go s (Full ary1) (Full ary2) =
intersectionArrayBy (go (s + bitsPerSubkey)) fullNodeMask fullNodeMask ary1 ary2
-- collision vs. branch
go s (BitmapIndexed b1 ary1) t2@(Collision h2 _ls2)
| b1 .&. m2 == 0 = Empty
| otherwise = go (s + bitsPerSubkey) (A.index ary1 i) t2
where
m2 = mask h2 s
i = sparseIndex b1 m2
go s t1@(Collision h1 _ls1) (BitmapIndexed b2 ary2)
| b2 .&. m1 == 0 = Empty
| otherwise = go (s + bitsPerSubkey) t1 (A.index ary2 i)
where
m1 = mask h1 s
i = sparseIndex b2 m1
go s (Full ary1) t2@(Collision h2 _ls2) = go (s + bitsPerSubkey) (A.index ary1 i) t2
where
i = index h2 s
go s t1@(Collision h1 _ls1) (Full ary2) = go (s + bitsPerSubkey) t1 (A.index ary2 i)
where
i = index h1 s
{-# INLINE intersectionWithKey# #-}
intersectionArrayBy ::
( HashMap k v1 ->
HashMap k v2 ->
HashMap k v3
) ->
Bitmap ->
Bitmap ->
A.Array (HashMap k v1) ->
A.Array (HashMap k v2) ->
HashMap k v3
intersectionArrayBy f !b1 !b2 !ary1 !ary2
| b1 .&. b2 == 0 = Empty
| otherwise = runST $ do
mary <- A.new_ $ popCount bIntersect
-- iterate over nonzero bits of b1 .|. b2
let go !i !i1 !i2 !b !bFinal
| b == 0 = pure (i, bFinal)
| testBit $ b1 .&. b2 = do
x1 <- A.indexM ary1 i1
x2 <- A.indexM ary2 i2
case f x1 x2 of
Empty -> go i (i1 + 1) (i2 + 1) b' (bFinal .&. complement m)
_ -> do
A.write mary i $! f x1 x2
go (i + 1) (i1 + 1) (i2 + 1) b' bFinal
| testBit b1 = go i (i1 + 1) i2 b' bFinal
| otherwise = go i i1 (i2 + 1) b' bFinal
where
m = 1 `unsafeShiftL` countTrailingZeros b
testBit x = x .&. m /= 0
b' = b .&. complement m
(len, bFinal) <- go 0 0 0 bCombined bIntersect
case len of
0 -> pure Empty
1 -> A.read mary 0
_ -> bitmapIndexedOrFull bFinal <$> (A.unsafeFreeze =<< A.shrink mary len)
where
bCombined = b1 .|. b2
bIntersect = b1 .&. b2
{-# INLINE intersectionArrayBy #-}
intersectionCollisions :: Eq k => (k -> v1 -> v2 -> (# v3 #)) -> Hash -> Hash -> A.Array (Leaf k v1) -> A.Array (Leaf k v2) -> HashMap k v3
intersectionCollisions f h1 h2 ary1 ary2
| h1 == h2 = runST $ do
mary2 <- A.thaw ary2 0 $ A.length ary2
mary <- A.new_ $ min (A.length ary1) (A.length ary2)
let go i j
| i >= A.length ary1 || j >= A.lengthM mary2 = pure j
| otherwise = do
L k1 v1 <- A.indexM ary1 i
searchSwap k1 j mary2 >>= \case
Just (L _k2 v2) -> do
let !(# v3 #) = f k1 v1 v2
A.write mary j $ L k1 v3
go (i + 1) (j + 1)
Nothing -> do
go (i + 1) j
len <- go 0 0
case len of
0 -> pure Empty
1 -> Leaf h1 <$> A.read mary 0
_ -> Collision h1 <$> (A.unsafeFreeze =<< A.shrink mary len)
| otherwise = Empty
{-# INLINE intersectionCollisions #-}
-- | Say we have
-- @
-- 1 2 3 4
-- @
-- and we search for @3@. Then we can mutate the array to
-- @
-- undefined 2 1 4
-- @
-- We don't actually need to write undefined, we just have to make sure that the next search starts 1 after the current one.
searchSwap :: Eq k => k -> Int -> A.MArray s (Leaf k v) -> ST s (Maybe (Leaf k v))
searchSwap toFind start = go start toFind start
where
go i0 k i mary
| i >= A.lengthM mary = pure Nothing
| otherwise = do
l@(L k' _v) <- A.read mary i
if k == k'
then do
A.write mary i =<< A.read mary i0
pure $ Just l
else go i0 k (i + 1) mary
{-# INLINE searchSwap #-}

  • It would be good to avoid allocating fresh Leaf nodes – we can simply use the ones from the first argument.
  • In intersectionCollisions it should be possible to perform the search-and-swap operations on the output array itself, so we don't have to allocate the intermediate mary2 array.

To preserve code sharing with intersectionWith[Key], it may be possible to generalize intersectionWithKey# to have a type similar to filterMapAux:

-- | Common implementation for 'filterWithKey' and 'mapMaybeWithKey',
-- allowing the former to former to reuse terms.
filterMapAux :: forall k v1 v2
. (HashMap k v1 -> Maybe (HashMap k v2))
-> (Leaf k v1 -> Maybe (Leaf k v2))
-> HashMap k v1
-> HashMap k v2
filterMapAux onLeaf onColl = go

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

1 participant