@@ -144,7 +144,7 @@ import Control.DeepSeq (NFData (..), NFData1 (..), NFData2 (..))
144
144
import Control.Monad.ST (ST , runST )
145
145
import Data.Bifoldable (Bifoldable (.. ))
146
146
import Data.Bits (complement , popCount , unsafeShiftL ,
147
- unsafeShiftR , (.&.) , (.|.) )
147
+ unsafeShiftR , (.&.) , (.|.) , countTrailingZeros )
148
148
import Data.Coerce (coerce )
149
149
import Data.Data (Constr , Data (.. ), DataType )
150
150
import Data.Functor.Classes (Eq1 (.. ), Eq2 (.. ), Ord1 (.. ), Ord2 (.. ),
@@ -1622,26 +1622,27 @@ unionArrayBy :: (a -> a -> a) -> Bitmap -> Bitmap -> A.Array a -> A.Array a
1622
1622
-- Core size reductions with GHC 9.2.2. See the Core diffs in
1623
1623
-- https://github.com/haskell-unordered-containers/unordered-containers/pull/376.
1624
1624
unionArrayBy f ! b1 ! b2 ! ary1 ! ary2 = A. run $ do
1625
- let b' = b1 .|. b2
1626
- mary <- A. new_ (popCount b' )
1625
+ let bCombined = b1 .|. b2
1626
+ mary <- A. new_ (popCount bCombined )
1627
1627
-- iterate over nonzero bits of b1 .|. b2
1628
- -- it would be nice if we could shift m by more than 1 each time
1629
- let ba = b1 .&. b2
1630
- go ! i ! i1 ! i2 ! m
1631
- | m > b' = return ()
1632
- | b' .&. m == 0 = go i i1 i2 (m `unsafeShiftL` 1 )
1633
- | ba .&. m /= 0 = do
1628
+ let go ! i ! i1 ! i2 ! b
1629
+ | b == 0 = return ()
1630
+ | testBit (b1 .&. b2) = do
1634
1631
x1 <- A. indexM ary1 i1
1635
1632
x2 <- A. indexM ary2 i2
1636
1633
A. write mary i $! f x1 x2
1637
- go (i+ 1 ) (i1+ 1 ) (i2+ 1 ) (m `unsafeShiftL` 1 )
1638
- | b1 .&. m /= 0 = do
1634
+ go (i+ 1 ) (i1+ 1 ) (i2+ 1 ) b'
1635
+ | testBit b1 = do
1639
1636
A. write mary i =<< A. indexM ary1 i1
1640
- go (i+ 1 ) (i1+ 1 ) i2 (m `unsafeShiftL` 1 )
1641
- | otherwise = do
1637
+ go (i+ 1 ) (i1+ 1 ) i2 b'
1638
+ | otherwise = do
1642
1639
A. write mary i =<< A. indexM ary2 i2
1643
- go (i+ 1 ) i1 (i2+ 1 ) (m `unsafeShiftL` 1 )
1644
- go 0 0 0 (b' .&. negate b') -- XXX: b' must be non-zero
1640
+ go (i+ 1 ) i1 (i2+ 1 ) b'
1641
+ where
1642
+ m = 1 `unsafeShiftL` (countTrailingZeros b)
1643
+ testBit x = x .&. m /= 0
1644
+ b' = b .&. complement m
1645
+ go 0 0 0 bCombined
1645
1646
return mary
1646
1647
-- TODO: For the case where b1 .&. b2 == b1, i.e. when one is a
1647
1648
-- subset of the other, we could use a slightly simpler algorithm,
0 commit comments