Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Equivalence classes on requests. #40

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions Haxl/Core/Monad.hs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ module Haxl.Core.Monad (

-- * Data fetching and caching
dataFetch, uncachedRequest,
dataFetchEquiv,
cacheRequest, cacheResult, cachedComputation,
dumpCacheAsHaskell,

Expand Down Expand Up @@ -79,6 +80,7 @@ import Control.Arrow (left)
import Control.Exception (bracket_)
import Debug.Trace (traceEventIO)
#endif
import Unsafe.Coerce

-- -----------------------------------------------------------------------------
-- The environment
Expand Down Expand Up @@ -388,6 +390,23 @@ cached env req = do
Right _ -> "Cached request: " ++ show req
return (Cached r)

-- | Use not the request itself, but the image of the request under a
-- mapping as a key in the cache.
--
-- This can be used to collect different requests that are known
-- priori to give the same results and only perform one request, as in
-- 'dataFetchEquiv'.
--
-- Another possible use case is to reduce the overall size of the
-- cache if the requests have a large memory footprint. In this case,
-- @f@ could be a function that calculates a cryptographic hash of the
-- request parameters.
cachedKeyTransform :: (Request r a)
=> (r a -> r a)
-- ^ Function @f@ that transforms the request.
-> Env u -> r a -> IO (CacheResult a)
cachedKeyTransform f env req = cached env (f req)

-- | Performs actual fetching of data for a 'Request' from a 'DataSource'.
dataFetch :: (DataSource u r, Request r a) => r a -> GenHaxl u a
dataFetch req = GenHaxl $ \env ref -> do
Expand All @@ -409,6 +428,52 @@ dataFetch req = GenHaxl $ \env ref -> do
Cached (Left ex) -> return (Throw ex)
Cached (Right a) -> return (Done a)

-- | Allows building equivalence classes of requests (classes of
-- requests that are known a priori to give the same result), so that
-- at most one request will be performed in the same round for each
-- equivalence class.
--
-- Given an equivalence relation and a request @req@, the currently
-- blocked requests will be checked for an eqivalent request @req'@.
-- If found, the request will be replaced by @req'@.
--
-- In addition to the equivalence relation, one can also provide a
-- function that maps each request from one equivalence class to a
-- /unique/ representative of that class. This representative will be
-- used as the key in the cache. This will ensure that only one
-- request from each class is performed ever, not only in a given
-- round.
dataFetchEquiv :: forall r a u . (DataSource u r, Request r a)
=> (r a -> r a -> Bool)
-- ^ Equivalence relation on requests.
-> (r a -> r a)
-- ^ Function that selects a unique representative from a class.
-> r a
-> GenHaxl u a
dataFetchEquiv equiv f req = GenHaxl $ \ env ref -> do
res <- cachedKeyTransform f env req
case res of
-- No request equivalent to req is cached yet
Uncached rvar -> do
allRequests <- readIORef ref
let test (BlockedFetch r _) = unsafeCoerce r `equiv` req
req' = find test (requestsOfType req allRequests)
-- test if there is already an equivalent request in the current round.
case req' of
-- if not, enter this request
Nothing -> do
modifyIORef' ref $ \ bs -> addRequest (BlockedFetch req rvar) bs
return $ Blocked (Cont (continueFetch req rvar))
-- if there is an equivalent request, use the same result variable
Just (BlockedFetch r rvar) ->
let r' = unsafeCoerce r :: r a
rvar' = unsafeCoerce rvar
in return $ Blocked (Cont (continueFetch r' rvar'))
-- below just as in dataFetch
CachedNotFetched rvar -> return $ Blocked (Cont (continueFetch req rvar))
Cached (Left ex) -> return (Throw ex)
Cached (Right a) -> return (Done a)

-- | A data request that is not cached. This is not what you want for
-- normal read requests, because then multiple identical requests may
-- return different results, and this invalidates some of the
Expand Down
13 changes: 12 additions & 1 deletion Haxl/Core/RequestStore.hs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
{-# LANGUAGE ExistentialQuantification #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ConstraintKinds #-}

-- | Bucketing requests by 'DataSource'.
--
Expand All @@ -20,7 +21,8 @@
-- users should not need to import it.
module Haxl.Core.RequestStore (
BlockedFetches(..), RequestStore,
noRequests, addRequest, contents
noRequests, addRequest, contents,
requestsOfType
) where

import Haxl.Core.Types
Expand Down Expand Up @@ -70,3 +72,12 @@ addRequest bf (RequestStore m) =
-- | Retrieves the whole contents of the 'RequestStore'.
contents :: RequestStore u -> [BlockedFetches u]
contents (RequestStore m) = Map.elems m

-- | Retrieves requests in the 'RequestStore' that have the same type
-- as a given request.
requestsOfType :: forall r a u . (DataSource u r, Request r a) => r a -> RequestStore u -> [BlockedFetch r]
requestsOfType _ (RequestStore rs) =
let ty = typeOf1 (undefined :: r a)
in case Map.lookup ty rs of
Just (BlockedFetches result) -> map unsafeCoerce result
Nothing -> []
104 changes: 104 additions & 0 deletions tests/EquivDataSource.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE StandaloneDeriving #-}
{-# LANGUAGE TypeFamilies #-}

module EquivDataSource (
MyData (..)
, getSimilar
, initGlobalState
) where

import Haxl.Prelude
import Prelude ()

import Haxl.Core
import Haxl.Core.Monad (dataFetchEquiv)

import Control.Concurrent.MVar
import Data.Hashable
import Data.Map (Map)
import qualified Data.Map as Map
import Data.Typeable

-- Example data source using equivalent requests. The data source
-- contains some numbers. Calling @getSimilar x@ queries if there is
-- a number stored that is equal to @x@ modulo @7@. If so, this number
-- is returned, if not, @x@ itself is stored (and returned).
--
-- This allows to perform a restricted class of write operations that
-- can safely be reordered by Haxl.
--
-- This is a simplified version of the real-world usecase, which
-- involves categorising pieces of data using some minhash-like method
-- to measure similarity.

newtype MyKey = MyKey Int
deriving (Eq, Ord, Hashable, Typeable, Show)
newtype MyData = MyData Int
deriving (Eq, Hashable, Typeable, Show)

data MyReq a where
EnterOrRetrieveData :: MyData -> MyReq MyData
RetrieveData :: MyKey -> MyReq MyData
deriving Typeable

key :: MyData -> MyKey
key (MyData x) = MyKey (x `mod` 7)

deriving instance Eq (MyReq a)
deriving instance Show (MyReq a)
instance Show1 MyReq where show1 = show
instance Hashable (MyReq a) where
hashWithSalt s (EnterOrRetrieveData x) = hashWithSalt s (0::Int, x)
hashWithSalt s (RetrieveData x) = hashWithSalt s (1::Int, x)

instance StateKey MyReq where
data State MyReq = MyState { myData :: MVar (Map MyKey MyData) }

instance DataSourceName MyReq where
dataSourceName _ = "Datasource with equivalent requests"

instance DataSource u MyReq where
fetch = myFetch

initGlobalState :: IO (State MyReq)
initGlobalState = do
myMVar <- newMVar Map.empty
return (MyState myMVar)

myFetch :: State MyReq -> Flags -> u -> [BlockedFetch MyReq] -> PerformFetch
myFetch state _flags _user bfs = SyncFetch $ mapM_ (fetch1 state) bfs

fetch1 :: State MyReq -> BlockedFetch MyReq -> IO ()
fetch1 state (BlockedFetch (EnterOrRetrieveData val) m) =
modifyMVar_ (myData state) $ \valMap ->
case Map.lookup k valMap of
Nothing ->
putSuccess m val
>> return (Map.insert k val valMap)
Just val' ->
putSuccess m val'
>> return valMap
where k = key val

fetch1 state (BlockedFetch (RetrieveData k) m) = do
valMap <- readMVar (myData state)
case Map.lookup k valMap of
Just val -> putSuccess m val
Nothing -> putFailure m (FetchError "This should not be possible.")


getSimilar :: MyData -> (GenHaxl ()) MyData
getSimilar =
let equiv :: MyReq a -> MyReq a -> Bool
equiv (EnterOrRetrieveData x) (EnterOrRetrieveData y) = key x == key y
equiv _ _ = error "impossible"
representative :: MyReq a -> MyReq a
representative (EnterOrRetrieveData x) = RetrieveData (key x)
representative _ = error "impossible"
in dataFetchEquiv equiv representative . EnterOrRetrieveData
2 changes: 2 additions & 0 deletions tests/Main.hs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import DataCacheTest
#ifdef HAVE_APPLICATIVEDO
import AdoTests
#endif
import TestEquivDataSource

import Data.String
import Test.HUnit
Expand All @@ -23,4 +24,5 @@ main = runTestTT $ TestList
#ifdef HAVE_APPLICATIVEDO
, TestLabel "AdoTests" AdoTests.tests
#endif
, TestLabel "EquivDataSource" TestEquivDataSource.tests
]
44 changes: 44 additions & 0 deletions tests/TestEquivDataSource.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
module TestEquivDataSource (tests) where

import Haxl.Prelude as Haxl
import Prelude ()

import Haxl.Core

import EquivDataSource

import Data.IORef
import Data.List (nub)
import Test.HUnit

testEnv :: IO (Env ())
testEnv = do
myState <- EquivDataSource.initGlobalState
let st = stateSet myState stateEmpty
initEnv st ()

tests :: Test
tests = TestList
[ TestLabel "singleFetchTest" singleFetchTest
, TestLabel "multiFetchTest" multiFetchTest
]

singleFetchTest :: Test
singleFetchTest = TestCase $ do
env <- testEnv
x <- runHaxl env $ mapM (getSimilar . MyData) [0, 7, 14, 21, 28]
-- the numbers are all congruent modulo 7, so we expect one unique result for all of them
assertEqual "unique result" 1 $ length (nub x)
stats <- readIORef (statsRef env)
-- ... and only one fetch
assertEqual "fetches" 1 (numFetches stats)

multiFetchTest :: Test
multiFetchTest = TestCase $ do
env <- testEnv
x <- runHaxl env $ mapM (getSimilar . MyData) [0 .. 13]
-- expect seven unique results
assertEqual "unique result" 7 $ length (nub x)
stats <- readIORef (statsRef env)
-- ... in seven fetches
assertEqual "fetches" 7 (numFetches stats)