diff --git a/Data/Bson/Size.hs b/Data/Bson/Size.hs new file mode 100644 index 0000000..f33594d --- /dev/null +++ b/Data/Bson/Size.hs @@ -0,0 +1,71 @@ +{-# LANGUAGE MagicHash #-} +module Data.Bson.Size (sizeOfDocument) where + +import Data.Bson +import qualified Data.ByteString as BS +import Data.Text (Text) +import Data.Text.Unsafe (lengthWord8) +import GHC.Num.Integer (integerSizeInBase#) +import GHC.Word (Word (..)) + +-- | Size in bytes of the BSON document when serialized to binary format. +sizeOfDocument :: Document -> Int +sizeOfDocument doc = sum (map sizeOfField doc) + 5 +-- Additional 5 bytes = 4 bytes (size) + 1 byte (null terminator) + +sizeOfField :: Field -> Int +sizeOfField (k := v) = sizeOfLabel k + sizeOfValue v + 1 +-- Additional 1 byte = element type + +sizeOfLabel :: Text -> Int +sizeOfLabel = sizeOfCString + +sizeOfValue :: Value -> Int +sizeOfValue v = case v of + Float _ -> 8 -- 64-bit binary floating point + String x -> sizeOfString x + Doc x -> sizeOfDocument x + Array x -> sizeOfArray x + Bin (Binary x) -> sizeOfBinary x + Fun (Function x) -> sizeOfBinary x + Uuid (UUID x) -> sizeOfBinary x + Md5 (MD5 x) -> sizeOfBinary x + UserDef (UserDefined x) -> sizeOfBinary x + ObjId _ -> 12 + Bool _ -> 1 + UTC _ -> 8 + Null -> 0 + RegEx (Regex pattern opts) -> sizeOfCString pattern + sizeOfCString opts + JavaScr (Javascript [] code) -> sizeOfString code + JavaScr (Javascript env code) -> sizeOfClosure env code -- "code with scope" + Sym (Symbol x) -> sizeOfString x + Int32 _ -> 4 + Int64 _ -> 8 + Stamp _ -> 8 + MinMax _ -> 0 + +sizeOfCString :: Text -> Int +sizeOfCString t = lengthWord8 t + 1 +-- Additional 1 byte = null terminator + +sizeOfString :: Text -> Int +sizeOfString t = sizeOfCString t + 4 +-- Additional 4 bytes = size of string + +sizeOfClosure :: Document -> Text -> Int +sizeOfClosure env code = 4 + sizeOfDocument env + sizeOfString code + +sizeOfBinary :: BS.ByteString -> Int +sizeOfBinary x = 5 + BS.length x +-- Additional 5 bytes = 4 bytes (size) + 1 byte (subtype) + +sizeOfArray :: [Value] -> Int +sizeOfArray vs = 5 + sum (zipWith go [0..] vs) +-- Additional 5 bytes = 4 bytes (size) + 1 byte (null terminator) + where + go i v = 2 + intLen i + sizeOfValue v + -- Additional 2 bytes = 1 byte (element type) + 1 byte (null terminator of key name) + + -- Length of the string representing the integer in base 10 + intLen 0 = 1 + intLen i = fromIntegral $ W# (integerSizeInBase# 10## i) diff --git a/bson.cabal b/bson.cabal index 4892bf0..b075be2 100644 --- a/bson.cabal +++ b/bson.cabal @@ -48,7 +48,8 @@ Library Default-Extensions: BangPatterns, CPP Exposed-modules: Data.Bson, - Data.Bson.Binary + Data.Bson.Binary, + Data.Bson.Size Source-repository head Type: git @@ -59,6 +60,7 @@ Test-suite bson-tests Hs-source-dirs: tests Main-is: Tests.hs Other-modules: Data.Bson.Binary.Tests + Data.Bson.Size.Tests Data.Bson.Tests Ghc-options: -Wall -fno-warn-orphans @@ -72,6 +74,7 @@ Test-suite bson-tests , base , time , bytestring + , binary >= 0.5 && < 0.9 , text Default-Language: Haskell2010 diff --git a/tests/Data/Bson/Size/Tests.hs b/tests/Data/Bson/Size/Tests.hs new file mode 100644 index 0000000..0f56234 --- /dev/null +++ b/tests/Data/Bson/Size/Tests.hs @@ -0,0 +1,26 @@ +{-# LANGUAGE FlexibleContexts #-} +module Data.Bson.Size.Tests + ( tests + ) where + +import Data.Binary.Put (runPut) +import Data.Bson +import Data.Bson.Binary (putDocument) +import Data.Bson.Size (sizeOfDocument) +import qualified Data.ByteString.Lazy as BL +import Test.Framework (Test, testGroup) +import Test.Framework.Providers.QuickCheck2 (testProperty) +import Test.Framework (TestOptions' (..), plusTestOptions) +import Test.QuickCheck (Arbitrary(..)) + +testDoc :: Document -> Bool +testDoc doc = + sizeOfDocument doc == (fromIntegral $ BL.length $ runPut $ putDocument doc) + +withTestOpts :: Test -> Test +withTestOpts = plusTestOptions $ TestOptions Nothing Nothing Nothing (Just 13) (Just 5) Nothing + +tests :: Arbitrary Field => Test +tests = testGroup "Data.Bson.Size.Tests" + [ withTestOpts $ testProperty "sizeOfDocument" testDoc + ] diff --git a/tests/Tests.hs b/tests/Tests.hs index e1f1109..b0e3705 100644 --- a/tests/Tests.hs +++ b/tests/Tests.hs @@ -4,9 +4,11 @@ import Test.Framework (defaultMain) import qualified Data.Bson.Tests import qualified Data.Bson.Binary.Tests +import qualified Data.Bson.Size.Tests main :: IO () main = defaultMain [ Data.Bson.Tests.tests , Data.Bson.Binary.Tests.tests + , Data.Bson.Size.Tests.tests ]