From 7fba507c965721b4273fd4b39efe6faf92df7e31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5kan=20Th=C3=B6rngren?= Date: Sat, 13 May 2023 16:10:53 -0700 Subject: [PATCH] Add some upper case hex functions This is a small subset of functions that can be used to get uppercase hex output which is useful for certain ancient binary file formats, such as Intel HEX and Motorola S-record. --- Data/ByteString/Builder/ASCII.hs | 21 +++++++++++ Data/ByteString/Builder/Prim/ASCII.hs | 35 +++++++++++++++++++ bytestring.cabal | 10 +++--- cbits/itoa.c | 24 ++++++++++++- .../Data/ByteString/Builder/Prim/TestUtils.hs | 7 ++++ .../builder/Data/ByteString/Builder/Tests.hs | 20 ++++++++++- 6 files changed, 110 insertions(+), 7 deletions(-) diff --git a/Data/ByteString/Builder/ASCII.hs b/Data/ByteString/Builder/ASCII.hs index 694b5c8f5..345524481 100644 --- a/Data/ByteString/Builder/ASCII.hs +++ b/Data/ByteString/Builder/ASCII.hs @@ -74,6 +74,10 @@ module Data.ByteString.Builder.ASCII , byteStringHex , lazyByteStringHex + , word8HexUpperFixed + , word64HexUpperFixedWidth + , byteStringHexUpper + ) where import Data.ByteString as S @@ -250,6 +254,23 @@ byteStringHex = P.primMapByteStringFixed P.word8HexFixed lazyByteStringHex :: L.ByteString -> Builder lazyByteStringHex = P.primMapLazyByteStringFixed P.word8HexFixed +-- | Hexadecimal encoding of a 'Word8' using 2 upper-case characters. +{-# INLINE word8HexUpperFixed #-} +word8HexUpperFixed :: Word8 -> Builder +word8HexUpperFixed = P.primFixed P.word8HexUpperFixed + +-- | Hexadecimal encoding of a 'Word64' using a specified number of +-- upper-case characters. +{-# INLINE word64HexUpperFixedWidth #-} +word64HexUpperFixedWidth :: Int -> Word64 -> Builder +word64HexUpperFixedWidth = P.primFixed . P.word64HexUpperFixedWidth + +-- | Encode each byte of a 'S.ByteString' using its fixed-width hex +-- upper-case encoding. +{-# NOINLINE byteStringHexUpper #-} -- share code +byteStringHexUpper :: S.ByteString -> Builder +byteStringHexUpper = P.primMapByteStringFixed P.word8HexUpperFixed + ------------------------------------------------------------------------------ -- Fast decimal 'Integer' encoding. diff --git a/Data/ByteString/Builder/Prim/ASCII.hs b/Data/ByteString/Builder/Prim/ASCII.hs index 8c5e3d1a2..72d22e635 100644 --- a/Data/ByteString/Builder/Prim/ASCII.hs +++ b/Data/ByteString/Builder/Prim/ASCII.hs @@ -75,6 +75,9 @@ module Data.ByteString.Builder.Prim.ASCII , floatHexFixed , doubleHexFixed + , word8HexUpperFixed + , word64HexUpperFixedWidth + ) where import Data.ByteString.Builder.Prim.Binary @@ -283,3 +286,35 @@ floatHexFixed = encodeFloatViaWord32F word32HexFixed {-# INLINE doubleHexFixed #-} doubleHexFixed :: FixedPrim Double doubleHexFixed = encodeDoubleViaWord64F word64HexFixed + +-- fixed width; leading zeroes; upper-case +------------------------------------------ + +foreign import ccall unsafe "static _hs_bytestring_builder_uint32_fixed_width_hex_upper" c_uint32_fixed_hex_upper + :: CInt -> Word32 -> Ptr Word8 -> IO () + +foreign import ccall unsafe "static _hs_bytestring_builder_uint64_fixed_width_hex_upper" c_uint64_fixed_hex_upper + :: CInt -> Word64 -> Ptr Word8 -> IO () + +{-# INLINE encodeWordHexUpperFixedWidth #-} +encodeWordHexUpperFixedWidth :: forall a. (Storable a, Integral a) => Int -> FixedPrim a +encodeWordHexUpperFixedWidth width = fixedPrim width $ c_uint32_fixed_hex_upper (CInt (fromIntegral width)) . fromIntegral + +{-# INLINE encodeWordHexUpperFixed #-} +encodeWordHexUpperFixed :: forall a. (Storable a, Integral a) => FixedPrim a +encodeWordHexUpperFixed = encodeWordHexUpperFixedWidth (2 * sizeOf (undefined :: a)) + +{-# INLINE encodeWord64HexUpperFixedWidth #-} +encodeWord64HexUpperFixedWidth :: forall a. (Storable a, Integral a) => Int -> FixedPrim a +encodeWord64HexUpperFixedWidth width = fixedPrim width $ c_uint64_fixed_hex_upper (CInt (fromIntegral width)) . fromIntegral + +-- | Hexadecimal encoding of a 'Word8' using 2 upper-case characters. +{-# INLINE word8HexUpperFixed #-} +word8HexUpperFixed :: FixedPrim Word8 +word8HexUpperFixed = encodeWordHexUpperFixed + +-- | Hexadecimal encoding of a 'Word64' using a specified number of +-- upper-case characters. +{-# INLINE word64HexUpperFixedWidth #-} +word64HexUpperFixedWidth :: Int -> FixedPrim Word64 +word64HexUpperFixedWidth = encodeWord64HexUpperFixedWidth diff --git a/bytestring.cabal b/bytestring.cabal index fbc7efd9f..924ccb08f 100644 --- a/bytestring.cabal +++ b/bytestring.cabal @@ -1,5 +1,5 @@ Name: bytestring -Version: 0.11.4.0 +Version: 0.11.4.0.1 Synopsis: Fast, compact, strict and lazy byte strings with a list interface Description: An efficient compact, immutable byte string type (both strict and lazy) @@ -121,19 +121,19 @@ library -fmax-simplifier-iterations=10 -fdicts-cheap -fspec-constr-count=6 - + c-sources: cbits/fpstring.c cbits/itoa.c cbits/shortbytestring.c - + if (arch(aarch64)) c-sources: cbits/aarch64/is-valid-utf8.c else c-sources: cbits/is-valid-utf8.c - + -- DNDEBUG disables asserts in cbits/ cc-options: -std=c11 -DNDEBUG=1 - + -- No need to link to libgcc on ghc-9.4 and later which uses a clang-based -- toolchain. if os(windows) && impl(ghc < 9.3) diff --git a/cbits/itoa.c b/cbits/itoa.c index f69fa6b66..1a1f14a2f 100644 --- a/cbits/itoa.c +++ b/cbits/itoa.c @@ -4,13 +4,15 @@ // inspired by: http://www.jb.man.ac.uk/~slowe/cpp/itoa.html // /////////////////////////////////////////////////////////////// -#include +#include // Decimal Encoding /////////////////// static const char* digits = "0123456789abcdef"; +static const char* digits_upper = "0123456789ABCDEF"; + // signed integers char* _hs_bytestring_int_dec (int x, char* buf) { @@ -213,3 +215,23 @@ char* _hs_bytestring_long_long_uint_hex (long long unsigned int x, char* buf) { } return next_free; }; + +// unsigned ints (32 bit words) +void _hs_bytestring_builder_uint32_fixed_width_hex_upper (int width, + uint32_t x, + char* buf) { + while (--width >= 0) { + buf[width] = digits_upper[x & 0xf]; + x >>= 4; + } +}; + +// unsigned ints (64 bit words) +void _hs_bytestring_builder_uint64_fixed_width_hex_upper (int width, + uint64_t x, + char* buf) { + while (--width >= 0) { + buf[width] = digits_upper[x & 0xf]; + x >>= 4; + } +}; diff --git a/tests/builder/Data/ByteString/Builder/Prim/TestUtils.hs b/tests/builder/Data/ByteString/Builder/Prim/TestUtils.hs index 61cf6af5f..fb3780990 100644 --- a/tests/builder/Data/ByteString/Builder/Prim/TestUtils.hs +++ b/tests/builder/Data/ByteString/Builder/Prim/TestUtils.hs @@ -47,6 +47,7 @@ module Data.ByteString.Builder.Prim.TestUtils ( , int16HexFixed_list , int32HexFixed_list , int64HexFixed_list + , wordHexFixedWidth_list , floatHexFixed_list , doubleHexFixed_list @@ -305,6 +306,9 @@ wordHexFixed_list x = where pad n cs = replicate (n - length cs) '0' ++ cs +pruneWidth :: Int -> [a] -> [a] +pruneWidth width xs = drop (length xs - width) xs + int8HexFixed_list :: Int8 -> [Word8] int8HexFixed_list = wordHexFixed_list . (fromIntegral :: Int8 -> Word8 ) @@ -317,6 +321,9 @@ int32HexFixed_list = wordHexFixed_list . (fromIntegral :: Int32 -> Word32) int64HexFixed_list :: Int64 -> [Word8] int64HexFixed_list = wordHexFixed_list . (fromIntegral :: Int64 -> Word64) +wordHexFixedWidth_list :: (Storable a, Integral a, Show a) => Int -> a -> [Word8] +wordHexFixedWidth_list width = pruneWidth width . wordHexFixed_list + floatHexFixed_list :: Float -> [Word8] floatHexFixed_list = float_list wordHexFixed_list diff --git a/tests/builder/Data/ByteString/Builder/Tests.hs b/tests/builder/Data/ByteString/Builder/Tests.hs index fa58645e4..7348eb45a 100644 --- a/tests/builder/Data/ByteString/Builder/Tests.hs +++ b/tests/builder/Data/ByteString/Builder/Tests.hs @@ -56,7 +56,7 @@ import Test.Tasty (TestTree, TestName, testGroup) import Test.Tasty.QuickCheck ( Arbitrary(..), oneof, choose, listOf, elements , counterexample, ioProperty, UnicodeString(..), Property, testProperty - , (===), (.&&.), conjoin ) + , (===), (.&&.), conjoin, forAll ) tests :: [TestTree] @@ -553,6 +553,16 @@ testBuilderConstr name ref mkBuilder = where ws = ref x +testBuilderConstrWidth :: (Arbitrary a, Show a) => + TestName -> Int -> (Int -> a -> [Word8]) -> (Int -> a -> Builder) -> TestTree +testBuilderConstrWidth name maxDigits ref mkBuilder = + testProperty name check + where + widths = choose (0, maxDigits) + check x = forAll widths $ \width -> + let ws = ref width x + in (ws ++ ws) == + (L.unpack $ toLazyByteString $ mkBuilder width x `BI.append` mkBuilder width x) testsBinary :: [TestTree] testsBinary = @@ -637,9 +647,17 @@ testsASCII = , testBuilderConstr "floatHexFixed" floatHexFixed_list floatHexFixed , testBuilderConstr "doubleHexFixed" doubleHexFixed_list doubleHexFixed + + , testBuilderConstr "word8UpperHexFixed" (uphex . wordHexFixed_list) word8HexUpperFixed + , testBuilderConstrWidth "word64HexUpperFixedWidth" 16 + (\width -> uphex . wordHexFixedWidth_list width) + word64HexUpperFixedWidth ] where enlarge (n, e) = n ^ (abs (e `mod` (50 :: Integer))) + uphex = map uphex1 + uphex1 n | n <= 57 = n -- '9' or below + | otherwise = n - 32 -- otherwise assume lower case a-f, convert to A-F testsFloating :: [TestTree] testsFloating =