From 2ec84e2b86550cf1c1ffc8230c93af5ebe78dcff Mon Sep 17 00:00:00 2001 From: Brian Shu Date: Fri, 10 Feb 2023 08:50:27 -0500 Subject: [PATCH 01/10] Add Text.IO.Utf8 module --- src/Data/Text/IO/Utf8.hs | 44 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 src/Data/Text/IO/Utf8.hs diff --git a/src/Data/Text/IO/Utf8.hs b/src/Data/Text/IO/Utf8.hs new file mode 100644 index 00000000..d7575dc4 --- /dev/null +++ b/src/Data/Text/IO/Utf8.hs @@ -0,0 +1,44 @@ +-- | +-- Module : Data.Text.IO.Utf8 +-- Copyright : (c) 2009, 2010 Bryan O'Sullivan, +-- (c) 2009 Simon Marlow +-- License : BSD-style +-- Maintainer : bos@serpentine.com +-- Portability : GHC +-- +-- Efficient UTF-8 support for text I\/O. +module Data.Text.IO.Utf8 + ( + readFile + , writeFile + , appendFile + ) where + +import Prelude hiding (readFile, writeFile, appendFile) +import Control.Exception (evaluate) +import Control.Monad ((<=<)) +import Data.ByteString (ByteString) +import qualified Data.ByteString as B +import Data.Text (Text) +import Data.Text.Encoding (decodeUtf8, encodeUtf8) + +decodeUtf8IO :: ByteString -> IO Text +decodeUtf8IO = evaluate . decodeUtf8 + +encodeUtf8IO :: Text -> IO ByteString +encodeUtf8IO = evaluate . encodeUtf8 + +-- | The 'readFile' function reads a file and returns the contents of +-- the file as a string. The entire file is read strictly, as with +-- 'getContents'. +readFile :: FilePath -> IO Text +readFile = decodeUtf8IO <=< B.readFile + +-- | Write a string to a file. The file is truncated to zero length +-- before writing begins. +writeFile :: FilePath -> Text -> IO () +writeFile fp = B.writeFile fp <=< encodeUtf8IO + +-- | Write a string to the end of a file. +appendFile :: FilePath -> Text -> IO () +appendFile fp = B.appendFile fp <=< encodeUtf8IO From a868f4bcb0943cb2ce5f5fe38e38292be4ed3436 Mon Sep 17 00:00:00 2001 From: Brian Shu Date: Wed, 15 Feb 2023 09:48:22 -0500 Subject: [PATCH 02/10] add more functions --- src/Data/Text/IO/Utf8.hs | 72 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 66 insertions(+), 6 deletions(-) diff --git a/src/Data/Text/IO/Utf8.hs b/src/Data/Text/IO/Utf8.hs index d7575dc4..8af240ec 100644 --- a/src/Data/Text/IO/Utf8.hs +++ b/src/Data/Text/IO/Utf8.hs @@ -9,25 +9,36 @@ -- Efficient UTF-8 support for text I\/O. module Data.Text.IO.Utf8 ( + -- * File-at-a-time operations readFile , writeFile , appendFile + -- * Operations on handles + , hGetContents + , hGetLine + , hPutStr + , hPutStrLn + -- * Special cases for standard input and output + , interact + , getContents + , getLine + , putStr + , putStrLn ) where -import Prelude hiding (readFile, writeFile, appendFile) +import Prelude hiding (readFile, writeFile, appendFile, interact, getContents, getLine, putStr, putStrLn) import Control.Exception (evaluate) import Control.Monad ((<=<)) import Data.ByteString (ByteString) import qualified Data.ByteString as B import Data.Text (Text) import Data.Text.Encoding (decodeUtf8, encodeUtf8) +import GHC.IO.Handle (Handle) +import qualified Data.ByteString.Char8 as B.Char8 decodeUtf8IO :: ByteString -> IO Text decodeUtf8IO = evaluate . decodeUtf8 -encodeUtf8IO :: Text -> IO ByteString -encodeUtf8IO = evaluate . encodeUtf8 - -- | The 'readFile' function reads a file and returns the contents of -- the file as a string. The entire file is read strictly, as with -- 'getContents'. @@ -37,8 +48,57 @@ readFile = decodeUtf8IO <=< B.readFile -- | Write a string to a file. The file is truncated to zero length -- before writing begins. writeFile :: FilePath -> Text -> IO () -writeFile fp = B.writeFile fp <=< encodeUtf8IO +writeFile fp = B.writeFile fp . encodeUtf8 -- | Write a string to the end of a file. appendFile :: FilePath -> Text -> IO () -appendFile fp = B.appendFile fp <=< encodeUtf8IO +appendFile fp = B.appendFile fp . encodeUtf8 + +-- | Read the remaining contents of a 'Handle' as a string. The +-- 'Handle' is closed once the contents have been read, or if an +-- exception is thrown. +-- +-- Internally, this function reads a chunk at a time from the +-- lower-level buffering abstraction, and concatenates the chunks into +-- a single string once the entire file has been read. +-- +-- As a result, it requires approximately twice as much memory as its +-- result to construct its result. For files more than a half of +-- available RAM in size, this may result in memory exhaustion. +hGetContents :: Handle -> IO Text +hGetContents = decodeUtf8IO <=< B.hGetContents + +-- | Read a single line from a handle. +hGetLine :: Handle -> IO Text +hGetLine = decodeUtf8IO <=< B.hGetLine + +-- | Write a string to a handle. +hPutStr :: Handle -> Text -> IO () +hPutStr h = B.hPutStr h . encodeUtf8 + +-- | Write a string to a handle, followed by a newline. +hPutStrLn :: Handle -> Text -> IO () +hPutStrLn h t = hPutStr h t >> B.hPutStr h (B.Char8.singleton '\n') + +-- | The 'interact' function takes a function of type @Text -> Text@ +-- as its argument. The entire input from the standard input device is +-- passed to this function as its argument, and the resulting string +-- is output on the standard output device. +interact :: (Text -> Text) -> IO () +interact f = B.interact (encodeUtf8 . f . decodeUtf8) + +-- | Read all user input on 'stdin' as a single string. +getContents :: IO Text +getContents = decodeUtf8IO =<< B.getContents + +-- | Read a single line of user input from 'stdin'. +getLine :: IO Text +getLine = decodeUtf8IO =<< B.getLine + +-- | Write a string to 'stdout'. +putStr :: Text -> IO () +putStr = B.putStr . encodeUtf8 + +-- | Write a string to 'stdout', followed by a newline. +putStrLn :: Text -> IO () +putStrLn t = B.putStr (encodeUtf8 t) >> B.putStr (B.Char8.singleton '\n') From 66dab030a0449e7797a1385ead82a97de7d81808 Mon Sep 17 00:00:00 2001 From: Brian Shu Date: Wed, 15 Feb 2023 09:50:49 -0500 Subject: [PATCH 03/10] make decoding strict in interact --- src/Data/Text/IO/Utf8.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Data/Text/IO/Utf8.hs b/src/Data/Text/IO/Utf8.hs index 8af240ec..754995fb 100644 --- a/src/Data/Text/IO/Utf8.hs +++ b/src/Data/Text/IO/Utf8.hs @@ -85,7 +85,7 @@ hPutStrLn h t = hPutStr h t >> B.hPutStr h (B.Char8.singleton '\n') -- passed to this function as its argument, and the resulting string -- is output on the standard output device. interact :: (Text -> Text) -> IO () -interact f = B.interact (encodeUtf8 . f . decodeUtf8) +interact f = putStr . f =<< getContents -- | Read all user input on 'stdin' as a single string. getContents :: IO Text From 306f7fa5a14b5bc163fcf1322c7043915acf3a28 Mon Sep 17 00:00:00 2001 From: Brian Shu Date: Wed, 15 Feb 2023 10:03:01 -0500 Subject: [PATCH 04/10] move back into Data.Text.IO --- src/Data/Text/IO.hs | 90 +++++++++++++++++++++++++++++++++ src/Data/Text/IO/Utf8.hs | 104 --------------------------------------- 2 files changed, 90 insertions(+), 104 deletions(-) delete mode 100644 src/Data/Text/IO/Utf8.hs diff --git a/src/Data/Text/IO.hs b/src/Data/Text/IO.hs index ad8389d7..17e7dd49 100644 --- a/src/Data/Text/IO.hs +++ b/src/Data/Text/IO.hs @@ -21,6 +21,7 @@ module Data.Text.IO ( + -- * locale-sensitive I\/O -- * File-at-a-time operations readFile , writeFile @@ -37,6 +38,21 @@ module Data.Text.IO , getLine , putStr , putStrLn + -- * File-at-a-time operations + , readFileUtf8 + , writeFileUtf8 + , appendFileUtf8 + -- * Operations on handles + , hGetContentsUtf8 + , hGetLineUtf8 + , hPutStrUtf8 + , hPutStrLnUtf8 + -- * Special cases for standard input and output + , interactUtf8 + , getContentsUtf8 + , getLineUtf8 + , putStrUtf8 + , putStrLnUtf8 ) where import Data.Text (Text) @@ -62,6 +78,13 @@ import GHC.IO.Handle.Types (BufferList(..), BufferMode(..), Handle__(..), HandleType(..), Newline(..)) import System.IO (hGetBuffering, hFileSize, hSetBuffering, hTell) import System.IO.Error (isEOFError) +import Prelude hiding (readFile, writeFile, appendFile, interact, getContents, getLine, putStr, putStrLn) +import Control.Exception (evaluate) +import Control.Monad ((<=<)) +import Data.ByteString (ByteString) +import qualified Data.ByteString as B +import Data.Text.Encoding (decodeUtf8, encodeUtf8) +import qualified Data.ByteString.Char8 as B.Char8 -- | The 'readFile' function reads a file and returns the contents of -- the file as a string. The entire file is read strictly, as with @@ -307,3 +330,70 @@ putStr = hPutStr stdout -- | Write a string to 'stdout', followed by a newline. putStrLn :: Text -> IO () putStrLn = hPutStrLn stdout + +-- | The 'readFile' function reads a file and returns the contents of +-- the file as a string. The entire file is read strictly, as with +-- 'getContents'. +readFileUtf8 :: FilePath -> IO Text +readFileUtf8 = decodeUtf8IO <=< B.readFile + +-- | Write a string to a file. The file is truncated to zero length +-- before writing begins. +writeFileUtf8 :: FilePath -> Text -> IO () +writeFileUtf8 fp = B.writeFile fp . encodeUtf8 + +-- | Write a string to the end of a file. +appendFileUtf8 :: FilePath -> Text -> IO () +appendFileUtf8 fp = B.appendFile fp . encodeUtf8 + +-- | Read the remaining contents of a 'Handle' as a string. The +-- 'Handle' is closed once the contents have been read, or if an +-- exception is thrown. +-- +-- Internally, this function reads a chunk at a time from the +-- lower-level buffering abstraction, and concatenates the chunks into +-- a single string once the entire file has been read. +-- +-- As a result, it requires approximately twice as much memory as its +-- result to construct its result. For files more than a half of +-- available RAM in size, this may result in memory exhaustion. +hGetContentsUtf8 :: Handle -> IO Text +hGetContentsUtf8 = decodeUtf8IO <=< B.hGetContents + +-- | Read a single line from a handle. +hGetLineUtf8 :: Handle -> IO Text +hGetLineUtf8 = decodeUtf8IO <=< B.hGetLine + +-- | Write a string to a handle. +hPutStrUtf8 :: Handle -> Text -> IO () +hPutStrUtf8 h = B.hPutStr h . encodeUtf8 + +-- | Write a string to a handle, followed by a newline. +hPutStrLnUtf8 :: Handle -> Text -> IO () +hPutStrLnUtf8 h t = hPutStrUtf8 h t >> B.hPutStr h (B.Char8.singleton '\n') + +-- | The 'interact' function takes a function of type @Text -> Text@ +-- as its argument. The entire input from the standard input device is +-- passed to this function as its argument, and the resulting string +-- is output on the standard output device. +interactUtf8 :: (Text -> Text) -> IO () +interactUtf8 f = putStrUtf8 . f =<< getContentsUtf8 + +-- | Read all user input on 'stdin' as a single string. +getContentsUtf8 :: IO Text +getContentsUtf8 = decodeUtf8IO =<< B.getContents + +-- | Read a single line of user input from 'stdin'. +getLineUtf8 :: IO Text +getLineUtf8 = decodeUtf8IO =<< B.getLine + +-- | Write a string to 'stdout'. +putStrUtf8 :: Text -> IO () +putStrUtf8 = B.putStr . encodeUtf8 + +-- | Write a string to 'stdout', followed by a newline. +putStrLnUtf8 :: Text -> IO () +putStrLnUtf8 t = B.putStr (encodeUtf8 t) >> B.putStr (B.Char8.singleton '\n') + +decodeUtf8IO :: ByteString -> IO Text +decodeUtf8IO = evaluate . decodeUtf8 diff --git a/src/Data/Text/IO/Utf8.hs b/src/Data/Text/IO/Utf8.hs deleted file mode 100644 index 754995fb..00000000 --- a/src/Data/Text/IO/Utf8.hs +++ /dev/null @@ -1,104 +0,0 @@ --- | --- Module : Data.Text.IO.Utf8 --- Copyright : (c) 2009, 2010 Bryan O'Sullivan, --- (c) 2009 Simon Marlow --- License : BSD-style --- Maintainer : bos@serpentine.com --- Portability : GHC --- --- Efficient UTF-8 support for text I\/O. -module Data.Text.IO.Utf8 - ( - -- * File-at-a-time operations - readFile - , writeFile - , appendFile - -- * Operations on handles - , hGetContents - , hGetLine - , hPutStr - , hPutStrLn - -- * Special cases for standard input and output - , interact - , getContents - , getLine - , putStr - , putStrLn - ) where - -import Prelude hiding (readFile, writeFile, appendFile, interact, getContents, getLine, putStr, putStrLn) -import Control.Exception (evaluate) -import Control.Monad ((<=<)) -import Data.ByteString (ByteString) -import qualified Data.ByteString as B -import Data.Text (Text) -import Data.Text.Encoding (decodeUtf8, encodeUtf8) -import GHC.IO.Handle (Handle) -import qualified Data.ByteString.Char8 as B.Char8 - -decodeUtf8IO :: ByteString -> IO Text -decodeUtf8IO = evaluate . decodeUtf8 - --- | The 'readFile' function reads a file and returns the contents of --- the file as a string. The entire file is read strictly, as with --- 'getContents'. -readFile :: FilePath -> IO Text -readFile = decodeUtf8IO <=< B.readFile - --- | Write a string to a file. The file is truncated to zero length --- before writing begins. -writeFile :: FilePath -> Text -> IO () -writeFile fp = B.writeFile fp . encodeUtf8 - --- | Write a string to the end of a file. -appendFile :: FilePath -> Text -> IO () -appendFile fp = B.appendFile fp . encodeUtf8 - --- | Read the remaining contents of a 'Handle' as a string. The --- 'Handle' is closed once the contents have been read, or if an --- exception is thrown. --- --- Internally, this function reads a chunk at a time from the --- lower-level buffering abstraction, and concatenates the chunks into --- a single string once the entire file has been read. --- --- As a result, it requires approximately twice as much memory as its --- result to construct its result. For files more than a half of --- available RAM in size, this may result in memory exhaustion. -hGetContents :: Handle -> IO Text -hGetContents = decodeUtf8IO <=< B.hGetContents - --- | Read a single line from a handle. -hGetLine :: Handle -> IO Text -hGetLine = decodeUtf8IO <=< B.hGetLine - --- | Write a string to a handle. -hPutStr :: Handle -> Text -> IO () -hPutStr h = B.hPutStr h . encodeUtf8 - --- | Write a string to a handle, followed by a newline. -hPutStrLn :: Handle -> Text -> IO () -hPutStrLn h t = hPutStr h t >> B.hPutStr h (B.Char8.singleton '\n') - --- | The 'interact' function takes a function of type @Text -> Text@ --- as its argument. The entire input from the standard input device is --- passed to this function as its argument, and the resulting string --- is output on the standard output device. -interact :: (Text -> Text) -> IO () -interact f = putStr . f =<< getContents - --- | Read all user input on 'stdin' as a single string. -getContents :: IO Text -getContents = decodeUtf8IO =<< B.getContents - --- | Read a single line of user input from 'stdin'. -getLine :: IO Text -getLine = decodeUtf8IO =<< B.getLine - --- | Write a string to 'stdout'. -putStr :: Text -> IO () -putStr = B.putStr . encodeUtf8 - --- | Write a string to 'stdout', followed by a newline. -putStrLn :: Text -> IO () -putStrLn t = B.putStr (encodeUtf8 t) >> B.putStr (B.Char8.singleton '\n') From 7e9d7e4e6332ec3f811bd2ea9c59ddb0681698b2 Mon Sep 17 00:00:00 2001 From: Brian Shu Date: Tue, 21 Feb 2023 12:03:58 -0500 Subject: [PATCH 05/10] Revert "move back into Data.Text.IO" This reverts commit 306f7fa5a14b5bc163fcf1322c7043915acf3a28. --- src/Data/Text/IO.hs | 90 --------------------------------- src/Data/Text/IO/Utf8.hs | 104 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+), 90 deletions(-) create mode 100644 src/Data/Text/IO/Utf8.hs diff --git a/src/Data/Text/IO.hs b/src/Data/Text/IO.hs index 17e7dd49..ad8389d7 100644 --- a/src/Data/Text/IO.hs +++ b/src/Data/Text/IO.hs @@ -21,7 +21,6 @@ module Data.Text.IO ( - -- * locale-sensitive I\/O -- * File-at-a-time operations readFile , writeFile @@ -38,21 +37,6 @@ module Data.Text.IO , getLine , putStr , putStrLn - -- * File-at-a-time operations - , readFileUtf8 - , writeFileUtf8 - , appendFileUtf8 - -- * Operations on handles - , hGetContentsUtf8 - , hGetLineUtf8 - , hPutStrUtf8 - , hPutStrLnUtf8 - -- * Special cases for standard input and output - , interactUtf8 - , getContentsUtf8 - , getLineUtf8 - , putStrUtf8 - , putStrLnUtf8 ) where import Data.Text (Text) @@ -78,13 +62,6 @@ import GHC.IO.Handle.Types (BufferList(..), BufferMode(..), Handle__(..), HandleType(..), Newline(..)) import System.IO (hGetBuffering, hFileSize, hSetBuffering, hTell) import System.IO.Error (isEOFError) -import Prelude hiding (readFile, writeFile, appendFile, interact, getContents, getLine, putStr, putStrLn) -import Control.Exception (evaluate) -import Control.Monad ((<=<)) -import Data.ByteString (ByteString) -import qualified Data.ByteString as B -import Data.Text.Encoding (decodeUtf8, encodeUtf8) -import qualified Data.ByteString.Char8 as B.Char8 -- | The 'readFile' function reads a file and returns the contents of -- the file as a string. The entire file is read strictly, as with @@ -330,70 +307,3 @@ putStr = hPutStr stdout -- | Write a string to 'stdout', followed by a newline. putStrLn :: Text -> IO () putStrLn = hPutStrLn stdout - --- | The 'readFile' function reads a file and returns the contents of --- the file as a string. The entire file is read strictly, as with --- 'getContents'. -readFileUtf8 :: FilePath -> IO Text -readFileUtf8 = decodeUtf8IO <=< B.readFile - --- | Write a string to a file. The file is truncated to zero length --- before writing begins. -writeFileUtf8 :: FilePath -> Text -> IO () -writeFileUtf8 fp = B.writeFile fp . encodeUtf8 - --- | Write a string to the end of a file. -appendFileUtf8 :: FilePath -> Text -> IO () -appendFileUtf8 fp = B.appendFile fp . encodeUtf8 - --- | Read the remaining contents of a 'Handle' as a string. The --- 'Handle' is closed once the contents have been read, or if an --- exception is thrown. --- --- Internally, this function reads a chunk at a time from the --- lower-level buffering abstraction, and concatenates the chunks into --- a single string once the entire file has been read. --- --- As a result, it requires approximately twice as much memory as its --- result to construct its result. For files more than a half of --- available RAM in size, this may result in memory exhaustion. -hGetContentsUtf8 :: Handle -> IO Text -hGetContentsUtf8 = decodeUtf8IO <=< B.hGetContents - --- | Read a single line from a handle. -hGetLineUtf8 :: Handle -> IO Text -hGetLineUtf8 = decodeUtf8IO <=< B.hGetLine - --- | Write a string to a handle. -hPutStrUtf8 :: Handle -> Text -> IO () -hPutStrUtf8 h = B.hPutStr h . encodeUtf8 - --- | Write a string to a handle, followed by a newline. -hPutStrLnUtf8 :: Handle -> Text -> IO () -hPutStrLnUtf8 h t = hPutStrUtf8 h t >> B.hPutStr h (B.Char8.singleton '\n') - --- | The 'interact' function takes a function of type @Text -> Text@ --- as its argument. The entire input from the standard input device is --- passed to this function as its argument, and the resulting string --- is output on the standard output device. -interactUtf8 :: (Text -> Text) -> IO () -interactUtf8 f = putStrUtf8 . f =<< getContentsUtf8 - --- | Read all user input on 'stdin' as a single string. -getContentsUtf8 :: IO Text -getContentsUtf8 = decodeUtf8IO =<< B.getContents - --- | Read a single line of user input from 'stdin'. -getLineUtf8 :: IO Text -getLineUtf8 = decodeUtf8IO =<< B.getLine - --- | Write a string to 'stdout'. -putStrUtf8 :: Text -> IO () -putStrUtf8 = B.putStr . encodeUtf8 - --- | Write a string to 'stdout', followed by a newline. -putStrLnUtf8 :: Text -> IO () -putStrLnUtf8 t = B.putStr (encodeUtf8 t) >> B.putStr (B.Char8.singleton '\n') - -decodeUtf8IO :: ByteString -> IO Text -decodeUtf8IO = evaluate . decodeUtf8 diff --git a/src/Data/Text/IO/Utf8.hs b/src/Data/Text/IO/Utf8.hs new file mode 100644 index 00000000..754995fb --- /dev/null +++ b/src/Data/Text/IO/Utf8.hs @@ -0,0 +1,104 @@ +-- | +-- Module : Data.Text.IO.Utf8 +-- Copyright : (c) 2009, 2010 Bryan O'Sullivan, +-- (c) 2009 Simon Marlow +-- License : BSD-style +-- Maintainer : bos@serpentine.com +-- Portability : GHC +-- +-- Efficient UTF-8 support for text I\/O. +module Data.Text.IO.Utf8 + ( + -- * File-at-a-time operations + readFile + , writeFile + , appendFile + -- * Operations on handles + , hGetContents + , hGetLine + , hPutStr + , hPutStrLn + -- * Special cases for standard input and output + , interact + , getContents + , getLine + , putStr + , putStrLn + ) where + +import Prelude hiding (readFile, writeFile, appendFile, interact, getContents, getLine, putStr, putStrLn) +import Control.Exception (evaluate) +import Control.Monad ((<=<)) +import Data.ByteString (ByteString) +import qualified Data.ByteString as B +import Data.Text (Text) +import Data.Text.Encoding (decodeUtf8, encodeUtf8) +import GHC.IO.Handle (Handle) +import qualified Data.ByteString.Char8 as B.Char8 + +decodeUtf8IO :: ByteString -> IO Text +decodeUtf8IO = evaluate . decodeUtf8 + +-- | The 'readFile' function reads a file and returns the contents of +-- the file as a string. The entire file is read strictly, as with +-- 'getContents'. +readFile :: FilePath -> IO Text +readFile = decodeUtf8IO <=< B.readFile + +-- | Write a string to a file. The file is truncated to zero length +-- before writing begins. +writeFile :: FilePath -> Text -> IO () +writeFile fp = B.writeFile fp . encodeUtf8 + +-- | Write a string to the end of a file. +appendFile :: FilePath -> Text -> IO () +appendFile fp = B.appendFile fp . encodeUtf8 + +-- | Read the remaining contents of a 'Handle' as a string. The +-- 'Handle' is closed once the contents have been read, or if an +-- exception is thrown. +-- +-- Internally, this function reads a chunk at a time from the +-- lower-level buffering abstraction, and concatenates the chunks into +-- a single string once the entire file has been read. +-- +-- As a result, it requires approximately twice as much memory as its +-- result to construct its result. For files more than a half of +-- available RAM in size, this may result in memory exhaustion. +hGetContents :: Handle -> IO Text +hGetContents = decodeUtf8IO <=< B.hGetContents + +-- | Read a single line from a handle. +hGetLine :: Handle -> IO Text +hGetLine = decodeUtf8IO <=< B.hGetLine + +-- | Write a string to a handle. +hPutStr :: Handle -> Text -> IO () +hPutStr h = B.hPutStr h . encodeUtf8 + +-- | Write a string to a handle, followed by a newline. +hPutStrLn :: Handle -> Text -> IO () +hPutStrLn h t = hPutStr h t >> B.hPutStr h (B.Char8.singleton '\n') + +-- | The 'interact' function takes a function of type @Text -> Text@ +-- as its argument. The entire input from the standard input device is +-- passed to this function as its argument, and the resulting string +-- is output on the standard output device. +interact :: (Text -> Text) -> IO () +interact f = putStr . f =<< getContents + +-- | Read all user input on 'stdin' as a single string. +getContents :: IO Text +getContents = decodeUtf8IO =<< B.getContents + +-- | Read a single line of user input from 'stdin'. +getLine :: IO Text +getLine = decodeUtf8IO =<< B.getLine + +-- | Write a string to 'stdout'. +putStr :: Text -> IO () +putStr = B.putStr . encodeUtf8 + +-- | Write a string to 'stdout', followed by a newline. +putStrLn :: Text -> IO () +putStrLn t = B.putStr (encodeUtf8 t) >> B.putStr (B.Char8.singleton '\n') From 5e0bd99b6eae49687d229c5179a57a0a74c005cd Mon Sep 17 00:00:00 2001 From: Brian Shu Date: Thu, 2 Mar 2023 20:46:06 -0500 Subject: [PATCH 06/10] remove unnecessary documentation --- src/Data/Text/IO/Utf8.hs | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/src/Data/Text/IO/Utf8.hs b/src/Data/Text/IO/Utf8.hs index 754995fb..4b0b997f 100644 --- a/src/Data/Text/IO/Utf8.hs +++ b/src/Data/Text/IO/Utf8.hs @@ -1,9 +1,6 @@ -- | -- Module : Data.Text.IO.Utf8 --- Copyright : (c) 2009, 2010 Bryan O'Sullivan, --- (c) 2009 Simon Marlow -- License : BSD-style --- Maintainer : bos@serpentine.com -- Portability : GHC -- -- Efficient UTF-8 support for text I\/O. @@ -54,17 +51,7 @@ writeFile fp = B.writeFile fp . encodeUtf8 appendFile :: FilePath -> Text -> IO () appendFile fp = B.appendFile fp . encodeUtf8 --- | Read the remaining contents of a 'Handle' as a string. The --- 'Handle' is closed once the contents have been read, or if an --- exception is thrown. --- --- Internally, this function reads a chunk at a time from the --- lower-level buffering abstraction, and concatenates the chunks into --- a single string once the entire file has been read. --- --- As a result, it requires approximately twice as much memory as its --- result to construct its result. For files more than a half of --- available RAM in size, this may result in memory exhaustion. +-- | Read the remaining contents of a 'Handle' as a string. hGetContents :: Handle -> IO Text hGetContents = decodeUtf8IO <=< B.hGetContents From acec5dc6bc4c9db98335c9de026a46ae56d9f04a Mon Sep 17 00:00:00 2001 From: Brian Shu Date: Thu, 2 Mar 2023 20:55:49 -0500 Subject: [PATCH 07/10] add module to cabal file and add note --- src/Data/Text/IO.hs | 5 ++++- text.cabal | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Data/Text/IO.hs b/src/Data/Text/IO.hs index ad8389d7..a4e5b2d2 100644 --- a/src/Data/Text/IO.hs +++ b/src/Data/Text/IO.hs @@ -13,8 +13,11 @@ -- The functions in this module obey the runtime system's locale, -- character set encoding, and line ending conversion settings. -- +-- If you want to do I\/O using the UTF-8 encoding, use @Data.Text.IO.Utf8@, +-- which is faster than this module. +-- -- If you know in advance that you will be working with data that has --- a specific encoding (e.g. UTF-8), and your application is highly +-- a specific encoding, and your application is highly -- performance sensitive, you may find that it is faster to perform -- I\/O with bytestrings and to encode and decode yourself than to use -- the functions in this module. diff --git a/text.cabal b/text.cabal index 05946247..9d340387 100644 --- a/text.cabal +++ b/text.cabal @@ -143,6 +143,7 @@ library Data.Text.Encoding.Error Data.Text.Foreign Data.Text.IO + Data.Text.IO.Utf8 Data.Text.Internal Data.Text.Internal.Builder Data.Text.Internal.Builder.Functions From a59e97e7cabe5a0dae8d64ff47be3f25413dd35a Mon Sep 17 00:00:00 2001 From: Brian Shu Date: Sun, 5 Mar 2023 19:54:23 -0500 Subject: [PATCH 08/10] add tests --- tests/Tests/Properties/LowLevel.hs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/Tests/Properties/LowLevel.hs b/tests/Tests/Properties/LowLevel.hs index c3b0a605..11b72c9d 100644 --- a/tests/Tests/Properties/LowLevel.hs +++ b/tests/Tests/Properties/LowLevel.hs @@ -38,6 +38,7 @@ import Test.Tasty.Inspection (inspectObligations, hasNoTypes, doesNotUseAnyOf) import qualified Data.Text.Internal.Fusion as S import qualified Data.Text.Internal.Fusion.Common as S import qualified GHC.CString as GHC +import qualified Data.Text.IO.Utf8 as TU #endif mulRef :: (Integral a, Bounded a) => a -> a -> Maybe a @@ -107,6 +108,9 @@ t_write_read_line m b t = write_read (T.concat . take 1) T.filter T.hPutStrLn tl_write_read_line m b t = write_read (TL.concat . take 1) TL.filter TL.hPutStrLn TL.hGetLine m b [t] +utf8_write_read = write_read T.unlines T.filter TU.hPutStr TU.hGetContents +utf8_write_read_line m b t = write_read (T.concat . take 1) T.filter TU.hPutStrLn + TU.hGetLine m b [t] testLowLevel :: TestTree testLowLevel = @@ -142,7 +146,9 @@ testLowLevel = testProperty "t_write_read" t_write_read, testProperty "tl_write_read" tl_write_read, testProperty "t_write_read_line" t_write_read_line, - testProperty "tl_write_read_line" tl_write_read_line + testProperty "tl_write_read_line" tl_write_read_line, + testProperty "utf8_write_read" utf8_write_read, + testProperty "utf8_write_read_line" utf8_write_read_line -- These tests are subject to I/O race conditions -- testProperty "t_put_get" t_put_get, -- testProperty "tl_put_get" tl_put_get From e29b6359e1e367596d924da3c21865e2bcab22aa Mon Sep 17 00:00:00 2001 From: Brian Shu Date: Fri, 10 Mar 2023 13:54:38 -0500 Subject: [PATCH 09/10] fix import --- tests/Tests/Properties/LowLevel.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/Tests/Properties/LowLevel.hs b/tests/Tests/Properties/LowLevel.hs index 11b72c9d..5a7c7ceb 100644 --- a/tests/Tests/Properties/LowLevel.hs +++ b/tests/Tests/Properties/LowLevel.hs @@ -31,6 +31,7 @@ import qualified Data.Text as T import qualified Data.Text.IO as T import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.IO as TL +import qualified Data.Text.IO.Utf8 as TU import qualified System.IO as IO #ifdef MIN_VERSION_tasty_inspection_testing @@ -38,7 +39,6 @@ import Test.Tasty.Inspection (inspectObligations, hasNoTypes, doesNotUseAnyOf) import qualified Data.Text.Internal.Fusion as S import qualified Data.Text.Internal.Fusion.Common as S import qualified GHC.CString as GHC -import qualified Data.Text.IO.Utf8 as TU #endif mulRef :: (Integral a, Bounded a) => a -> a -> Maybe a From 23c69de44ad6310fdba4faa87383d1839ad797ec Mon Sep 17 00:00:00 2001 From: Brian Shu Date: Fri, 10 Mar 2023 14:02:43 -0500 Subject: [PATCH 10/10] add docs --- src/Data/Text/IO/Utf8.hs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Data/Text/IO/Utf8.hs b/src/Data/Text/IO/Utf8.hs index 4b0b997f..2d0cd495 100644 --- a/src/Data/Text/IO/Utf8.hs +++ b/src/Data/Text/IO/Utf8.hs @@ -4,6 +4,8 @@ -- Portability : GHC -- -- Efficient UTF-8 support for text I\/O. +-- Unlike @Data.Text.IO@, these functions do not depend on the locale +-- and do not do line ending conversion. module Data.Text.IO.Utf8 ( -- * File-at-a-time operations