Skip to content

Commit 08b378f

Browse files
MirceaSttuegel
andauthored
Improve the performance of the Kore parser (#2296)
* Inline some primitive parsers * Kore.Parser.Lexer: Remove unused primitive parsers * Run Parser over Text, not String Using Text instead of String reduces peak memory use by 43% and total allocation by 24%. Never use String. Co-authored-by: Thomas Tuegel <[email protected]>
1 parent 3ba6fd3 commit 08b378f

File tree

16 files changed

+118
-119
lines changed

16 files changed

+118
-119
lines changed

kore/app/format/Main.hs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ module Main (main) where
22

33
import Prelude.Kore
44

5+
import qualified Data.Text.IO as Text
56
import Options.Applicative
67
import System.IO
78
( stdout
@@ -78,4 +79,5 @@ main =
7879
-- | Read a 'KoreDefinition' from the given file name or signal an error.
7980
readKoreOrDie :: FilePath -> IO ParsedDefinition
8081
readKoreOrDie fileName =
81-
readFile fileName >>= either error return . parseKoreDefinition fileName
82+
Text.readFile fileName
83+
>>= either error return . parseKoreDefinition fileName

kore/app/share/GlobalMain.hs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ import Data.Text
4848
( Text
4949
, pack
5050
)
51+
import qualified Data.Text.IO as Text
5152
import Data.Time.Format
5253
( defaultTimeLocale
5354
, formatTime
@@ -477,12 +478,14 @@ parseDefinition :: FilePath -> Main ParsedDefinition
477478
parseDefinition = mainParse parseKoreDefinition
478479

479480
mainParse
480-
:: (FilePath -> String -> Either String a)
481+
:: (FilePath -> Text -> Either String a)
481482
-> String
482483
-> Main a
483484
mainParse parser fileName = do
484485
contents <-
485-
clockSomethingIO "Reading the input file" $ liftIO $ readFile fileName
486+
Text.readFile fileName
487+
& liftIO
488+
& clockSomethingIO "Reading the input file"
486489
parseResult <-
487490
clockSomething "Parsing the file" (parser fileName contents)
488491
case parseResult of

kore/src/Kore/Log/KoreLogOptions.hs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -210,17 +210,17 @@ parseEntryTypes =
210210

211211
parseCommaSeparatedEntries :: Options.ReadM EntryTypes
212212
parseCommaSeparatedEntries =
213-
Options.maybeReader $ Parser.parseMaybe parseEntryTypes'
213+
Options.maybeReader $ Parser.parseMaybe parseEntryTypes' . Text.pack
214214
where
215-
parseEntryTypes' :: Parser.Parsec String String EntryTypes
215+
parseEntryTypes' :: Parser.Parsec String Text EntryTypes
216216
parseEntryTypes' = Set.fromList <$> Parser.sepEndBy parseSomeTypeRep comma
217217

218218
comma = void (Parser.char ',')
219219

220-
parseSomeTypeRep :: Parser.Parsec String String SomeTypeRep
220+
parseSomeTypeRep :: Parser.Parsec String Text SomeTypeRep
221221
parseSomeTypeRep =
222222
Parser.takeWhile1P (Just "SomeTypeRep") (flip notElem [',', ' '])
223-
>>= parseEntryType . Text.pack
223+
>>= parseEntryType
224224

225225
parseSeverity :: Parser Severity
226226
parseSeverity =

kore/src/Kore/Log/Registry.hs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ lookupTextFromTypeWithError type' =
235235
<> show type'
236236
<> " It should be added to Kore.Log.Registry.registry."
237237

238-
parseEntryType :: Ord e => Text -> Parser.Parsec e String SomeTypeRep
238+
parseEntryType :: Ord e => Text -> Parser.Parsec e Text SomeTypeRep
239239
parseEntryType entryText =
240240
maybe empty return
241241
$ Map.lookup entryText (textToType registry)

kore/src/Kore/Parser.hs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ module Kore.Parser
3434

3535
import Prelude.Kore
3636

37+
import Data.Text
38+
( Text
39+
)
3740
import Text.Megaparsec
3841
( eof
3942
)
@@ -58,7 +61,7 @@ else.
5861
-}
5962
parseKoreDefinition
6063
:: FilePath -- ^ Filename used for error messages
61-
-> String -- ^ The concrete syntax of a valid Kore definition
64+
-> Text -- ^ The concrete syntax of a valid Kore definition
6265
-> Either String ParsedDefinition
6366
parseKoreDefinition = parseOnly (Lexer.space *> koreParser)
6467

@@ -70,6 +73,6 @@ message otherwise. The input must contain a valid Kore pattern and nothing else.
7073
-}
7174
parseKorePattern
7275
:: FilePath -- ^ Filename used for error messages
73-
-> String -- ^ The concrete syntax of a valid Kore pattern
76+
-> Text -- ^ The concrete syntax of a valid Kore pattern
7477
-> Either String ParsedPattern
7578
parseKorePattern = parseOnly (Lexer.space *> Parser.parsePattern)

kore/src/Kore/Parser/Lexer.hs

Lines changed: 43 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@ module Kore.Parser.Lexer
2525
, parseId
2626
, parseAnyId, parseSetId, isSymbolId
2727
, isElementVariableId, isSetVariableId
28-
, elementVariableIdParser
29-
, setVariableIdParser
3028
, parseSortId
3129
, parseSymbolId
3230
, parseModuleName
@@ -48,6 +46,9 @@ import Data.Map.Strict
4846
( Map
4947
)
5048
import qualified Data.Map.Strict as Map
49+
import Data.Text
50+
( Text
51+
)
5152
import qualified Data.Text as Text
5253
import Text.Megaparsec
5354
( SourcePos (..)
@@ -84,6 +85,7 @@ space = L.space Parser.space1 lineComment blockComment
8485
where
8586
lineComment = L.skipLineComment "//"
8687
blockComment = L.skipBlockComment "/*" "*/"
88+
{-# INLINE space #-}
8789

8890
{- | Parse the character, but skip its result.
8991
-}
@@ -97,7 +99,7 @@ skipChar = Monad.void . Parser.char
9799
See also: 'L.symbol', 'space'
98100
99101
-}
100-
symbol :: String -> Parser ()
102+
symbol :: Text -> Parser ()
101103
symbol = Monad.void . L.symbol space
102104

103105
colon :: Parser ()
@@ -163,7 +165,7 @@ consumes any trailing whitespace.
163165
See also: 'space'
164166
165167
-}
166-
keyword :: String -> Parser ()
168+
keyword :: Text -> Parser ()
167169
keyword s = lexeme $ do
168170
_ <- Parser.chunk s
169171
-- Check that the next character cannot be part of an @id@, i.e. check that
@@ -183,19 +185,16 @@ sourcePosToFileLocation
183185
, column = unPos column'
184186
}
185187

186-
{- Takes a parser for the string of the identifier
187-
and returns an 'Id' annotated with position.
188-
-}
189-
stringParserToIdParser :: Parser String -> Parser Id
190-
stringParserToIdParser stringRawParser = do
188+
{- | Annotate a 'Text' parser with an 'AstLocation'.
189+
-}
190+
parseIntoId :: Parser Text -> Parser Id
191+
parseIntoId stringRawParser = do
191192
!pos <- sourcePosToFileLocation <$> getSourcePos
192-
name <- lexeme stringRawParser
193-
return Id
194-
{ getId = Text.pack name
195-
, idLocation = AstLocationFile pos
196-
}
193+
getId <- lexeme stringRawParser
194+
return Id { getId, idLocation = AstLocationFile pos }
195+
{-# INLINE parseIntoId #-}
197196

198-
koreKeywordsSet :: HashSet String
197+
koreKeywordsSet :: HashSet Text
199198
koreKeywordsSet = HashSet.fromList keywords
200199
where
201200
keywords =
@@ -224,17 +223,18 @@ genericIdRawParser
224223
:: (Char -> Bool) -- ^ contains the characters allowed for @⟨prefix-char⟩@.
225224
-> (Char -> Bool) -- ^ contains the characters allowed for @⟨body-char⟩@.
226225
-> IdKeywordParsing
227-
-> Parser String
226+
-> Parser Text
228227
genericIdRawParser isFirstChar isBodyChar idKeywordParsing = do
229-
c <- Parser.satisfy isFirstChar <?> "first identifier character"
230-
cs <- Parser.takeWhileP (Just "identifier character") isBodyChar
231-
let genericId = c : cs
232-
keywordsForbidden = idKeywordParsing == KeywordsForbidden
228+
(genericId, _) <- Parser.match $ do
229+
_ <- Parser.satisfy isFirstChar <?> "first identifier character"
230+
_ <- Parser.takeWhileP (Just "identifier character") isBodyChar
231+
pure ()
232+
let keywordsForbidden = idKeywordParsing == KeywordsForbidden
233233
isKeyword = HashSet.member genericId koreKeywordsSet
234234
when (keywordsForbidden && isKeyword)
235235
$ fail
236236
( "Identifiers should not be keywords: '"
237-
++ genericId
237+
++ Text.unpack genericId
238238
++ "'."
239239
)
240240
return genericId
@@ -293,11 +293,14 @@ isIdChar c = isIdFirstChar c || isIdOtherChar c
293293
An identifier cannot be a keyword.
294294
-}
295295
parseId :: Parser Id
296-
parseId = stringParserToIdParser (parseIdRaw KeywordsForbidden)
296+
parseId = parseIntoId parseIdText
297297

298-
parseIdRaw :: IdKeywordParsing -> Parser String
298+
parseIdRaw :: IdKeywordParsing -> Parser Text
299299
parseIdRaw = genericIdRawParser isIdFirstChar isIdChar
300300

301+
parseIdText :: Parser Text
302+
parseIdText = parseIdRaw KeywordsForbidden
303+
301304
{- | Parse a module name.
302305
303306
@
@@ -309,7 +312,7 @@ parseModuleName = lexeme moduleNameRawParser
309312

310313
moduleNameRawParser :: Parser ModuleName
311314
moduleNameRawParser =
312-
ModuleName . Text.pack <$> parseIdRaw KeywordsForbidden
315+
ModuleName <$> parseIdRaw KeywordsForbidden
313316

314317
{- | Parses a 'Sort' 'Id'
315318
@@ -321,7 +324,9 @@ parseSortId :: Parser Id
321324
parseSortId = parseId <?> "sort identifier"
322325

323326
parseAnyId :: Parser Id
324-
parseAnyId = (parseSpecialId <|> parseSetId <|> parseId) <?> "identifier"
327+
parseAnyId = parseIntoId
328+
(parseSpecialIdText <|> parseSetIdText <|> parseIdText)
329+
<?> "identifier"
325330

326331
isSymbolId :: Id -> Bool
327332
isSymbolId Id { getId } =
@@ -336,19 +341,16 @@ isElementVariableId Id { getId } =
336341
isSetVariableId :: Id -> Bool
337342
isSetVariableId Id { getId } = Text.head getId == '@'
338343

339-
parseSpecialId :: Parser Id
340-
parseSpecialId =
341-
stringParserToIdParser parseSpecialIdString
342-
where
343-
parseSpecialIdString =
344-
(:) <$> Parser.char '\\' <*> parseIdRaw KeywordsPermitted
344+
parseSpecialIdText :: Parser Text
345+
parseSpecialIdText = fst <$> Parser.match
346+
(Parser.char '\\' >> parseIdRaw KeywordsPermitted)
347+
348+
parseSetIdText :: Parser Text
349+
parseSetIdText = fst <$> Parser.match
350+
(Parser.char '@' >> parseIdRaw KeywordsPermitted)
345351

346352
parseSetId :: Parser Id
347-
parseSetId =
348-
stringParserToIdParser parseSetIdString
349-
where
350-
parseSetIdString =
351-
(:) <$> Parser.char '@' <*> parseIdRaw KeywordsPermitted
353+
parseSetId = parseIntoId parseSetIdText
352354

353355
{- | Parses a 'Symbol' 'Id'
354356
@@ -357,41 +359,12 @@ parseSetId =
357359
@
358360
-}
359361
parseSymbolId :: Parser Id
360-
parseSymbolId =
361-
stringParserToIdParser symbolIdRawParser <?> "symbol or alias identifier"
362-
363-
symbolIdRawParser :: Parser String
364-
symbolIdRawParser = do
365-
c <- peekChar'
366-
if c == '\\'
367-
then do
368-
skipChar '\\'
369-
(c :) <$> parseIdRaw KeywordsPermitted
370-
else parseIdRaw KeywordsForbidden
371-
372-
{-|Parses a @set-variable-id@, which always starts with @\@@.
373-
374-
@
375-
<set-variable-id> ::= ['@'] <id>
376-
@
377-
-}
378-
setVariableIdParser :: Parser Id
379-
setVariableIdParser = stringParserToIdParser setVariableIdRawParser
362+
parseSymbolId = parseIntoId symbolIdRawParser <?> "symbol or alias identifier"
380363

381-
setVariableIdRawParser :: Parser String
382-
setVariableIdRawParser = do
383-
start <- Parser.char '@'
384-
end <- parseIdRaw KeywordsPermitted
385-
return (start:end)
386-
387-
{-| Parses an @element-variable-id@
388-
389-
@
390-
<element-variable-id> ::= <id>
391-
@
392-
-}
393-
elementVariableIdParser :: Parser Id
394-
elementVariableIdParser = parseId
364+
symbolIdRawParser :: Parser Text
365+
symbolIdRawParser = fmap fst $ Parser.match $
366+
(Parser.char '\\' >> parseIdRaw KeywordsPermitted)
367+
<|> parseIdRaw KeywordsForbidden
395368

396369
{- | Parses a C-style string literal, unescaping it.
397370

kore/src/Kore/Parser/ParserUtils.hs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ import Prelude.Kore hiding
2121
( takeWhile
2222
)
2323

24+
import Data.Text
25+
( Text
26+
)
2427
import Data.Void
2528
( Void
2629
)
@@ -34,7 +37,7 @@ import Text.Megaparsec.Error
3437
( errorBundlePretty
3538
)
3639

37-
type Parser = Parsec Void String
40+
type Parser = Parsec Void Text
3841

3942
{-|'peekChar' is similar to Attoparsec's 'peekChar'. It returns the next
4043
available character in the input, without consuming it. Returns 'Nothing'
@@ -55,7 +58,7 @@ peekChar' = lookAhead anySingle
5558
a FilePath that is used for generating error messages and an input string
5659
and produces either a parsed object, or an error message.
5760
-}
58-
parseOnly :: Parser a -> FilePath -> String -> Either String a
61+
parseOnly :: Parser a -> FilePath -> Text -> Either String a
5962
parseOnly parser filePathForErrors input =
6063
case parse parser filePathForErrors input of
6164
Left err -> Left (errorBundlePretty err)

kore/src/Kore/Repl.hs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ import Data.List
4545
)
4646
import qualified Data.Map.Strict as Map
4747
import qualified Data.Sequence as Seq
48+
import qualified Data.Text as Text
4849
import Kore.Attribute.RuleIndex
4950
( RuleIndex (..)
5051
)
@@ -176,7 +177,7 @@ runRepl
176177
repl0 = do
177178
str <- prompt
178179
let command =
179-
fromMaybe ShowUsage $ parseMaybe commandParser str
180+
fromMaybe ShowUsage $ parseMaybe commandParser (Text.pack str)
180181
when (shouldStore command) $ field @"commands" Lens.%= (Seq.|> str)
181182
void $ replInterpreter printIfNotEmpty command
182183

kore/src/Kore/Repl/Interpreter.hs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1251,7 +1251,7 @@ tryAlias replAlias@ReplAlias { name } printAux printKore = do
12511251
parsedCommand =
12521252
fromMaybe
12531253
ShowUsage
1254-
$ parseMaybe commandParser command
1254+
$ parseMaybe commandParser (Text.pack command)
12551255
config <- ask
12561256
(cont, st') <- get >>= runInterpreter parsedCommand config
12571257
put st'
@@ -1500,7 +1500,7 @@ parseEvalScript file scriptModeOutput = do
15001500
if exists
15011501
then do
15021502
contents <- lift . liftIO $ readFile file
1503-
let result = runParser scriptParser file contents
1503+
let result = runParser scriptParser file (Text.pack contents)
15041504
either parseFailed executeScript result
15051505
else lift . liftIO . putStrLn $ "Cannot find " <> file
15061506

kore/src/Kore/Repl/Parser.hs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ import qualified Data.Set as Set
3131
import Data.String
3232
( IsString (..)
3333
)
34+
import Data.Text
35+
( Text
36+
)
3437
import qualified Data.Text as Text
3538
import Text.Megaparsec
3639
( Parsec
@@ -57,7 +60,7 @@ import qualified Kore.Log as Log
5760
import qualified Kore.Log.Registry as Log
5861
import Kore.Repl.Data
5962

60-
type Parser = Parsec ReplParseError String
63+
type Parser = Parsec ReplParseError Text
6164

6265
newtype ReplParseError = ReplParseError { unReplParseError :: String }
6366
deriving (Eq, Ord)
@@ -449,7 +452,7 @@ spaceNoNewline :: Parser ()
449452
spaceNoNewline =
450453
void . many $ oneOf [' ', '\t', '\r', '\f', '\v']
451454

452-
literal :: String -> Parser ()
455+
literal :: Text -> Parser ()
453456
literal str = void $ Char.string str <* spaceNoNewline
454457

455458
decimal :: Integral a => Parser a

0 commit comments

Comments
 (0)