Skip to content

Fix consumed semantics #239

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ Notable changes to this project are documented in this file. The format is based

Bugfixes:
- `float` parser of `GenTokenParser` does not parse negative numbers (by @mstream)
- Fixes `consumed` semantics which could cause unexpected backtracking instead of a failure (by @natefaubion)

Breaking changes:

Expand Down
36 changes: 23 additions & 13 deletions src/Parsing.purs
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ derive instance ordParseError :: Ord ParseError
-- | - If the left parser fails *without consuming any input*, then backtrack and try the right parser.
-- | - If the left parser fails and consumes input, then fail immediately.
data ParseState s = ParseState s Position Boolean

-- ParseState constructor has three parameters,
-- s: the remaining input
-- Position: the current position
Expand All @@ -93,6 +94,12 @@ data ParseState s = ParseState s Position Boolean
--
-- http://blog.ezyang.com/2014/05/parsec-try-a-or-b-considered-harmful/

appendConsumed :: forall a. ParseState a -> ParseState a -> ParseState a
appendConsumed (ParseState _ _ consumed1) state@(ParseState a b consumed2) =
case consumed1, consumed2 of
true, false -> ParseState a b true
_, _ -> state

-- | The `Parser s` monad with a monad transformer parameter `m`.
newtype ParserT s m a = ParserT
-- The parser is implemented using continuation-passing-style with uncurried
Expand Down Expand Up @@ -231,11 +238,12 @@ instance Apply (ParserT s m) where
more \_ ->
runFn5 k1 state1 more lift throw
( mkFn2 \state2 f ->
more \_ ->
runFn5 k2 state2 more lift throw
more \_ -> do
let state2' = state1 `appendConsumed` state2
runFn5 k2 state2' more lift throw
( mkFn2 \state3 a ->
more \_ ->
runFn2 done state3 (f a)
runFn2 done (state2' `appendConsumed` state3) (f a)
)
)
)
Expand All @@ -254,7 +262,7 @@ instance Bind (ParserT s m) where
( mkFn2 \state2 a ->
more \_ -> do
let (ParserT k2) = next a
runFn5 k2 state2 more lift throw done
runFn5 k2 (state1 `appendConsumed` state2) more lift throw done
)
)

Expand All @@ -271,15 +279,17 @@ instance MonadRec (ParserT s m) where
loop = mkFn3 \state2 arg gas -> do
let (ParserT k1) = next arg
runFn5 k1 state2 more lift throw
( mkFn2 \state3 step -> case step of
Loop nextArg ->
if gas == 0 then
more \_ ->
runFn3 loop state3 nextArg 30
else
runFn3 loop state3 nextArg (gas - 1)
Done res ->
runFn2 done state3 res
( mkFn2 \state3 step -> do
let state3' = state2 `appendConsumed` state3
case step of
Loop nextArg ->
if gas == 0 then
more \_ ->
runFn3 loop state3' nextArg 30
else
runFn3 loop state3' nextArg (gas - 1)
Done res ->
runFn2 done state3' res
)
runFn3 loop state1 initArg 30
)
Expand Down
4 changes: 2 additions & 2 deletions src/Parsing/String.purs
Original file line number Diff line number Diff line change
Expand Up @@ -282,12 +282,12 @@ consumeWith
. (String -> Either String { value :: a, consumed :: String, remainder :: String })
-> ParserT String m a
consumeWith f = ParserT
( mkFn5 \state1@(ParseState input pos oldConsumed) _ _ throw done ->
( mkFn5 \state1@(ParseState input pos _) _ _ throw done ->
case f input of
Left err ->
runFn2 throw state1 (ParseError err pos)
Right { value, consumed, remainder } ->
runFn2 done (ParseState remainder (updatePosString pos consumed remainder) (oldConsumed || not (String.null consumed))) value
runFn2 done (ParseState remainder (updatePosString pos consumed remainder) (not (String.null consumed))) value
)

-- | Combinator which finds the first position in the input `String` where the
Expand Down
51 changes: 48 additions & 3 deletions test/Test/Main.purs
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,19 @@

module Test.Main where

import Prelude (class Eq, class Show, Unit, append, bind, const, discard, div, flip, identity, map, negate, pure, show, unit, void, ($), ($>), (*), (*>), (+), (-), (/), (/=), (<$), (<$>), (<*), (<*>), (<>), (==), (>>=))
import Prelude hiding (between, when)

import Control.Alt ((<|>))
import Control.Lazy (fix, defer)
import Control.Monad.Rec.Class (Step(..), tailRecM)
import Control.Monad.State (State, lift, modify, runState)
import Data.Array (some, toUnfoldable)
import Data.Array as Array
import Data.Bifunctor (lmap, rmap)
import Data.CodePoint.Unicode as CodePoint.Unicode
import Data.Either (Either(..), either, fromLeft, hush)
import Data.Foldable (oneOf)
import Data.Function.Uncurried (mkFn5, runFn2)
import Data.List (List(..), fromFoldable, (:))
import Data.List as List
import Data.List.NonEmpty (NonEmptyList(..), catMaybes, cons, cons')
Expand All @@ -35,7 +37,7 @@ import Effect (Effect)
import Effect.Console (log, logShow)
import Effect.Unsafe (unsafePerformEffect)
import Node.Process (lookupEnv)
import Parsing (ParseError(..), ParseState(..), Parser, ParserT, Position(..), consume, fail, getParserT, initialPos, parseErrorPosition, position, region, runParser)
import Parsing (ParseError(..), ParseState(..), Parser, ParserT(..), Position(..), consume, fail, getParserT, initialPos, parseErrorPosition, position, region, runParser)
import Parsing.Combinators (advance, between, chainl, chainl1, chainr, chainr1, choice, empty, endBy, endBy1, lookAhead, many, many1, many1Till, many1Till_, manyIndex, manyTill, manyTill_, notFollowedBy, optional, optionMaybe, replicateA, sepBy, sepBy1, sepEndBy, sepEndBy1, skipMany, skipMany1, try, tryRethrow, (<?>), (<??>), (<~?>))
import Parsing.Combinators.Array as Combinators.Array
import Parsing.Expr (Assoc(..), Operator(..), buildExprParser)
Expand All @@ -49,7 +51,7 @@ import Parsing.Token as Token
import Partial.Unsafe (unsafePartial)
import Test.Assert (assert', assertEqual')
import Test.IndentationTests as IndentationTests
import Test.Lib
import Test.Lib (class ParseErrorHuman__OnlyString, TestM, mkParseErrorTestMessage, mkParseErrorTestPosition, mkParseTest)

parseTest :: forall s a. Show a => Eq a => ParseErrorHuman__OnlyString s => s -> a -> Parser s a -> Effect Unit
parseTest = mkParseTest runParser
Expand All @@ -60,6 +62,13 @@ parseErrorTestPosition = mkParseErrorTestPosition runParser
parseErrorTestMessage :: forall s a. Show a => Parser s a -> s -> String -> Effect Unit
parseErrorTestMessage = mkParseErrorTestMessage runParser

parseState :: forall m s a. (ParseState s -> Tuple (ParseState s) a) -> ParserT s m a
parseState k = ParserT
( mkFn5 \state1 _ _ _ done -> do
let Tuple state2 res = k state1
runFn2 done state2 res
)

parens :: forall m a. ParserT String m a -> ParserT String m a
parens = between (string "(") (string ")")

Expand Down Expand Up @@ -581,8 +590,44 @@ takeWhilePropagateFail = do
"f"
(Position { index: 1, line: 1, column: 2 })

applicativeSemantics :: Parser String String
applicativeSemantics =
( string "foo"
<* parseState (\(ParseState a b _) -> Tuple (ParseState a b false) unit)
<* fail "fail"
)
<|> pure ""

bindSemantics :: Parser String String
bindSemantics =
( do
_ <- string "foo"
parseState (\(ParseState a b _) -> Tuple (ParseState a b false) unit)
fail "fail"
)
<|> pure ""

monadRecSemantics :: Parser String String
monadRecSemantics = loop <|> pure ""
where
loop = tailRecM
( case _ of
1 -> do
_ <- string "foo"
pure (Loop 2)
2 ->
parseState (\(ParseState a b _) -> Tuple (ParseState a b false) (Loop 3))
_ ->
fail "fail"
)
1

main :: Effect Unit
main = do
log "\nTESTS Semantics\n"
parseErrorTestMessage applicativeSemantics "foo" "fail"
parseErrorTestMessage bindSemantics "foo" "fail"
parseErrorTestMessage monadRecSemantics "foo" "fail"

log "\nTESTS Indentation\n"
IndentationTests.testIndentationParser
Expand Down