From 3bfc9d40c2e9f1e6473f443dcb0c8a77c82bd9d3 Mon Sep 17 00:00:00 2001 From: Nathan Faubion Date: Thu, 6 Mar 2025 06:06:47 -0800 Subject: [PATCH 1/3] Fix consumed semantics This effectively treats the consumed flag as a monoid appended on each sequential action (ie, Writer). This means it can always be a local decision so low-level combinators don't need to consult the previous state. Fixes #235 --- src/Parsing.purs | 35 +++++++++++++++++----------- src/Parsing/String.purs | 4 ++-- test/Test/Main.purs | 51 ++++++++++++++++++++++++++++++++++++++--- 3 files changed, 72 insertions(+), 18 deletions(-) diff --git a/src/Parsing.purs b/src/Parsing.purs index 11e0d67..a6d5110 100644 --- a/src/Parsing.purs +++ b/src/Parsing.purs @@ -93,6 +93,12 @@ data ParseState s = ParseState s Position Boolean -- -- http://blog.ezyang.com/2014/05/parsec-try-a-or-b-considered-harmful/ +appendConsumed :: forall a. ParseState a -> ParseState a -> ParseState a +appendConsumed (ParseState _ _ consumed1) state@(ParseState a b consumed2) = + case consumed1, consumed2 of + true, false -> ParseState a b true + _, _ -> state + -- | The `Parser s` monad with a monad transformer parameter `m`. newtype ParserT s m a = ParserT -- The parser is implemented using continuation-passing-style with uncurried @@ -231,11 +237,12 @@ instance Apply (ParserT s m) where more \_ -> runFn5 k1 state1 more lift throw ( mkFn2 \state2 f -> - more \_ -> - runFn5 k2 state2 more lift throw + more \_ -> do + let state2' = state1 `appendConsumed` state2 + runFn5 k2 state2' more lift throw ( mkFn2 \state3 a -> more \_ -> - runFn2 done state3 (f a) + runFn2 done (state2' `appendConsumed` state3) (f a) ) ) ) @@ -254,7 +261,7 @@ instance Bind (ParserT s m) where ( mkFn2 \state2 a -> more \_ -> do let (ParserT k2) = next a - runFn5 k2 state2 more lift throw done + runFn5 k2 (state1 `appendConsumed` state2) more lift throw done ) ) @@ -271,15 +278,17 @@ instance MonadRec (ParserT s m) where loop = mkFn3 \state2 arg gas -> do let (ParserT k1) = next arg runFn5 k1 state2 more lift throw - ( mkFn2 \state3 step -> case step of - Loop nextArg -> - if gas == 0 then - more \_ -> - runFn3 loop state3 nextArg 30 - else - runFn3 loop state3 nextArg (gas - 1) - Done res -> - runFn2 done state3 res + ( mkFn2 \state3 step -> do + let state3' = state2 `appendConsumed` state3 + case step of + Loop nextArg -> + if gas == 0 then + more \_ -> + runFn3 loop state3' nextArg 30 + else + runFn3 loop state3' nextArg (gas - 1) + Done res -> + runFn2 done state3' res ) runFn3 loop state1 initArg 30 ) diff --git a/src/Parsing/String.purs b/src/Parsing/String.purs index 3ce0854..29c7652 100644 --- a/src/Parsing/String.purs +++ b/src/Parsing/String.purs @@ -282,12 +282,12 @@ consumeWith . (String -> Either String { value :: a, consumed :: String, remainder :: String }) -> ParserT String m a consumeWith f = ParserT - ( mkFn5 \state1@(ParseState input pos oldConsumed) _ _ throw done -> + ( mkFn5 \state1@(ParseState input pos _) _ _ throw done -> case f input of Left err -> runFn2 throw state1 (ParseError err pos) Right { value, consumed, remainder } -> - runFn2 done (ParseState remainder (updatePosString pos consumed remainder) (oldConsumed || not (String.null consumed))) value + runFn2 done (ParseState remainder (updatePosString pos consumed remainder) (not (String.null consumed))) value ) -- | Combinator which finds the first position in the input `String` where the diff --git a/test/Test/Main.purs b/test/Test/Main.purs index 68fdb6f..0c78b66 100644 --- a/test/Test/Main.purs +++ b/test/Test/Main.purs @@ -5,10 +5,11 @@ module Test.Main where -import Prelude (class Eq, class Show, Unit, append, bind, const, discard, div, flip, identity, map, negate, pure, show, unit, void, ($), ($>), (*), (*>), (+), (-), (/), (/=), (<$), (<$>), (<*), (<*>), (<>), (==), (>>=)) +import Prelude hiding (between, when) import Control.Alt ((<|>)) import Control.Lazy (fix, defer) +import Control.Monad.Rec.Class (Step(..), tailRecM) import Control.Monad.State (State, lift, modify, runState) import Data.Array (some, toUnfoldable) import Data.Array as Array @@ -16,6 +17,7 @@ import Data.Bifunctor (lmap, rmap) import Data.CodePoint.Unicode as CodePoint.Unicode import Data.Either (Either(..), either, fromLeft, hush) import Data.Foldable (oneOf) +import Data.Function.Uncurried (mkFn5, runFn2) import Data.List (List(..), fromFoldable, (:)) import Data.List as List import Data.List.NonEmpty (NonEmptyList(..), catMaybes, cons, cons') @@ -35,7 +37,7 @@ import Effect (Effect) import Effect.Console (log, logShow) import Effect.Unsafe (unsafePerformEffect) import Node.Process (lookupEnv) -import Parsing (ParseError(..), ParseState(..), Parser, ParserT, Position(..), consume, fail, getParserT, initialPos, parseErrorPosition, position, region, runParser) +import Parsing (ParseError(..), ParseState(..), Parser, ParserT(..), Position(..), consume, fail, getParserT, initialPos, parseErrorPosition, position, region, runParser) import Parsing.Combinators (advance, between, chainl, chainl1, chainr, chainr1, choice, empty, endBy, endBy1, lookAhead, many, many1, many1Till, many1Till_, manyIndex, manyTill, manyTill_, notFollowedBy, optional, optionMaybe, replicateA, sepBy, sepBy1, sepEndBy, sepEndBy1, skipMany, skipMany1, try, tryRethrow, (), (), (<~?>)) import Parsing.Combinators.Array as Combinators.Array import Parsing.Expr (Assoc(..), Operator(..), buildExprParser) @@ -49,7 +51,7 @@ import Parsing.Token as Token import Partial.Unsafe (unsafePartial) import Test.Assert (assert', assertEqual') import Test.IndentationTests as IndentationTests -import Test.Lib +import Test.Lib (class ParseErrorHuman__OnlyString, TestM, mkParseErrorTestMessage, mkParseErrorTestPosition, mkParseTest) parseTest :: forall s a. Show a => Eq a => ParseErrorHuman__OnlyString s => s -> a -> Parser s a -> Effect Unit parseTest = mkParseTest runParser @@ -60,6 +62,13 @@ parseErrorTestPosition = mkParseErrorTestPosition runParser parseErrorTestMessage :: forall s a. Show a => Parser s a -> s -> String -> Effect Unit parseErrorTestMessage = mkParseErrorTestMessage runParser +parseState :: forall m s a. (ParseState s -> Tuple (ParseState s) a) -> ParserT s m a +parseState k = ParserT + ( mkFn5 \state1 _ _ _ done -> do + let Tuple state2 res = k state1 + runFn2 done state2 res + ) + parens :: forall m a. ParserT String m a -> ParserT String m a parens = between (string "(") (string ")") @@ -581,8 +590,44 @@ takeWhilePropagateFail = do "f" (Position { index: 1, line: 1, column: 2 }) +applicativeSemantics :: Parser String String +applicativeSemantics = + ( string "foo" + <* parseState (\(ParseState a b _) -> Tuple (ParseState a b false) unit) + <* fail "fail" + ) + <|> pure "" + +bindSemantics :: Parser String String +bindSemantics = + ( do + _ <- string "foo" + parseState (\(ParseState a b _) -> Tuple (ParseState a b false) unit) + fail "fail" + ) + <|> pure "" + +monadRecSemantics :: Parser String String +monadRecSemantics = loop <|> pure "" + where + loop = tailRecM + ( case _ of + 1 -> do + _ <- string "foo" + pure (Loop 2) + 2 -> + parseState (\(ParseState a b _) -> Tuple (ParseState a b false) (Loop 3)) + _ -> + fail "fail" + ) + 1 + main :: Effect Unit main = do + log "\nTESTS Semantics\n" + parseErrorTestMessage applicativeSemantics "foo" "fail" + parseErrorTestMessage bindSemantics "foo" "fail" + parseErrorTestMessage monadRecSemantics "foo" "fail" log "\nTESTS Indentation\n" IndentationTests.testIndentationParser From fd49c54604c078424a4e930dc252f7be3439d06e Mon Sep 17 00:00:00 2001 From: Nathan Faubion Date: Thu, 6 Mar 2025 06:12:33 -0800 Subject: [PATCH 2/3] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index aa272ed..abf682d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ Notable changes to this project are documented in this file. The format is based Bugfixes: - `float` parser of `GenTokenParser` does not parse negative numbers (by @mstream) +- Fixes `consumed` semantics which could cause unexpected backtracking instead of a failure (by @natefaubion) Breaking changes: From 9650d39760558a23896b7892015748d08e083901 Mon Sep 17 00:00:00 2001 From: Nathan Faubion Date: Thu, 6 Mar 2025 06:14:12 -0800 Subject: [PATCH 3/3] Formatting --- src/Parsing.purs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Parsing.purs b/src/Parsing.purs index a6d5110..1cf811e 100644 --- a/src/Parsing.purs +++ b/src/Parsing.purs @@ -80,6 +80,7 @@ derive instance ordParseError :: Ord ParseError -- | - If the left parser fails *without consuming any input*, then backtrack and try the right parser. -- | - If the left parser fails and consumes input, then fail immediately. data ParseState s = ParseState s Position Boolean + -- ParseState constructor has three parameters, -- s: the remaining input -- Position: the current position