From 7a3ca8bd20812866b226c94aba89c6e85691c4ad Mon Sep 17 00:00:00 2001 From: Franco Leonardo Bulgarelli Date: Tue, 30 Nov 2021 20:13:18 -0300 Subject: [PATCH 1/5] Add tests --- test/Test/Language/Javascript/Lexer.hs | 9 +++++++++ test/Test/Language/Javascript/ProgramParser.hs | 12 ++++++++++++ test/Test/Language/Javascript/RoundTrip.hs | 3 +++ 3 files changed, 24 insertions(+) diff --git a/test/Test/Language/Javascript/Lexer.hs b/test/Test/Language/Javascript/Lexer.hs index 1eda282d..8a9eefd1 100644 --- a/test/Test/Language/Javascript/Lexer.hs +++ b/test/Test/Language/Javascript/Lexer.hs @@ -15,6 +15,15 @@ testLexer = describe "Lexer:" $ do testLex "// 𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡 " `shouldBe` "[CommentToken]" testLex "/* 𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡 */" `shouldBe` "[CommentToken]" + it "return mixed with comments" $ do + testLex "return 1" `shouldBe` "[ReturnToken,WsToken,DecimalToken 1]" + testLex "return \n 1" `shouldBe` "[ReturnToken,WsToken,DecimalToken 1]" + testLex "return //hello" `shouldBe` "[ReturnToken,WsToken,CommentToken]" + testLex "return /*hello*/" `shouldBe` "[ReturnToken,WsToken,CommentToken]" + testLex "return //hello\n 1" `shouldBe` "[ReturnToken,WsToken,CommentToken,WsToken,DecimalToken 1]" + testLex "return /*hello*/\n 1" `shouldBe` "[ReturnToken,WsToken,CommentToken,WsToken,DecimalToken 1]" + testLex "return /*hello 1*/\n" `shouldBe` "[ReturnToken,WsToken,CommentToken,WsToken]" + it "numbers" $ do testLex "123" `shouldBe` "[DecimalToken 123]" testLex "037" `shouldBe` "[OctalToken 037]" diff --git a/test/Test/Language/Javascript/ProgramParser.hs b/test/Test/Language/Javascript/ProgramParser.hs index b7dc900c..21ef8fe2 100644 --- a/test/Test/Language/Javascript/ProgramParser.hs +++ b/test/Test/Language/Javascript/ProgramParser.hs @@ -18,12 +18,24 @@ testProgramParser = describe "Program parser:" $ do it "function" $ do testProg "function a(){}" `shouldBe` "Right (JSAstProgram [JSFunction 'a' () (JSBlock [])])" testProg "function a(b,c){}" `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [])])" + it "comments" $ do testProg "//blah\nx=1;//foo\na" `shouldBe` "Right (JSAstProgram [JSOpAssign ('=',JSIdentifier 'x',JSDecimal '1'),JSSemicolon,JSIdentifier 'a'])" testProg "/*x=1\ny=2\n*/z=2;//foo\na" `shouldBe` "Right (JSAstProgram [JSOpAssign ('=',JSIdentifier 'z',JSDecimal '2'),JSSemicolon,JSIdentifier 'a'])" testProg "/* */\nfunction f() {\n/* */\n}\n" `shouldBe` "Right (JSAstProgram [JSFunction 'f' () (JSBlock [])])" testProg "/* **/\nfunction f() {\n/* */\n}\n" `shouldBe` "Right (JSAstProgram [JSFunction 'f' () (JSBlock [])])" + it "function with comments" $ do + testProg "function a(){/* return */}" `shouldBe` "Right (JSAstProgram [JSFunction 'a' () (JSBlock [])])" + testProg "function a(b,c/*d*/){}" `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [])])" + + it "return with comments" $ do + testProg "function a(b,c){ return \n 4 }" `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ,JSDecimal '4'])])" + testProg "function a(b,c){ return // 4\n }" `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ])])" + testProg "function a(b,c){ return /* 4*/\n }" `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ])])" + testProg "function a(b,c){ return //\n 4 }" `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ,JSDecimal '4'])])" + testProg "function a(b,c){ return /*\n*/ 4 }" `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ,JSDecimal '4'])])" + it "if" $ do testProg "if(x);x=1" `shouldBe` "Right (JSAstProgram [JSIf (JSIdentifier 'x') (JSEmptyStatement),JSOpAssign ('=',JSIdentifier 'x',JSDecimal '1')])" testProg "if(a)x=1;y=2" `shouldBe` "Right (JSAstProgram [JSIf (JSIdentifier 'a') (JSOpAssign ('=',JSIdentifier 'x',JSDecimal '1'),JSSemicolon),JSOpAssign ('=',JSIdentifier 'y',JSDecimal '2')])" diff --git a/test/Test/Language/Javascript/RoundTrip.hs b/test/Test/Language/Javascript/RoundTrip.hs index d116d565..b6b96d45 100644 --- a/test/Test/Language/Javascript/RoundTrip.hs +++ b/test/Test/Language/Javascript/RoundTrip.hs @@ -136,6 +136,9 @@ testRoundTrip = describe "Roundtrip:" $ do testRT "var [x, y]=z;" testRT "let {x: [y]}=z;" testRT "let yield=1" + testRT "return x" + testRT "return /**/ x" + testRT "return /*\n*/ x" it "module" $ do testRTModule "import def from 'mod'" From 63f9a9cb7445cc10aa0ab6966b66caf58e59f9d3 Mon Sep 17 00:00:00 2001 From: Franco Leonardo Bulgarelli Date: Tue, 30 Nov 2021 22:54:08 -0300 Subject: [PATCH 2/5] Also consider comments as semi producers --- src/Language/JavaScript/Parser/Lexer.x | 13 +++++++++++-- test/Test/Language/Javascript/ProgramParser.hs | 7 ++++++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/Language/JavaScript/Parser/Lexer.x b/src/Language/JavaScript/Parser/Lexer.x index d28be52c..2e13dbbc 100644 --- a/src/Language/JavaScript/Parser/Lexer.x +++ b/src/Language/JavaScript/Parser/Lexer.x @@ -432,13 +432,17 @@ lexCont cont = where lexLoop = do tok <- lexToken + ltok <- getLastToken case tok of CommentToken {} -> do addComment tok - lexLoop + case ltok of + BreakToken {} -> maybeAutoSemi tok + ContinueToken {} -> maybeAutoSemi tok + ReturnToken {} -> maybeAutoSemi tok + _otherwise -> lexLoop WsToken {} -> do addComment tok - ltok <- getLastToken case ltok of BreakToken {} -> maybeAutoSemi tok ContinueToken {} -> maybeAutoSemi tok @@ -456,6 +460,10 @@ lexCont cont = if any (== '\n') tl then cont $ AutoSemiToken sp tl cmt else lexLoop + maybeAutoSemi (CommentToken sp tl cmt) = + if any (== '\n') tl + then cont $ AutoSemiToken sp tl cmt + else lexLoop maybeAutoSemi _ = lexLoop @@ -483,6 +491,7 @@ getLastToken = Alex $ \s@AlexState{alex_ust=ust} -> Right (s, previousToken ust) setLastToken :: Token -> Alex () setLastToken (WsToken {}) = Alex $ \s -> Right (s, ()) +setLastToken (CommentToken {}) = Alex $ \s -> Right (s, ()) setLastToken tok = Alex $ \s -> Right (s{alex_ust=(alex_ust s){previousToken=tok}}, ()) getComment :: Alex [Token] diff --git a/test/Test/Language/Javascript/ProgramParser.hs b/test/Test/Language/Javascript/ProgramParser.hs index 21ef8fe2..817630db 100644 --- a/test/Test/Language/Javascript/ProgramParser.hs +++ b/test/Test/Language/Javascript/ProgramParser.hs @@ -33,9 +33,15 @@ testProgramParser = describe "Program parser:" $ do testProg "function a(b,c){ return \n 4 }" `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ,JSDecimal '4'])])" testProg "function a(b,c){ return // 4\n }" `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ])])" testProg "function a(b,c){ return /* 4*/\n }" `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ])])" + + it "return with comments and trailing expression" $ do testProg "function a(b,c){ return //\n 4 }" `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ,JSDecimal '4'])])" testProg "function a(b,c){ return /*\n*/ 4 }" `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ,JSDecimal '4'])])" + it "return without spaces and but comments and trailing expression" $ do + testProg "function a(b,c){ return//\n4 }" `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ,JSDecimal '4'])])" + testProg "function a(b,c){ return/*\n*/4 }" `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ,JSDecimal '4'])])" + it "if" $ do testProg "if(x);x=1" `shouldBe` "Right (JSAstProgram [JSIf (JSIdentifier 'x') (JSEmptyStatement),JSOpAssign ('=',JSIdentifier 'x',JSDecimal '1')])" testProg "if(a)x=1;y=2" `shouldBe` "Right (JSAstProgram [JSIf (JSIdentifier 'a') (JSOpAssign ('=',JSIdentifier 'x',JSDecimal '1'),JSSemicolon),JSOpAssign ('=',JSIdentifier 'y',JSDecimal '2')])" @@ -103,4 +109,3 @@ testProg str = showStrippedMaybe (parseUsing parseProgram str "src") testFileUtf8 :: FilePath -> IO String testFileUtf8 fileName = showStripped <$> parseFileUtf8 fileName - From f328daff3106f50cb3f8ab512ec7b1001d65a488 Mon Sep 17 00:00:00 2001 From: Franco Leonardo Bulgarelli Date: Thu, 2 Dec 2021 14:35:25 -0300 Subject: [PATCH 3/5] Generalize function --- src/Language/JavaScript/Parser/Lexer.x | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/Language/JavaScript/Parser/Lexer.x b/src/Language/JavaScript/Parser/Lexer.x index 2e13dbbc..dfbac4f1 100644 --- a/src/Language/JavaScript/Parser/Lexer.x +++ b/src/Language/JavaScript/Parser/Lexer.x @@ -431,6 +431,7 @@ lexCont cont = lexLoop where lexLoop = do + let maybeAutoSemi = tryInsertAutoSemi cont lexLoop tok <- lexToken ltok <- getLastToken case tok of @@ -454,17 +455,18 @@ lexCont cont = setComment [] cont tok' - -- If the token is a WsToken and it contains a newline, convert it to an - -- AutoSemiToken and call the continuation, otherwise, just lexLoop. - maybeAutoSemi (WsToken sp tl cmt) = - if any (== '\n') tl - then cont $ AutoSemiToken sp tl cmt - else lexLoop - maybeAutoSemi (CommentToken sp tl cmt) = - if any (== '\n') tl - then cont $ AutoSemiToken sp tl cmt - else lexLoop - maybeAutoSemi _ = lexLoop +-- If the token is a WsToken and it contains a newline, convert it to an +-- AutoSemiToken and call the continuation, otherwise, just lexLoop. +tryInsertAutoSemi :: (Token -> Alex a) -> Alex a -> Token -> Alex a +tryInsertAutoSemi cont loop (WsToken sp tl cmt) = + if any (== '\n') tl + then cont $ AutoSemiToken sp tl cmt + else loop +tryInsertAutoSemi cont loop (CommentToken sp tl cmt) = + if any (== '\n') tl + then cont $ AutoSemiToken sp tl cmt + else loop +tryInsertAutoSemi _ loop _ = loop toCommentAnnotation :: [Token] -> [CommentAnnotation] From 018a9832f555c257ae1a471a9c2a1500e7fdcf61 Mon Sep 17 00:00:00 2001 From: Franco Leonardo Bulgarelli Date: Thu, 2 Dec 2021 15:18:56 -0300 Subject: [PATCH 4/5] Generalize lexing step and test happy lexer rules --- src/Language/JavaScript/Parser/Lexer.x | 93 +++++----- test/Test/Language/Javascript/Lexer.hs | 238 +++++++++++++++++-------- 2 files changed, 217 insertions(+), 114 deletions(-) diff --git a/src/Language/JavaScript/Parser/Lexer.x b/src/Language/JavaScript/Parser/Lexer.x index dfbac4f1..ff4bf502 100644 --- a/src/Language/JavaScript/Parser/Lexer.x +++ b/src/Language/JavaScript/Parser/Lexer.x @@ -14,6 +14,7 @@ module Language.JavaScript.Parser.Lexer , lexCont , alexError , runAlex + , happyTestTokeniser , alexTestTokeniser , setInTemplate ) where @@ -425,49 +426,61 @@ alexTestTokeniser input = xs -> reverse xs _ -> loop (tok:acc) +-- Test variant of alexTestTokeniser +-- that tokenizes using the same rules as those used by the happy parser +happyTestTokeniser :: String -> Either String [Token] +happyTestTokeniser input = runAlex input $ loop [] + where + loop :: [Token] -> Alex [Token] + loop acc = genericLexStep (loop . (:acc)) (loop acc) (\_ -> + return $ case acc of + [] -> [] + (TailToken{}:xs) -> reverse xs + xs -> reverse xs) + -- This is called by the Happy parser. lexCont :: (Token -> Alex a) -> Alex a -lexCont cont = - lexLoop +lexCont cont = lexLoop where - lexLoop = do - let maybeAutoSemi = tryInsertAutoSemi cont lexLoop - tok <- lexToken - ltok <- getLastToken - case tok of - CommentToken {} -> do - addComment tok - case ltok of - BreakToken {} -> maybeAutoSemi tok - ContinueToken {} -> maybeAutoSemi tok - ReturnToken {} -> maybeAutoSemi tok - _otherwise -> lexLoop - WsToken {} -> do - addComment tok - case ltok of - BreakToken {} -> maybeAutoSemi tok - ContinueToken {} -> maybeAutoSemi tok - ReturnToken {} -> maybeAutoSemi tok - _otherwise -> lexLoop - _other -> do - cs <- getComment - let tok' = tok{ tokenComment=(toCommentAnnotation cs) } - setComment [] - cont tok' - --- If the token is a WsToken and it contains a newline, convert it to an --- AutoSemiToken and call the continuation, otherwise, just lexLoop. -tryInsertAutoSemi :: (Token -> Alex a) -> Alex a -> Token -> Alex a -tryInsertAutoSemi cont loop (WsToken sp tl cmt) = - if any (== '\n') tl - then cont $ AutoSemiToken sp tl cmt - else loop -tryInsertAutoSemi cont loop (CommentToken sp tl cmt) = - if any (== '\n') tl - then cont $ AutoSemiToken sp tl cmt - else loop -tryInsertAutoSemi _ loop _ = loop - + lexLoop = genericLexStep cont lexLoop (addCommentAnnotation cont) + +genericLexStep :: (Token -> Alex a) -> Alex a -> (Token -> Alex a) -> Alex a +genericLexStep cont lexLoop eof = do + tok <- lexToken + ltok <- getLastToken + case tok of + CommentToken {} -> do + addComment tok + case ltok of + BreakToken {} -> maybeAutoSemi tok + ContinueToken {} -> maybeAutoSemi tok + ReturnToken {} -> maybeAutoSemi tok + _otherwise -> lexLoop + WsToken {} -> do + addComment tok + case ltok of + BreakToken {} -> maybeAutoSemi tok + ContinueToken {} -> maybeAutoSemi tok + ReturnToken {} -> maybeAutoSemi tok + _otherwise -> lexLoop + EOFToken {} -> eof tok + _other -> addCommentAnnotation cont tok + where + -- If the token is a WsToken or CommentToken and it contains a newline, convert it to an + -- AutoSemiToken and call the continuation, otherwise, just lexLoop. + maybeAutoSemi (WsToken sp tl cmt) | hasNewline tl = cont $ AutoSemiToken sp tl cmt + maybeAutoSemi (CommentToken sp tl cmt) | hasNewline tl = cont $ AutoSemiToken sp tl cmt + maybeAutoSemi _ = lexLoop + + hasNewline :: String -> Bool + hasNewline = any (== '\n') + +addCommentAnnotation :: (Token -> Alex a) -> Token -> Alex a +addCommentAnnotation cont tok = do + cs <- getComment + let tok' = tok{ tokenComment=(toCommentAnnotation cs) } + setComment [] + cont tok' toCommentAnnotation :: [Token] -> [CommentAnnotation] toCommentAnnotation [] = [] diff --git a/test/Test/Language/Javascript/Lexer.hs b/test/Test/Language/Javascript/Lexer.hs index 8a9eefd1..b9adae9b 100644 --- a/test/Test/Language/Javascript/Lexer.hs +++ b/test/Test/Language/Javascript/Lexer.hs @@ -11,80 +11,170 @@ import Language.JavaScript.Parser.Lexer testLexer :: Spec testLexer = describe "Lexer:" $ do - it "comments" $ do - testLex "// 𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡 " `shouldBe` "[CommentToken]" - testLex "/* 𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡 */" `shouldBe` "[CommentToken]" - - it "return mixed with comments" $ do - testLex "return 1" `shouldBe` "[ReturnToken,WsToken,DecimalToken 1]" - testLex "return \n 1" `shouldBe` "[ReturnToken,WsToken,DecimalToken 1]" - testLex "return //hello" `shouldBe` "[ReturnToken,WsToken,CommentToken]" - testLex "return /*hello*/" `shouldBe` "[ReturnToken,WsToken,CommentToken]" - testLex "return //hello\n 1" `shouldBe` "[ReturnToken,WsToken,CommentToken,WsToken,DecimalToken 1]" - testLex "return /*hello*/\n 1" `shouldBe` "[ReturnToken,WsToken,CommentToken,WsToken,DecimalToken 1]" - testLex "return /*hello 1*/\n" `shouldBe` "[ReturnToken,WsToken,CommentToken,WsToken]" - - it "numbers" $ do - testLex "123" `shouldBe` "[DecimalToken 123]" - testLex "037" `shouldBe` "[OctalToken 037]" - testLex "0xab" `shouldBe` "[HexIntegerToken 0xab]" - testLex "0xCD" `shouldBe` "[HexIntegerToken 0xCD]" - - it "invalid numbers" $ do - testLex "089" `shouldBe` "[DecimalToken 0,DecimalToken 89]" - testLex "0xGh" `shouldBe` "[DecimalToken 0,IdentifierToken 'xGx']" - - it "string" $ do - testLex "'cat'" `shouldBe` "[StringToken 'cat']" - testLex "\"dog\"" `shouldBe` "[StringToken \"dog\"]" - - it "strings with escape chars" $ do - testLex "'\t'" `shouldBe` "[StringToken '\t']" - testLex "'\\n'" `shouldBe` "[StringToken '\\n']" - testLex "'\\\\n'" `shouldBe` "[StringToken '\\\\n']" - testLex "'\\\\'" `shouldBe` "[StringToken '\\\\']" - testLex "'\\0'" `shouldBe` "[StringToken '\\0']" - testLex "'\\12'" `shouldBe` "[StringToken '\\12']" - testLex "'\\s'" `shouldBe` "[StringToken '\\s']" - testLex "'\\-'" `shouldBe` "[StringToken '\\-']" - - it "strings with non-escaped chars" $ - testLex "'\\/'" `shouldBe` "[StringToken '\\/']" - - it "strings with escaped quotes" $ do - testLex "'\"'" `shouldBe` "[StringToken '\"']" - testLex "\"\\\"\"" `shouldBe` "[StringToken \"\\\\\"\"]" - testLex "'\\\''" `shouldBe` "[StringToken '\\\\'']" - testLex "'\"'" `shouldBe` "[StringToken '\"']" - testLex "\"\\'\"" `shouldBe` "[StringToken \"\\'\"]" - - it "spread token" $ do - testLex "...a" `shouldBe` "[SpreadToken,IdentifierToken 'a']" - - it "assignment" $ do - testLex "x=1" `shouldBe` "[IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]" - testLex "x=1\ny=2" `shouldBe` "[IdentifierToken 'x',SimpleAssignToken,DecimalToken 1,WsToken,IdentifierToken 'y',SimpleAssignToken,DecimalToken 2]" - - it "break/continue/return" $ do - testLex "break\nx=1" `shouldBe` "[BreakToken,WsToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]" - testLex "continue\nx=1" `shouldBe` "[ContinueToken,WsToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]" - testLex "return\nx=1" `shouldBe` "[ReturnToken,WsToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]" - - it "var/let" $ do - testLex "var\n" `shouldBe` "[VarToken,WsToken]" - testLex "let\n" `shouldBe` "[LetToken,WsToken]" - - it "in/of" $ do - testLex "in\n" `shouldBe` "[InToken,WsToken]" - testLex "of\n" `shouldBe` "[OfToken,WsToken]" - - it "function" $ do - testLex "async function\n" `shouldBe` "[AsyncToken,WsToken,FunctionToken,WsToken]" - - -testLex :: String -> String -testLex str = - either id stringify $ alexTestTokeniser str + describe "with Alex rules" $ do + it "comments" $ do + alexTestLex "// 𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡 " `shouldBe` "[CommentToken]" + alexTestLex "/* 𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡 */" `shouldBe` "[CommentToken]" + + it "return mixed with comments" $ do + alexTestLex "return 1" `shouldBe` "[ReturnToken,WsToken,DecimalToken 1]" + alexTestLex "return \n 1" `shouldBe` "[ReturnToken,WsToken,DecimalToken 1]" + alexTestLex "return //hello" `shouldBe` "[ReturnToken,WsToken,CommentToken]" + alexTestLex "return /*hello*/" `shouldBe` "[ReturnToken,WsToken,CommentToken]" + alexTestLex "return //hello\n 1" `shouldBe` "[ReturnToken,WsToken,CommentToken,WsToken,DecimalToken 1]" + alexTestLex "return /*hello*/\n 1" `shouldBe` "[ReturnToken,WsToken,CommentToken,WsToken,DecimalToken 1]" + alexTestLex "return /*hello 1*/\n" `shouldBe` "[ReturnToken,WsToken,CommentToken,WsToken]" + + it "numbers" $ do + alexTestLex "123" `shouldBe` "[DecimalToken 123]" + alexTestLex "037" `shouldBe` "[OctalToken 037]" + alexTestLex "0xab" `shouldBe` "[HexIntegerToken 0xab]" + alexTestLex "0xCD" `shouldBe` "[HexIntegerToken 0xCD]" + + it "invalid numbers" $ do + alexTestLex "089" `shouldBe` "[DecimalToken 0,DecimalToken 89]" + alexTestLex "0xGh" `shouldBe` "[DecimalToken 0,IdentifierToken 'xGx']" + + it "string" $ do + alexTestLex "'cat'" `shouldBe` "[StringToken 'cat']" + alexTestLex "\"dog\"" `shouldBe` "[StringToken \"dog\"]" + + it "strings with escape chars" $ do + alexTestLex "'\t'" `shouldBe` "[StringToken '\t']" + alexTestLex "'\\n'" `shouldBe` "[StringToken '\\n']" + alexTestLex "'\\\\n'" `shouldBe` "[StringToken '\\\\n']" + alexTestLex "'\\\\'" `shouldBe` "[StringToken '\\\\']" + alexTestLex "'\\0'" `shouldBe` "[StringToken '\\0']" + alexTestLex "'\\12'" `shouldBe` "[StringToken '\\12']" + alexTestLex "'\\s'" `shouldBe` "[StringToken '\\s']" + alexTestLex "'\\-'" `shouldBe` "[StringToken '\\-']" + + it "strings with non-escaped chars" $ + alexTestLex "'\\/'" `shouldBe` "[StringToken '\\/']" + + it "strings with escaped quotes" $ do + alexTestLex "'\"'" `shouldBe` "[StringToken '\"']" + alexTestLex "\"\\\"\"" `shouldBe` "[StringToken \"\\\\\"\"]" + alexTestLex "'\\\''" `shouldBe` "[StringToken '\\\\'']" + alexTestLex "'\"'" `shouldBe` "[StringToken '\"']" + alexTestLex "\"\\'\"" `shouldBe` "[StringToken \"\\'\"]" + + it "spread token" $ do + alexTestLex "...a" `shouldBe` "[SpreadToken,IdentifierToken 'a']" + + it "assignment" $ do + alexTestLex "x=1" `shouldBe` "[IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]" + alexTestLex "x=1\ny=2" `shouldBe` "[IdentifierToken 'x',SimpleAssignToken,DecimalToken 1,WsToken,IdentifierToken 'y',SimpleAssignToken,DecimalToken 2]" + + it "break/continue/return" $ do + alexTestLex "break\nx=1" `shouldBe` "[BreakToken,WsToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]" + alexTestLex "continue\nx=1" `shouldBe` "[ContinueToken,WsToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]" + alexTestLex "return\nx=1" `shouldBe` "[ReturnToken,WsToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]" + + it "var/let" $ do + alexTestLex "var\n" `shouldBe` "[VarToken,WsToken]" + alexTestLex "let\n" `shouldBe` "[LetToken,WsToken]" + + it "in/of" $ do + alexTestLex "in\n" `shouldBe` "[InToken,WsToken]" + alexTestLex "of\n" `shouldBe` "[OfToken,WsToken]" + + it "function" $ do + alexTestLex "async function\n" `shouldBe` "[AsyncToken,WsToken,FunctionToken,WsToken]" + + + describe "with Happy rules" $ do + it "comments" $ do + happyTestLex "// 𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡 " `shouldBe` "[]" + happyTestLex "/* 𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡 */" `shouldBe` "[]" + happyTestLex "/* 𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡 */ // foo" `shouldBe` "[]" + + it "return that doesn't produce autosemi" $ do + happyTestLex "return 1" `shouldBe` "[ReturnToken,DecimalToken 1]" + happyTestLex "return //hello" `shouldBe` "[ReturnToken]" + happyTestLex "return/*hello*/1" `shouldBe` "[ReturnToken,DecimalToken 1]" + + it "return mixed with newlines that produce autosemi" $ do + happyTestLex "return \n 1" `shouldBe` "[ReturnToken,AutoSemiToken,DecimalToken 1]" + + it "return mixed with comments that produce autosemi and trailing expressions" $ do + happyTestLex "return /*hello \n */" `shouldBe` "[ReturnToken,AutoSemiToken]" + happyTestLex "return /*hello \n 1*/" `shouldBe` "[ReturnToken,AutoSemiToken]" + happyTestLex "return //hello\n" `shouldBe` "[ReturnToken,AutoSemiToken]" + + it "return mixed with comments that produce autosemi but no trailing expressions" $ do + happyTestLex "return //hello\n 1" `shouldBe` "[ReturnToken,AutoSemiToken,DecimalToken 1]" + happyTestLex "return /*hello*/\n 1" `shouldBe` "[ReturnToken,AutoSemiToken,DecimalToken 1]" + happyTestLex "return//hello\n1" `shouldBe` "[ReturnToken,AutoSemiToken,DecimalToken 1]" + + it "numbers" $ do + happyTestLex "123" `shouldBe` "[DecimalToken 123]" + happyTestLex "037" `shouldBe` "[OctalToken 037]" + happyTestLex "0xab" `shouldBe` "[HexIntegerToken 0xab]" + happyTestLex "0xCD" `shouldBe` "[HexIntegerToken 0xCD]" + + it "invalid numbers" $ do + happyTestLex "089" `shouldBe` "[DecimalToken 0,DecimalToken 89]" + happyTestLex "0xGh" `shouldBe` "[DecimalToken 0,IdentifierToken 'xGx']" + + it "string" $ do + happyTestLex "'cat'" `shouldBe` "[StringToken 'cat']" + happyTestLex "\"dog\"" `shouldBe` "[StringToken \"dog\"]" + + it "strings with escape chars" $ do + happyTestLex "'\t'" `shouldBe` "[StringToken '\t']" + happyTestLex "'\\n'" `shouldBe` "[StringToken '\\n']" + happyTestLex "'\\\\n'" `shouldBe` "[StringToken '\\\\n']" + happyTestLex "'\\\\'" `shouldBe` "[StringToken '\\\\']" + happyTestLex "'\\0'" `shouldBe` "[StringToken '\\0']" + happyTestLex "'\\12'" `shouldBe` "[StringToken '\\12']" + happyTestLex "'\\s'" `shouldBe` "[StringToken '\\s']" + happyTestLex "'\\-'" `shouldBe` "[StringToken '\\-']" + + it "strings with non-escaped chars" $ + happyTestLex "'\\/'" `shouldBe` "[StringToken '\\/']" + + it "strings with escaped quotes" $ do + happyTestLex "'\"'" `shouldBe` "[StringToken '\"']" + happyTestLex "\"\\\"\"" `shouldBe` "[StringToken \"\\\\\"\"]" + happyTestLex "'\\\''" `shouldBe` "[StringToken '\\\\'']" + happyTestLex "'\"'" `shouldBe` "[StringToken '\"']" + happyTestLex "\"\\'\"" `shouldBe` "[StringToken \"\\'\"]" + + it "spread token" $ do + happyTestLex "...a" `shouldBe` "[SpreadToken,IdentifierToken 'a']" + + it "assignment" $ do + happyTestLex "x=1" `shouldBe` "[IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]" + happyTestLex "x=1\ny=2" `shouldBe` "[IdentifierToken 'x',SimpleAssignToken,DecimalToken 1,IdentifierToken 'y',SimpleAssignToken,DecimalToken 2]" + + it "break/continue/return" $ do + happyTestLex "break\nx=1" `shouldBe` "[BreakToken,AutoSemiToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]" + happyTestLex "continue\nx=1" `shouldBe` "[ContinueToken,AutoSemiToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]" + happyTestLex "return\nx=1" `shouldBe` "[ReturnToken,AutoSemiToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]" + + it "var/let" $ do + happyTestLex "var\n" `shouldBe` "[VarToken]" + happyTestLex "let\n" `shouldBe` "[LetToken]" + + it "in/of" $ do + happyTestLex "in\n" `shouldBe` "[InToken]" + happyTestLex "of\n" `shouldBe` "[OfToken]" + + it "function" $ do + happyTestLex "async function\n" `shouldBe` "[AsyncToken,FunctionToken]" + + + +alexTestLex :: String -> String +alexTestLex = genericTestLex alexTestTokeniser + +happyTestLex :: String -> String +happyTestLex = genericTestLex happyTestTokeniser + +genericTestLex :: (String -> Either String [Token]) -> String -> String +genericTestLex lexer str = + either id stringify $ lexer str where stringify xs = "[" ++ intercalate "," (map showToken xs) ++ "]" From 71fcdf5ee7bd1800f3e214e2ff6f64083799326c Mon Sep 17 00:00:00 2001 From: Franco Leonardo Bulgarelli Date: Fri, 11 Mar 2022 23:06:01 -0300 Subject: [PATCH 5/5] Fixed typo --- src/Language/JavaScript/Parser/Lexer.x | 14 +++++++------- test/Test/Language/Javascript/Lexer.hs | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/Language/JavaScript/Parser/Lexer.x b/src/Language/JavaScript/Parser/Lexer.x index ff4bf502..858087b8 100644 --- a/src/Language/JavaScript/Parser/Lexer.x +++ b/src/Language/JavaScript/Parser/Lexer.x @@ -14,8 +14,8 @@ module Language.JavaScript.Parser.Lexer , lexCont , alexError , runAlex - , happyTestTokeniser - , alexTestTokeniser + , happyTestTokenizer + , alexTestTokenizer , setInTemplate ) where @@ -412,8 +412,8 @@ lexToken = do return tok -- For tesing. -alexTestTokeniser :: String -> Either String [Token] -alexTestTokeniser input = +alexTestTokenizer :: String -> Either String [Token] +alexTestTokenizer input = runAlex input $ loop [] where loop acc = do @@ -426,10 +426,10 @@ alexTestTokeniser input = xs -> reverse xs _ -> loop (tok:acc) --- Test variant of alexTestTokeniser +-- Test variant of alexTestTokenizer -- that tokenizes using the same rules as those used by the happy parser -happyTestTokeniser :: String -> Either String [Token] -happyTestTokeniser input = runAlex input $ loop [] +happyTestTokenizer :: String -> Either String [Token] +happyTestTokenizer input = runAlex input $ loop [] where loop :: [Token] -> Alex [Token] loop acc = genericLexStep (loop . (:acc)) (loop acc) (\_ -> diff --git a/test/Test/Language/Javascript/Lexer.hs b/test/Test/Language/Javascript/Lexer.hs index b9adae9b..8c67ecfa 100644 --- a/test/Test/Language/Javascript/Lexer.hs +++ b/test/Test/Language/Javascript/Lexer.hs @@ -167,10 +167,10 @@ testLexer = describe "Lexer:" $ do alexTestLex :: String -> String -alexTestLex = genericTestLex alexTestTokeniser +alexTestLex = genericTestLex alexTestTokenizer happyTestLex :: String -> String -happyTestLex = genericTestLex happyTestTokeniser +happyTestLex = genericTestLex happyTestTokenizer genericTestLex :: (String -> Either String [Token]) -> String -> String genericTestLex lexer str =