diff --git a/src/Language/JavaScript/Parser/Lexer.x b/src/Language/JavaScript/Parser/Lexer.x index d28be52..858087b 100644 --- a/src/Language/JavaScript/Parser/Lexer.x +++ b/src/Language/JavaScript/Parser/Lexer.x @@ -14,7 +14,8 @@ module Language.JavaScript.Parser.Lexer , lexCont , alexError , runAlex - , alexTestTokeniser + , happyTestTokenizer + , alexTestTokenizer , setInTemplate ) where @@ -411,8 +412,8 @@ lexToken = do return tok -- For tesing. -alexTestTokeniser :: String -> Either String [Token] -alexTestTokeniser input = +alexTestTokenizer :: String -> Either String [Token] +alexTestTokenizer input = runAlex input $ loop [] where loop acc = do @@ -425,39 +426,61 @@ alexTestTokeniser input = xs -> reverse xs _ -> loop (tok:acc) +-- Test variant of alexTestTokenizer +-- that tokenizes using the same rules as those used by the happy parser +happyTestTokenizer :: String -> Either String [Token] +happyTestTokenizer input = runAlex input $ loop [] + where + loop :: [Token] -> Alex [Token] + loop acc = genericLexStep (loop . (:acc)) (loop acc) (\_ -> + return $ case acc of + [] -> [] + (TailToken{}:xs) -> reverse xs + xs -> reverse xs) + -- This is called by the Happy parser. lexCont :: (Token -> Alex a) -> Alex a -lexCont cont = - lexLoop +lexCont cont = lexLoop where - lexLoop = do - tok <- lexToken - case tok of - CommentToken {} -> do - addComment tok - lexLoop - WsToken {} -> do - addComment tok - ltok <- getLastToken - case ltok of - BreakToken {} -> maybeAutoSemi tok - ContinueToken {} -> maybeAutoSemi tok - ReturnToken {} -> maybeAutoSemi tok - _otherwise -> lexLoop - _other -> do - cs <- getComment - let tok' = tok{ tokenComment=(toCommentAnnotation cs) } - setComment [] - cont tok' - - -- If the token is a WsToken and it contains a newline, convert it to an - -- AutoSemiToken and call the continuation, otherwise, just lexLoop. - maybeAutoSemi (WsToken sp tl cmt) = - if any (== '\n') tl - then cont $ AutoSemiToken sp tl cmt - else lexLoop - maybeAutoSemi _ = lexLoop - + lexLoop = genericLexStep cont lexLoop (addCommentAnnotation cont) + +genericLexStep :: (Token -> Alex a) -> Alex a -> (Token -> Alex a) -> Alex a +genericLexStep cont lexLoop eof = do + tok <- lexToken + ltok <- getLastToken + case tok of + CommentToken {} -> do + addComment tok + case ltok of + BreakToken {} -> maybeAutoSemi tok + ContinueToken {} -> maybeAutoSemi tok + ReturnToken {} -> maybeAutoSemi tok + _otherwise -> lexLoop + WsToken {} -> do + addComment tok + case ltok of + BreakToken {} -> maybeAutoSemi tok + ContinueToken {} -> maybeAutoSemi tok + ReturnToken {} -> maybeAutoSemi tok + _otherwise -> lexLoop + EOFToken {} -> eof tok + _other -> addCommentAnnotation cont tok + where + -- If the token is a WsToken or CommentToken and it contains a newline, convert it to an + -- AutoSemiToken and call the continuation, otherwise, just lexLoop. + maybeAutoSemi (WsToken sp tl cmt) | hasNewline tl = cont $ AutoSemiToken sp tl cmt + maybeAutoSemi (CommentToken sp tl cmt) | hasNewline tl = cont $ AutoSemiToken sp tl cmt + maybeAutoSemi _ = lexLoop + + hasNewline :: String -> Bool + hasNewline = any (== '\n') + +addCommentAnnotation :: (Token -> Alex a) -> Token -> Alex a +addCommentAnnotation cont tok = do + cs <- getComment + let tok' = tok{ tokenComment=(toCommentAnnotation cs) } + setComment [] + cont tok' toCommentAnnotation :: [Token] -> [CommentAnnotation] toCommentAnnotation [] = [] @@ -483,6 +506,7 @@ getLastToken = Alex $ \s@AlexState{alex_ust=ust} -> Right (s, previousToken ust) setLastToken :: Token -> Alex () setLastToken (WsToken {}) = Alex $ \s -> Right (s, ()) +setLastToken (CommentToken {}) = Alex $ \s -> Right (s, ()) setLastToken tok = Alex $ \s -> Right (s{alex_ust=(alex_ust s){previousToken=tok}}, ()) getComment :: Alex [Token] diff --git a/test/Test/Language/Javascript/Lexer.hs b/test/Test/Language/Javascript/Lexer.hs index 1eda282..8c67ecf 100644 --- a/test/Test/Language/Javascript/Lexer.hs +++ b/test/Test/Language/Javascript/Lexer.hs @@ -11,71 +11,170 @@ import Language.JavaScript.Parser.Lexer testLexer :: Spec testLexer = describe "Lexer:" $ do - it "comments" $ do - testLex "// 𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡 " `shouldBe` "[CommentToken]" - testLex "/* 𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡 */" `shouldBe` "[CommentToken]" - - it "numbers" $ do - testLex "123" `shouldBe` "[DecimalToken 123]" - testLex "037" `shouldBe` "[OctalToken 037]" - testLex "0xab" `shouldBe` "[HexIntegerToken 0xab]" - testLex "0xCD" `shouldBe` "[HexIntegerToken 0xCD]" - - it "invalid numbers" $ do - testLex "089" `shouldBe` "[DecimalToken 0,DecimalToken 89]" - testLex "0xGh" `shouldBe` "[DecimalToken 0,IdentifierToken 'xGx']" - - it "string" $ do - testLex "'cat'" `shouldBe` "[StringToken 'cat']" - testLex "\"dog\"" `shouldBe` "[StringToken \"dog\"]" - - it "strings with escape chars" $ do - testLex "'\t'" `shouldBe` "[StringToken '\t']" - testLex "'\\n'" `shouldBe` "[StringToken '\\n']" - testLex "'\\\\n'" `shouldBe` "[StringToken '\\\\n']" - testLex "'\\\\'" `shouldBe` "[StringToken '\\\\']" - testLex "'\\0'" `shouldBe` "[StringToken '\\0']" - testLex "'\\12'" `shouldBe` "[StringToken '\\12']" - testLex "'\\s'" `shouldBe` "[StringToken '\\s']" - testLex "'\\-'" `shouldBe` "[StringToken '\\-']" - - it "strings with non-escaped chars" $ - testLex "'\\/'" `shouldBe` "[StringToken '\\/']" - - it "strings with escaped quotes" $ do - testLex "'\"'" `shouldBe` "[StringToken '\"']" - testLex "\"\\\"\"" `shouldBe` "[StringToken \"\\\\\"\"]" - testLex "'\\\''" `shouldBe` "[StringToken '\\\\'']" - testLex "'\"'" `shouldBe` "[StringToken '\"']" - testLex "\"\\'\"" `shouldBe` "[StringToken \"\\'\"]" - - it "spread token" $ do - testLex "...a" `shouldBe` "[SpreadToken,IdentifierToken 'a']" - - it "assignment" $ do - testLex "x=1" `shouldBe` "[IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]" - testLex "x=1\ny=2" `shouldBe` "[IdentifierToken 'x',SimpleAssignToken,DecimalToken 1,WsToken,IdentifierToken 'y',SimpleAssignToken,DecimalToken 2]" - - it "break/continue/return" $ do - testLex "break\nx=1" `shouldBe` "[BreakToken,WsToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]" - testLex "continue\nx=1" `shouldBe` "[ContinueToken,WsToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]" - testLex "return\nx=1" `shouldBe` "[ReturnToken,WsToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]" - - it "var/let" $ do - testLex "var\n" `shouldBe` "[VarToken,WsToken]" - testLex "let\n" `shouldBe` "[LetToken,WsToken]" - - it "in/of" $ do - testLex "in\n" `shouldBe` "[InToken,WsToken]" - testLex "of\n" `shouldBe` "[OfToken,WsToken]" - - it "function" $ do - testLex "async function\n" `shouldBe` "[AsyncToken,WsToken,FunctionToken,WsToken]" - - -testLex :: String -> String -testLex str = - either id stringify $ alexTestTokeniser str + describe "with Alex rules" $ do + it "comments" $ do + alexTestLex "// 𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡 " `shouldBe` "[CommentToken]" + alexTestLex "/* 𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡 */" `shouldBe` "[CommentToken]" + + it "return mixed with comments" $ do + alexTestLex "return 1" `shouldBe` "[ReturnToken,WsToken,DecimalToken 1]" + alexTestLex "return \n 1" `shouldBe` "[ReturnToken,WsToken,DecimalToken 1]" + alexTestLex "return //hello" `shouldBe` "[ReturnToken,WsToken,CommentToken]" + alexTestLex "return /*hello*/" `shouldBe` "[ReturnToken,WsToken,CommentToken]" + alexTestLex "return //hello\n 1" `shouldBe` "[ReturnToken,WsToken,CommentToken,WsToken,DecimalToken 1]" + alexTestLex "return /*hello*/\n 1" `shouldBe` "[ReturnToken,WsToken,CommentToken,WsToken,DecimalToken 1]" + alexTestLex "return /*hello 1*/\n" `shouldBe` "[ReturnToken,WsToken,CommentToken,WsToken]" + + it "numbers" $ do + alexTestLex "123" `shouldBe` "[DecimalToken 123]" + alexTestLex "037" `shouldBe` "[OctalToken 037]" + alexTestLex "0xab" `shouldBe` "[HexIntegerToken 0xab]" + alexTestLex "0xCD" `shouldBe` "[HexIntegerToken 0xCD]" + + it "invalid numbers" $ do + alexTestLex "089" `shouldBe` "[DecimalToken 0,DecimalToken 89]" + alexTestLex "0xGh" `shouldBe` "[DecimalToken 0,IdentifierToken 'xGx']" + + it "string" $ do + alexTestLex "'cat'" `shouldBe` "[StringToken 'cat']" + alexTestLex "\"dog\"" `shouldBe` "[StringToken \"dog\"]" + + it "strings with escape chars" $ do + alexTestLex "'\t'" `shouldBe` "[StringToken '\t']" + alexTestLex "'\\n'" `shouldBe` "[StringToken '\\n']" + alexTestLex "'\\\\n'" `shouldBe` "[StringToken '\\\\n']" + alexTestLex "'\\\\'" `shouldBe` "[StringToken '\\\\']" + alexTestLex "'\\0'" `shouldBe` "[StringToken '\\0']" + alexTestLex "'\\12'" `shouldBe` "[StringToken '\\12']" + alexTestLex "'\\s'" `shouldBe` "[StringToken '\\s']" + alexTestLex "'\\-'" `shouldBe` "[StringToken '\\-']" + + it "strings with non-escaped chars" $ + alexTestLex "'\\/'" `shouldBe` "[StringToken '\\/']" + + it "strings with escaped quotes" $ do + alexTestLex "'\"'" `shouldBe` "[StringToken '\"']" + alexTestLex "\"\\\"\"" `shouldBe` "[StringToken \"\\\\\"\"]" + alexTestLex "'\\\''" `shouldBe` "[StringToken '\\\\'']" + alexTestLex "'\"'" `shouldBe` "[StringToken '\"']" + alexTestLex "\"\\'\"" `shouldBe` "[StringToken \"\\'\"]" + + it "spread token" $ do + alexTestLex "...a" `shouldBe` "[SpreadToken,IdentifierToken 'a']" + + it "assignment" $ do + alexTestLex "x=1" `shouldBe` "[IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]" + alexTestLex "x=1\ny=2" `shouldBe` "[IdentifierToken 'x',SimpleAssignToken,DecimalToken 1,WsToken,IdentifierToken 'y',SimpleAssignToken,DecimalToken 2]" + + it "break/continue/return" $ do + alexTestLex "break\nx=1" `shouldBe` "[BreakToken,WsToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]" + alexTestLex "continue\nx=1" `shouldBe` "[ContinueToken,WsToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]" + alexTestLex "return\nx=1" `shouldBe` "[ReturnToken,WsToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]" + + it "var/let" $ do + alexTestLex "var\n" `shouldBe` "[VarToken,WsToken]" + alexTestLex "let\n" `shouldBe` "[LetToken,WsToken]" + + it "in/of" $ do + alexTestLex "in\n" `shouldBe` "[InToken,WsToken]" + alexTestLex "of\n" `shouldBe` "[OfToken,WsToken]" + + it "function" $ do + alexTestLex "async function\n" `shouldBe` "[AsyncToken,WsToken,FunctionToken,WsToken]" + + + describe "with Happy rules" $ do + it "comments" $ do + happyTestLex "// 𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡 " `shouldBe` "[]" + happyTestLex "/* 𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡 */" `shouldBe` "[]" + happyTestLex "/* 𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡 */ // foo" `shouldBe` "[]" + + it "return that doesn't produce autosemi" $ do + happyTestLex "return 1" `shouldBe` "[ReturnToken,DecimalToken 1]" + happyTestLex "return //hello" `shouldBe` "[ReturnToken]" + happyTestLex "return/*hello*/1" `shouldBe` "[ReturnToken,DecimalToken 1]" + + it "return mixed with newlines that produce autosemi" $ do + happyTestLex "return \n 1" `shouldBe` "[ReturnToken,AutoSemiToken,DecimalToken 1]" + + it "return mixed with comments that produce autosemi and trailing expressions" $ do + happyTestLex "return /*hello \n */" `shouldBe` "[ReturnToken,AutoSemiToken]" + happyTestLex "return /*hello \n 1*/" `shouldBe` "[ReturnToken,AutoSemiToken]" + happyTestLex "return //hello\n" `shouldBe` "[ReturnToken,AutoSemiToken]" + + it "return mixed with comments that produce autosemi but no trailing expressions" $ do + happyTestLex "return //hello\n 1" `shouldBe` "[ReturnToken,AutoSemiToken,DecimalToken 1]" + happyTestLex "return /*hello*/\n 1" `shouldBe` "[ReturnToken,AutoSemiToken,DecimalToken 1]" + happyTestLex "return//hello\n1" `shouldBe` "[ReturnToken,AutoSemiToken,DecimalToken 1]" + + it "numbers" $ do + happyTestLex "123" `shouldBe` "[DecimalToken 123]" + happyTestLex "037" `shouldBe` "[OctalToken 037]" + happyTestLex "0xab" `shouldBe` "[HexIntegerToken 0xab]" + happyTestLex "0xCD" `shouldBe` "[HexIntegerToken 0xCD]" + + it "invalid numbers" $ do + happyTestLex "089" `shouldBe` "[DecimalToken 0,DecimalToken 89]" + happyTestLex "0xGh" `shouldBe` "[DecimalToken 0,IdentifierToken 'xGx']" + + it "string" $ do + happyTestLex "'cat'" `shouldBe` "[StringToken 'cat']" + happyTestLex "\"dog\"" `shouldBe` "[StringToken \"dog\"]" + + it "strings with escape chars" $ do + happyTestLex "'\t'" `shouldBe` "[StringToken '\t']" + happyTestLex "'\\n'" `shouldBe` "[StringToken '\\n']" + happyTestLex "'\\\\n'" `shouldBe` "[StringToken '\\\\n']" + happyTestLex "'\\\\'" `shouldBe` "[StringToken '\\\\']" + happyTestLex "'\\0'" `shouldBe` "[StringToken '\\0']" + happyTestLex "'\\12'" `shouldBe` "[StringToken '\\12']" + happyTestLex "'\\s'" `shouldBe` "[StringToken '\\s']" + happyTestLex "'\\-'" `shouldBe` "[StringToken '\\-']" + + it "strings with non-escaped chars" $ + happyTestLex "'\\/'" `shouldBe` "[StringToken '\\/']" + + it "strings with escaped quotes" $ do + happyTestLex "'\"'" `shouldBe` "[StringToken '\"']" + happyTestLex "\"\\\"\"" `shouldBe` "[StringToken \"\\\\\"\"]" + happyTestLex "'\\\''" `shouldBe` "[StringToken '\\\\'']" + happyTestLex "'\"'" `shouldBe` "[StringToken '\"']" + happyTestLex "\"\\'\"" `shouldBe` "[StringToken \"\\'\"]" + + it "spread token" $ do + happyTestLex "...a" `shouldBe` "[SpreadToken,IdentifierToken 'a']" + + it "assignment" $ do + happyTestLex "x=1" `shouldBe` "[IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]" + happyTestLex "x=1\ny=2" `shouldBe` "[IdentifierToken 'x',SimpleAssignToken,DecimalToken 1,IdentifierToken 'y',SimpleAssignToken,DecimalToken 2]" + + it "break/continue/return" $ do + happyTestLex "break\nx=1" `shouldBe` "[BreakToken,AutoSemiToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]" + happyTestLex "continue\nx=1" `shouldBe` "[ContinueToken,AutoSemiToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]" + happyTestLex "return\nx=1" `shouldBe` "[ReturnToken,AutoSemiToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]" + + it "var/let" $ do + happyTestLex "var\n" `shouldBe` "[VarToken]" + happyTestLex "let\n" `shouldBe` "[LetToken]" + + it "in/of" $ do + happyTestLex "in\n" `shouldBe` "[InToken]" + happyTestLex "of\n" `shouldBe` "[OfToken]" + + it "function" $ do + happyTestLex "async function\n" `shouldBe` "[AsyncToken,FunctionToken]" + + + +alexTestLex :: String -> String +alexTestLex = genericTestLex alexTestTokenizer + +happyTestLex :: String -> String +happyTestLex = genericTestLex happyTestTokenizer + +genericTestLex :: (String -> Either String [Token]) -> String -> String +genericTestLex lexer str = + either id stringify $ lexer str where stringify xs = "[" ++ intercalate "," (map showToken xs) ++ "]" diff --git a/test/Test/Language/Javascript/ProgramParser.hs b/test/Test/Language/Javascript/ProgramParser.hs index b7dc900..817630d 100644 --- a/test/Test/Language/Javascript/ProgramParser.hs +++ b/test/Test/Language/Javascript/ProgramParser.hs @@ -18,12 +18,30 @@ testProgramParser = describe "Program parser:" $ do it "function" $ do testProg "function a(){}" `shouldBe` "Right (JSAstProgram [JSFunction 'a' () (JSBlock [])])" testProg "function a(b,c){}" `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [])])" + it "comments" $ do testProg "//blah\nx=1;//foo\na" `shouldBe` "Right (JSAstProgram [JSOpAssign ('=',JSIdentifier 'x',JSDecimal '1'),JSSemicolon,JSIdentifier 'a'])" testProg "/*x=1\ny=2\n*/z=2;//foo\na" `shouldBe` "Right (JSAstProgram [JSOpAssign ('=',JSIdentifier 'z',JSDecimal '2'),JSSemicolon,JSIdentifier 'a'])" testProg "/* */\nfunction f() {\n/* */\n}\n" `shouldBe` "Right (JSAstProgram [JSFunction 'f' () (JSBlock [])])" testProg "/* **/\nfunction f() {\n/* */\n}\n" `shouldBe` "Right (JSAstProgram [JSFunction 'f' () (JSBlock [])])" + it "function with comments" $ do + testProg "function a(){/* return */}" `shouldBe` "Right (JSAstProgram [JSFunction 'a' () (JSBlock [])])" + testProg "function a(b,c/*d*/){}" `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [])])" + + it "return with comments" $ do + testProg "function a(b,c){ return \n 4 }" `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ,JSDecimal '4'])])" + testProg "function a(b,c){ return // 4\n }" `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ])])" + testProg "function a(b,c){ return /* 4*/\n }" `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ])])" + + it "return with comments and trailing expression" $ do + testProg "function a(b,c){ return //\n 4 }" `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ,JSDecimal '4'])])" + testProg "function a(b,c){ return /*\n*/ 4 }" `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ,JSDecimal '4'])])" + + it "return without spaces and but comments and trailing expression" $ do + testProg "function a(b,c){ return//\n4 }" `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ,JSDecimal '4'])])" + testProg "function a(b,c){ return/*\n*/4 }" `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ,JSDecimal '4'])])" + it "if" $ do testProg "if(x);x=1" `shouldBe` "Right (JSAstProgram [JSIf (JSIdentifier 'x') (JSEmptyStatement),JSOpAssign ('=',JSIdentifier 'x',JSDecimal '1')])" testProg "if(a)x=1;y=2" `shouldBe` "Right (JSAstProgram [JSIf (JSIdentifier 'a') (JSOpAssign ('=',JSIdentifier 'x',JSDecimal '1'),JSSemicolon),JSOpAssign ('=',JSIdentifier 'y',JSDecimal '2')])" @@ -91,4 +109,3 @@ testProg str = showStrippedMaybe (parseUsing parseProgram str "src") testFileUtf8 :: FilePath -> IO String testFileUtf8 fileName = showStripped <$> parseFileUtf8 fileName - diff --git a/test/Test/Language/Javascript/RoundTrip.hs b/test/Test/Language/Javascript/RoundTrip.hs index d116d56..b6b96d4 100644 --- a/test/Test/Language/Javascript/RoundTrip.hs +++ b/test/Test/Language/Javascript/RoundTrip.hs @@ -136,6 +136,9 @@ testRoundTrip = describe "Roundtrip:" $ do testRT "var [x, y]=z;" testRT "let {x: [y]}=z;" testRT "let yield=1" + testRT "return x" + testRT "return /**/ x" + testRT "return /*\n*/ x" it "module" $ do testRTModule "import def from 'mod'"