From 7a3ca8bd20812866b226c94aba89c6e85691c4ad Mon Sep 17 00:00:00 2001
From: Franco Leonardo Bulgarelli <franco@mumuki.org>
Date: Tue, 30 Nov 2021 20:13:18 -0300
Subject: [PATCH 1/5] Add tests

---
 test/Test/Language/Javascript/Lexer.hs         |  9 +++++++++
 test/Test/Language/Javascript/ProgramParser.hs | 12 ++++++++++++
 test/Test/Language/Javascript/RoundTrip.hs     |  3 +++
 3 files changed, 24 insertions(+)

diff --git a/test/Test/Language/Javascript/Lexer.hs b/test/Test/Language/Javascript/Lexer.hs
index 1eda282d..8a9eefd1 100644
--- a/test/Test/Language/Javascript/Lexer.hs
+++ b/test/Test/Language/Javascript/Lexer.hs
@@ -15,6 +15,15 @@ testLexer = describe "Lexer:" $ do
         testLex "// 𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡 "    `shouldBe` "[CommentToken]"
         testLex "/* 𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡 */"  `shouldBe` "[CommentToken]"
 
+    it "return mixed with comments" $ do
+        testLex "return 1"  `shouldBe` "[ReturnToken,WsToken,DecimalToken 1]"
+        testLex "return \n 1"  `shouldBe` "[ReturnToken,WsToken,DecimalToken 1]"
+        testLex "return //hello"  `shouldBe` "[ReturnToken,WsToken,CommentToken]"
+        testLex "return /*hello*/"  `shouldBe` "[ReturnToken,WsToken,CommentToken]"
+        testLex "return //hello\n 1"  `shouldBe` "[ReturnToken,WsToken,CommentToken,WsToken,DecimalToken 1]"
+        testLex "return /*hello*/\n 1"  `shouldBe` "[ReturnToken,WsToken,CommentToken,WsToken,DecimalToken 1]"
+        testLex "return /*hello 1*/\n"  `shouldBe` "[ReturnToken,WsToken,CommentToken,WsToken]"
+
     it "numbers" $ do
         testLex "123"       `shouldBe` "[DecimalToken 123]"
         testLex "037"       `shouldBe` "[OctalToken 037]"
diff --git a/test/Test/Language/Javascript/ProgramParser.hs b/test/Test/Language/Javascript/ProgramParser.hs
index b7dc900c..21ef8fe2 100644
--- a/test/Test/Language/Javascript/ProgramParser.hs
+++ b/test/Test/Language/Javascript/ProgramParser.hs
@@ -18,12 +18,24 @@ testProgramParser = describe "Program parser:" $ do
     it "function" $ do
         testProg "function a(){}"     `shouldBe` "Right (JSAstProgram [JSFunction 'a' () (JSBlock [])])"
         testProg "function a(b,c){}"  `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [])])"
+
     it "comments" $ do
         testProg "//blah\nx=1;//foo\na"   `shouldBe` "Right (JSAstProgram [JSOpAssign ('=',JSIdentifier 'x',JSDecimal '1'),JSSemicolon,JSIdentifier 'a'])"
         testProg "/*x=1\ny=2\n*/z=2;//foo\na"  `shouldBe` "Right (JSAstProgram [JSOpAssign ('=',JSIdentifier 'z',JSDecimal '2'),JSSemicolon,JSIdentifier 'a'])"
         testProg "/* */\nfunction f() {\n/*  */\n}\n" `shouldBe` "Right (JSAstProgram [JSFunction 'f' () (JSBlock [])])"
         testProg "/* **/\nfunction f() {\n/*  */\n}\n" `shouldBe` "Right (JSAstProgram [JSFunction 'f' () (JSBlock [])])"
 
+    it "function with comments" $ do
+        testProg "function a(){/* return */}"     `shouldBe` "Right (JSAstProgram [JSFunction 'a' () (JSBlock [])])"
+        testProg "function a(b,c/*d*/){}"  `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [])])"
+
+    it "return with comments" $ do
+        testProg "function a(b,c){ return \n 4 }"  `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ,JSDecimal '4'])])"
+        testProg "function a(b,c){ return // 4\n }"  `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ])])"
+        testProg "function a(b,c){ return /* 4*/\n }"  `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ])])"
+        testProg "function a(b,c){ return //\n 4 }"  `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ,JSDecimal '4'])])"
+        testProg "function a(b,c){ return /*\n*/ 4 }"  `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ,JSDecimal '4'])])"
+
     it "if" $ do
         testProg "if(x);x=1"        `shouldBe` "Right (JSAstProgram [JSIf (JSIdentifier 'x') (JSEmptyStatement),JSOpAssign ('=',JSIdentifier 'x',JSDecimal '1')])"
         testProg "if(a)x=1;y=2"     `shouldBe` "Right (JSAstProgram [JSIf (JSIdentifier 'a') (JSOpAssign ('=',JSIdentifier 'x',JSDecimal '1'),JSSemicolon),JSOpAssign ('=',JSIdentifier 'y',JSDecimal '2')])"
diff --git a/test/Test/Language/Javascript/RoundTrip.hs b/test/Test/Language/Javascript/RoundTrip.hs
index d116d565..b6b96d45 100644
--- a/test/Test/Language/Javascript/RoundTrip.hs
+++ b/test/Test/Language/Javascript/RoundTrip.hs
@@ -136,6 +136,9 @@ testRoundTrip = describe "Roundtrip:" $ do
         testRT "var [x, y]=z;"
         testRT "let {x: [y]}=z;"
         testRT "let yield=1"
+        testRT "return x"
+        testRT "return /**/ x"
+        testRT "return /*\n*/ x"
 
     it "module" $ do
         testRTModule "import  def  from 'mod'"

From 63f9a9cb7445cc10aa0ab6966b66caf58e59f9d3 Mon Sep 17 00:00:00 2001
From: Franco Leonardo Bulgarelli <franco@mumuki.org>
Date: Tue, 30 Nov 2021 22:54:08 -0300
Subject: [PATCH 2/5] Also consider comments as semi producers

---
 src/Language/JavaScript/Parser/Lexer.x         | 13 +++++++++++--
 test/Test/Language/Javascript/ProgramParser.hs |  7 ++++++-
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/src/Language/JavaScript/Parser/Lexer.x b/src/Language/JavaScript/Parser/Lexer.x
index d28be52c..2e13dbbc 100644
--- a/src/Language/JavaScript/Parser/Lexer.x
+++ b/src/Language/JavaScript/Parser/Lexer.x
@@ -432,13 +432,17 @@ lexCont cont =
   where
     lexLoop = do
         tok <- lexToken
+        ltok <- getLastToken
         case tok of
             CommentToken {} -> do
                 addComment tok
-                lexLoop
+                case ltok of
+                    BreakToken {} -> maybeAutoSemi tok
+                    ContinueToken {} -> maybeAutoSemi tok
+                    ReturnToken {} -> maybeAutoSemi tok
+                    _otherwise -> lexLoop
             WsToken {} -> do
                 addComment tok
-                ltok <- getLastToken
                 case ltok of
                     BreakToken {} -> maybeAutoSemi tok
                     ContinueToken {} -> maybeAutoSemi tok
@@ -456,6 +460,10 @@ lexCont cont =
         if any (== '\n') tl
             then cont $ AutoSemiToken sp tl cmt
             else lexLoop
+    maybeAutoSemi (CommentToken sp tl cmt) =
+        if any (== '\n') tl
+            then cont $ AutoSemiToken sp tl cmt
+            else lexLoop
     maybeAutoSemi _ = lexLoop
 
 
@@ -483,6 +491,7 @@ getLastToken = Alex $ \s@AlexState{alex_ust=ust} -> Right (s, previousToken ust)
 
 setLastToken :: Token -> Alex ()
 setLastToken (WsToken {}) = Alex $ \s -> Right (s, ())
+setLastToken (CommentToken {}) = Alex $ \s -> Right (s, ())
 setLastToken tok          = Alex $ \s -> Right (s{alex_ust=(alex_ust s){previousToken=tok}}, ())
 
 getComment :: Alex [Token]
diff --git a/test/Test/Language/Javascript/ProgramParser.hs b/test/Test/Language/Javascript/ProgramParser.hs
index 21ef8fe2..817630db 100644
--- a/test/Test/Language/Javascript/ProgramParser.hs
+++ b/test/Test/Language/Javascript/ProgramParser.hs
@@ -33,9 +33,15 @@ testProgramParser = describe "Program parser:" $ do
         testProg "function a(b,c){ return \n 4 }"  `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ,JSDecimal '4'])])"
         testProg "function a(b,c){ return // 4\n }"  `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ])])"
         testProg "function a(b,c){ return /* 4*/\n }"  `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ])])"
+
+    it "return with comments and trailing expression" $ do
         testProg "function a(b,c){ return //\n 4 }"  `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ,JSDecimal '4'])])"
         testProg "function a(b,c){ return /*\n*/ 4 }"  `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ,JSDecimal '4'])])"
 
+    it "return without spaces and but comments and trailing expression" $ do
+        testProg "function a(b,c){ return//\n4 }"  `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ,JSDecimal '4'])])"
+        testProg "function a(b,c){ return/*\n*/4 }"  `shouldBe` "Right (JSAstProgram [JSFunction 'a' (JSIdentifier 'b',JSIdentifier 'c') (JSBlock [JSReturn ,JSDecimal '4'])])"
+
     it "if" $ do
         testProg "if(x);x=1"        `shouldBe` "Right (JSAstProgram [JSIf (JSIdentifier 'x') (JSEmptyStatement),JSOpAssign ('=',JSIdentifier 'x',JSDecimal '1')])"
         testProg "if(a)x=1;y=2"     `shouldBe` "Right (JSAstProgram [JSIf (JSIdentifier 'a') (JSOpAssign ('=',JSIdentifier 'x',JSDecimal '1'),JSSemicolon),JSOpAssign ('=',JSIdentifier 'y',JSDecimal '2')])"
@@ -103,4 +109,3 @@ testProg str = showStrippedMaybe (parseUsing parseProgram str "src")
 
 testFileUtf8 :: FilePath -> IO String
 testFileUtf8 fileName = showStripped <$> parseFileUtf8 fileName
-

From f328daff3106f50cb3f8ab512ec7b1001d65a488 Mon Sep 17 00:00:00 2001
From: Franco Leonardo Bulgarelli <franco@mumuki.org>
Date: Thu, 2 Dec 2021 14:35:25 -0300
Subject: [PATCH 3/5] Generalize function

---
 src/Language/JavaScript/Parser/Lexer.x | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/src/Language/JavaScript/Parser/Lexer.x b/src/Language/JavaScript/Parser/Lexer.x
index 2e13dbbc..dfbac4f1 100644
--- a/src/Language/JavaScript/Parser/Lexer.x
+++ b/src/Language/JavaScript/Parser/Lexer.x
@@ -431,6 +431,7 @@ lexCont cont =
     lexLoop
   where
     lexLoop = do
+        let maybeAutoSemi = tryInsertAutoSemi cont lexLoop
         tok <- lexToken
         ltok <- getLastToken
         case tok of
@@ -454,17 +455,18 @@ lexCont cont =
                 setComment []
                 cont tok'
 
-    -- If the token is a WsToken and it contains a newline, convert it to an
-    -- AutoSemiToken and call the continuation, otherwise, just lexLoop.
-    maybeAutoSemi (WsToken sp tl cmt) =
-        if any (== '\n') tl
-            then cont $ AutoSemiToken sp tl cmt
-            else lexLoop
-    maybeAutoSemi (CommentToken sp tl cmt) =
-        if any (== '\n') tl
-            then cont $ AutoSemiToken sp tl cmt
-            else lexLoop
-    maybeAutoSemi _ = lexLoop
+-- If the token is a WsToken and it contains a newline, convert it to an
+-- AutoSemiToken and call the continuation, otherwise, just lexLoop.
+tryInsertAutoSemi :: (Token -> Alex a) -> Alex a -> Token -> Alex a
+tryInsertAutoSemi cont loop (WsToken sp tl cmt) =
+    if any (== '\n') tl
+        then cont $ AutoSemiToken sp tl cmt
+        else loop
+tryInsertAutoSemi cont loop (CommentToken sp tl cmt) =
+    if any (== '\n') tl
+        then cont $ AutoSemiToken sp tl cmt
+        else loop
+tryInsertAutoSemi _ loop _ = loop
 
 
 toCommentAnnotation :: [Token] -> [CommentAnnotation]

From 018a9832f555c257ae1a471a9c2a1500e7fdcf61 Mon Sep 17 00:00:00 2001
From: Franco Leonardo Bulgarelli <franco@mumuki.org>
Date: Thu, 2 Dec 2021 15:18:56 -0300
Subject: [PATCH 4/5] Generalize lexing step and test happy lexer rules

---
 src/Language/JavaScript/Parser/Lexer.x |  93 +++++-----
 test/Test/Language/Javascript/Lexer.hs | 238 +++++++++++++++++--------
 2 files changed, 217 insertions(+), 114 deletions(-)

diff --git a/src/Language/JavaScript/Parser/Lexer.x b/src/Language/JavaScript/Parser/Lexer.x
index dfbac4f1..ff4bf502 100644
--- a/src/Language/JavaScript/Parser/Lexer.x
+++ b/src/Language/JavaScript/Parser/Lexer.x
@@ -14,6 +14,7 @@ module Language.JavaScript.Parser.Lexer
     , lexCont
     , alexError
     , runAlex
+    , happyTestTokeniser
     , alexTestTokeniser
     , setInTemplate
     ) where
@@ -425,49 +426,61 @@ alexTestTokeniser input =
                             xs -> reverse xs
             _ -> loop (tok:acc)
 
+-- Test variant of alexTestTokeniser
+-- that tokenizes using the same rules as those used by the happy parser
+happyTestTokeniser :: String -> Either String [Token]
+happyTestTokeniser input = runAlex input $ loop []
+  where
+    loop :: [Token] -> Alex [Token]
+    loop acc = genericLexStep (loop . (:acc)) (loop acc) (\_ ->
+                        return $ case acc of
+                            [] -> []
+                            (TailToken{}:xs) -> reverse xs
+                            xs -> reverse xs)
+
 -- This is called by the Happy parser.
 lexCont :: (Token -> Alex a) -> Alex a
-lexCont cont =
-    lexLoop
+lexCont cont = lexLoop
   where
-    lexLoop = do
-        let maybeAutoSemi = tryInsertAutoSemi cont lexLoop
-        tok <- lexToken
-        ltok <- getLastToken
-        case tok of
-            CommentToken {} -> do
-                addComment tok
-                case ltok of
-                    BreakToken {} -> maybeAutoSemi tok
-                    ContinueToken {} -> maybeAutoSemi tok
-                    ReturnToken {} -> maybeAutoSemi tok
-                    _otherwise -> lexLoop
-            WsToken {} -> do
-                addComment tok
-                case ltok of
-                    BreakToken {} -> maybeAutoSemi tok
-                    ContinueToken {} -> maybeAutoSemi tok
-                    ReturnToken {} -> maybeAutoSemi tok
-                    _otherwise -> lexLoop
-            _other -> do
-                cs <- getComment
-                let tok' = tok{ tokenComment=(toCommentAnnotation cs) }
-                setComment []
-                cont tok'
-
--- If the token is a WsToken and it contains a newline, convert it to an
--- AutoSemiToken and call the continuation, otherwise, just lexLoop.
-tryInsertAutoSemi :: (Token -> Alex a) -> Alex a -> Token -> Alex a
-tryInsertAutoSemi cont loop (WsToken sp tl cmt) =
-    if any (== '\n') tl
-        then cont $ AutoSemiToken sp tl cmt
-        else loop
-tryInsertAutoSemi cont loop (CommentToken sp tl cmt) =
-    if any (== '\n') tl
-        then cont $ AutoSemiToken sp tl cmt
-        else loop
-tryInsertAutoSemi _ loop _ = loop
-
+    lexLoop = genericLexStep cont lexLoop (addCommentAnnotation cont)
+
+genericLexStep :: (Token -> Alex a) -> Alex a -> (Token -> Alex a) -> Alex a
+genericLexStep cont lexLoop eof = do
+    tok <- lexToken
+    ltok <- getLastToken
+    case tok of
+        CommentToken {} -> do
+            addComment tok
+            case ltok of
+                BreakToken {} -> maybeAutoSemi tok
+                ContinueToken {} -> maybeAutoSemi tok
+                ReturnToken {} -> maybeAutoSemi tok
+                _otherwise -> lexLoop
+        WsToken {} -> do
+            addComment tok
+            case ltok of
+                BreakToken {} -> maybeAutoSemi tok
+                ContinueToken {} -> maybeAutoSemi tok
+                ReturnToken {} -> maybeAutoSemi tok
+                _otherwise -> lexLoop
+        EOFToken {} -> eof tok
+        _other -> addCommentAnnotation cont tok
+    where
+        -- If the token is a WsToken or CommentToken and it contains a newline, convert it to an
+        -- AutoSemiToken and call the continuation, otherwise, just lexLoop.
+        maybeAutoSemi (WsToken sp tl cmt)      | hasNewline tl = cont $ AutoSemiToken sp tl cmt
+        maybeAutoSemi (CommentToken sp tl cmt) | hasNewline tl = cont $ AutoSemiToken sp tl cmt
+        maybeAutoSemi _ = lexLoop
+
+        hasNewline :: String -> Bool
+        hasNewline = any (== '\n')
+
+addCommentAnnotation :: (Token -> Alex a) -> Token -> Alex a
+addCommentAnnotation cont tok = do
+    cs <- getComment
+    let tok' = tok{ tokenComment=(toCommentAnnotation cs) }
+    setComment []
+    cont tok'
 
 toCommentAnnotation :: [Token] -> [CommentAnnotation]
 toCommentAnnotation [] = []
diff --git a/test/Test/Language/Javascript/Lexer.hs b/test/Test/Language/Javascript/Lexer.hs
index 8a9eefd1..b9adae9b 100644
--- a/test/Test/Language/Javascript/Lexer.hs
+++ b/test/Test/Language/Javascript/Lexer.hs
@@ -11,80 +11,170 @@ import Language.JavaScript.Parser.Lexer
 
 testLexer :: Spec
 testLexer = describe "Lexer:" $ do
-    it "comments" $ do
-        testLex "// 𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡 "    `shouldBe` "[CommentToken]"
-        testLex "/* 𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡 */"  `shouldBe` "[CommentToken]"
-
-    it "return mixed with comments" $ do
-        testLex "return 1"  `shouldBe` "[ReturnToken,WsToken,DecimalToken 1]"
-        testLex "return \n 1"  `shouldBe` "[ReturnToken,WsToken,DecimalToken 1]"
-        testLex "return //hello"  `shouldBe` "[ReturnToken,WsToken,CommentToken]"
-        testLex "return /*hello*/"  `shouldBe` "[ReturnToken,WsToken,CommentToken]"
-        testLex "return //hello\n 1"  `shouldBe` "[ReturnToken,WsToken,CommentToken,WsToken,DecimalToken 1]"
-        testLex "return /*hello*/\n 1"  `shouldBe` "[ReturnToken,WsToken,CommentToken,WsToken,DecimalToken 1]"
-        testLex "return /*hello 1*/\n"  `shouldBe` "[ReturnToken,WsToken,CommentToken,WsToken]"
-
-    it "numbers" $ do
-        testLex "123"       `shouldBe` "[DecimalToken 123]"
-        testLex "037"       `shouldBe` "[OctalToken 037]"
-        testLex "0xab"      `shouldBe` "[HexIntegerToken 0xab]"
-        testLex "0xCD"      `shouldBe` "[HexIntegerToken 0xCD]"
-
-    it "invalid numbers" $ do
-        testLex "089"       `shouldBe` "[DecimalToken 0,DecimalToken 89]"
-        testLex "0xGh"      `shouldBe` "[DecimalToken 0,IdentifierToken 'xGx']"
-
-    it "string" $ do
-        testLex "'cat'"     `shouldBe` "[StringToken 'cat']"
-        testLex "\"dog\""   `shouldBe` "[StringToken \"dog\"]"
-
-    it "strings with escape chars" $ do
-        testLex "'\t'"      `shouldBe` "[StringToken '\t']"
-        testLex "'\\n'"     `shouldBe` "[StringToken '\\n']"
-        testLex "'\\\\n'"   `shouldBe` "[StringToken '\\\\n']"
-        testLex "'\\\\'"    `shouldBe` "[StringToken '\\\\']"
-        testLex "'\\0'"     `shouldBe` "[StringToken '\\0']"
-        testLex "'\\12'"    `shouldBe` "[StringToken '\\12']"
-        testLex "'\\s'"      `shouldBe` "[StringToken '\\s']"
-        testLex "'\\-'"      `shouldBe` "[StringToken '\\-']"
-
-    it "strings with non-escaped chars" $
-        testLex "'\\/'"     `shouldBe` "[StringToken '\\/']"
-
-    it "strings with escaped quotes" $ do
-        testLex "'\"'"      `shouldBe` "[StringToken '\"']"
-        testLex "\"\\\"\""  `shouldBe` "[StringToken \"\\\\\"\"]"
-        testLex "'\\\''"    `shouldBe` "[StringToken '\\\\'']"
-        testLex "'\"'"      `shouldBe` "[StringToken '\"']"
-        testLex "\"\\'\""      `shouldBe` "[StringToken \"\\'\"]"
-
-    it "spread token" $ do
-        testLex "...a" `shouldBe` "[SpreadToken,IdentifierToken 'a']"
-
-    it "assignment" $ do
-        testLex "x=1"       `shouldBe` "[IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]"
-        testLex "x=1\ny=2"  `shouldBe` "[IdentifierToken 'x',SimpleAssignToken,DecimalToken 1,WsToken,IdentifierToken 'y',SimpleAssignToken,DecimalToken 2]"
-
-    it "break/continue/return" $ do
-        testLex "break\nx=1"     `shouldBe` "[BreakToken,WsToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]"
-        testLex "continue\nx=1"  `shouldBe` "[ContinueToken,WsToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]"
-        testLex "return\nx=1"    `shouldBe` "[ReturnToken,WsToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]"
-
-    it "var/let" $ do
-        testLex "var\n"     `shouldBe` "[VarToken,WsToken]"
-        testLex "let\n"     `shouldBe` "[LetToken,WsToken]"
-
-    it "in/of" $ do
-        testLex "in\n"     `shouldBe` "[InToken,WsToken]"
-        testLex "of\n"     `shouldBe` "[OfToken,WsToken]"
-
-    it "function" $ do
-        testLex "async function\n"     `shouldBe` "[AsyncToken,WsToken,FunctionToken,WsToken]"
-
-
-testLex :: String -> String
-testLex str =
-    either id stringify $ alexTestTokeniser str
+    describe "with Alex rules" $ do
+        it "comments" $ do
+            alexTestLex "// 𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡 "    `shouldBe` "[CommentToken]"
+            alexTestLex "/* 𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡 */"  `shouldBe` "[CommentToken]"
+
+        it "return mixed with comments" $ do
+            alexTestLex "return 1"  `shouldBe` "[ReturnToken,WsToken,DecimalToken 1]"
+            alexTestLex "return \n 1"  `shouldBe` "[ReturnToken,WsToken,DecimalToken 1]"
+            alexTestLex "return //hello"  `shouldBe` "[ReturnToken,WsToken,CommentToken]"
+            alexTestLex "return /*hello*/"  `shouldBe` "[ReturnToken,WsToken,CommentToken]"
+            alexTestLex "return //hello\n 1"  `shouldBe` "[ReturnToken,WsToken,CommentToken,WsToken,DecimalToken 1]"
+            alexTestLex "return /*hello*/\n 1"  `shouldBe` "[ReturnToken,WsToken,CommentToken,WsToken,DecimalToken 1]"
+            alexTestLex "return /*hello 1*/\n"  `shouldBe` "[ReturnToken,WsToken,CommentToken,WsToken]"
+
+        it "numbers" $ do
+            alexTestLex "123"       `shouldBe` "[DecimalToken 123]"
+            alexTestLex "037"       `shouldBe` "[OctalToken 037]"
+            alexTestLex "0xab"      `shouldBe` "[HexIntegerToken 0xab]"
+            alexTestLex "0xCD"      `shouldBe` "[HexIntegerToken 0xCD]"
+
+        it "invalid numbers" $ do
+            alexTestLex "089"       `shouldBe` "[DecimalToken 0,DecimalToken 89]"
+            alexTestLex "0xGh"      `shouldBe` "[DecimalToken 0,IdentifierToken 'xGx']"
+
+        it "string" $ do
+            alexTestLex "'cat'"     `shouldBe` "[StringToken 'cat']"
+            alexTestLex "\"dog\""   `shouldBe` "[StringToken \"dog\"]"
+
+        it "strings with escape chars" $ do
+            alexTestLex "'\t'"      `shouldBe` "[StringToken '\t']"
+            alexTestLex "'\\n'"     `shouldBe` "[StringToken '\\n']"
+            alexTestLex "'\\\\n'"   `shouldBe` "[StringToken '\\\\n']"
+            alexTestLex "'\\\\'"    `shouldBe` "[StringToken '\\\\']"
+            alexTestLex "'\\0'"     `shouldBe` "[StringToken '\\0']"
+            alexTestLex "'\\12'"    `shouldBe` "[StringToken '\\12']"
+            alexTestLex "'\\s'"      `shouldBe` "[StringToken '\\s']"
+            alexTestLex "'\\-'"      `shouldBe` "[StringToken '\\-']"
+
+        it "strings with non-escaped chars" $
+            alexTestLex "'\\/'"     `shouldBe` "[StringToken '\\/']"
+
+        it "strings with escaped quotes" $ do
+            alexTestLex "'\"'"      `shouldBe` "[StringToken '\"']"
+            alexTestLex "\"\\\"\""  `shouldBe` "[StringToken \"\\\\\"\"]"
+            alexTestLex "'\\\''"    `shouldBe` "[StringToken '\\\\'']"
+            alexTestLex "'\"'"      `shouldBe` "[StringToken '\"']"
+            alexTestLex "\"\\'\""      `shouldBe` "[StringToken \"\\'\"]"
+
+        it "spread token" $ do
+            alexTestLex "...a" `shouldBe` "[SpreadToken,IdentifierToken 'a']"
+
+        it "assignment" $ do
+            alexTestLex "x=1"       `shouldBe` "[IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]"
+            alexTestLex "x=1\ny=2"  `shouldBe` "[IdentifierToken 'x',SimpleAssignToken,DecimalToken 1,WsToken,IdentifierToken 'y',SimpleAssignToken,DecimalToken 2]"
+
+        it "break/continue/return" $ do
+            alexTestLex "break\nx=1"     `shouldBe` "[BreakToken,WsToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]"
+            alexTestLex "continue\nx=1"  `shouldBe` "[ContinueToken,WsToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]"
+            alexTestLex "return\nx=1"    `shouldBe` "[ReturnToken,WsToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]"
+
+        it "var/let" $ do
+            alexTestLex "var\n"     `shouldBe` "[VarToken,WsToken]"
+            alexTestLex "let\n"     `shouldBe` "[LetToken,WsToken]"
+
+        it "in/of" $ do
+            alexTestLex "in\n"     `shouldBe` "[InToken,WsToken]"
+            alexTestLex "of\n"     `shouldBe` "[OfToken,WsToken]"
+
+        it "function" $ do
+            alexTestLex "async function\n"     `shouldBe` "[AsyncToken,WsToken,FunctionToken,WsToken]"
+
+
+    describe "with Happy rules" $ do
+        it "comments" $ do
+            happyTestLex "// 𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡 "    `shouldBe` "[]"
+            happyTestLex "/* 𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡 */"  `shouldBe` "[]"
+            happyTestLex "/* 𝟘𝟙𝟚𝟛𝟜𝟝𝟞𝟟𝟠𝟡 */ // foo"  `shouldBe` "[]"
+
+        it "return that doesn't produce autosemi" $ do
+            happyTestLex "return 1"  `shouldBe` "[ReturnToken,DecimalToken 1]"
+            happyTestLex "return //hello"  `shouldBe` "[ReturnToken]"
+            happyTestLex "return/*hello*/1"  `shouldBe` "[ReturnToken,DecimalToken 1]"
+
+        it "return mixed with newlines that produce autosemi" $ do
+            happyTestLex "return \n 1"  `shouldBe` "[ReturnToken,AutoSemiToken,DecimalToken 1]"
+
+        it "return mixed with comments that produce autosemi and trailing expressions" $ do
+            happyTestLex "return /*hello \n */"  `shouldBe` "[ReturnToken,AutoSemiToken]"
+            happyTestLex "return /*hello \n 1*/"  `shouldBe` "[ReturnToken,AutoSemiToken]"
+            happyTestLex "return //hello\n"  `shouldBe` "[ReturnToken,AutoSemiToken]"
+
+        it "return mixed with comments that produce autosemi but no trailing expressions" $ do
+            happyTestLex "return //hello\n 1"  `shouldBe` "[ReturnToken,AutoSemiToken,DecimalToken 1]"
+            happyTestLex "return /*hello*/\n 1"  `shouldBe` "[ReturnToken,AutoSemiToken,DecimalToken 1]"
+            happyTestLex "return//hello\n1"  `shouldBe` "[ReturnToken,AutoSemiToken,DecimalToken 1]"
+
+        it "numbers" $ do
+            happyTestLex "123"       `shouldBe` "[DecimalToken 123]"
+            happyTestLex "037"       `shouldBe` "[OctalToken 037]"
+            happyTestLex "0xab"      `shouldBe` "[HexIntegerToken 0xab]"
+            happyTestLex "0xCD"      `shouldBe` "[HexIntegerToken 0xCD]"
+
+        it "invalid numbers" $ do
+            happyTestLex "089"       `shouldBe` "[DecimalToken 0,DecimalToken 89]"
+            happyTestLex "0xGh"      `shouldBe` "[DecimalToken 0,IdentifierToken 'xGx']"
+
+        it "string" $ do
+            happyTestLex "'cat'"     `shouldBe` "[StringToken 'cat']"
+            happyTestLex "\"dog\""   `shouldBe` "[StringToken \"dog\"]"
+
+        it "strings with escape chars" $ do
+            happyTestLex "'\t'"      `shouldBe` "[StringToken '\t']"
+            happyTestLex "'\\n'"     `shouldBe` "[StringToken '\\n']"
+            happyTestLex "'\\\\n'"   `shouldBe` "[StringToken '\\\\n']"
+            happyTestLex "'\\\\'"    `shouldBe` "[StringToken '\\\\']"
+            happyTestLex "'\\0'"     `shouldBe` "[StringToken '\\0']"
+            happyTestLex "'\\12'"    `shouldBe` "[StringToken '\\12']"
+            happyTestLex "'\\s'"      `shouldBe` "[StringToken '\\s']"
+            happyTestLex "'\\-'"      `shouldBe` "[StringToken '\\-']"
+
+        it "strings with non-escaped chars" $
+            happyTestLex "'\\/'"     `shouldBe` "[StringToken '\\/']"
+
+        it "strings with escaped quotes" $ do
+            happyTestLex "'\"'"      `shouldBe` "[StringToken '\"']"
+            happyTestLex "\"\\\"\""  `shouldBe` "[StringToken \"\\\\\"\"]"
+            happyTestLex "'\\\''"    `shouldBe` "[StringToken '\\\\'']"
+            happyTestLex "'\"'"      `shouldBe` "[StringToken '\"']"
+            happyTestLex "\"\\'\""      `shouldBe` "[StringToken \"\\'\"]"
+
+        it "spread token" $ do
+            happyTestLex "...a" `shouldBe` "[SpreadToken,IdentifierToken 'a']"
+
+        it "assignment" $ do
+            happyTestLex "x=1"       `shouldBe` "[IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]"
+            happyTestLex "x=1\ny=2"  `shouldBe` "[IdentifierToken 'x',SimpleAssignToken,DecimalToken 1,IdentifierToken 'y',SimpleAssignToken,DecimalToken 2]"
+
+        it "break/continue/return" $ do
+            happyTestLex "break\nx=1"     `shouldBe` "[BreakToken,AutoSemiToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]"
+            happyTestLex "continue\nx=1"  `shouldBe` "[ContinueToken,AutoSemiToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]"
+            happyTestLex "return\nx=1"    `shouldBe` "[ReturnToken,AutoSemiToken,IdentifierToken 'x',SimpleAssignToken,DecimalToken 1]"
+
+        it "var/let" $ do
+            happyTestLex "var\n"     `shouldBe` "[VarToken]"
+            happyTestLex "let\n"     `shouldBe` "[LetToken]"
+
+        it "in/of" $ do
+            happyTestLex "in\n"     `shouldBe` "[InToken]"
+            happyTestLex "of\n"     `shouldBe` "[OfToken]"
+
+        it "function" $ do
+            happyTestLex "async function\n"     `shouldBe` "[AsyncToken,FunctionToken]"
+
+
+
+alexTestLex :: String -> String
+alexTestLex = genericTestLex alexTestTokeniser
+
+happyTestLex :: String -> String
+happyTestLex = genericTestLex happyTestTokeniser
+
+genericTestLex :: (String -> Either String [Token]) -> String -> String
+genericTestLex lexer str =
+    either id stringify $ lexer str
   where
     stringify xs = "[" ++ intercalate "," (map showToken xs) ++ "]"
 

From 71fcdf5ee7bd1800f3e214e2ff6f64083799326c Mon Sep 17 00:00:00 2001
From: Franco Leonardo Bulgarelli <franco@mumuki.org>
Date: Fri, 11 Mar 2022 23:06:01 -0300
Subject: [PATCH 5/5] Fixed typo

---
 src/Language/JavaScript/Parser/Lexer.x | 14 +++++++-------
 test/Test/Language/Javascript/Lexer.hs |  4 ++--
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/Language/JavaScript/Parser/Lexer.x b/src/Language/JavaScript/Parser/Lexer.x
index ff4bf502..858087b8 100644
--- a/src/Language/JavaScript/Parser/Lexer.x
+++ b/src/Language/JavaScript/Parser/Lexer.x
@@ -14,8 +14,8 @@ module Language.JavaScript.Parser.Lexer
     , lexCont
     , alexError
     , runAlex
-    , happyTestTokeniser
-    , alexTestTokeniser
+    , happyTestTokenizer
+    , alexTestTokenizer
     , setInTemplate
     ) where
 
@@ -412,8 +412,8 @@ lexToken = do
                     return tok
 
 -- For tesing.
-alexTestTokeniser :: String -> Either String [Token]
-alexTestTokeniser input =
+alexTestTokenizer :: String -> Either String [Token]
+alexTestTokenizer input =
     runAlex input $ loop []
   where
     loop acc = do
@@ -426,10 +426,10 @@ alexTestTokeniser input =
                             xs -> reverse xs
             _ -> loop (tok:acc)
 
--- Test variant of alexTestTokeniser
+-- Test variant of alexTestTokenizer
 -- that tokenizes using the same rules as those used by the happy parser
-happyTestTokeniser :: String -> Either String [Token]
-happyTestTokeniser input = runAlex input $ loop []
+happyTestTokenizer :: String -> Either String [Token]
+happyTestTokenizer input = runAlex input $ loop []
   where
     loop :: [Token] -> Alex [Token]
     loop acc = genericLexStep (loop . (:acc)) (loop acc) (\_ ->
diff --git a/test/Test/Language/Javascript/Lexer.hs b/test/Test/Language/Javascript/Lexer.hs
index b9adae9b..8c67ecfa 100644
--- a/test/Test/Language/Javascript/Lexer.hs
+++ b/test/Test/Language/Javascript/Lexer.hs
@@ -167,10 +167,10 @@ testLexer = describe "Lexer:" $ do
 
 
 alexTestLex :: String -> String
-alexTestLex = genericTestLex alexTestTokeniser
+alexTestLex = genericTestLex alexTestTokenizer
 
 happyTestLex :: String -> String
-happyTestLex = genericTestLex happyTestTokeniser
+happyTestLex = genericTestLex happyTestTokenizer
 
 genericTestLex :: (String -> Either String [Token]) -> String -> String
 genericTestLex lexer str =