diff --git a/shared/src/main/scala/scala/util/parsing/combinator/lexical/StdLexical.scala b/shared/src/main/scala/scala/util/parsing/combinator/lexical/StdLexical.scala index 4aa852c3..0b21b02f 100644 --- a/shared/src/main/scala/scala/util/parsing/combinator/lexical/StdLexical.scala +++ b/shared/src/main/scala/scala/util/parsing/combinator/lexical/StdLexical.scala @@ -36,13 +36,11 @@ import scala.collection.mutable class StdLexical extends Lexical with StdTokens { // see `token` in `Scanners` def token: Parser[Token] = - ( identChar ~ rep( identChar | digit ) ^^ { case first ~ rest => processIdent(first :: rest mkString "") } - | digit ~ rep( digit ) ^^ { case first ~ rest => NumericLit(first :: rest mkString "") } - | '\'' ~ rep( chrExcept('\'', '\n', EofCh) ) ~ '\'' ^^ { case '\'' ~ chars ~ '\'' => StringLit(chars mkString "") } - | '\"' ~ rep( chrExcept('\"', '\n', EofCh) ) ~ '\"' ^^ { case '\"' ~ chars ~ '\"' => StringLit(chars mkString "") } - | EofCh ^^^ EOF - | '\'' ~> failure("unclosed string literal") - | '\"' ~> failure("unclosed string literal") + ( identChar ~ rep( identChar | digit ) ^^ { case first ~ rest => processIdent(first :: rest mkString "") } + | digit ~ rep( digit ) ^^ { case first ~ rest => NumericLit(first :: rest mkString "") } + | '\'' ~> rep( chrExcept('\'', '\n') ) >> { chars => stringEnd('\'', chars) } + | '\"' ~> rep( chrExcept('\"', '\n') ) >> { chars => stringEnd('\"', chars) } + | EofCh ^^^ EOF | delim | failure("illegal character") ) @@ -50,6 +48,11 @@ class StdLexical extends Lexical with StdTokens { /** Returns the legal identifier chars, except digits. */ def identChar = letter | elem('_') + /** Parses the final quote of a string literal or fails if it is unterminated. */ + private def stringEnd(quoteChar: Char, chars: List[Char]): Parser[Token] = { + { elem(quoteChar) ^^^ StringLit(chars mkString "") } | err("unclosed string literal") + } + // see `whitespace in `Scanners` def whitespace: Parser[Any] = rep[Any]( whitespaceChar diff --git a/shared/src/test/scala/scala/util/parsing/combinator/lexical/StdLexicalTest.scala b/shared/src/test/scala/scala/util/parsing/combinator/lexical/StdLexicalTest.scala new file mode 100644 index 00000000..9d31c42a --- /dev/null +++ b/shared/src/test/scala/scala/util/parsing/combinator/lexical/StdLexicalTest.scala @@ -0,0 +1,126 @@ +package scala.util.parsing.combinator.lexical + +import org.junit.Test +import org.junit.Assert.assertEquals + +import scala.util.parsing.input.Reader + +import scala.collection.mutable.ListBuffer +import java.awt.RenderingHints.Key + +class StdLexicalTest { + private def lex[Lexer <: StdLexical](lexer: Lexer, input: String): List[lexer.Token] = { + var scanner: Reader[lexer.Token] = new lexer.Scanner(input) + val listBuffer = ListBuffer[lexer.Token]() + while (!scanner.atEnd) { + listBuffer += scanner.first + scanner = scanner.rest + } + listBuffer.toList + } + + @Test + def parseKeyword: Unit = { + object Lexer extends StdLexical + Lexer.reserved add "keyword" + import Lexer._ + assertEquals( + List(Keyword("keyword"), Identifier("id")), + lex(Lexer, "keyword id") + ) + } + + @Test + def parseDelimiters: Unit = { + object Lexer extends StdLexical + Lexer.delimiters ++= List("(", ")", "=>") + import Lexer._ + assertEquals( + List(Keyword("("), Identifier("id1"), Keyword(")"), Keyword("=>"), Identifier("id2")), + lex(Lexer, "(id1) => id2") + ) + } + + @Test + def parseNumericLiterals: Unit = { + object Lexer extends StdLexical + import Lexer._ + assertEquals( + List(NumericLit("1"), NumericLit("21"), NumericLit("321")), + lex(Lexer, " 1 21 321 ") + ) + } + + @Test + def parseStringLiterals: Unit = { + object Lexer extends StdLexical + import Lexer._ + assertEquals( + List(StringLit("double double"), StringLit("single single"), StringLit("double'double"), StringLit("single\"single")), + lex(Lexer, """ + "double double" + 'single single' + "double'double" + 'single"single' + """) + ) + } + + @Test + def parseUnclosedStringLiterals: Unit = { + object Lexer extends StdLexical + import Lexer._ + + // Unclosed double quoted string at end of input. + assertEquals( + List(Identifier("id"), ErrorToken("unclosed string literal")), + lex(Lexer, """id """") + ) + + // Unclosed single quoted string at end of input. + assertEquals( + List(Identifier("id"), ErrorToken("unclosed string literal")), + lex(Lexer, "id '") + ) + + // Unclosed double quoted string _not_ at end of input. + assertEquals( + List(Identifier("id"), ErrorToken("unclosed string literal")), + lex(Lexer, """id "string""") + ) + + // Unclosed single quoted string _not_ at end of input. + assertEquals( + List(Identifier("id"), ErrorToken("unclosed string literal")), + lex(Lexer, "id 'string") + ) + } + + @Test + def parseIllegalCharacter: Unit = { + object Lexer extends StdLexical + import Lexer._ + assertEquals( + List(Identifier("we"), ErrorToken("illegal character"), Identifier("scala")), + lex(Lexer, "we\u2665scala") + ) + } + + @Test + def parseComments: Unit = { + object Lexer extends StdLexical + import Lexer._ + + // Single-line comments. + assertEquals( + List(Identifier("id")), + lex(Lexer, "//\n// comment\nid // ") + ) + + // Multi-line comments. + assertEquals( + List(Identifier("id1"), Identifier("id2")), + lex(Lexer, "/* single */ id1 /* multi \n line */ id2") + ) + } +}