Skip to content

StdLexical now emits ErrorToken on unterminated string literals. #402

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -36,20 +36,23 @@ import scala.collection.mutable
class StdLexical extends Lexical with StdTokens {
// see `token` in `Scanners`
def token: Parser[Token] =
( identChar ~ rep( identChar | digit ) ^^ { case first ~ rest => processIdent(first :: rest mkString "") }
| digit ~ rep( digit ) ^^ { case first ~ rest => NumericLit(first :: rest mkString "") }
| '\'' ~ rep( chrExcept('\'', '\n', EofCh) ) ~ '\'' ^^ { case '\'' ~ chars ~ '\'' => StringLit(chars mkString "") }
| '\"' ~ rep( chrExcept('\"', '\n', EofCh) ) ~ '\"' ^^ { case '\"' ~ chars ~ '\"' => StringLit(chars mkString "") }
| EofCh ^^^ EOF
| '\'' ~> failure("unclosed string literal")
| '\"' ~> failure("unclosed string literal")
( identChar ~ rep( identChar | digit ) ^^ { case first ~ rest => processIdent(first :: rest mkString "") }
| digit ~ rep( digit ) ^^ { case first ~ rest => NumericLit(first :: rest mkString "") }
| '\'' ~> rep( chrExcept('\'', '\n') ) >> { chars => stringEnd('\'', chars) }
| '\"' ~> rep( chrExcept('\"', '\n') ) >> { chars => stringEnd('\"', chars) }
| EofCh ^^^ EOF
| delim
| failure("illegal character")
)

/** Returns the legal identifier chars, except digits. */
def identChar = letter | elem('_')

/** Parses the final quote of a string literal or fails if it is unterminated. */
private def stringEnd(quoteChar: Char, chars: List[Char]): Parser[Token] = {
{ elem(quoteChar) ^^^ StringLit(chars mkString "") } | err("unclosed string literal")
}

// see `whitespace in `Scanners`
def whitespace: Parser[Any] = rep[Any](
whitespaceChar
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
package scala.util.parsing.combinator.lexical

import org.junit.Test
import org.junit.Assert.assertEquals

import scala.util.parsing.input.Reader

import scala.collection.mutable.ListBuffer
import java.awt.RenderingHints.Key

class StdLexicalTest {
private def lex[Lexer <: StdLexical](lexer: Lexer, input: String): List[lexer.Token] = {
var scanner: Reader[lexer.Token] = new lexer.Scanner(input)
val listBuffer = ListBuffer[lexer.Token]()
while (!scanner.atEnd) {
listBuffer += scanner.first
scanner = scanner.rest
}
listBuffer.toList
}

@Test
def parseKeyword: Unit = {
object Lexer extends StdLexical
Lexer.reserved add "keyword"
import Lexer._
assertEquals(
List(Keyword("keyword"), Identifier("id")),
lex(Lexer, "keyword id")
)
}

@Test
def parseDelimiters: Unit = {
object Lexer extends StdLexical
Lexer.delimiters ++= List("(", ")", "=>")
import Lexer._
assertEquals(
List(Keyword("("), Identifier("id1"), Keyword(")"), Keyword("=>"), Identifier("id2")),
lex(Lexer, "(id1) => id2")
)
}

@Test
def parseNumericLiterals: Unit = {
object Lexer extends StdLexical
import Lexer._
assertEquals(
List(NumericLit("1"), NumericLit("21"), NumericLit("321")),
lex(Lexer, " 1 21 321 ")
)
}

@Test
def parseStringLiterals: Unit = {
object Lexer extends StdLexical
import Lexer._
assertEquals(
List(StringLit("double double"), StringLit("single single"), StringLit("double'double"), StringLit("single\"single")),
lex(Lexer, """
"double double"
'single single'
"double'double"
'single"single'
""")
)
}

@Test
def parseUnclosedStringLiterals: Unit = {
object Lexer extends StdLexical
import Lexer._

// Unclosed double quoted string at end of input.
assertEquals(
List(Identifier("id"), ErrorToken("unclosed string literal")),
lex(Lexer, """id """")
)

// Unclosed single quoted string at end of input.
assertEquals(
List(Identifier("id"), ErrorToken("unclosed string literal")),
lex(Lexer, "id '")
)

// Unclosed double quoted string _not_ at end of input.
assertEquals(
List(Identifier("id"), ErrorToken("unclosed string literal")),
lex(Lexer, """id "string""")
)

// Unclosed single quoted string _not_ at end of input.
assertEquals(
List(Identifier("id"), ErrorToken("unclosed string literal")),
lex(Lexer, "id 'string")
)
}

@Test
def parseIllegalCharacter: Unit = {
object Lexer extends StdLexical
import Lexer._
assertEquals(
List(Identifier("we"), ErrorToken("illegal character"), Identifier("scala")),
lex(Lexer, "we\u2665scala")
)
}

@Test
def parseComments: Unit = {
object Lexer extends StdLexical
import Lexer._

// Single-line comments.
assertEquals(
List(Identifier("id")),
lex(Lexer, "//\n// comment\nid // ")
)

// Multi-line comments.
assertEquals(
List(Identifier("id1"), Identifier("id2")),
lex(Lexer, "/* single */ id1 /* multi \n line */ id2")
)
}
}