Skip to content

Commit e952ff4

Browse files
authored
Merge pull request #402 from martingd/issue-397
StdLexical now emits ErrorToken on unterminated string literals.
2 parents 2983cbb + fcb6afa commit e952ff4

File tree

2 files changed

+136
-7
lines changed

2 files changed

+136
-7
lines changed

Diff for: shared/src/main/scala/scala/util/parsing/combinator/lexical/StdLexical.scala

+10-7
Original file line numberDiff line numberDiff line change
@@ -36,20 +36,23 @@ import scala.collection.mutable
3636
class StdLexical extends Lexical with StdTokens {
3737
// see `token` in `Scanners`
3838
def token: Parser[Token] =
39-
( identChar ~ rep( identChar | digit ) ^^ { case first ~ rest => processIdent(first :: rest mkString "") }
40-
| digit ~ rep( digit ) ^^ { case first ~ rest => NumericLit(first :: rest mkString "") }
41-
| '\'' ~ rep( chrExcept('\'', '\n', EofCh) ) ~ '\'' ^^ { case '\'' ~ chars ~ '\'' => StringLit(chars mkString "") }
42-
| '\"' ~ rep( chrExcept('\"', '\n', EofCh) ) ~ '\"' ^^ { case '\"' ~ chars ~ '\"' => StringLit(chars mkString "") }
43-
| EofCh ^^^ EOF
44-
| '\'' ~> failure("unclosed string literal")
45-
| '\"' ~> failure("unclosed string literal")
39+
( identChar ~ rep( identChar | digit ) ^^ { case first ~ rest => processIdent(first :: rest mkString "") }
40+
| digit ~ rep( digit ) ^^ { case first ~ rest => NumericLit(first :: rest mkString "") }
41+
| '\'' ~> rep( chrExcept('\'', '\n') ) >> { chars => stringEnd('\'', chars) }
42+
| '\"' ~> rep( chrExcept('\"', '\n') ) >> { chars => stringEnd('\"', chars) }
43+
| EofCh ^^^ EOF
4644
| delim
4745
| failure("illegal character")
4846
)
4947

5048
/** Returns the legal identifier chars, except digits. */
5149
def identChar = letter | elem('_')
5250

51+
/** Parses the final quote of a string literal or fails if it is unterminated. */
52+
private def stringEnd(quoteChar: Char, chars: List[Char]): Parser[Token] = {
53+
{ elem(quoteChar) ^^^ StringLit(chars mkString "") } | err("unclosed string literal")
54+
}
55+
5356
// see `whitespace in `Scanners`
5457
def whitespace: Parser[Any] = rep[Any](
5558
whitespaceChar
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
package scala.util.parsing.combinator.lexical
2+
3+
import org.junit.Test
4+
import org.junit.Assert.assertEquals
5+
6+
import scala.util.parsing.input.Reader
7+
8+
import scala.collection.mutable.ListBuffer
9+
import java.awt.RenderingHints.Key
10+
11+
class StdLexicalTest {
12+
private def lex[Lexer <: StdLexical](lexer: Lexer, input: String): List[lexer.Token] = {
13+
var scanner: Reader[lexer.Token] = new lexer.Scanner(input)
14+
val listBuffer = ListBuffer[lexer.Token]()
15+
while (!scanner.atEnd) {
16+
listBuffer += scanner.first
17+
scanner = scanner.rest
18+
}
19+
listBuffer.toList
20+
}
21+
22+
@Test
23+
def parseKeyword: Unit = {
24+
object Lexer extends StdLexical
25+
Lexer.reserved add "keyword"
26+
import Lexer._
27+
assertEquals(
28+
List(Keyword("keyword"), Identifier("id")),
29+
lex(Lexer, "keyword id")
30+
)
31+
}
32+
33+
@Test
34+
def parseDelimiters: Unit = {
35+
object Lexer extends StdLexical
36+
Lexer.delimiters ++= List("(", ")", "=>")
37+
import Lexer._
38+
assertEquals(
39+
List(Keyword("("), Identifier("id1"), Keyword(")"), Keyword("=>"), Identifier("id2")),
40+
lex(Lexer, "(id1) => id2")
41+
)
42+
}
43+
44+
@Test
45+
def parseNumericLiterals: Unit = {
46+
object Lexer extends StdLexical
47+
import Lexer._
48+
assertEquals(
49+
List(NumericLit("1"), NumericLit("21"), NumericLit("321")),
50+
lex(Lexer, " 1 21 321 ")
51+
)
52+
}
53+
54+
@Test
55+
def parseStringLiterals: Unit = {
56+
object Lexer extends StdLexical
57+
import Lexer._
58+
assertEquals(
59+
List(StringLit("double double"), StringLit("single single"), StringLit("double'double"), StringLit("single\"single")),
60+
lex(Lexer, """
61+
"double double"
62+
'single single'
63+
"double'double"
64+
'single"single'
65+
""")
66+
)
67+
}
68+
69+
@Test
70+
def parseUnclosedStringLiterals: Unit = {
71+
object Lexer extends StdLexical
72+
import Lexer._
73+
74+
// Unclosed double quoted string at end of input.
75+
assertEquals(
76+
List(Identifier("id"), ErrorToken("unclosed string literal")),
77+
lex(Lexer, """id """")
78+
)
79+
80+
// Unclosed single quoted string at end of input.
81+
assertEquals(
82+
List(Identifier("id"), ErrorToken("unclosed string literal")),
83+
lex(Lexer, "id '")
84+
)
85+
86+
// Unclosed double quoted string _not_ at end of input.
87+
assertEquals(
88+
List(Identifier("id"), ErrorToken("unclosed string literal")),
89+
lex(Lexer, """id "string""")
90+
)
91+
92+
// Unclosed single quoted string _not_ at end of input.
93+
assertEquals(
94+
List(Identifier("id"), ErrorToken("unclosed string literal")),
95+
lex(Lexer, "id 'string")
96+
)
97+
}
98+
99+
@Test
100+
def parseIllegalCharacter: Unit = {
101+
object Lexer extends StdLexical
102+
import Lexer._
103+
assertEquals(
104+
List(Identifier("we"), ErrorToken("illegal character"), Identifier("scala")),
105+
lex(Lexer, "we\u2665scala")
106+
)
107+
}
108+
109+
@Test
110+
def parseComments: Unit = {
111+
object Lexer extends StdLexical
112+
import Lexer._
113+
114+
// Single-line comments.
115+
assertEquals(
116+
List(Identifier("id")),
117+
lex(Lexer, "//\n// comment\nid // ")
118+
)
119+
120+
// Multi-line comments.
121+
assertEquals(
122+
List(Identifier("id1"), Identifier("id2")),
123+
lex(Lexer, "/* single */ id1 /* multi \n line */ id2")
124+
)
125+
}
126+
}

0 commit comments

Comments
 (0)