Skip to content

Commit f92ae49

Browse files
committed
Do not ignore XML comments when parsing:
fixes scala#508 - set `org.xml.sax.ext.LexicalHandler` on the `SAXParser` that `XMLLoader` uses; - override `LexicalHandler.comment()` in `FactoryAdapter` so that comments are added to the element content; - add unit tests; - add excludes for the binary compatibility checks.
1 parent deff368 commit f92ae49

File tree

6 files changed

+63
-20
lines changed

6 files changed

+63
-20
lines changed

build.sbt

+9
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,15 @@ lazy val xml = crossProject(JSPlatform, JVMPlatform, NativePlatform)
6868
// we compare classes built on JDK 16 (which we only do on CI, not at release time)
6969
// to previous-version artifacts that were built on 8. see scala/scala-xml#501
7070
exclude[DirectMissingMethodProblem]("scala.xml.include.sax.XIncluder.declaration"),
71+
72+
// caused by the switch from DefaultHandler to DefaultHandler2:
73+
exclude[MissingTypesProblem]("scala.xml.parsing.FactoryAdapter"),
74+
exclude[MissingTypesProblem]("scala.xml.parsing.NoBindingFactoryAdapter"),
75+
76+
exclude[DirectMissingMethodProblem]("scala.xml.parsing.FactoryAdapter.comment"),
77+
exclude[ReversedMissingMethodProblem]("scala.xml.parsing.FactoryAdapter.createComment"),
78+
exclude[DirectMissingMethodProblem]("scala.xml.parsing.FactoryAdapter.createComment"),
79+
exclude[DirectMissingMethodProblem]("scala.xml.parsing.NoBindingFactoryAdapter.createComment")
7180
)
7281
},
7382

jvm/src/test/scala/scala/xml/XMLTest.scala

+15-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ import org.junit.{Test => UnitTest}
66
import org.junit.Assert.assertTrue
77
import org.junit.Assert.assertFalse
88
import org.junit.Assert.assertEquals
9-
import scala.xml.parsing.ConstructingParser
109
import java.io.StringWriter
1110
import java.io.ByteArrayOutputStream
1211
import java.io.StringReader
@@ -581,6 +580,21 @@ class XMLTestJVM {
581580
XML.loadString(broken)
582581
}
583582

583+
@UnitTest
584+
def issue508: Unit = {
585+
def check(xml: String): Unit = assertEquals(xml, XML.loadString(xml).toString)
586+
587+
check("<a><!-- comment --> suffix</a>")
588+
check("<a>prefix <!-- comment --> suffix</a>")
589+
check("<a>prefix <b><!-- comment --></b> suffix</a>")
590+
591+
// TODO since XMLLoader retrieves FactoryAdapter.rootNode,
592+
// capturing comments before and after the root element is not currently possible
593+
// (by the way, the same applies to processing instructions).
594+
//check("<!-- prologue --><a>text</a>")
595+
//check("<a>text</a><!-- epilogue -->")
596+
}
597+
584598
@UnitTest
585599
def nodeSeqNs: Unit = {
586600
val x = {
@@ -746,5 +760,4 @@ class XMLTestJVM {
746760

747761
assertEquals("", x.xEntityValue())
748762
}
749-
750763
}

shared/src/main/scala/scala/xml/factory/NodeFactory.scala

+2-2
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ trait NodeFactory[A <: Node] {
3434
def eqElements(ch1: Seq[Node], ch2: Seq[Node]): Boolean =
3535
ch1.view.zipAll(ch2.view, null, null) forall { case (x, y) => x eq y }
3636

37-
def nodeEquals(n: Node, pre: String, name: String, attrSeq: MetaData, scope: NamespaceBinding, children: Seq[Node]) =
37+
def nodeEquals(n: Node, pre: String, name: String, attrSeq: MetaData, scope: NamespaceBinding, children: Seq[Node]): Boolean =
3838
n.prefix == pre &&
3939
n.label == name &&
4040
n.attributes == attrSeq &&
@@ -55,7 +55,7 @@ trait NodeFactory[A <: Node] {
5555
}
5656
}
5757

58-
def makeText(s: String) = Text(s)
58+
def makeText(s: String): Text = Text(s)
5959
def makeComment(s: String): Seq[Comment] =
6060
if (ignoreComments) Nil else List(Comment(s))
6161
def makeProcInstr(t: String, s: String): Seq[ProcInstr] =

shared/src/main/scala/scala/xml/factory/XMLLoader.scala

+10-3
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,10 @@ package scala
1414
package xml
1515
package factory
1616

17+
import org.xml.sax.SAXNotRecognizedException
1718
import javax.xml.parsers.SAXParserFactory
18-
import parsing.{ FactoryAdapter, NoBindingFactoryAdapter }
19-
import java.io.{ InputStream, Reader, File, FileDescriptor }
19+
import parsing.{FactoryAdapter, NoBindingFactoryAdapter}
20+
import java.io.{File, FileDescriptor, InputStream, Reader}
2021
import java.net.URL
2122

2223
/**
@@ -28,7 +29,7 @@ trait XMLLoader[T <: Node] {
2829
def adapter: FactoryAdapter = new NoBindingFactoryAdapter()
2930

3031
private lazy val parserInstance = new ThreadLocal[SAXParser] {
31-
override def initialValue = {
32+
override def initialValue: SAXParser = {
3233
val parser = SAXParserFactory.newInstance()
3334
parser.setFeature("http://javax.xml.XMLConstants/feature/secure-processing", true)
3435
parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false)
@@ -52,6 +53,12 @@ trait XMLLoader[T <: Node] {
5253
def loadXML(source: InputSource, parser: SAXParser): T = {
5354
val newAdapter = adapter
5455

56+
try {
57+
parser.setProperty("http://xml.org/sax/properties/lexical-handler", newAdapter)
58+
} catch {
59+
case _: SAXNotRecognizedException =>
60+
}
61+
5562
newAdapter.scopeStack = TopScope :: newAdapter.scopeStack
5663
parser.parse(source, newAdapter)
5764
newAdapter.scopeStack = newAdapter.scopeStack.tail

shared/src/main/scala/scala/xml/parsing/FactoryAdapter.scala

+19-8
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@ package parsing
1616

1717
import scala.collection.Seq
1818
import org.xml.sax.Attributes
19-
import org.xml.sax.helpers.DefaultHandler
19+
import org.xml.sax.ext.DefaultHandler2
2020

2121
// can be mixed into FactoryAdapter if desired
22-
trait ConsoleErrorHandler extends DefaultHandler {
22+
trait ConsoleErrorHandler extends DefaultHandler2 {
2323
// ignore warning, crimson warns even for entity resolution!
2424
override def warning(ex: SAXParseException): Unit = {}
2525
override def error(ex: SAXParseException): Unit = printError("Error", ex)
@@ -39,8 +39,8 @@ trait ConsoleErrorHandler extends DefaultHandler {
3939
* namespace bindings, without relying on namespace handling of the
4040
* underlying SAX parser.
4141
*/
42-
abstract class FactoryAdapter extends DefaultHandler with factory.XMLLoader[Node] {
43-
var rootElem: Node = null
42+
abstract class FactoryAdapter extends DefaultHandler2 with factory.XMLLoader[Node] {
43+
var rootElem: Node = _
4444

4545
val buffer = new StringBuilder()
4646
/** List of attributes
@@ -72,7 +72,7 @@ abstract class FactoryAdapter extends DefaultHandler with factory.XMLLoader[Node
7272
*/
7373
var scopeStack = List.empty[NamespaceBinding]
7474

75-
var curTag: String = null
75+
var curTag: String = _
7676
var capture: Boolean = false
7777

7878
// abstract methods
@@ -105,6 +105,11 @@ abstract class FactoryAdapter extends DefaultHandler with factory.XMLLoader[Node
105105
*/
106106
def createProcInstr(target: String, data: String): Seq[ProcInstr]
107107

108+
/**
109+
* creates a new comment node.
110+
*/
111+
def createComment(characters: String): Seq[Comment]
112+
108113
//
109114
// ContentHandler methods
110115
//
@@ -118,7 +123,7 @@ abstract class FactoryAdapter extends DefaultHandler with factory.XMLLoader[Node
118123
* @param length
119124
*/
120125
override def characters(ch: Array[Char], offset: Int, length: Int): Unit = {
121-
if (!capture) return
126+
if (!capture) ()
122127
// compliant: report every character
123128
else if (!normalizeWhitespace) buffer.appendAll(ch, offset, length)
124129
// normalizing whitespace is not compliant, but useful
@@ -170,7 +175,7 @@ abstract class FactoryAdapter extends DefaultHandler with factory.XMLLoader[Node
170175

171176
if (pre == "xmlns" || (pre == null && qname == "xmlns")) {
172177
val arg = if (pre == null) null else key
173-
scpe = new NamespaceBinding(arg, nullIfEmpty(value), scpe)
178+
scpe = NamespaceBinding(arg, nullIfEmpty(value), scpe)
174179
} else
175180
m = Attribute(Option(pre), key, Text(value), m)
176181
}
@@ -183,7 +188,7 @@ abstract class FactoryAdapter extends DefaultHandler with factory.XMLLoader[Node
183188
* captures text, possibly normalizing whitespace
184189
*/
185190
def captureText(): Unit = {
186-
if (capture && buffer.length > 0)
191+
if (capture && buffer.nonEmpty)
187192
hStack = createText(buffer.toString) :: hStack
188193

189194
buffer.clear()
@@ -226,4 +231,10 @@ abstract class FactoryAdapter extends DefaultHandler with factory.XMLLoader[Node
226231
captureText()
227232
hStack = hStack.reverse_:::(createProcInstr(target, data).toList)
228233
}
234+
235+
override def comment(ch: Array[Char], start: Int, length: Int): Unit = {
236+
captureText()
237+
val commentText: String = String.valueOf(ch.slice(start, start + length))
238+
hStack = hStack.reverse_:::(createComment(commentText).toList)
239+
}
229240
}

shared/src/main/scala/scala/xml/parsing/NoBindingFactoryAdapter.scala

+8-5
Original file line numberDiff line numberDiff line change
@@ -23,20 +23,23 @@ import factory.NodeFactory
2323
*/
2424
class NoBindingFactoryAdapter extends FactoryAdapter with NodeFactory[Elem] {
2525
/** True. Every XML node may contain text that the application needs */
26-
def nodeContainsText(label: String) = true
26+
override def nodeContainsText(label: String) = true
2727

2828
/** From NodeFactory. Constructs an instance of scala.xml.Elem -- TODO: deprecate as in Elem */
29-
protected def create(pre: String, label: String, attrs: MetaData, scope: NamespaceBinding, children: Seq[Node]): Elem =
29+
override protected def create(pre: String, label: String, attrs: MetaData, scope: NamespaceBinding, children: Seq[Node]): Elem =
3030
Elem(pre, label, attrs, scope, children.isEmpty, children: _*)
3131

3232
/** From FactoryAdapter. Creates a node. never creates the same node twice, using hash-consing.
3333
TODO: deprecate as in Elem, or forward to create?? */
34-
def createNode(pre: String, label: String, attrs: MetaData, scope: NamespaceBinding, children: List[Node]): Elem =
34+
override def createNode(pre: String, label: String, attrs: MetaData, scope: NamespaceBinding, children: List[Node]): Elem =
3535
Elem(pre, label, attrs, scope, children.isEmpty, children: _*)
3636

3737
/** Creates a text node. */
38-
def createText(text: String) = Text(text)
38+
override def createText(text: String): Text = makeText(text)
3939

4040
/** Creates a processing instruction. */
41-
def createProcInstr(target: String, data: String) = makeProcInstr(target, data)
41+
override def createProcInstr(target: String, data: String): Seq[ProcInstr] = makeProcInstr(target, data)
42+
43+
/** Creates a comment. */
44+
override def createComment(characters: String): Seq[Comment] = makeComment(characters)
4245
}

0 commit comments

Comments
 (0)