Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix automatic space insertion between entity references in ConstructingParser. #140

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions jvm/src/test/scala/scala/xml/XMLTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,71 @@ class XMLTestJVM {
assertHonorsIterableContract(<a a="" y={ null: String }/>.attributes)
}

@UnitTest
def preserveSpaceTextOptionDisabledIssue107: Unit = {
// This test is rhetorical, but is the argument for being
// consistent with entities.
val x = "<div>tt</div>"
val preserveWS = false
val d = ConstructingParser.fromSource(scala.io.Source.fromString(x), preserveWS).document
assertEquals(x, d.toString)
}

@UnitTest
def preserveNoSpaceBetweenEntitiesOptionDisabledIssue107: Unit = {
// This is the example given in the original post.
val x = "<div>&lt;&lt;</div>"
val preserveWS = false
val d = ConstructingParser.fromSource(scala.io.Source.fromString(x), preserveWS).document
// Should:
assertEquals(x, d.toString)
// But was adding a space:
// assertEquals("<div>&lt; &lt;</div>", d.toString)
}

@UnitTest
def preserveNoSpaceBetweenEntitiesOptionEnabledIssue107: Unit = {
val x = "<div>&lt;&lt;</div>"
// Shouldn't add space when this option is enabled, either.
val preserveWS = true
val d = ConstructingParser.fromSource(scala.io.Source.fromString(x), preserveWS).document
// Should:
assertEquals(x, d.toString)
// But was adding a space:
// assertEquals("<div>&lt; &lt;</div>", d.toString)
}

@UnitTest
def preserveSpaceBetweenEntitiesOptionEnabledIssue107: Unit = {
val x = "<div>&lt; &lt;</div>"
val preserveWS = true
val d = ConstructingParser.fromSource(scala.io.Source.fromString(x), preserveWS).document
// This was already correct in 1.0.5
assertEquals(x, d.toString)
}

@UnitTest
def preserveSpaceBetweenEntitiesOptionDisabledIssue107: Unit = {
val x = "<div>&lt; &lt;</div>"
val preserveWS = false
val d = ConstructingParser.fromSource(scala.io.Source.fromString(x), preserveWS).document
// This was already correct in 1.0.5
assertEquals(x, d.toString)
}

@UnitTest
def issue77: Unit = {
val preserveWS = false

val x = "<x>a &amp; b</x>"
val d = ConstructingParser.fromSource(scala.io.Source.fromString(x), preserveWS).document
assertEquals(x, d.toString)

val y = "<y>&amp; a &amp;</y>"
val e = ConstructingParser.fromSource(scala.io.Source.fromString(y), preserveWS).document
assertEquals(y, e.toString)
}

@UnitTest
def t5843 {
val foo = scala.xml.Attribute(null, "foo", "1", scala.xml.Null)
Expand Down
11 changes: 10 additions & 1 deletion shared/src/main/scala/scala/xml/Utility.scala
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,12 @@ object Utility extends AnyRef with parsing.TokenTests {
}
}

// Checks if node when converted to string is an entity ref
def checkNodeForEntityRef(n: Node): Boolean = {
val st = n.toString
st.startsWith("&") && st.endsWith(";")
}

def sequenceToXML(
children: Seq[Node],
pscope: NamespaceBinding = TopScope,
Expand All @@ -256,10 +262,13 @@ object Utility extends AnyRef with parsing.TokenTests {
else if (children forall isAtomAndNotText) { // add space
val it = children.iterator
val f = it.next()
var prev: Node = f
serialize(f, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags)
while (it.hasNext) {
val x = it.next()
sb.append(' ')
// No need to append if space is between two EntityRefs. This is taken care in appendText in MarkupParser
if (!checkNodeForEntityRef(prev) && !checkNodeForEntityRef(x)) sb.append(' ')
prev = x
serialize(x, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags)
}
} else children foreach { serialize(_, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) }
Expand Down
21 changes: 18 additions & 3 deletions shared/src/main/scala/scala/xml/parsing/MarkupParser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -405,10 +405,25 @@ trait MarkupParser extends MarkupParserCommon with TokenTests {
def appendText(pos: Int, ts: NodeBuffer, txt: String): Unit = {
if (preserveWS)
ts &+ handle.text(pos, txt)
else
for (t <- TextBuffer.fromString(txt).toText) {
ts &+ handle.text(pos, t.text)
else {
// If 'txt' is just made up of one or more spaces and 'ts' is not empty
if (!ts.isEmpty && TextBuffer.fromString(txt).toText == Nil) {
// Check if the last node in 'ts' was an 'Atom' and the next node to be parsed is an entity or character ref
if(ts.last.isAtom && ch == '&')
ts &+ handle.text(pos, " ") // Append a text node consisting of a single space
}
else {
// If 'txt 'starts with a space and follows an 'Atom'
if(txt.startsWith(" ") && !ts.isEmpty && ts.last.isAtom)
ts &+ handle.text(pos, " ")
for (t <- TextBuffer.fromString(txt).toText) {
ts &+ handle.text(pos, t.text)
}
// If txt ends with a space and is followed by an entity or character ref
if(txt.endsWith(" ") && ch == '&')
ts &+ handle.text(pos, " ")
}
}
}

/**
Expand Down