diff --git a/jvm/src/test/scala/scala/xml/XMLTest.scala b/jvm/src/test/scala/scala/xml/XMLTest.scala index aea258486..53c9d6bf7 100644 --- a/jvm/src/test/scala/scala/xml/XMLTest.scala +++ b/jvm/src/test/scala/scala/xml/XMLTest.scala @@ -415,6 +415,71 @@ class XMLTestJVM { assertHonorsIterableContract(.attributes) } + @UnitTest + def preserveSpaceTextOptionDisabledIssue107: Unit = { + // This test is rhetorical, but is the argument for being + // consistent with entities. + val x = "
tt
" + val preserveWS = false + val d = ConstructingParser.fromSource(scala.io.Source.fromString(x), preserveWS).document + assertEquals(x, d.toString) + } + + @UnitTest + def preserveNoSpaceBetweenEntitiesOptionDisabledIssue107: Unit = { + // This is the example given in the original post. + val x = "
<<
" + val preserveWS = false + val d = ConstructingParser.fromSource(scala.io.Source.fromString(x), preserveWS).document + // Should: + assertEquals(x, d.toString) + // But was adding a space: + // assertEquals("
< <
", d.toString) + } + + @UnitTest + def preserveNoSpaceBetweenEntitiesOptionEnabledIssue107: Unit = { + val x = "
<<
" + // Shouldn't add space when this option is enabled, either. + val preserveWS = true + val d = ConstructingParser.fromSource(scala.io.Source.fromString(x), preserveWS).document + // Should: + assertEquals(x, d.toString) + // But was adding a space: + // assertEquals("
< <
", d.toString) + } + + @UnitTest + def preserveSpaceBetweenEntitiesOptionEnabledIssue107: Unit = { + val x = "
< <
" + val preserveWS = true + val d = ConstructingParser.fromSource(scala.io.Source.fromString(x), preserveWS).document + // This was already correct in 1.0.5 + assertEquals(x, d.toString) + } + + @UnitTest + def preserveSpaceBetweenEntitiesOptionDisabledIssue107: Unit = { + val x = "
< <
" + val preserveWS = false + val d = ConstructingParser.fromSource(scala.io.Source.fromString(x), preserveWS).document + // This was already correct in 1.0.5 + assertEquals(x, d.toString) + } + + @UnitTest + def issue77: Unit = { + val preserveWS = false + + val x = "a & b" + val d = ConstructingParser.fromSource(scala.io.Source.fromString(x), preserveWS).document + assertEquals(x, d.toString) + + val y = "& a &" + val e = ConstructingParser.fromSource(scala.io.Source.fromString(y), preserveWS).document + assertEquals(y, e.toString) + } + @UnitTest def t5843 { val foo = scala.xml.Attribute(null, "foo", "1", scala.xml.Null) diff --git a/shared/src/main/scala/scala/xml/Utility.scala b/shared/src/main/scala/scala/xml/Utility.scala index 5b96ccefb..a66a80ea7 100755 --- a/shared/src/main/scala/scala/xml/Utility.scala +++ b/shared/src/main/scala/scala/xml/Utility.scala @@ -243,6 +243,12 @@ object Utility extends AnyRef with parsing.TokenTests { } } + // Checks if node when converted to string is an entity ref + def checkNodeForEntityRef(n: Node): Boolean = { + val st = n.toString + st.startsWith("&") && st.endsWith(";") + } + def sequenceToXML( children: Seq[Node], pscope: NamespaceBinding = TopScope, @@ -256,10 +262,13 @@ object Utility extends AnyRef with parsing.TokenTests { else if (children forall isAtomAndNotText) { // add space val it = children.iterator val f = it.next() + var prev: Node = f serialize(f, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) while (it.hasNext) { val x = it.next() - sb.append(' ') + // No need to append if space is between two EntityRefs. This is taken care in appendText in MarkupParser + if (!checkNodeForEntityRef(prev) && !checkNodeForEntityRef(x)) sb.append(' ') + prev = x serialize(x, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) } } else children foreach { serialize(_, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) } diff --git a/shared/src/main/scala/scala/xml/parsing/MarkupParser.scala b/shared/src/main/scala/scala/xml/parsing/MarkupParser.scala index a0831249b..64ee425fd 100755 --- a/shared/src/main/scala/scala/xml/parsing/MarkupParser.scala +++ b/shared/src/main/scala/scala/xml/parsing/MarkupParser.scala @@ -405,10 +405,25 @@ trait MarkupParser extends MarkupParserCommon with TokenTests { def appendText(pos: Int, ts: NodeBuffer, txt: String): Unit = { if (preserveWS) ts &+ handle.text(pos, txt) - else - for (t <- TextBuffer.fromString(txt).toText) { - ts &+ handle.text(pos, t.text) + else { + // If 'txt' is just made up of one or more spaces and 'ts' is not empty + if (!ts.isEmpty && TextBuffer.fromString(txt).toText == Nil) { + // Check if the last node in 'ts' was an 'Atom' and the next node to be parsed is an entity or character ref + if(ts.last.isAtom && ch == '&') + ts &+ handle.text(pos, " ") // Append a text node consisting of a single space } + else { + // If 'txt 'starts with a space and follows an 'Atom' + if(txt.startsWith(" ") && !ts.isEmpty && ts.last.isAtom) + ts &+ handle.text(pos, " ") + for (t <- TextBuffer.fromString(txt).toText) { + ts &+ handle.text(pos, t.text) + } + // If txt ends with a space and is followed by an entity or character ref + if(txt.endsWith(" ") && ch == '&') + ts &+ handle.text(pos, " ") + } + } } /**