From 307d33636bd0887bf61efc7a1ed451bfd91c7cd1 Mon Sep 17 00:00:00 2001 From: piyush-jaiswal Date: Sun, 2 Apr 2017 20:17:39 +0530 Subject: [PATCH 1/2] Fix automatic space insertion between EntityRef --- jvm/src/test/scala/scala/xml/XMLTest.scala | 52 +++++++++++++++++++ shared/src/main/scala/scala/xml/Utility.scala | 11 +++- .../scala/xml/parsing/MarkupParser.scala | 15 ++++-- 3 files changed, 74 insertions(+), 4 deletions(-) diff --git a/jvm/src/test/scala/scala/xml/XMLTest.scala b/jvm/src/test/scala/scala/xml/XMLTest.scala index aea258486..60fa90e2d 100644 --- a/jvm/src/test/scala/scala/xml/XMLTest.scala +++ b/jvm/src/test/scala/scala/xml/XMLTest.scala @@ -415,6 +415,58 @@ class XMLTestJVM { assertHonorsIterableContract(.attributes) } + @UnitTest + def preserveSpaceTextOptionDisabledIssue107: Unit = { + // This test is rhetorical, but is the argument for being + // consistent with entities. + val x = "
tt
" + val preserveWS = false + val d = ConstructingParser.fromSource(scala.io.Source.fromString(x), preserveWS).document + assertEquals(x, d.toString) + } + + @UnitTest + def preserveNoSpaceBetweenEntitiesOptionDisabledIssue107: Unit = { + // This is the example given in the original post. + val x = "
<<
" + val preserveWS = false + val d = ConstructingParser.fromSource(scala.io.Source.fromString(x), preserveWS).document + // Should: + assertEquals(x, d.toString) + // But was adding a space: + // assertEquals("
< <
", d.toString) + } + + @UnitTest + def preserveNoSpaceBetweenEntitiesOptionEnabledIssue107: Unit = { + val x = "
<<
" + // Shouldn't add space when this option is enabled, either. + val preserveWS = true + val d = ConstructingParser.fromSource(scala.io.Source.fromString(x), preserveWS).document + // Should: + assertEquals(x, d.toString) + // But was adding a space: + // assertEquals("
< <
", d.toString) + } + + @UnitTest + def preserveSpaceBetweenEntitiesOptionEnabledIssue107: Unit = { + val x = "
< <
" + val preserveWS = true + val d = ConstructingParser.fromSource(scala.io.Source.fromString(x), preserveWS).document + // This was already correct in 1.0.5 + assertEquals(x, d.toString) + } + + @UnitTest + def preserveSpaceBetweenEntitiesOptionDisabledIssue107: Unit = { + val x = "
< <
" + val preserveWS = false + val d = ConstructingParser.fromSource(scala.io.Source.fromString(x), preserveWS).document + // This was already correct in 1.0.5 + assertEquals(x, d.toString) + } + @UnitTest def t5843 { val foo = scala.xml.Attribute(null, "foo", "1", scala.xml.Null) diff --git a/shared/src/main/scala/scala/xml/Utility.scala b/shared/src/main/scala/scala/xml/Utility.scala index 5b96ccefb..a66a80ea7 100755 --- a/shared/src/main/scala/scala/xml/Utility.scala +++ b/shared/src/main/scala/scala/xml/Utility.scala @@ -243,6 +243,12 @@ object Utility extends AnyRef with parsing.TokenTests { } } + // Checks if node when converted to string is an entity ref + def checkNodeForEntityRef(n: Node): Boolean = { + val st = n.toString + st.startsWith("&") && st.endsWith(";") + } + def sequenceToXML( children: Seq[Node], pscope: NamespaceBinding = TopScope, @@ -256,10 +262,13 @@ object Utility extends AnyRef with parsing.TokenTests { else if (children forall isAtomAndNotText) { // add space val it = children.iterator val f = it.next() + var prev: Node = f serialize(f, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) while (it.hasNext) { val x = it.next() - sb.append(' ') + // No need to append if space is between two EntityRefs. This is taken care in appendText in MarkupParser + if (!checkNodeForEntityRef(prev) && !checkNodeForEntityRef(x)) sb.append(' ') + prev = x serialize(x, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) } } else children foreach { serialize(_, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) } diff --git a/shared/src/main/scala/scala/xml/parsing/MarkupParser.scala b/shared/src/main/scala/scala/xml/parsing/MarkupParser.scala index a0831249b..4934482aa 100755 --- a/shared/src/main/scala/scala/xml/parsing/MarkupParser.scala +++ b/shared/src/main/scala/scala/xml/parsing/MarkupParser.scala @@ -405,10 +405,19 @@ trait MarkupParser extends MarkupParserCommon with TokenTests { def appendText(pos: Int, ts: NodeBuffer, txt: String): Unit = { if (preserveWS) ts &+ handle.text(pos, txt) - else - for (t <- TextBuffer.fromString(txt).toText) { - ts &+ handle.text(pos, t.text) + else { + // If 'txt' is just made up of one or more spaces and 'ts' is not empty + if (!ts.isEmpty && TextBuffer.fromString(txt).toText == Nil) { + // Check if the last node in 'ts' was an 'Atom' and the next node to be parsed is an entity or character ref + if(ts.last.isAtom && ch == '&') + ts &+ handle.text(pos, " ") // Append a text node consisting of a single space } + else { + for (t <- TextBuffer.fromString(txt).toText) { + ts &+ handle.text(pos, t.text) + } + } + } } /** From cfbc087090e7fb44ec16ecca3e95837ce77be671 Mon Sep 17 00:00:00 2001 From: piyush-jaiswal Date: Sun, 30 Apr 2017 00:15:06 +0530 Subject: [PATCH 2/2] Fix issue #77 --- jvm/src/test/scala/scala/xml/XMLTest.scala | 13 +++++++++++++ .../main/scala/scala/xml/parsing/MarkupParser.scala | 6 ++++++ 2 files changed, 19 insertions(+) diff --git a/jvm/src/test/scala/scala/xml/XMLTest.scala b/jvm/src/test/scala/scala/xml/XMLTest.scala index 60fa90e2d..53c9d6bf7 100644 --- a/jvm/src/test/scala/scala/xml/XMLTest.scala +++ b/jvm/src/test/scala/scala/xml/XMLTest.scala @@ -467,6 +467,19 @@ class XMLTestJVM { assertEquals(x, d.toString) } + @UnitTest + def issue77: Unit = { + val preserveWS = false + + val x = "a & b" + val d = ConstructingParser.fromSource(scala.io.Source.fromString(x), preserveWS).document + assertEquals(x, d.toString) + + val y = "& a &" + val e = ConstructingParser.fromSource(scala.io.Source.fromString(y), preserveWS).document + assertEquals(y, e.toString) + } + @UnitTest def t5843 { val foo = scala.xml.Attribute(null, "foo", "1", scala.xml.Null) diff --git a/shared/src/main/scala/scala/xml/parsing/MarkupParser.scala b/shared/src/main/scala/scala/xml/parsing/MarkupParser.scala index 4934482aa..64ee425fd 100755 --- a/shared/src/main/scala/scala/xml/parsing/MarkupParser.scala +++ b/shared/src/main/scala/scala/xml/parsing/MarkupParser.scala @@ -413,9 +413,15 @@ trait MarkupParser extends MarkupParserCommon with TokenTests { ts &+ handle.text(pos, " ") // Append a text node consisting of a single space } else { + // If 'txt 'starts with a space and follows an 'Atom' + if(txt.startsWith(" ") && !ts.isEmpty && ts.last.isAtom) + ts &+ handle.text(pos, " ") for (t <- TextBuffer.fromString(txt).toText) { ts &+ handle.text(pos, t.text) } + // If txt ends with a space and is followed by an entity or character ref + if(txt.endsWith(" ") && ch == '&') + ts &+ handle.text(pos, " ") } } }