Skip to content

Commit c000880

Browse files
authored
Fixing a bug when multi-byte characters were split (#75)
1 parent 3d8bafd commit c000880

File tree

2 files changed

+69
-8
lines changed

2 files changed

+69
-8
lines changed

src/main/java/com/fasterxml/aalto/out/ByteXmlWriter.java

+12-8
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,7 @@ public final void writeStartTagEnd()
390390
flushBuffer();
391391
}
392392
_outputBuffer[_outputPtr++] = BYTE_GT;
393-
}
393+
}
394394

395395
@Override
396396
public void writeStartTagEmptyEnd()
@@ -435,7 +435,7 @@ public final void writeEndTag(WName name)
435435
ptr += name.appendBytes(bbuf, ptr);
436436
bbuf[ptr++] = BYTE_GT;
437437
_outputPtr = ptr;
438-
}
438+
}
439439

440440
/*
441441
/**********************************************************************
@@ -572,6 +572,8 @@ protected final void writeAttrValue(char[] vbuf, int offset, int len)
572572
{
573573
if (_surrogate != 0) {
574574
outputSurrogates(_surrogate, vbuf[offset]);
575+
// reset the temporary surrogate storage
576+
_surrogate = 0;
575577
++offset;
576578
--len;
577579
}
@@ -785,7 +787,7 @@ public int writeCData(char[] cbuf, int offset, int len)
785787
writeCDataEnd(); // will check surrogates
786788
}
787789
return ix;
788-
}
790+
}
789791

790792
protected int writeCDataContents(char[] cbuf, int offset, int len)
791793
throws IOException, XMLStreamException
@@ -865,7 +867,7 @@ protected int writeCDataContents(char[] cbuf, int offset, int len)
865867
}
866868
}
867869
return -1;
868-
}
870+
}
869871

870872
@Override
871873
public final void writeCharacters(String text)
@@ -908,6 +910,8 @@ public final void writeCharacters(char[] cbuf, int offset, int len)
908910
{
909911
if (_surrogate != 0) {
910912
outputSurrogates(_surrogate, cbuf[offset]);
913+
// reset the temporary surrogate storage
914+
_surrogate = 0;
911915
++offset;
912916
--len;
913917
}
@@ -1088,7 +1092,7 @@ private final void writeSplitCharacters(char[] cbuf, int offset, int len)
10881092
}
10891093
_outputBuffer[_outputPtr++] = (byte)ch;
10901094
}
1091-
}
1095+
}
10921096

10931097
/*
10941098
/**********************************************************************
@@ -1439,7 +1443,7 @@ public void writeXmlDeclaration(String version, String encoding, String standalo
14391443
// !!! TBI: check validity
14401444
writeRaw(version, 0, version.length());
14411445
writeRaw(BYTE_APOS);
1442-
1446+
14431447
if (encoding != null && encoding.length() > 0) {
14441448
writeRaw(BYTES_XMLDECL_ENCODING);
14451449
// !!! TBI: check validity
@@ -1453,7 +1457,7 @@ public void writeXmlDeclaration(String version, String encoding, String standalo
14531457
writeRaw(BYTE_APOS);
14541458
}
14551459
writeRaw(BYTE_QMARK, BYTE_GT);
1456-
}
1460+
}
14571461

14581462
/*
14591463
/**********************************************************************
@@ -1594,7 +1598,7 @@ protected final void flushBuffer()
15941598
protected final void writeAsEntity(int c)
15951599
throws IOException
15961600
{
1597-
// Quickie check to avoid
1601+
// Quickie check to avoid
15981602

15991603
byte[] buf = _outputBuffer;
16001604
int ptr = _outputPtr;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
package com.fasterxml.aalto.sax;
2+
3+
import com.fasterxml.aalto.out.Utf8XmlWriter;
4+
import com.fasterxml.aalto.out.WriterConfig;
5+
6+
import java.io.ByteArrayOutputStream;
7+
8+
public class TestSaxWriter extends base.BaseTestCase {
9+
10+
public void testSurrogateMemory1() throws Exception {
11+
// This test aims to produce the
12+
// javax.xml.stream.XMLStreamException: Incomplete surrogate pair in content: first char 0xd835, second 0x78
13+
// error message. Before fixing the respective issue, it was provoked by a multi-byte character
14+
// where the first byte was exactly at the end of the internal reading buffer and enough further data
15+
// to also fill the next two internal reading buffers. Then, the code would try to fuse the first byte
16+
// of the original multi-byte character with the first character in the third buffer because
17+
// ByteXmlWriter#_surrogate was not set back to 0 after writing the original multi-byte character.
18+
StringBuilder testText = new StringBuilder();
19+
for (int i = 0; i < 511; i++)
20+
testText.append('x');
21+
testText.append("\uD835\uDFCE");
22+
for (int i = 0; i < 512; i++)
23+
testText.append('x');
24+
25+
WriterConfig writerConfig = new WriterConfig();
26+
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
27+
Utf8XmlWriter writer = new Utf8XmlWriter(writerConfig, byteArrayOutputStream);
28+
writer.writeStartTagStart(writer.constructName("testelement"));
29+
writer.writeAttribute(writer.constructName("testattr"), testText.toString());
30+
writer.writeStartTagEnd();
31+
writer.writeEndTag(writer.constructName("testelement"));
32+
writer.close(false);
33+
34+
}
35+
36+
public void testSurrogateMemory2() throws Exception {
37+
// This test aims to produce the
38+
// java.io.IOException: Unpaired surrogate character (0xd835)
39+
// error message. Before fixing the respective issue, it was provoked by a multi-byte character
40+
// where the first byte was exactly at the end of the internal reading buffer and the next
41+
// reading buffer was enough to write all the remaining data. Then, by the missing reset of
42+
// ByteXmlWriter#_surrogate, the code expected another multi-byte surrogate that never came.
43+
StringBuilder testText = new StringBuilder();
44+
for (int i = 0; i < 511; i++)
45+
testText.append('x');
46+
testText.append("\uD835\uDFCE");
47+
48+
WriterConfig writerConfig = new WriterConfig();
49+
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
50+
Utf8XmlWriter writer = new Utf8XmlWriter(writerConfig, byteArrayOutputStream);
51+
writer.writeStartTagStart(writer.constructName("testelement"));
52+
writer.writeAttribute(writer.constructName("testattr"), testText.toString());
53+
writer.writeStartTagEnd();
54+
writer.writeEndTag(writer.constructName("testelement"));
55+
writer.close(false);
56+
}
57+
}

0 commit comments

Comments
 (0)