diff --git a/metafacture-xml/src/main/java/org/metafacture/xml/SerializeEncoder.java b/metafacture-xml/src/main/java/org/metafacture/xml/SerializeEncoder.java new file mode 100644 index 000000000..92a4785b6 --- /dev/null +++ b/metafacture-xml/src/main/java/org/metafacture/xml/SerializeEncoder.java @@ -0,0 +1,229 @@ +/* + * Copyright 2018 Deutsche Nationalbibliothek + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.metafacture.xml; + +import org.metafacture.framework.FluxCommand; +import org.metafacture.framework.ObjectReceiver; +import org.metafacture.framework.StreamReceiver; +import org.metafacture.framework.annotations.Description; +import org.metafacture.framework.annotations.In; +import org.metafacture.framework.annotations.Out; +import org.metafacture.framework.helpers.DefaultStreamPipe; + +@Description("Converts a Metafacture stream into its XML equivalent.") +@In(StreamReceiver.class) +@Out(String.class) +@FluxCommand("serialize-to-xml") +public class SerializeEncoder extends DefaultStreamPipe> +{ + final private String xmlDeclaration = ""; + final private String spacer = " "; + + private boolean initStream; + private boolean prettyPrint; + private int indentationLevel; + private StringBuilder stringBuilder; + + private boolean omitDeclaration = false; + private boolean omitRoot = false; + + public SerializeEncoder() + { + this.indentationLevel = 0; + this.prettyPrint = true; + this.initStream = true; + } + + public void setPrettyPrint(boolean prettyPrint) + { + this.prettyPrint = prettyPrint; + } + + public void setOmitDeclaration(boolean omitDeclaration) + { + this.omitDeclaration = omitDeclaration; + } + + public void setOmitRoot(boolean omitRoot) + { + this.omitRoot = omitRoot; + } + + @Override + public void startRecord(final String identifier) + { + if (initStream) + { + if (!omitDeclaration) + { + getReceiver().process(xmlDeclaration); + } + if (!omitRoot) + { + getReceiver().process(""); + } + initStream = false; + } + + String elem = ""; + if (prettyPrint) + { + indentationLevel++; + getReceiver().process(spacer + elem); + } + else + { + stringBuilder = new StringBuilder(); + stringBuilder.append(elem); + } + } + + @Override + public void startEntity(final String name) + { + String elem = ""; + if (prettyPrint) + { + indentationLevel++; + getReceiver().process(repeat(spacer, indentationLevel) + elem); + } + else + { + stringBuilder.append(elem); + } + } + + @Override + public void literal(final String name, final String value) + { + String elem = "" + escape(value) + ""; + if (prettyPrint) + { + getReceiver().process(repeat(spacer, indentationLevel + 1) + elem); + } + else + { + stringBuilder.append(elem); + } + } + + @Override + public void endEntity() + { + String elem = ""; + if (prettyPrint) + { + getReceiver().process(repeat(spacer, indentationLevel) + elem); + indentationLevel--; + } + else + { + stringBuilder.append(elem); + } + } + + @Override + public void endRecord() + { + String elem = ""; + if (prettyPrint) + { + getReceiver().process(spacer + ""); + indentationLevel--; + } + else + { + stringBuilder.append(elem); + getReceiver().process(stringBuilder.toString().trim()); + } + } + + @Override + public void onCloseStream() + { + if (!omitRoot) + { + getReceiver().process(""); + } + } + + @Override + public void onResetStream() + { + this.indentationLevel = 0; + this.initStream = true; + } + + /** + * Build a String consists of {@code n} repetitions of a String {@code s}. + */ + private String repeat(String s, int n) + { + StringBuilder sb = new StringBuilder(s); + for (int i = 0; i < n - 1; i++) + { + sb.append(s); + } + return sb.toString(); + } + + /** + * Escapes the following control characters: {@code <}, {@code >}, {@code "}, {@code '}, and {@code &} . + */ + private String escape(final String s) + { + if (s == null || s.isEmpty()) + { + return ""; + } + + StringBuilder result = new StringBuilder(); + final int len = s.length(); + for (int i = 0; i < len; ++i) { + final char c = s.charAt(i); + final String entityName; + switch (c) { + case '&': + entityName = "amp"; + break; + case '<': + entityName = "lt"; + break; + case '>': + entityName = "gt"; + break; + case '\'': + entityName = "apos"; + break; + case '"': + entityName = "quot"; + break; + default: + entityName = null; + break; + } + + if (entityName == null) { + result.append(c); + } else { + result.append('&'); + result.append(entityName); + result.append(';'); + } + } + return result.toString().trim(); + } +} diff --git a/metafacture-xml/src/main/java/org/metafacture/xml/SerializeXmlHandler.java b/metafacture-xml/src/main/java/org/metafacture/xml/SerializeXmlHandler.java new file mode 100644 index 000000000..350cd55ad --- /dev/null +++ b/metafacture-xml/src/main/java/org/metafacture/xml/SerializeXmlHandler.java @@ -0,0 +1,120 @@ +/* + * Copyright 2018 Deutsche Nationalbibliothek + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.metafacture.xml; + +import org.metafacture.framework.FluxCommand; +import org.metafacture.framework.MetafactureException; +import org.metafacture.framework.StreamReceiver; +import org.metafacture.framework.XmlReceiver; +import org.metafacture.framework.annotations.Description; +import org.metafacture.framework.annotations.In; +import org.metafacture.framework.annotations.Out; +import org.metafacture.framework.helpers.DefaultXmlPipe; +import org.xml.sax.Attributes; + +@Description("Deserialize a XML encoded Metafacture stream.") +@In(XmlReceiver.class) +@Out(StreamReceiver.class) +@FluxCommand("handle-serialize-xml") +public class SerializeXmlHandler extends DefaultXmlPipe +{ + + final private String ID = "id"; + final private String NAME = "name"; + + private int streamTagCount; + private String currentTag; + private String currentLiteralName; + private StringBuilder stringBuilder; + + public SerializeXmlHandler() + { + streamTagCount = 0; + stringBuilder = new StringBuilder(); + } + + @Override + public void startElement(final String uri, final String localName, final String qName, final Attributes attributes) + { + currentTag = localName.toUpperCase(); + switch (currentTag) + { + case "STREAM": + if (streamTagCount > 0) + { + throw new MetafactureException("Root tag 'stream' opened a second time."); + } + streamTagCount += 1; + case "RECORD": + String identifier = attributes.getValue(ID); + getReceiver().startRecord(identifier); + break; + case "ENTITY": + String name = attributes.getValue(NAME); + getReceiver().startEntity(name); + break; + case "LITERAL": + currentLiteralName = attributes.getValue(NAME); + break; + default: + String message = "Unknown tag '%s'. Expected 'stream', 'record', 'entity' or 'literal'."; + throw new MetafactureException(String.format(message, currentTag)); + } + } + + @Override + public void characters(final char[] chars, final int start, final int length) + { + if (currentTag.equals("LITERAL")) + { + this.stringBuilder.append(chars, start, length); + } + } + + @Override + public void endElement(final String uri, final String localName, final String qName) + { + currentTag = localName.toUpperCase(); + switch (currentTag) + { + case "STREAM": + streamTagCount -= 1; + if (streamTagCount != 0) + { + throw new MetafactureException("Root tag 'stream' closed a second time."); + } + else + { + getReceiver().closeStream(); + } + break; + case "RECORD": + getReceiver().endRecord(); + break; + case "ENTITY": + getReceiver().endEntity(); + break; + case "LITERAL": + String currentLiteralValue = stringBuilder.toString().trim(); + getReceiver().literal(currentLiteralName, currentLiteralValue); + stringBuilder = new StringBuilder(); + break; + default: + String message = "Unknown tag '%s'. Expected 'stream', 'record', 'entity' or 'literal'."; + throw new MetafactureException(String.format(message, currentTag)); + } + } +} diff --git a/metafacture-xml/src/main/resources/flux-commands.properties b/metafacture-xml/src/main/resources/flux-commands.properties index c69356ce0..b297736fb 100644 --- a/metafacture-xml/src/main/resources/flux-commands.properties +++ b/metafacture-xml/src/main/resources/flux-commands.properties @@ -15,6 +15,8 @@ # handle-cg-xml org.metafacture.xml.CGXmlHandler handle-generic-xml org.metafacture.xml.GenericXmlHandler +handle-serialize-xml org.metafacture.xml.SerializeXmlHandler +serialize-to-xml org.metafacture.xml.SerializeEncoder stream-to-xml org.metafacture.xml.SimpleXmlEncoder decode-xml org.metafacture.xml.XmlDecoder split-xml-elements org.metafacture.xml.XmlElementSplitter diff --git a/metafacture-xml/src/test/java/org/metafacture/xml/SerializeEncoderTest.java b/metafacture-xml/src/test/java/org/metafacture/xml/SerializeEncoderTest.java new file mode 100644 index 000000000..6847c0ff6 --- /dev/null +++ b/metafacture-xml/src/test/java/org/metafacture/xml/SerializeEncoderTest.java @@ -0,0 +1,161 @@ +/* + * Copyright 2018 Deutsche Nationalbibliothek + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.metafacture.xml; + +import org.junit.Before; +import org.junit.Test; +import org.metafacture.framework.ObjectReceiver; +import org.mockito.InOrder; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; + +import static org.mockito.Mockito.inOrder; +import static org.mockito.Mockito.verify; +import static org.mockito.internal.verification.VerificationModeFactory.times; + +public class SerializeEncoderTest +{ + private SerializeEncoder encoder; + + @Mock + private ObjectReceiver receiver; + + @Before + public void setUp() + { + MockitoAnnotations.initMocks(this); + encoder = new SerializeEncoder(); + encoder.setReceiver(receiver); + } + + @Test + public void serialize() + { + encoder.startRecord("1"); + encoder.literal("id", "1"); + encoder.startEntity("<>"); + encoder.literal("name", "joe"); + encoder.endEntity(); + encoder.endRecord(); + + encoder.startRecord("1"); + encoder.literal("id", "1"); + encoder.startEntity("<>"); + encoder.literal("name", "joe"); + encoder.endEntity(); + encoder.endRecord(); + + encoder.closeStream(); + + final InOrder ordered = inOrder(receiver); + ordered.verify(receiver).process(""); + ordered.verify(receiver).process(""); + ordered.verify(receiver).process(" "); + ordered.verify(receiver).process(" 1"); + ordered.verify(receiver).process(" "); + ordered.verify(receiver).process(" joe"); + ordered.verify(receiver).process(" "); + ordered.verify(receiver).process(" "); + ordered.verify(receiver).process(" "); + ordered.verify(receiver).process(" 1"); + ordered.verify(receiver).process(" "); + ordered.verify(receiver).process(" joe"); + ordered.verify(receiver).process(" "); + ordered.verify(receiver).process(" "); + ordered.verify(receiver).process(""); + } + + @Test + public void serializeWithoutPrettyPrinting() + { + encoder.setPrettyPrint(false); + + encoder.startRecord("1"); + encoder.literal("id", "1"); + encoder.startEntity("names"); + encoder.literal("name", "joe"); + encoder.endEntity(); + encoder.endRecord(); + + encoder.startRecord("1"); + encoder.literal("id", "1"); + encoder.startEntity("names"); + encoder.literal("name", "joe"); + encoder.endEntity(); + encoder.endRecord(); + encoder.closeStream(); + + final InOrder ordered = inOrder(receiver); + ordered.verify(receiver).process(""); + ordered.verify(receiver).process(""); + ordered.verify(receiver, times(2)).process( + "" + + "1" + + "joe" + + "" + ); + ordered.verify(receiver).process(""); + } + + @Test + public void serializeWithNullValue() + { + encoder.setPrettyPrint(false); + + encoder.startRecord("1"); + encoder.literal("value", null); + encoder.endRecord(); + encoder.closeStream(); + + final InOrder ordered = inOrder(receiver); + ordered.verify(receiver).process(""); + ordered.verify(receiver).process(""); + ordered.verify(receiver).process(""); + ordered.verify(receiver).process(""); + } + + @Test + public void omitDeclaration() + { + encoder.setPrettyPrint(false); + encoder.setOmitDeclaration(true); + + encoder.startRecord("1"); + encoder.literal("value", null); + encoder.endRecord(); + encoder.closeStream(); + + final InOrder ordered = inOrder(receiver); + ordered.verify(receiver).process(""); + ordered.verify(receiver).process(""); + ordered.verify(receiver).process(""); + } + + @Test + public void omitRoot() + { + encoder.setPrettyPrint(false); + encoder.setOmitDeclaration(true); + encoder.setOmitRoot(true); + + encoder.startRecord("1"); + encoder.literal("value", null); + encoder.endRecord(); + encoder.closeStream(); + + verify(receiver).process(""); + } +} diff --git a/metafacture-xml/src/test/java/org/metafacture/xml/SerializeXmlHandlerTest.java b/metafacture-xml/src/test/java/org/metafacture/xml/SerializeXmlHandlerTest.java new file mode 100644 index 000000000..96db7a8d8 --- /dev/null +++ b/metafacture-xml/src/test/java/org/metafacture/xml/SerializeXmlHandlerTest.java @@ -0,0 +1,113 @@ +/* + * Copyright 2018 Deutsche Nationalbibliothek + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.metafacture.xml; + +import org.junit.Before; +import org.junit.Test; +import org.metafacture.framework.MetafactureException; +import org.metafacture.framework.StreamReceiver; +import org.mockito.InOrder; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.xml.sax.helpers.AttributesImpl; +import static org.mockito.Mockito.inOrder; + +public class SerializeXmlHandlerTest +{ + + private SerializeXmlHandler handler; + + @Mock + private StreamReceiver receiver; + + @Before + public void setUp() + { + MockitoAnnotations.initMocks(this); + handler = new SerializeXmlHandler(); + handler.setReceiver(receiver); + } + + @Test(expected = MetafactureException.class) + public void complainAboutMissingRootTagNamedStream() + { + handler.startElement("", "notStream", "notStream", new AttributesImpl()); + } + + @Test(expected = MetafactureException.class) + public void complainAboutASecondlyOpenedRootTag() + { + handler.startElement("", "stream", "stream", new AttributesImpl()); + handler.startElement("", "stream", "stream", new AttributesImpl()); + } + + @Test(expected = MetafactureException.class) + public void complainAboutAboutASecondlyClosedRootTag() + { + handler.endElement("", "stream", "stream"); + handler.endElement("", "stream", "stream"); + } + + @Test + public void readXmlStream() + { + // + handler.startElement("", "stream", "stream", new AttributesImpl()); + + // + final AttributesImpl recordAttributes = new AttributesImpl(); + recordAttributes.addAttribute("", "id", "id", "ID", "1"); + handler.startElement("", "record", "record", recordAttributes); + + // 1 + final AttributesImpl literal1Attributes = new AttributesImpl(); + literal1Attributes.addAttribute("", "name", "name", "CDATA", "id"); + handler.startElement("", "literal", "literal", literal1Attributes); + handler.characters("1".toCharArray(), 0, 1); + handler.endElement("", "literal", "literal"); + + // + final AttributesImpl entityAttributes = new AttributesImpl(); + entityAttributes.addAttribute("", "name", "name", "CDATA", "names"); + handler.startElement("", "entity", "entity", entityAttributes); + + // "joe" + final AttributesImpl literal2Attributes = new AttributesImpl(); + literal2Attributes.addAttribute("", "name", "name", "CDATA", "name"); + handler.startElement("", "literal", "literal", literal2Attributes); + handler.characters("\"joe\"".toCharArray(), 0, "\"joe\"".length()); + handler.endElement("", "literal", "literal"); + + // + handler.endElement("", "entity", "entity"); + + // + handler.endElement("", "record", "record"); + + // + handler.endElement("", "stream", "stream"); + + final InOrder ordered = inOrder(receiver); + ordered.verify(receiver).startRecord("1"); + ordered.verify(receiver).literal("id", "1"); + ordered.verify(receiver).startEntity("names"); + ordered.verify(receiver).literal("name", "\"joe\""); + ordered.verify(receiver).endEntity(); + ordered.verify(receiver).endRecord(); + ordered.verify(receiver).closeStream(); + ordered.verifyNoMoreInteractions(); + } +}