/*
* Copyright (c) 2008, Harald Kuhr
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.twelvemonkeys.xml;
import com.twelvemonkeys.lang.
StringUtil;
import org.w3c.dom.*;
import org.xml.sax.
SAXException;
import javax.xml.parsers.
DocumentBuilder;
import javax.xml.parsers.
DocumentBuilderFactory;
import javax.xml.parsers.
ParserConfigurationException;
import java.io.*;
import java.nio.charset.
Charset;
import java.util.
Date;
/**
* XMLSerializer
*
* @author <a href="mailto:harald.kuhr@gmail.com">Harald Kuhr</a>
* @author last modified by $Author: haku $
* @version $Id: //depot/branches/personal/haraldk/twelvemonkeys/release-2/twelvemonkeys-core/src/main/java/com/twelvemonkeys/xml/XMLSerializer.java#1 $
*/
public class
XMLSerializer {
// TODO: Replace with DOMSerializer? Test performance, pretty printing etc...
// Main problem: Sun's Java 5 does not have LS 3.0 support
// This class has no dependencies, which probably makes it more useful
// TODO: Don't insert initial and ending line-break for text-nodes
// TODO: Support not inserting line-breaks, to preserve space
// TODO: Support line breaking (at configurable width)
// TODO: Support standalone?
// TODO: Support more than version 1.0?
// TODO: Consider using IOException to communicate trouble, rather than RTE,
// to be more compatible...
private final
OutputStream output;
private final
Charset encoding;
private final
SerializationContext context;
public
XMLSerializer(final
OutputStream pOutput, final
String pEncoding) {
output =
pOutput;
encoding =
Charset.
forName(
pEncoding);
context = new
SerializationContext();
}
public final
XMLSerializer indentation(
String pIndent) {
// TODO: Verify that indent value is only whitespace?
context.
indent =
pIndent != null ?
pIndent : "\t";
return this;
}
public final
XMLSerializer stripComments(boolean
pStrip) {
context.
stripComments =
pStrip;
return this;
}
/**
* Serializes the entire document, along with the XML declaration
* ({@code <?xml version="1.0" encoding="..."?>}).
*
* @param pDocument the document to serialize.
*/
public void
serialize(final
Document pDocument) {
serialize(
pDocument, true);
}
/**
* Serializes the entire sub tree starting at {@code pRootNode}, along with an optional XML declaration
* ({@code <?xml version="1.0" encoding="..."?>}).
*
* @param pRootNode the root node to serialize.
* @param pWriteXMLDeclaration {@code true} if the XML declaration should be included, otherwise {@code false}.
*/
public void
serialize(final
Node pRootNode, final boolean
pWriteXMLDeclaration) {
PrintWriter out = new
PrintWriter(new
OutputStreamWriter(
output,
encoding));
try {
if (
pWriteXMLDeclaration) {
writeXMLDeclaration(
out);
}
writeXML(
out,
pRootNode,
context.
copy());
}
finally {
out.
flush();
}
}
private void
writeXMLDeclaration(final
PrintWriter pOut) {
pOut.
print("<?xml version=\"1.0\" encoding=\"");
pOut.
print(
encoding.
name());
pOut.
println("\"?>");
}
private void
writeXML(final
PrintWriter pOut, final
Node pDocument, final
SerializationContext pContext) {
writeNodeRecursive(
pOut,
pDocument,
pContext);
}
private void
writeNodeRecursive(final
PrintWriter pOut, final
Node pNode, final
SerializationContext pContext) {
if (
pNode.
getNodeType() !=
Node.
TEXT_NODE) {
indentToLevel(
pOut,
pContext);
}
switch (
pNode.
getNodeType()) {
case
Node.
DOCUMENT_NODE:
case
Node.
DOCUMENT_FRAGMENT_NODE:
writeDocument(
pOut,
pNode,
pContext);
break;
case
Node.
DOCUMENT_TYPE_NODE:
writeDoctype(
pOut, (
DocumentType)
pNode);
break;
case
Node.
ELEMENT_NODE:
boolean
preserveSpace =
pContext.
preserveSpace;
updatePreserveSpace(
pNode,
pContext);
writeElement(
pOut, (
Element)
pNode,
pContext);
pContext.
preserveSpace =
preserveSpace;
break;
case
Node.
CDATA_SECTION_NODE:
writeCData(
pOut,
pNode);
break;
case
Node.
TEXT_NODE:
writeText(
pOut,
pNode,
pContext);
break;
case
Node.
COMMENT_NODE:
writeComment(
pOut,
pNode,
pContext);
break;
case
Node.
PROCESSING_INSTRUCTION_NODE:
writeProcessingInstruction(
pOut, (
ProcessingInstruction)
pNode);
break;
case
Node.
ATTRIBUTE_NODE:
throw new
IllegalArgumentException("Malformed input Document: Attribute nodes should only occur inside Element nodes");
case
Node.
ENTITY_NODE:
// '<!ENTITY ' + getNodeName + ... + '>'
case
Node.
ENTITY_REFERENCE_NODE:
// ( '&' | '%' ) + getNodeName + ';'
case
Node.
NOTATION_NODE:
// '<!NOTATION ' + getNodeName + ( ExternalID | PublicID ) + '>'
default:
throw new
InternalError("Lazy programmer never implemented serialization of " +
pNode.
getClass());
}
}
private void
writeProcessingInstruction(final
PrintWriter pOut, final
ProcessingInstruction pNode) {
pOut.
print("\n<?");
pOut.
print(
pNode.
getTarget());
String value =
pNode.
getData();
if (
value != null) {
pOut.
print(" ");
pOut.
print(
value);
}
pOut.
println("?>");
}
private void
writeText(final
PrintWriter pOut, final
Node pNode, final
SerializationContext pContext) {
// TODO: Is this really as specified?
String value =
pNode.
getNodeValue();
if (
pContext.
preserveSpace) {
pOut.
print(
maybeEscapeElementValue(
value));
}
else if (!
StringUtil.
isEmpty(
value)) {
String escapedValue =
maybeEscapeElementValue(
value.
trim());
//if (escapedValue.length() + (pContext.level * pContext.indent.length()) > 78) {
indentToLevel(
pOut,
pContext);
//}
pOut.
println(
escapedValue);
}
}
private void
writeCData(final
PrintWriter pOut, final
Node pNode) {
pOut.
print("<![CDATA[");
pOut.
print(
validateCDataValue(
pNode.
getNodeValue()));
pOut.
println("]]>");
}
private static void
updatePreserveSpace(final
Node pNode, final
SerializationContext pContext) {
NamedNodeMap attributes =
pNode.
getAttributes();
if (
attributes != null) {
Node space =
attributes.
getNamedItem("xml:space");
if (
space != null) {
if ("preserve".
equals(
space.
getNodeValue())) {
pContext.
preserveSpace = true;
}
else if ("default".
equals(
space.
getNodeValue())) {
pContext.
preserveSpace = false;
}
// No other values are allowed per spec, ignore
}
}
}
private static void
indentToLevel(final
PrintWriter pOut, final
SerializationContext pContext) {
for (int
i = 0;
i <
pContext.
level;
i++) {
pOut.
print(
pContext.
indent);
}
}
private void
writeComment(final
PrintWriter pOut, final
Node pNode, final
SerializationContext pContext) {
if (
pContext.
stripComments) {
return;
}
String value =
pNode.
getNodeValue();
validateCommentValue(
value);
if (
value.
startsWith(" ")) {
pOut.
print("<!--");
}
else {
pOut.
print("<!-- ");
}
pOut.
print(
value);
if (
value.
endsWith(" ")) {
pOut.
println("-->");
}
else {
pOut.
println(" -->");
}
}
/**
* Returns an escaped version of the input string. The string is guaranteed
* to not contain illegal XML characters ({@code &<>}).
* If no escaping is needed, the input string is returned as is.
*
* @param pValue the input string that might need escaping.
* @return an escaped version of the input string.
*/
static
String maybeEscapeElementValue(final
String pValue) {
int
startEscape =
needsEscapeElement(
pValue);
if (
startEscape < 0) {
// If no escaping is needed, simply return original
return
pValue;
}
else {
// Otherwise, start replacing
StringBuilder builder = new
StringBuilder(
pValue.
substring(0,
startEscape));
builder.
ensureCapacity(
pValue.
length() + 30);
int
pos =
startEscape;
for (int
i =
pos;
i <
pValue.
length();
i++) {
switch (
pValue.
charAt(
i)) {
case '&':
pos =
appendAndEscape(
pValue,
pos,
i,
builder, "&");
break;
case '<':
pos =
appendAndEscape(
pValue,
pos,
i,
builder, "<");
break;
case '>':
pos =
appendAndEscape(
pValue,
pos,
i,
builder, ">");
break;
//case '\'':
//case '"':
default:
break;
}
}
builder.
append(
pValue.
substring(
pos));
return
builder.
toString();
}
}
private static int
appendAndEscape(final
String pString, int
pStart, final int
pEnd, final
StringBuilder pBuilder, final
String pEntity) {
pBuilder.
append(
pString.
substring(
pStart,
pEnd));
pBuilder.
append(
pEntity);
return
pEnd + 1;
}
/**
* Returns an the first index from the input string that should be escaped
* if escaping is needed, otherwise {@code -1}.
*
* @param pString the input string that might need escaping.
* @return the first index from the input string that should be escaped,
* or {@code -1}.
*/
private static int
needsEscapeElement(final
String pString) {
for (int
i = 0;
i <
pString.
length();
i++) {
switch (
pString.
charAt(
i)) {
case '&':
case '<':
case '>':
//case '\'':
//case '"':
return
i;
default:
}
}
return -1;
}
private static
String maybeEscapeAttributeValue(final
String pValue) {
int
startEscape =
needsEscapeAttribute(
pValue);
if (
startEscape < 0) {
return
pValue;
}
else {
StringBuilder builder = new
StringBuilder(
pValue.
substring(0,
startEscape));
builder.
ensureCapacity(
pValue.
length() + 16);
int
pos =
startEscape;
for (int
i =
pos;
i <
pValue.
length();
i++) {
switch (
pValue.
charAt(
i)) {
case '&':
pos =
appendAndEscape(
pValue,
pos,
i,
builder, "&");
break;
case '"':
pos =
appendAndEscape(
pValue,
pos,
i,
builder, """);
break;
default:
break;
}
}
builder.
append(
pValue.
substring(
pos));
return
builder.
toString();
}
}
/**
* Returns an the first index from the input string that should be escaped
* if escaping is needed, otherwise {@code -1}.
*
* @param pString the input string that might need escaping.
* @return the first index from the input string that should be escaped,
* or {@code -1}.
*/
private static int
needsEscapeAttribute(final
String pString) {
for (int
i = 0;
i <
pString.
length();
i++) {
switch (
pString.
charAt(
i)) {
case '&':
//case '<':
//case '>':
//case '\'':
case '"':
return
i;
default:
}
}
return -1;
}
private static
String validateCDataValue(final
String pValue) {
if (
pValue.
contains("]]>")) {
throw new
IllegalArgumentException("Malformed input document: CDATA block may not contain the string ']]>'");
}
return
pValue;
}
private static
String validateCommentValue(final
String pValue) {
if (
pValue.
contains("--")) {
throw new
IllegalArgumentException("Malformed input document: Comment may not contain the string '--'");
}
return
pValue;
}
private void
writeDocument(final
PrintWriter pOut, final
Node pNode, final
SerializationContext pContext) {
// Document fragments might not have child nodes...
if (
pNode.
hasChildNodes()) {
NodeList nodes =
pNode.
getChildNodes();
for (int
i = 0;
i <
nodes.
getLength();
i++) {
writeNodeRecursive(
pOut,
nodes.
item(
i),
pContext);
}
}
}
private void
writeElement(final
PrintWriter pOut, final
Element pNode, final
SerializationContext pContext) {
pOut.
print("<");
pOut.
print(
pNode.
getTagName());
// TODO: Attributes should probably include namespaces, so that it works
// even if the document was created using attributes instead of namespaces...
// In that case, prefix will be null...
// Handle namespace
String namespace =
pNode.
getNamespaceURI();
if (
namespace != null && !
namespace.
equals(
pContext.
defaultNamespace)) {
String prefix =
pNode.
getPrefix();
if (
prefix == null) {
pContext.
defaultNamespace =
namespace;
pOut.
print(" xmlns");
}
else {
pOut.
print(" xmlns:");
pOut.
print(
prefix);
}
pOut.
print("=\"");
pOut.
print(
namespace);
pOut.
print("\"");
}
// Iterate attributes if any
if (
pNode.
hasAttributes()) {
NamedNodeMap attributes =
pNode.
getAttributes();
for (int
i = 0;
i <
attributes.
getLength();
i++) {
Attr attribute = (
Attr)
attributes.
item(
i);
String name =
attribute.
getName();
if (!(
name.
startsWith("xmlns") && (
name.
length() == 5 ||
name.
charAt(5) == ':'))) {
pOut.
print(" ");
pOut.
print(
name);
pOut.
print("=\"");
pOut.
print(
maybeEscapeAttributeValue(
attribute.
getValue()));
pOut.
print("\"");
}
//else {
// System.err.println("attribute.getName(): " + name);
//}
}
}
// TODO: Consider not indenting/newline if the first child is a text node
// Iterate children if any
if (
pNode.
hasChildNodes()) {
pOut.
print(">");
if (!
pContext.
preserveSpace) {
pOut.
println();
}
NodeList children =
pNode.
getChildNodes();
for (int
i = 0;
i <
children.
getLength();
i++) {
writeNodeRecursive(
pOut,
children.
item(
i),
pContext.
push());
}
if (!
pContext.
preserveSpace) {
indentToLevel(
pOut,
pContext);
}
pOut.
print("</");
pOut.
print(
pNode.
getTagName());
pOut.
println(">");
}
else if (
pNode.
getNodeValue() != null) {
// NOTE: This is NOT AS SPECIFIED, but we do this to support
// the weirdness that is the javax.imageio.metadata.IIOMetadataNode.
// According to the spec, the nodeValue of an Element is null.
pOut.
print(">");
pOut.
print(
pNode.
getNodeValue());
pOut.
print("</");
pOut.
print(
pNode.
getTagName());
pOut.
println(">");
}
else {
pOut.
println("/>");
}
}
private void
writeDoctype(final
PrintWriter pOut, final
DocumentType pDoctype) {
// NOTE: The DOMImplementationLS LSSerializer actually inserts SYSTEM or
// PUBLIC identifiers even if they are empty strings. The result is, it
// will create invalid documents.
// Testing for empty strings seems to be more compatible.
if (
pDoctype != null) {
pOut.
print("<!DOCTYPE ");
pOut.
print(
pDoctype.
getName());
String publicId =
pDoctype.
getPublicId();
if (!
StringUtil.
isEmpty(
publicId)) {
pOut.
print(" PUBLIC ");
pOut.
print(
publicId);
}
String systemId =
pDoctype.
getSystemId();
if (!
StringUtil.
isEmpty(
systemId)) {
if (
StringUtil.
isEmpty(
publicId)) {
pOut.
print(" SYSTEM \"");
}
else {
pOut.
print(" \"");
}
pOut.
print(
systemId);
pOut.
print("\"");
}
String internalSubset =
pDoctype.
getInternalSubset();
if (!
StringUtil.
isEmpty(
internalSubset)) {
pOut.
print(" [ ");
pOut.
print(
internalSubset);
pOut.
print(" ]");
}
pOut.
println(">");
}
}
public static void
main(
String[]
pArgs) throws
IOException,
SAXException {
// Build XML tree (Document) and write
// Find the implementation
DocumentBuilderFactory factory =
DocumentBuilderFactory.
newInstance();
factory.
setNamespaceAware(true);
DocumentBuilder builder;
try {
builder =
factory.
newDocumentBuilder();
}
catch (
ParserConfigurationException e) {
//noinspection ThrowableInstanceNeverThrown BOGUS
throw (
IOException) new
IOException(
e.
getMessage()).
initCause(
e);
}
DOMImplementation dom =
builder.
getDOMImplementation();
Document document =
dom.
createDocument("http://www.twelvemonkeys.com/xml/test", "test",
dom.
createDocumentType("test", null, null));
Element root =
document.
getDocumentElement();
// This is probably not the correct way of setting a default namespace
//root.setAttribute("xmlns", "http://www.twelvemonkeys.com/xml/test");
// Create and insert the normal Properties headers as XML comments
document.
insertBefore(
document.
createComment(new
Date().
toString()),
root);
Element test =
document.
createElement("sub");
root.
appendChild(
test);
Element more =
document.
createElementNS("http://more.com/1999/namespace", "more:more");
more.
setAttribute("foo", "test");
more.
setAttribute("bar", "'really' \"legal\" & ok");
test.
appendChild(
more);
more.
appendChild(
document.
createTextNode("Simply some text."));
more.
appendChild(
document.
createCDATASection("&something escaped;"));
more.
appendChild(
document.
createTextNode("More & <more>!"));
more.
appendChild(
document.
createTextNode("\"<<'&'>>\""));
Element another =
document.
createElement("another");
test.
appendChild(
another);
Element yet =
document.
createElement("yet-another");
yet.
setAttribute("this-one", "with-params");
test.
appendChild(
yet);
Element pre =
document.
createElementNS("http://www.twelvemonkeys.com/xml/test", "pre");
pre.
setAttributeNS("http://www.w3.org/XML/1998/namespace", "xml:space", "preserve");
pre.
appendChild(
document.
createTextNode(" \t \n\r some text & white ' ' \n "));
test.
appendChild(
pre);
Element pre2 =
document.
createElementNS("http://www.twelvemonkeys.com/xml/test", "tight");
pre2.
setAttributeNS("http://www.w3.org/XML/1998/namespace", "xml:space", "preserve");
pre2.
appendChild(
document.
createTextNode("no-space-around-me"));
test.
appendChild(
pre2);
// Create serializer and output document
//XMLSerializer serializer = new XMLSerializer(pOutput, new OutputFormat(document, UTF_8_ENCODING, true));
System.
out.
println("XMLSerializer:");
XMLSerializer serializer = new
XMLSerializer(
System.
out, "UTF-8");
serializer.
serialize(
document);
System.
out.
println();
System.
out.
println("DOMSerializer:");
DOMSerializer serializerD = new
DOMSerializer(
System.
out, "UTF-8");
serializerD.
setPrettyPrint(true);
serializerD.
serialize(
document);
System.
out.
println();
System.
out.
println("\n");
ByteArrayOutputStream out = new
ByteArrayOutputStream();
XMLSerializer serializer2 = new
XMLSerializer(
out, "UTF-8");
serializer2.
serialize(
document);
ByteArrayOutputStream outD = new
ByteArrayOutputStream();
DOMSerializer serializer2D = new
DOMSerializer(
outD, "UTF-8");
serializer2D.
serialize(
document);
Document document2 =
builder.
parse(new
ByteArrayInputStream(
out.
toByteArray()));
System.
out.
println("XMLSerializer reparsed XMLSerializer:");
serializer.
serialize(
document2);
System.
out.
println();
System.
out.
println("DOMSerializer reparsed XMLSerializer:");
serializerD.
serialize(
document2);
System.
out.
println();
Document documentD =
builder.
parse(new
ByteArrayInputStream(
outD.
toByteArray()));
System.
out.
println("XMLSerializer reparsed DOMSerializer:");
serializer.
serialize(
documentD);
System.
out.
println();
System.
out.
println("DOMSerializer reparsed DOMSerializer:");
serializerD.
serialize(
documentD);
System.
out.
println();
}
static class
SerializationContext implements
Cloneable {
String indent = "\t";
int
level = 0;
boolean
preserveSpace = false;
boolean
stripComments = false;
String defaultNamespace;
public
SerializationContext copy() {
try {
return (
SerializationContext)
clone();
}
catch (
CloneNotSupportedException e) {
throw new
Error(
e);
}
}
public
SerializationContext push() {
SerializationContext context =
copy();
context.
level++;
return
context;
}
}
}