You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by tr...@apache.org on 2013/08/10 07:53:54 UTC
svn commit: r1512568 [24/39] - in /jackrabbit/commons/filevault/trunk: ./
parent/ vault-cli/ vault-cli/src/ vault-cli/src/main/
vault-cli/src/main/appassembler/ vault-cli/src/main/assembly/
vault-cli/src/main/java/ vault-cli/src/main/java/org/ vault-cl...
Added: jackrabbit/commons/filevault/trunk/vault-core/src/main/java/org/apache/jackrabbit/vault/util/xml/serialize/BaseMarkupSerializer.java
URL: http://svn.apache.org/viewvc/jackrabbit/commons/filevault/trunk/vault-core/src/main/java/org/apache/jackrabbit/vault/util/xml/serialize/BaseMarkupSerializer.java?rev=1512568&view=auto
==============================================================================
--- jackrabbit/commons/filevault/trunk/vault-core/src/main/java/org/apache/jackrabbit/vault/util/xml/serialize/BaseMarkupSerializer.java (added)
+++ jackrabbit/commons/filevault/trunk/vault-core/src/main/java/org/apache/jackrabbit/vault/util/xml/serialize/BaseMarkupSerializer.java Sat Aug 10 05:53:42 2013
@@ -0,0 +1,1780 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Sep 14, 2000:
+// Fixed comments to preserve whitespaces and add a line break
+// when indenting. Reported by Gervase Markham <ge...@gerv.net>
+// Sep 14, 2000:
+// Fixed serializer to report IO exception directly, instead at
+// the end of document processing.
+// Reported by Patrick Higgins <ph...@transzap.com>
+// Sep 13, 2000:
+// CR in character data will print as �D;
+// Aug 25, 2000:
+// Fixed processing instruction printing inside element content
+// to not escape content. Reported by Mikael Staldal
+// <d9...@d.kth.se>
+// Aug 25, 2000:
+// Added ability to omit comments.
+// Contributed by Anupam Bagchi <ab...@jtcsv.com>
+// Aug 26, 2000:
+// Fixed bug in newline handling when preserving spaces.
+// Contributed by Mike Dusseault <md...@home.com>
+// Aug 29, 2000:
+// Fixed state.unescaped not being set to false when
+// entering element state.
+// Reported by Lowell Vaughn <lv...@agillion.com>
+
+
+package org.apache.jackrabbit.vault.util.xml.serialize;
+
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.Writer;
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.Vector;
+
+import org.apache.jackrabbit.vault.util.xml.xerces.dom.DOMErrorImpl;
+import org.apache.jackrabbit.vault.util.xml.xerces.dom.DOMLocatorImpl;
+import org.apache.jackrabbit.vault.util.xml.xerces.dom.DOMMessageFormatter;
+import org.apache.jackrabbit.vault.util.xml.xerces.util.XMLChar;
+import org.w3c.dom.DOMError;
+import org.w3c.dom.DOMErrorHandler;
+import org.w3c.dom.DOMImplementation;
+import org.w3c.dom.Document;
+import org.w3c.dom.DocumentFragment;
+import org.w3c.dom.DocumentType;
+import org.w3c.dom.Element;
+import org.w3c.dom.Entity;
+import org.w3c.dom.NamedNodeMap;
+import org.w3c.dom.Node;
+import org.w3c.dom.Notation;
+import org.w3c.dom.ls.LSSerializer;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.DTDHandler;
+import org.xml.sax.DocumentHandler;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+import org.xml.sax.ext.DeclHandler;
+import org.xml.sax.ext.LexicalHandler;
+
+/**
+ * Base class for a serializer supporting both DOM and SAX pretty
+ * serializing of XML/HTML/XHTML documents. Derives classes perform
+ * the method-specific serializing, this class provides the common
+ * serializing mechanisms.
+ * <p/>
+ * The serializer must be initialized with the proper writer and
+ * output format before it can be used by calling {@link #setOutputCharStream}
+ * or {@link #setOutputByteStream} for the writer and {@link #setOutputFormat}
+ * for the output format.
+ * <p/>
+ * The serializer can be reused any number of times, but cannot
+ * be used concurrently by two threads.
+ * <p/>
+ * If an output stream is used, the encoding is taken from the
+ * output format (defaults to <tt>UTF-8</tt>). If a writer is
+ * used, make sure the writer uses the same encoding (if applies)
+ * as specified in the output format.
+ * <p/>
+ * The serializer supports both DOM and SAX. DOM serializing is done
+ * by calling {@link #serialize(Document)} and SAX serializing is done by firing
+ * SAX events and using the serializer as a document handler.
+ * This also applies to derived class.
+ * <p/>
+ * If an I/O exception occurs while serializing, the serializer
+ * will not throw an exception directly, but only throw it
+ * at the end of serializing (either DOM or SAX's {@link
+ * org.xml.sax.DocumentHandler#endDocument}.
+ * <p/>
+ * For elements that are not specified as whitespace preserving,
+ * the serializer will potentially break long text lines at space
+ * boundaries, indent lines, and serialize elements on separate
+ * lines. Line terminators will be regarded as spaces, and
+ * spaces at beginning of line will be stripped.
+ * <p/>
+ * When indenting, the serializer is capable of detecting seemingly
+ * element content, and serializing these elements indented on separate
+ * lines. An element is serialized indented when it is the first or
+ * last child of an element, or immediate following or preceding
+ * another element.
+ *
+ * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
+ * @author <a href="mailto:rahul.srivastava@sun.com">Rahul Srivastava</a>
+ * @author Elena Litani, IBM
+ * @see Serializer
+ * @see LSSerializer
+ */
+public abstract class BaseMarkupSerializer
+ implements ContentHandler, DocumentHandler, LexicalHandler,
+ DTDHandler, DeclHandler, DOMSerializer, Serializer {
+
+ // DOM L3 implementation
+ protected short features = 0xFFFFFFFF;
+ protected DOMErrorHandler fDOMErrorHandler;
+ protected final DOMErrorImpl fDOMError = new DOMErrorImpl();
+ //protected LSSerializerFilter fDOMFilter;
+
+ protected EncodingInfo _encodingInfo;
+
+
+ /**
+ * Holds array of all element states that have been entered.
+ * The array is automatically resized. When leaving an element,
+ * it's state is not removed but reused when later returning
+ * to the same nesting level.
+ */
+ private ElementState[] _elementStates;
+
+
+ /**
+ * The index of the next state to place in the array,
+ * or one plus the index of the current state. When zero,
+ * we are in no state.
+ */
+ private int _elementStateCount;
+
+
+ /**
+ * Vector holding comments and PIs that come before the root
+ * element (even after it), see {@link #serializePreRoot}.
+ */
+ private Vector _preRoot;
+
+
+ /**
+ * If the document has been started (header serialized), this
+ * flag is set to true so it's not started twice.
+ */
+ protected boolean _started;
+
+
+ /**
+ * True if the serializer has been prepared. This flag is set
+ * to false when the serializer is reset prior to using it,
+ * and to true after it has been prepared for usage.
+ */
+ private boolean _prepared;
+
+
+ /**
+ * Association between namespace URIs (keys) and prefixes (values).
+ * Accumulated here prior to starting an element and placing this
+ * list in the element state.
+ */
+ protected Map<String, String> _prefixes;
+
+
+ /**
+ * The system identifier of the document type, if known.
+ */
+ protected String _docTypePublicId;
+
+
+ /**
+ * The system identifier of the document type, if known.
+ */
+ protected String _docTypeSystemId;
+
+
+ /**
+ * The output format associated with this serializer. This will never
+ * be a null reference. If no format was passed to the constructor,
+ * the default one for this document type will be used. The format
+ * object is never changed by the serializer.
+ */
+ protected OutputFormat _format;
+
+
+ /**
+ * The printer used for printing text parts.
+ */
+ protected Printer _printer;
+
+
+ /**
+ * True if indenting printer.
+ */
+ protected boolean _indenting;
+
+ /**
+ * Temporary buffer to store character data
+ */
+ protected final StringBuffer fStrBuffer = new StringBuffer(40);
+
+ /**
+ * The underlying writer.
+ */
+ private Writer _writer;
+
+
+ /**
+ * The output stream.
+ */
+ private OutputStream _output;
+
+ /**
+ * Current node that is being processed
+ */
+ protected Node fCurrentNode = null;
+
+ //--------------------------------//
+ // Constructor and initialization //
+ //--------------------------------//
+
+
+ /**
+ * Protected constructor can only be used by derived class.
+ * Must initialize the serializer before serializing any document,
+ * by calling {@link #setOutputCharStream} or {@link #setOutputByteStream}
+ * first
+ */
+ protected BaseMarkupSerializer(OutputFormat format) {
+ int i;
+
+ _elementStates = new ElementState[10];
+ for (i = 0; i < _elementStates.length; ++i)
+ _elementStates[i] = new ElementState();
+ _format = format;
+ }
+
+
+ public DocumentHandler asDocumentHandler()
+ throws IOException {
+ prepare();
+ return this;
+ }
+
+
+ public ContentHandler asContentHandler()
+ throws IOException {
+ prepare();
+ return this;
+ }
+
+
+ public DOMSerializer asDOMSerializer()
+ throws IOException {
+ prepare();
+ return this;
+ }
+
+
+ public void setOutputByteStream(OutputStream output) {
+ if (output == null) {
+ String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN,
+ "ArgumentIsNull", new Object[]{"output"});
+ throw new NullPointerException(msg);
+ }
+ _output = output;
+ _writer = null;
+ reset();
+ }
+
+
+ public void setOutputCharStream(Writer writer) {
+ if (writer == null) {
+ String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN,
+ "ArgumentIsNull", new Object[]{"writer"});
+ throw new NullPointerException(msg);
+ }
+ _writer = writer;
+ _output = null;
+ reset();
+ }
+
+
+ public void setOutputFormat(OutputFormat format) {
+ if (format == null) {
+ String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN,
+ "ArgumentIsNull", new Object[]{"format"});
+ throw new NullPointerException(msg);
+ }
+ _format = format;
+ reset();
+ }
+
+
+ public boolean reset() {
+ if (_elementStateCount > 1) {
+ String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN,
+ "ResetInMiddle", null);
+ throw new IllegalStateException(msg);
+ }
+ _prepared = false;
+ fCurrentNode = null;
+ fStrBuffer.setLength(0);
+ return true;
+ }
+
+
+ protected void prepare()
+ throws IOException {
+ if (_prepared)
+ return;
+
+ if (_writer == null && _output == null) {
+ String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN,
+ "NoWriterSupplied", null);
+ throw new IOException(msg);
+ }
+ // If the output stream has been set, use it to construct
+ // the writer. It is possible that the serializer has been
+ // reused with the same output stream and different encoding.
+
+ _encodingInfo = _format.getEncodingInfo();
+
+ if (_output != null) {
+ _writer = _encodingInfo.getWriter(_output);
+ }
+
+ if (_format.getIndenting()) {
+ _indenting = true;
+ _printer = new IndentPrinter(_writer, _format);
+ } else {
+ _indenting = false;
+ _printer = new Printer(_writer, _format);
+ }
+
+ ElementState state;
+
+ _elementStateCount = 0;
+ state = _elementStates[0];
+ state.namespaceURI = null;
+ state.localName = null;
+ state.rawName = null;
+ state.preserveSpace = _format.getPreserveSpace();
+ state.empty = true;
+ state.afterElement = false;
+ state.afterComment = false;
+ state.doCData = state.inCData = false;
+ state.prefixes = null;
+
+ _docTypePublicId = _format.getDoctypePublic();
+ _docTypeSystemId = _format.getDoctypeSystem();
+ _started = false;
+ _prepared = true;
+ }
+
+ //----------------------------------//
+ // DOM document serializing methods //
+ //----------------------------------//
+
+
+ /**
+ * Serializes the DOM element using the previously specified
+ * writer and output format. Throws an exception only if
+ * an I/O exception occured while serializing.
+ *
+ * @param elem The element to serialize
+ * @throws IOException An I/O exception occured while
+ * serializing
+ */
+ public void serialize(Element elem)
+ throws IOException {
+ reset();
+ prepare();
+ serializeNode(elem);
+ _printer.flush();
+ if (_printer.getException() != null)
+ throw _printer.getException();
+ }
+
+
+ /**
+ * Serializes the DOM document fragmnt using the previously specified
+ * writer and output format. Throws an exception only if
+ * an I/O exception occured while serializing.
+ *
+ * @param frag The element to serialize
+ * @throws IOException An I/O exception occured while
+ * serializing
+ */
+ public void serialize(DocumentFragment frag)
+ throws IOException {
+ reset();
+ prepare();
+ serializeNode(frag);
+ _printer.flush();
+ if (_printer.getException() != null)
+ throw _printer.getException();
+ }
+
+
+ /**
+ * Serializes the DOM document using the previously specified
+ * writer and output format. Throws an exception only if
+ * an I/O exception occured while serializing.
+ *
+ * @param doc The document to serialize
+ * @throws IOException An I/O exception occured while
+ * serializing
+ */
+ public void serialize(Document doc)
+ throws IOException {
+ reset();
+ prepare();
+ serializeNode(doc);
+ serializePreRoot();
+ _printer.flush();
+ if (_printer.getException() != null)
+ throw _printer.getException();
+ }
+
+ //------------------------------------------//
+ // SAX document handler serializing methods //
+ //------------------------------------------//
+
+
+ public void startDocument()
+ throws SAXException {
+ try {
+ prepare();
+ } catch (IOException except) {
+ throw new SAXException(except.toString());
+ }
+ // Nothing to do here. All the magic happens in startDocument(String)
+ }
+
+
+ public void characters(char[] chars, int start, int length)
+ throws SAXException {
+ ElementState state;
+
+ try {
+ state = content();
+
+ // Check if text should be print as CDATA section or unescaped
+ // based on elements listed in the output format (the element
+ // state) or whether we are inside a CDATA section or entity.
+
+ if (state.inCData || state.doCData) {
+ int saveIndent;
+
+ // Print a CDATA section. The text is not escaped, but ']]>'
+ // appearing in the code must be identified and dealt with.
+ // The contents of a text node is considered space preserving.
+ if (!state.inCData) {
+ _printer.printText("<![CDATA[");
+ state.inCData = true;
+ }
+ saveIndent = _printer.getNextIndent();
+ _printer.setNextIndent(0);
+ char ch;
+ for (int index = start; index < length; ++index) {
+ ch = chars[index];
+ if (ch == ']' && index + 2 < length &&
+ chars[index + 1] == ']' && chars[index + 2] == '>') {
+ _printer.printText("]]]]><![CDATA[>");
+ index += 2;
+ continue;
+ }
+ if (!XMLChar.isValid(ch)) {
+ // check if it is surrogate
+ if (++index < length) {
+ surrogates(ch, chars[index]);
+ } else {
+ fatalError("The character '" + (char) ch + "' is an invalid XML character");
+ }
+ continue;
+ } else {
+ if ((ch >= ' ' && _encodingInfo.isPrintable((char) ch) && ch != 0xF7) ||
+ ch == '\n' || ch == '\r' || ch == '\t') {
+ _printer.printText((char) ch);
+ } else {
+ // The character is not printable -- split CDATA section
+ _printer.printText("]]>&#x");
+ _printer.printText(Integer.toHexString(ch));
+ _printer.printText(";<![CDATA[");
+ }
+ }
+ }
+ _printer.setNextIndent(saveIndent);
+
+ } else {
+
+ int saveIndent;
+
+ if (state.preserveSpace) {
+ // If preserving space then hold of indentation so no
+ // excessive spaces are printed at line breaks, escape
+ // the text content without replacing spaces and print
+ // the text breaking only at line breaks.
+ saveIndent = _printer.getNextIndent();
+ _printer.setNextIndent(0);
+ printText(chars, start, length, true, state.unescaped);
+ _printer.setNextIndent(saveIndent);
+ } else {
+ printText(chars, start, length, false, state.unescaped);
+ }
+ }
+ } catch (IOException except) {
+ throw new SAXException(except);
+ }
+ }
+
+
+ public void ignorableWhitespace(char[] chars, int start, int length)
+ throws SAXException {
+ int i;
+
+ try {
+ content();
+
+ // Print ignorable whitespaces only when indenting, after
+ // all they are indentation. Cancel the indentation to
+ // not indent twice.
+ if (_indenting) {
+ _printer.setThisIndent(0);
+ for (i = start; length-- > 0; ++i)
+ _printer.printText(chars[i]);
+ }
+ } catch (IOException except) {
+ throw new SAXException(except);
+ }
+ }
+
+
+ public final void processingInstruction(String target, String code)
+ throws SAXException {
+ try {
+ processingInstructionIO(target, code);
+ } catch (IOException except) {
+ throw new SAXException(except);
+ }
+ }
+
+ public void processingInstructionIO(String target, String code)
+ throws IOException {
+ int index;
+ ElementState state;
+
+ state = content();
+
+ // Create the processing instruction textual representation.
+ // Make sure we don't have '?>' inside either target or code.
+ index = target.indexOf("?>");
+ if (index >= 0)
+ fStrBuffer.append("<?").append(target.substring(0, index));
+ else
+ fStrBuffer.append("<?").append(target);
+ if (code != null) {
+ fStrBuffer.append(' ');
+ index = code.indexOf("?>");
+ if (index >= 0)
+ fStrBuffer.append(code.substring(0, index));
+ else
+ fStrBuffer.append(code);
+ }
+ fStrBuffer.append("?>");
+
+ // If before the root element (or after it), do not print
+ // the PI directly but place it in the pre-root vector.
+ if (isDocumentState()) {
+ if (_preRoot == null)
+ _preRoot = new Vector();
+ _preRoot.addElement(fStrBuffer.toString());
+ } else {
+ _printer.indent();
+ printText(fStrBuffer.toString(), true, true);
+ _printer.unindent();
+ if (_indenting)
+ state.afterElement = true;
+ }
+
+ fStrBuffer.setLength(0);
+ }
+
+
+ public void comment(char[] chars, int start, int length)
+ throws SAXException {
+ try {
+ comment(new String(chars, start, length));
+ } catch (IOException except) {
+ throw new SAXException(except);
+ }
+ }
+
+
+ public void comment(String text)
+ throws IOException {
+ int index;
+ ElementState state;
+
+ if (_format.getOmitComments())
+ return;
+
+ state = content();
+ // Create the processing comment textual representation.
+ // Make sure we don't have '-->' inside the comment.
+ index = text.indexOf("-->");
+ if (index >= 0)
+ fStrBuffer.append("<!--").append(text.substring(0, index)).append("-->");
+ else
+ fStrBuffer.append("<!--").append(text).append("-->");
+
+ // If before the root element (or after it), do not print
+ // the comment directly but place it in the pre-root vector.
+ if (isDocumentState()) {
+ if (_preRoot == null)
+ _preRoot = new Vector();
+ _preRoot.addElement(fStrBuffer.toString());
+ } else {
+ // Indent this element on a new line if the first
+ // content of the parent element or immediately
+ // following an element.
+ if (_indenting && !state.preserveSpace)
+ _printer.breakLine();
+ _printer.indent();
+ printText(fStrBuffer.toString(), true, true);
+ _printer.unindent();
+ if (_indenting)
+ state.afterElement = true;
+ }
+
+ fStrBuffer.setLength(0);
+ state.afterComment = true;
+ state.afterElement = false;
+ }
+
+
+ public void startCDATA() {
+ ElementState state;
+
+ state = getElementState();
+ state.doCData = true;
+ }
+
+
+ public void endCDATA() {
+ ElementState state;
+
+ state = getElementState();
+ state.doCData = false;
+ }
+
+
+ public void startNonEscaping() {
+ ElementState state;
+
+ state = getElementState();
+ state.unescaped = true;
+ }
+
+
+ public void endNonEscaping() {
+ ElementState state;
+
+ state = getElementState();
+ state.unescaped = false;
+ }
+
+
+ public void startPreserving() {
+ ElementState state;
+
+ state = getElementState();
+ state.preserveSpace = true;
+ }
+
+
+ public void endPreserving() {
+ ElementState state;
+
+ state = getElementState();
+ state.preserveSpace = false;
+ }
+
+
+ /**
+ * Called at the end of the document to wrap it up.
+ * Will flush the output stream and throw an exception
+ * if any I/O error occured while serializing.
+ *
+ * @throws SAXException An I/O exception occured during
+ * serializing
+ */
+ public void endDocument()
+ throws SAXException {
+ try {
+ // Print all the elements accumulated outside of
+ // the root element.
+ serializePreRoot();
+ // Flush the output, this is necessary for fStrBuffered output.
+ _printer.flush();
+ } catch (IOException except) {
+ throw new SAXException(except);
+ }
+ }
+
+
+ public void startEntity(String name) {
+ // ???
+ }
+
+
+ public void endEntity(String name) {
+ // ???
+ }
+
+
+ public void setDocumentLocator(Locator locator) {
+ // Nothing to do
+ }
+
+ //-----------------------------------------//
+ // SAX content handler serializing methods //
+ //-----------------------------------------//
+
+
+ public void skippedEntity(String name)
+ throws SAXException {
+ try {
+ endCDATA();
+ content();
+ _printer.printText('&');
+ _printer.printText(name);
+ _printer.printText(';');
+ } catch (IOException except) {
+ throw new SAXException(except);
+ }
+ }
+
+
+ public void startPrefixMapping(String prefix, String uri)
+ throws SAXException {
+ if (_prefixes == null)
+ _prefixes = new TreeMap<String, String>();
+ _prefixes.put(uri, prefix == null ? "" : prefix);
+ }
+
+
+ public void endPrefixMapping(String prefix)
+ throws SAXException {
+ }
+
+ //------------------------------------------//
+ // SAX DTD/Decl handler serializing methods //
+ //------------------------------------------//
+
+
+ public final void startDTD(String name, String publicId, String systemId)
+ throws SAXException {
+ try {
+ _printer.enterDTD();
+ _docTypePublicId = publicId;
+ _docTypeSystemId = systemId;
+ } catch (IOException except) {
+ throw new SAXException(except);
+ }
+ }
+
+
+ public void endDTD() {
+ // Nothing to do here, all the magic occurs in startDocument(String).
+ }
+
+
+ public void elementDecl(String name, String model)
+ throws SAXException {
+ try {
+ _printer.enterDTD();
+ _printer.printText("<!ELEMENT ");
+ _printer.printText(name);
+ _printer.printText(' ');
+ _printer.printText(model);
+ _printer.printText('>');
+ if (_indenting)
+ _printer.breakLine();
+ } catch (IOException except) {
+ throw new SAXException(except);
+ }
+ }
+
+
+ public void attributeDecl(String eName, String aName, String type,
+ String valueDefault, String value)
+ throws SAXException {
+ try {
+ _printer.enterDTD();
+ _printer.printText("<!ATTLIST ");
+ _printer.printText(eName);
+ _printer.printText(' ');
+ _printer.printText(aName);
+ _printer.printText(' ');
+ _printer.printText(type);
+ if (valueDefault != null) {
+ _printer.printText(' ');
+ _printer.printText(valueDefault);
+ }
+ if (value != null) {
+ _printer.printText(" \"");
+ printEscaped(value);
+ _printer.printText('"');
+ }
+ _printer.printText('>');
+ if (_indenting)
+ _printer.breakLine();
+ } catch (IOException except) {
+ throw new SAXException(except);
+ }
+ }
+
+
+ public void internalEntityDecl(String name, String value)
+ throws SAXException {
+ try {
+ _printer.enterDTD();
+ _printer.printText("<!ENTITY ");
+ _printer.printText(name);
+ _printer.printText(" \"");
+ printEscaped(value);
+ _printer.printText("\">");
+ if (_indenting)
+ _printer.breakLine();
+ } catch (IOException except) {
+ throw new SAXException(except);
+ }
+ }
+
+
+ public void externalEntityDecl(String name, String publicId, String systemId)
+ throws SAXException {
+ try {
+ _printer.enterDTD();
+ unparsedEntityDecl(name, publicId, systemId, null);
+ } catch (IOException except) {
+ throw new SAXException(except);
+ }
+ }
+
+
+ public void unparsedEntityDecl(String name, String publicId,
+ String systemId, String notationName)
+ throws SAXException {
+ try {
+ _printer.enterDTD();
+ if (publicId == null) {
+ _printer.printText("<!ENTITY ");
+ _printer.printText(name);
+ _printer.printText(" SYSTEM ");
+ printDoctypeURL(systemId);
+ } else {
+ _printer.printText("<!ENTITY ");
+ _printer.printText(name);
+ _printer.printText(" PUBLIC ");
+ printDoctypeURL(publicId);
+ _printer.printText(' ');
+ printDoctypeURL(systemId);
+ }
+ if (notationName != null) {
+ _printer.printText(" NDATA ");
+ _printer.printText(notationName);
+ }
+ _printer.printText('>');
+ if (_indenting)
+ _printer.breakLine();
+ } catch (IOException except) {
+ throw new SAXException(except);
+ }
+ }
+
+
+ public void notationDecl(String name, String publicId, String systemId)
+ throws SAXException {
+ try {
+ _printer.enterDTD();
+ if (publicId != null) {
+ _printer.printText("<!NOTATION ");
+ _printer.printText(name);
+ _printer.printText(" PUBLIC ");
+ printDoctypeURL(publicId);
+ if (systemId != null) {
+ _printer.printText(' ');
+ printDoctypeURL(systemId);
+ }
+ } else {
+ _printer.printText("<!NOTATION ");
+ _printer.printText(name);
+ _printer.printText(" SYSTEM ");
+ printDoctypeURL(systemId);
+ }
+ _printer.printText('>');
+ if (_indenting)
+ _printer.breakLine();
+ } catch (IOException except) {
+ throw new SAXException(except);
+ }
+ }
+
+ //------------------------------------------//
+ // Generic node serializing methods methods //
+ //------------------------------------------//
+
+
+ /**
+ * Serialize the DOM node. This method is shared across XML, HTML and XHTML
+ * serializers and the differences are masked out in a separate {@link
+ * #serializeElement}.
+ *
+ * @param node The node to serialize
+ * @throws IOException An I/O exception occured while
+ * serializing
+ * @see #serializeElement
+ */
+ protected void serializeNode(Node node)
+ throws IOException {
+ fCurrentNode = node;
+
+ // Based on the node type call the suitable SAX handler.
+ // Only comments entities and documents which are not
+ // handled by SAX are serialized directly.
+ switch (node.getNodeType()) {
+ case Node.TEXT_NODE: {
+ String text;
+
+ text = node.getNodeValue();
+ if (text != null) {
+ /*
+ if (fDOMFilter != null &&
+ (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_TEXT) != 0) {
+ short code = fDOMFilter.acceptNode(node);
+ switch (code) {
+ case NodeFilter.FILTER_REJECT:
+ case NodeFilter.FILTER_SKIP: {
+ break;
+ }
+ default: {
+ characters(text);
+ }
+ }
+ } else */
+ if (!_indenting || getElementState().preserveSpace
+ || (text.replace('\n', ' ').trim().length() != 0))
+ characters(text);
+
+ }
+ break;
+ }
+
+ case Node.CDATA_SECTION_NODE: {
+ String text = node.getNodeValue();
+ if ((features & (0x1 << 3) /*DOMSerializerImpl.CDATA*/) != 0) {
+ if (text != null) {
+ /*
+ if (fDOMFilter != null
+ && (fDOMFilter.getWhatToShow()
+ & NodeFilter.SHOW_CDATA_SECTION)
+ != 0) {
+ short code = fDOMFilter.acceptNode(node);
+ switch (code) {
+ case NodeFilter.FILTER_REJECT:
+ case NodeFilter.FILTER_SKIP: {
+ // skip the CDATA node
+ return;
+ }
+ default: {
+ //fall through..
+ }
+ }
+ }
+ */
+ startCDATA();
+ characters(text);
+ endCDATA();
+ }
+ } else {
+ // transform into a text node
+ characters(text);
+ }
+ break;
+ }
+ case Node.COMMENT_NODE: {
+ String text;
+
+ if (!_format.getOmitComments()) {
+ text = node.getNodeValue();
+ if (text != null) {
+ /*
+ if (fDOMFilter != null &&
+ (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_COMMENT) != 0) {
+ short code = fDOMFilter.acceptNode(node);
+ switch (code) {
+ case NodeFilter.FILTER_REJECT:
+ case NodeFilter.FILTER_SKIP: {
+ // skip the comment node
+ return;
+ }
+ default: {
+ // fall through
+ }
+ }
+ }
+ */
+ comment(text);
+ }
+ }
+ break;
+ }
+
+ case Node.ENTITY_REFERENCE_NODE: {
+ Node child;
+
+ endCDATA();
+ content();
+
+ if (((features & (0x1 << 2)/*DOMSerializerImpl.ENTITIES*/) != 0)
+ || (node.getFirstChild() == null)) {
+ /*
+ if (fDOMFilter != null &&
+ (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_ENTITY_REFERENCE) != 0) {
+ short code = fDOMFilter.acceptNode(node);
+ switch (code) {
+ case NodeFilter.FILTER_REJECT: {
+ return; // remove the node
+ }
+ case NodeFilter.FILTER_SKIP: {
+ child = node.getFirstChild();
+ while (child != null) {
+ serializeNode(child);
+ child = child.getNextSibling();
+ }
+ return;
+ }
+
+ default: {
+ // fall through
+ }
+ }
+ }
+ */
+ checkUnboundNamespacePrefixedNode(node);
+
+ _printer.printText("&");
+ _printer.printText(node.getNodeName());
+ _printer.printText(";");
+ } else {
+ child = node.getFirstChild();
+ while (child != null) {
+ serializeNode(child);
+ child = child.getNextSibling();
+ }
+ }
+
+ break;
+ }
+
+ case Node.PROCESSING_INSTRUCTION_NODE: {
+ /*
+ if (fDOMFilter != null &&
+ (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_PROCESSING_INSTRUCTION) != 0) {
+ short code = fDOMFilter.acceptNode(node);
+ switch (code) {
+ case NodeFilter.FILTER_REJECT:
+ case NodeFilter.FILTER_SKIP: {
+ return; // skip this node
+ }
+ default: { // fall through
+ }
+ }
+ }
+ */
+ processingInstructionIO(node.getNodeName(), node.getNodeValue());
+ break;
+ }
+ case Node.ELEMENT_NODE: {
+ /*
+ if (fDOMFilter != null &&
+ (fDOMFilter.getWhatToShow() & NodeFilter.SHOW_ELEMENT) != 0) {
+ short code = fDOMFilter.acceptNode(node);
+ switch (code) {
+ case NodeFilter.FILTER_REJECT: {
+ return;
+ }
+ case NodeFilter.FILTER_SKIP: {
+ Node child = node.getFirstChild();
+ while (child != null) {
+ serializeNode(child);
+ child = child.getNextSibling();
+ }
+ return; // skip this node
+ }
+
+ default: { // fall through
+ }
+ }
+ }
+ */
+ serializeElement((Element) node);
+ break;
+ }
+ case Node.DOCUMENT_NODE: {
+ DocumentType docType;
+ DOMImplementation domImpl;
+ NamedNodeMap map;
+ Entity entity;
+ Notation notation;
+ int i;
+
+ // If there is a document type, use the SAX events to
+ // serialize it.
+ docType = ((Document) node).getDoctype();
+ if (docType != null) {
+ // DOM Level 2 (or higher)
+ domImpl = ((Document) node).getImplementation();
+ try {
+ String internal;
+
+ _printer.enterDTD();
+ _docTypePublicId = docType.getPublicId();
+ _docTypeSystemId = docType.getSystemId();
+ internal = docType.getInternalSubset();
+ if (internal != null && internal.length() > 0)
+ _printer.printText(internal);
+ endDTD();
+ }
+ // DOM Level 1 -- does implementation have methods?
+ catch (NoSuchMethodError nsme) {
+ Class docTypeClass = docType.getClass();
+
+ String docTypePublicId = null;
+ String docTypeSystemId = null;
+ try {
+ java.lang.reflect.Method getPublicId = docTypeClass.getMethod("getPublicId");
+ if (getPublicId.getReturnType().equals(String.class)) {
+ docTypePublicId = (String) getPublicId.invoke(docType);
+ }
+ }
+ catch (Exception e) {
+ // ignore
+ }
+ try {
+ java.lang.reflect.Method getSystemId = docTypeClass.getMethod("getSystemId");
+ if (getSystemId.getReturnType().equals(String.class)) {
+ docTypeSystemId = (String) getSystemId.invoke(docType);
+ }
+ }
+ catch (Exception e) {
+ // ignore
+ }
+ _printer.enterDTD();
+ _docTypePublicId = docTypePublicId;
+ _docTypeSystemId = docTypeSystemId;
+ endDTD();
+ }
+ }
+ // !! Fall through
+ }
+ case Node.DOCUMENT_FRAGMENT_NODE: {
+ Node child;
+
+ // By definition this will happen if the node is a document,
+ // document fragment, etc. Just serialize its contents. It will
+ // work well for other nodes that we do not know how to serialize.
+ child = node.getFirstChild();
+ while (child != null) {
+ serializeNode(child);
+ child = child.getNextSibling();
+ }
+ break;
+ }
+
+ default:
+ break;
+ }
+ }
+
+
+ /**
+ * Must be called by a method about to print any type of content.
+ * If the element was just opened, the opening tag is closed and
+ * will be matched to a closing tag. Returns the current element
+ * state with <tt>empty</tt> and <tt>afterElement</tt> set to false.
+ *
+ * @return The current element state
+ * @throws IOException An I/O exception occured while
+ * serializing
+ */
+ protected ElementState content()
+ throws IOException {
+ ElementState state;
+
+ state = getElementState();
+ if (!isDocumentState()) {
+ // Need to close CData section first
+ if (state.inCData && !state.doCData) {
+ _printer.printText("]]>");
+ state.inCData = false;
+ }
+ // If this is the first content in the element,
+ // change the state to not-empty and close the
+ // opening element tag.
+ if (state.empty) {
+ _printer.printText('>');
+ state.empty = false;
+ }
+ // Except for one content type, all of them
+ // are not last element. That one content
+ // type will take care of itself.
+ state.afterElement = false;
+ // Except for one content type, all of them
+ // are not last comment. That one content
+ // type will take care of itself.
+ state.afterComment = false;
+ }
+ return state;
+ }
+
+
+ /**
+ * Called to print the text contents in the prevailing element format.
+ * Since this method is capable of printing text as CDATA, it is used
+ * for that purpose as well. White space handling is determined by the
+ * current element state. In addition, the output format can dictate
+ * whether the text is printed as CDATA or unescaped.
+ *
+ * @param text The text to print
+ * @throws IOException An I/O exception occured while
+ * serializing
+ */
+ protected void characters(String text)
+ throws IOException {
+ ElementState state;
+
+ state = content();
+ // Check if text should be print as CDATA section or unescaped
+ // based on elements listed in the output format (the element
+ // state) or whether we are inside a CDATA section or entity.
+
+ if (state.inCData || state.doCData) {
+ int saveIndent;
+
+ // Print a CDATA section. The text is not escaped, but ']]>'
+ // appearing in the code must be identified and dealt with.
+ // The contents of a text node is considered space preserving.
+ if (!state.inCData) {
+ _printer.printText("<![CDATA[");
+ state.inCData = true;
+ }
+ saveIndent = _printer.getNextIndent();
+ _printer.setNextIndent(0);
+ printCDATAText(text);
+ _printer.setNextIndent(saveIndent);
+
+ } else {
+
+ int saveIndent;
+
+ if (state.preserveSpace) {
+ // If preserving space then hold of indentation so no
+ // excessive spaces are printed at line breaks, escape
+ // the text content without replacing spaces and print
+ // the text breaking only at line breaks.
+ saveIndent = _printer.getNextIndent();
+ _printer.setNextIndent(0);
+ printText(text, true, state.unescaped);
+ _printer.setNextIndent(saveIndent);
+ } else {
+ printText(text, false, state.unescaped);
+ }
+ }
+ }
+
+
+ /**
+ * Returns the suitable entity reference for this character value,
+ * or null if no such entity exists. Calling this method with <tt>'&'</tt>
+ * will return <tt>"&amp;"</tt>.
+ *
+ * @param ch Character value
+ * @return Character entity name, or null
+ */
+ protected abstract String getEntityRef(int ch);
+
+
+ /**
+ * Called to serializee the DOM element. The element is serialized based on
+ * the serializer's method (XML, HTML, XHTML).
+ *
+ * @param elem The element to serialize
+ * @throws IOException An I/O exception occured while
+ * serializing
+ */
+ protected abstract void serializeElement(Element elem)
+ throws IOException;
+
+
+ /**
+ * Comments and PIs cannot be serialized before the root element,
+ * because the root element serializes the document type, which
+ * generally comes first. Instead such PIs and comments are
+ * accumulated inside a vector and serialized by calling this
+ * method. Will be called when the root element is serialized
+ * and when the document finished serializing.
+ *
+ * @throws IOException An I/O exception occured while
+ * serializing
+ */
+ protected void serializePreRoot()
+ throws IOException {
+ int i;
+
+ if (_preRoot != null) {
+ for (i = 0; i < _preRoot.size(); ++i) {
+ printText((String) _preRoot.elementAt(i), true, true);
+ if (_indenting)
+ _printer.breakLine();
+ }
+ _preRoot.removeAllElements();
+ }
+ }
+
+ //---------------------------------------------//
+ // Text pretty printing and formatting methods //
+ //---------------------------------------------//
+
+ protected void printCDATAText(String text) throws IOException {
+ int length = text.length();
+ char ch;
+
+ for (int index = 0; index < length; ++index) {
+ ch = text.charAt(index);
+ if (ch == ']'
+ && index + 2 < length
+ && text.charAt(index + 1) == ']'
+ && text.charAt(index + 2) == '>') { // check for ']]>'
+ if (fDOMErrorHandler != null) {
+ // REVISIT: this means that if DOM Error handler is not registered we don't report any
+ // fatal errors and might serialize not wellformed document
+ /*
+ if ((features & DOMSerializerImpl.SPLITCDATA) == 0
+ && (features & DOMSerializerImpl.WELLFORMED) == 0) {
+ // issue fatal error
+ String msg =
+ DOMMessageFormatter.formatMessage(
+ DOMMessageFormatter.SERIALIZER_DOMAIN,
+ "EndingCDATA",
+ null);
+ modifyDOMError(
+ msg,
+ DOMError.SEVERITY_FATAL_ERROR,
+ fCurrentNode);
+ boolean continueProcess =
+ fDOMErrorHandler.handleError(fDOMError);
+ if (!continueProcess) {
+ throw new IOException();
+ }
+ } else*/ {
+ // issue warning
+ String msg =
+ DOMMessageFormatter.formatMessage(
+ DOMMessageFormatter.SERIALIZER_DOMAIN,
+ "SplittingCDATA",
+ null);
+ modifyDOMError(
+ msg,
+ DOMError.SEVERITY_WARNING,
+ fCurrentNode);
+ fDOMErrorHandler.handleError(fDOMError);
+ }
+ }
+ // split CDATA section
+ _printer.printText("]]]]><![CDATA[>");
+ index += 2;
+ continue;
+ }
+
+ if (!XMLChar.isValid(ch)) {
+ // check if it is surrogate
+ if (++index < length) {
+ surrogates(ch, text.charAt(index));
+ } else {
+ fatalError("The character '" + (char) ch + "' is an invalid XML character");
+ }
+ continue;
+ } else {
+ if ((ch >= ' ' && _encodingInfo.isPrintable((char) ch) && ch != 0xF7) ||
+ ch == '\n' || ch == '\r' || ch == '\t') {
+ _printer.printText((char) ch);
+ } else {
+
+ // The character is not printable -- split CDATA section
+ _printer.printText("]]>&#x");
+ _printer.printText(Integer.toHexString(ch));
+ _printer.printText(";<![CDATA[");
+ }
+ }
+ }
+ }
+
+
+ protected void surrogates(int high, int low) throws IOException {
+ if (XMLChar.isHighSurrogate(high)) {
+ if (!XMLChar.isLowSurrogate(low)) {
+ //Invalid XML
+ fatalError("The character '" + (char) low + "' is an invalid XML character");
+ } else {
+ int supplemental = XMLChar.supplemental((char) high, (char) low);
+ if (!XMLChar.isValid(supplemental)) {
+ //Invalid XML
+ fatalError("The character '" + (char) supplemental + "' is an invalid XML character");
+ } else {
+ if (content().inCData) {
+ _printer.printText("]]>&#x");
+ _printer.printText(Integer.toHexString(supplemental));
+ _printer.printText(";<![CDATA[");
+ } else {
+ printHex(supplemental);
+ }
+ }
+ }
+ } else {
+ fatalError("The character '" + (char) high + "' is an invalid XML character");
+ }
+
+ }
+
+ /**
+ * Called to print additional text with whitespace handling.
+ * If spaces are preserved, the text is printed as if by calling
+ * {@link #printText(String,boolean,boolean)} with a call to {@link Printer#breakLine}
+ * for each new line. If spaces are not preserved, the text is
+ * broken at space boundaries if longer than the line width;
+ * Multiple spaces are printed as such, but spaces at beginning
+ * of line are removed.
+ *
+ * @param chars The text to print
+ * @param preserveSpace Space preserving flag
+ * @param unescaped Print unescaped
+ */
+ protected void printText(char[] chars, int start, int length,
+ boolean preserveSpace, boolean unescaped)
+ throws IOException {
+ int index;
+ char ch;
+
+ if (preserveSpace) {
+ // Preserving spaces: the text must print exactly as it is,
+ // without breaking when spaces appear in the text and without
+ // consolidating spaces. If a line terminator is used, a line
+ // break will occur.
+ while (length-- > 0) {
+ ch = chars[start];
+ ++start;
+ if (ch == '\n' || ch == '\r' || unescaped)
+ _printer.printText(ch);
+ else
+ printEscaped(ch);
+ }
+ } else {
+ // Not preserving spaces: print one part at a time, and
+ // use spaces between parts to break them into different
+ // lines. Spaces at beginning of line will be stripped
+ // by printing mechanism. Line terminator is treated
+ // no different than other text part.
+ while (length-- > 0) {
+ ch = chars[start];
+ ++start;
+ if (ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == '\r')
+ _printer.printSpace();
+ else if (unescaped)
+ _printer.printText(ch);
+ else
+ printEscaped(ch);
+ }
+ }
+ }
+
+
+ protected void printText(String text, boolean preserveSpace, boolean unescaped)
+ throws IOException {
+ int index;
+ char ch;
+
+ if (preserveSpace) {
+ // Preserving spaces: the text must print exactly as it is,
+ // without breaking when spaces appear in the text and without
+ // consolidating spaces. If a line terminator is used, a line
+ // break will occur.
+ for (index = 0; index < text.length(); ++index) {
+ ch = text.charAt(index);
+ if (ch == '\n' || ch == '\r' || unescaped)
+ _printer.printText(ch);
+ else
+ printEscaped(ch);
+ }
+ } else {
+ // Not preserving spaces: print one part at a time, and
+ // use spaces between parts to break them into different
+ // lines. Spaces at beginning of line will be stripped
+ // by printing mechanism. Line terminator is treated
+ // no different than other text part.
+ for (index = 0; index < text.length(); ++index) {
+ ch = text.charAt(index);
+ if (ch == ' ' || ch == '\f' || ch == '\t' || ch == '\n' || ch == '\r')
+ _printer.printSpace();
+ else if (unescaped)
+ _printer.printText(ch);
+ else
+ printEscaped(ch);
+ }
+ }
+ }
+
+
+ /**
+ * Print a document type public or system identifier URL.
+ * Encapsulates the URL in double quotes, escapes non-printing
+ * characters and print it equivalent to {@link #printText}.
+ *
+ * @param url The document type url to print
+ */
+ protected void printDoctypeURL(String url)
+ throws IOException {
+ int i;
+
+ _printer.printText('"');
+ for (i = 0; i < url.length(); ++i) {
+ if (url.charAt(i) == '"' || url.charAt(i) < 0x20 || url.charAt(i) > 0x7F) {
+ _printer.printText('%');
+ _printer.printText(Integer.toHexString(url.charAt(i)));
+ } else
+ _printer.printText(url.charAt(i));
+ }
+ _printer.printText('"');
+ }
+
+
+ protected void printEscaped(int ch)
+ throws IOException {
+ String charRef;
+ // If there is a suitable entity reference for this
+ // character, print it. The list of available entity
+ // references is almost but not identical between
+ // XML and HTML.
+ charRef = getEntityRef(ch);
+ if (charRef != null) {
+ _printer.printText('&');
+ _printer.printText(charRef);
+ _printer.printText(';');
+ } else if ((ch >= ' ' && _encodingInfo.isPrintable((char) ch) && ch != 0xF7) ||
+ ch == '\n' || ch == '\r' || ch == '\t') {
+ // Non printables are below ASCII space but not tab or line
+ // terminator, ASCII delete, or above a certain Unicode threshold.
+ if (ch < 0x10000) {
+ _printer.printText((char) ch);
+ } else {
+ _printer.printText((char) (((ch - 0x10000) >> 10) + 0xd800));
+ _printer.printText((char) (((ch - 0x10000) & 0x3ff) + 0xdc00));
+ }
+ } else {
+ printHex(ch);
+ }
+ }
+
+ /**
+ * Escapes chars
+ */
+ final void printHex(int ch) throws IOException {
+ _printer.printText("&#x");
+ _printer.printText(Integer.toHexString(ch));
+ _printer.printText(';');
+
+ }
+
+
+ /**
+ * Escapes a string so it may be printed as text content or attribute
+ * value. Non printable characters are escaped using character references.
+ * Where the format specifies a deault entity reference, that reference
+ * is used (e.g. <tt>&lt;</tt>).
+ *
+ * @param source The string to escape
+ */
+ protected void printEscaped(String source)
+ throws IOException {
+ for (int i = 0; i < source.length(); ++i) {
+ int ch = source.charAt(i);
+ if ((ch & 0xfc00) == 0xd800 && i + 1 < source.length()) {
+ int lowch = source.charAt(i + 1);
+ if ((lowch & 0xfc00) == 0xdc00) {
+ ch = 0x10000 + ((ch - 0xd800) << 10) + lowch - 0xdc00;
+ i++;
+ }
+ }
+ printEscaped(ch);
+ }
+ }
+
+ //--------------------------------//
+ // Element state handling methods //
+ //--------------------------------//
+
+
+ /**
+ * Return the state of the current element.
+ *
+ * @return Current element state
+ */
+ protected ElementState getElementState() {
+ return _elementStates[_elementStateCount];
+ }
+
+
+ /**
+ * Enter a new element state for the specified element.
+ * Tag name and space preserving is specified, element
+ * state is initially empty.
+ *
+ * @return Current element state, or null
+ */
+ protected ElementState enterElementState(String namespaceURI, String localName,
+ String rawName, boolean preserveSpace) {
+ ElementState state;
+
+ if (_elementStateCount + 1 == _elementStates.length) {
+ ElementState[] newStates;
+
+ // Need to create a larger array of states. This does not happen
+ // often, unless the document is really deep.
+ newStates = new ElementState[_elementStates.length + 10];
+ for (int i = 0; i < _elementStates.length; ++i)
+ newStates[i] = _elementStates[i];
+ for (int i = _elementStates.length; i < newStates.length; ++i)
+ newStates[i] = new ElementState();
+ _elementStates = newStates;
+ }
+
+ ++_elementStateCount;
+ state = _elementStates[_elementStateCount];
+ state.namespaceURI = namespaceURI;
+ state.localName = localName;
+ state.rawName = rawName;
+ state.preserveSpace = preserveSpace;
+ state.empty = true;
+ state.afterElement = false;
+ state.afterComment = false;
+ state.doCData = state.inCData = false;
+ state.unescaped = false;
+ state.prefixes = _prefixes;
+
+ _prefixes = null;
+ return state;
+ }
+
+
+ /**
+ * Leave the current element state and return to the
+ * state of the parent element. If this was the root
+ * element, return to the state of the document.
+ *
+ * @return Previous element state
+ */
+ protected ElementState leaveElementState() {
+ if (_elementStateCount > 0) {
+ /*Corrected by David Blondeau (blondeau@intalio.com)*/
+ _prefixes = null;
+ //_prefixes = _elementStates[ _elementStateCount ].prefixes;
+ --_elementStateCount;
+ return _elementStates[_elementStateCount];
+ } else {
+ String msg = DOMMessageFormatter.formatMessage(DOMMessageFormatter.SERIALIZER_DOMAIN, "Internal", null);
+ throw new IllegalStateException(msg);
+ }
+ }
+
+
+ /**
+ * Returns true if in the state of the document.
+ * Returns true before entering any element and after
+ * leaving the root element.
+ *
+ * @return True if in the state of the document
+ */
+ protected boolean isDocumentState() {
+ return _elementStateCount == 0;
+ }
+
+
+ /**
+ * Returns the namespace prefix for the specified URI.
+ * If the URI has been mapped to a prefix, returns the
+ * prefix, otherwise returns null.
+ *
+ * @param namespaceURI The namespace URI
+ * @return The namespace prefix if known, or null
+ */
+ protected String getPrefix(String namespaceURI) {
+ String prefix;
+
+ if (_prefixes != null) {
+ prefix = (String) _prefixes.get(namespaceURI);
+ if (prefix != null)
+ return prefix;
+ }
+ if (_elementStateCount == 0)
+ return null;
+ else {
+ for (int i = _elementStateCount; i > 0; --i) {
+ if (_elementStates[i].prefixes != null) {
+ prefix = (String) _elementStates[i].prefixes.get(namespaceURI);
+ if (prefix != null)
+ return prefix;
+ }
+ }
+ }
+ return null;
+ }
+
+ /**
+ * The method modifies global DOM error object
+ *
+ * @param message
+ * @param severity
+ * @return a DOMError
+ */
+ protected DOMError modifyDOMError(String message, short severity, Node node) {
+ fDOMError.reset();
+ fDOMError.fMessage = message;
+ fDOMError.fSeverity = severity;
+ fDOMError.fLocator = new DOMLocatorImpl(-1, -1, -1, node, null);
+ return fDOMError;
+
+ }
+
+
+ protected void fatalError(String message) throws IOException {
+ if (fDOMErrorHandler != null) {
+ modifyDOMError(message, DOMError.SEVERITY_FATAL_ERROR, fCurrentNode);
+ fDOMErrorHandler.handleError(fDOMError);
+ } else {
+ throw new IOException(message);
+ }
+ }
+
+ /**
+ * DOM level 3:
+ * Check a node to determine if it contains unbound namespace prefixes.
+ *
+ * @param node The node to check for unbound namespace prefices
+ */
+ protected void checkUnboundNamespacePrefixedNode(Node node) throws IOException {
+
+ }
+}
Added: jackrabbit/commons/filevault/trunk/vault-core/src/main/java/org/apache/jackrabbit/vault/util/xml/serialize/DOMSerializer.java
URL: http://svn.apache.org/viewvc/jackrabbit/commons/filevault/trunk/vault-core/src/main/java/org/apache/jackrabbit/vault/util/xml/serialize/DOMSerializer.java?rev=1512568&view=auto
==============================================================================
--- jackrabbit/commons/filevault/trunk/vault-core/src/main/java/org/apache/jackrabbit/vault/util/xml/serialize/DOMSerializer.java (added)
+++ jackrabbit/commons/filevault/trunk/vault-core/src/main/java/org/apache/jackrabbit/vault/util/xml/serialize/DOMSerializer.java Sat Aug 10 05:53:42 2013
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.jackrabbit.vault.util.xml.serialize;
+
+
+import java.io.IOException;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.DocumentFragment;
+import org.w3c.dom.Element;
+
+
+/**
+ * Interface for a DOM serializer implementation.
+ *
+ * @author <a href="mailto:Scott_Boag/CAM/Lotus@lotus.com">Scott Boag</a>
+ * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
+ * @version $Revision$ $Date$
+ */
+public interface DOMSerializer {
+
+
+ /**
+ * Serialized the DOM element. Throws an exception only if
+ * an I/O exception occured while serializing.
+ *
+ * @param elem The element to serialize
+ * @throws IOException An I/O exception occured while
+ * serializing
+ */
+ public void serialize(Element elem)
+ throws IOException;
+
+
+ /**
+ * Serializes the DOM document. Throws an exception only if
+ * an I/O exception occured while serializing.
+ *
+ * @param doc The document to serialize
+ * @throws IOException An I/O exception occured while
+ * serializing
+ */
+ public void serialize(Document doc)
+ throws IOException;
+
+
+ /**
+ * Serializes the DOM document fragment. Throws an exception
+ * only if an I/O exception occured while serializing.
+ *
+ * @param frag The document fragment to serialize
+ * @throws IOException An I/O exception occured while
+ * serializing
+ */
+ public void serialize(DocumentFragment frag)
+ throws IOException;
+
+
+}
+
+
+
Added: jackrabbit/commons/filevault/trunk/vault-core/src/main/java/org/apache/jackrabbit/vault/util/xml/serialize/ElementState.java
URL: http://svn.apache.org/viewvc/jackrabbit/commons/filevault/trunk/vault-core/src/main/java/org/apache/jackrabbit/vault/util/xml/serialize/ElementState.java?rev=1512568&view=auto
==============================================================================
--- jackrabbit/commons/filevault/trunk/vault-core/src/main/java/org/apache/jackrabbit/vault/util/xml/serialize/ElementState.java (added)
+++ jackrabbit/commons/filevault/trunk/vault-core/src/main/java/org/apache/jackrabbit/vault/util/xml/serialize/ElementState.java Sat Aug 10 05:53:42 2013
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.jackrabbit.vault.util.xml.serialize;
+
+import java.util.Map;
+
+
+/**
+ * Holds the state of the currently serialized element.
+ *
+ * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
+ * @version $Revision$ $Date$
+ * @see BaseMarkupSerializer
+ */
+public class ElementState {
+
+
+ /**
+ * The element's raw tag name (local or prefix:local).
+ */
+ public String rawName;
+
+
+ /**
+ * The element's local tag name.
+ */
+ public String localName;
+
+
+ /**
+ * The element's namespace URI.
+ */
+ public String namespaceURI;
+
+
+ /**
+ * True if element is space preserving.
+ */
+ public boolean preserveSpace;
+
+
+ /**
+ * True if element is empty. Turns false immediately
+ * after serializing the first contents of the element.
+ */
+ public boolean empty;
+
+
+ /**
+ * True if the last serialized node was an element node.
+ */
+ public boolean afterElement;
+
+
+ /**
+ * True if the last serialized node was a comment node.
+ */
+ public boolean afterComment;
+
+
+ /**
+ * True if textual content of current element should be
+ * serialized as CDATA section.
+ */
+ public boolean doCData;
+
+
+ /**
+ * True if textual content of current element should be
+ * serialized as raw characters (unescaped).
+ */
+ public boolean unescaped;
+
+
+ /**
+ * True while inside CData and printing text as CData.
+ */
+ public boolean inCData;
+
+
+ /**
+ * Association between namespace URIs (keys) and prefixes (values).
+ */
+ public Map<String, String> prefixes;
+
+
+}
Added: jackrabbit/commons/filevault/trunk/vault-core/src/main/java/org/apache/jackrabbit/vault/util/xml/serialize/EncodingInfo.java
URL: http://svn.apache.org/viewvc/jackrabbit/commons/filevault/trunk/vault-core/src/main/java/org/apache/jackrabbit/vault/util/xml/serialize/EncodingInfo.java?rev=1512568&view=auto
==============================================================================
--- jackrabbit/commons/filevault/trunk/vault-core/src/main/java/org/apache/jackrabbit/vault/util/xml/serialize/EncodingInfo.java (added)
+++ jackrabbit/commons/filevault/trunk/vault-core/src/main/java/org/apache/jackrabbit/vault/util/xml/serialize/EncodingInfo.java Sat Aug 10 05:53:42 2013
@@ -0,0 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.jackrabbit.vault.util.xml.serialize;
+
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.UnsupportedEncodingException;
+import java.io.Writer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.IllegalCharsetNameException;
+import java.nio.charset.UnsupportedCharsetException;
+
+import org.apache.jackrabbit.vault.util.xml.xerces.util.EncodingMap;
+
+/**
+ * This class represents an encoding.
+ *
+ * @version $Id$
+ */
+public class EncodingInfo {
+
+ // Method: sun.io.CharToByteConverter.getConverter(java.lang.String)
+ private static java.lang.reflect.Method fgGetConverterMethod = null;
+
+ // Method: sun.io.CharToByteConverter.canConvert(char)
+ private static java.lang.reflect.Method fgCanConvertMethod = null;
+
+ // Flag indicating whether or not sun.io.CharToByteConverter is available.
+ private static boolean fgConvertersAvailable = false;
+
+ // An array to hold the argument for a method of CharToByteConverter.
+ private Object[] fArgsForMethod = null;
+
+ // name of encoding as registered with IANA;
+ // preferably a MIME name, but aliases are fine too.
+ String ianaName;
+ String javaName;
+ int lastPrintable;
+
+ // The charToByteConverter with which we test unusual characters.
+ Object fCharToByteConverter = null;
+
+ // Is the converter null because it can't be instantiated
+ // for some reason (perhaps we're running with insufficient authority as
+ // an applet?
+ boolean fHaveTriedCToB = false;
+ Charset nioCharset = null;
+ CharsetEncoder nioCharEncoder = null;
+
+ /**
+ * Creates new <code>EncodingInfo</code> instance.
+ */
+ public EncodingInfo(String ianaName, String javaName, int lastPrintable) {
+ this.ianaName = ianaName;
+ this.javaName = EncodingMap.getIANA2JavaMapping(ianaName);
+ this.lastPrintable = lastPrintable;
+ try {
+ nioCharset = Charset.forName(this.javaName);
+ if (nioCharset.canEncode())
+ nioCharEncoder = nioCharset.newEncoder();
+ } catch (IllegalCharsetNameException ie) {
+ nioCharset = null;
+ nioCharEncoder = null;
+ } catch (UnsupportedCharsetException ue) {
+ nioCharset = null;
+ nioCharEncoder = null;
+ }
+ }
+
+ /**
+ * Returns a MIME charset name of this encoding.
+ */
+ public String getIANAName() {
+ return this.ianaName;
+ }
+
+ /**
+ * Returns a writer for this encoding based on
+ * an output stream.
+ *
+ * @return A suitable writer
+ * @throws UnsupportedEncodingException There is no convertor
+ * to support this encoding
+ */
+ public Writer getWriter(OutputStream output)
+ throws UnsupportedEncodingException {
+ // this should always be true!
+ if (javaName != null)
+ return new OutputStreamWriter(output, javaName);
+ javaName = EncodingMap.getIANA2JavaMapping(ianaName);
+ if (javaName == null)
+ // use UTF-8 as preferred encoding
+ return new OutputStreamWriter(output, "UTF8");
+ return new OutputStreamWriter(output, javaName);
+ }
+
+ /**
+ * Checks whether the specified character is printable or not
+ * in this encoding.
+ *
+ * @param ch a code point (0-0x10ffff)
+ */
+ public boolean isPrintable(char ch) {
+ if (ch <= this.lastPrintable)
+ return true;
+ if (nioCharEncoder != null)
+ return nioCharEncoder.canEncode(ch);
+
+ //We should not reach here , if we reach due to
+ //charset not supporting encoding then fgConvertersAvailable
+ //should take care of returning false.
+
+ if (fCharToByteConverter == null) {
+ if (fHaveTriedCToB || !fgConvertersAvailable) {
+ // forget it; nothing we can do...
+ return false;
+ }
+ if (fArgsForMethod == null) {
+ fArgsForMethod = new Object[1];
+ }
+ // try and create it:
+ try {
+ fArgsForMethod[0] = javaName;
+ fCharToByteConverter = fgGetConverterMethod.invoke(null, fArgsForMethod);
+ } catch (Exception e) {
+ // don't try it again...
+ fHaveTriedCToB = true;
+ return false;
+ }
+ }
+ try {
+ fArgsForMethod[0] = new Character(ch);
+ return ((Boolean) fgCanConvertMethod.invoke(fCharToByteConverter, fArgsForMethod)).booleanValue();
+ } catch (Exception e) {
+ // obviously can't use this converter; probably some kind of
+ // security restriction
+ fCharToByteConverter = null;
+ fHaveTriedCToB = false;
+ return false;
+ }
+ }
+
+ // is this an encoding name recognized by this JDK?
+ // if not, will throw UnsupportedEncodingException
+ public static void testJavaEncodingName(String name) throws UnsupportedEncodingException {
+ final byte[] bTest = {(byte) 'v', (byte) 'a', (byte) 'l', (byte) 'i', (byte) 'd'};
+ String s = new String(bTest, name);
+ }
+
+ // Attempt to get methods for char to byte
+ // converter on class initialization.
+ static {
+ try {
+ Class clazz = Class.forName("sun.io.CharToByteConverter");
+ fgGetConverterMethod = clazz.getMethod("getConverter", new Class[]{String.class});
+ fgCanConvertMethod = clazz.getMethod("canConvert", new Class[]{Character.TYPE});
+ fgConvertersAvailable = true;
+ }
+ // ClassNotFoundException, NoSuchMethodException or SecurityException
+ // Whatever the case, we cannot use sun.io.CharToByteConverter.
+ catch (Exception exc) {
+ fgGetConverterMethod = null;
+ fgCanConvertMethod = null;
+ fgConvertersAvailable = false;
+ }
+ }
+}
Added: jackrabbit/commons/filevault/trunk/vault-core/src/main/java/org/apache/jackrabbit/vault/util/xml/serialize/Encodings.java
URL: http://svn.apache.org/viewvc/jackrabbit/commons/filevault/trunk/vault-core/src/main/java/org/apache/jackrabbit/vault/util/xml/serialize/Encodings.java?rev=1512568&view=auto
==============================================================================
--- jackrabbit/commons/filevault/trunk/vault-core/src/main/java/org/apache/jackrabbit/vault/util/xml/serialize/Encodings.java (added)
+++ jackrabbit/commons/filevault/trunk/vault-core/src/main/java/org/apache/jackrabbit/vault/util/xml/serialize/Encodings.java Sat Aug 10 05:53:42 2013
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.jackrabbit.vault.util.xml.serialize;
+
+
+import java.io.UnsupportedEncodingException;
+import java.util.Hashtable;
+import java.util.Locale;
+
+import org.apache.jackrabbit.vault.util.xml.xerces.util.EncodingMap;
+
+
+/**
+ * Provides information about encodings. Depends on the Java runtime
+ * to provides writers for the different encodings, but can be used
+ * to override encoding names and provide the last printable character
+ * for each encoding.
+ *
+ * @author <a href="mailto:arkin@intalio.com">Assaf Arkin</a>
+ * @version $Id$
+ */
+public class Encodings {
+
+
+ /**
+ * The last printable character for unknown encodings.
+ */
+ static final int DEFAULT_LAST_PRINTABLE = 0x7F;
+
+ // last printable character for Unicode-compatible encodings
+ static final int LAST_PRINTABLE_UNICODE = 0xffff;
+ // unicode-compliant encodings; can express plane 0
+ static final String[] UNICODE_ENCODINGS = {
+ "Unicode", "UnicodeBig", "UnicodeLittle", "GB2312", "UTF8",
+ };
+ // default (Java) encoding if none supplied:
+ static final String DEFAULT_ENCODING = "UTF8";
+
+ // note that the size of this Hashtable
+ // is bounded by the number of encodings recognized by EncodingMap;
+ // therefore it poses no static mutability risk.
+ static Hashtable _encodings = new Hashtable();
+
+ /**
+ * @param encoding a MIME charset name, or null.
+ */
+ static EncodingInfo getEncodingInfo(String encoding, boolean allowJavaNames) throws UnsupportedEncodingException {
+ EncodingInfo eInfo = null;
+ if (encoding == null) {
+ if ((eInfo = (EncodingInfo) _encodings.get(DEFAULT_ENCODING)) != null)
+ return eInfo;
+ eInfo = new EncodingInfo(EncodingMap.getJava2IANAMapping(DEFAULT_ENCODING), DEFAULT_ENCODING, LAST_PRINTABLE_UNICODE);
+ _encodings.put(DEFAULT_ENCODING, eInfo);
+ return eInfo;
+ }
+ // need to convert it to upper case:
+ encoding = encoding.toUpperCase(Locale.ENGLISH);
+ String jName = EncodingMap.getIANA2JavaMapping(encoding);
+ if (jName == null) {
+ // see if the encoding passed in is a Java encoding name.
+ if (allowJavaNames) {
+ EncodingInfo.testJavaEncodingName(encoding);
+ if ((eInfo = (EncodingInfo) _encodings.get(encoding)) != null)
+ return eInfo;
+ // is it known to be unicode-compliant?
+ int i = 0;
+ for (; i < UNICODE_ENCODINGS.length; i++) {
+ if (UNICODE_ENCODINGS[i].equalsIgnoreCase(encoding)) {
+ eInfo = new EncodingInfo(EncodingMap.getJava2IANAMapping(encoding), encoding, LAST_PRINTABLE_UNICODE);
+ break;
+ }
+ }
+ if (i == UNICODE_ENCODINGS.length) {
+ eInfo = new EncodingInfo(EncodingMap.getJava2IANAMapping(encoding), encoding, DEFAULT_LAST_PRINTABLE);
+ }
+ _encodings.put(encoding, eInfo);
+ return eInfo;
+ } else {
+ throw new UnsupportedEncodingException(encoding);
+ }
+ }
+ if ((eInfo = (EncodingInfo) _encodings.get(jName)) != null)
+ return eInfo;
+ // have to create one...
+ // is it known to be unicode-compliant?
+ int i = 0;
+ for (; i < UNICODE_ENCODINGS.length; i++) {
+ if (UNICODE_ENCODINGS[i].equalsIgnoreCase(jName)) {
+ eInfo = new EncodingInfo(encoding, jName, LAST_PRINTABLE_UNICODE);
+ break;
+ }
+ }
+ if (i == UNICODE_ENCODINGS.length) {
+ eInfo = new EncodingInfo(encoding, jName, DEFAULT_LAST_PRINTABLE);
+ }
+ _encodings.put(jName, eInfo);
+ return eInfo;
+ }
+
+ static final String JIS_DANGER_CHARS
+ = "\\\u007e\u007f\u00a2\u00a3\u00a5\u00ac"
+ + "\u2014\u2015\u2016\u2026\u203e\u203e\u2225\u222f\u301c"
+ + "\uff3c\uff5e\uffe0\uffe1\uffe2\uffe3";
+
+}