You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2011/09/25 11:42:05 UTC

svn commit: r1175353 - in /tika/trunk/tika-core/src: main/java/org/apache/tika/sax/ test/java/org/apache/tika/sax/

Author: jukka
Date: Sun Sep 25 09:42:05 2011
New Revision: 1175353

URL: http://svn.apache.org/viewvc?rev=1175353&view=rev
Log:
TIKA-651: Unescaped attribute value generated

Add simple text and XML serializers

Added:
    tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToTextContentHandler.java   (with props)
    tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToXMLContentHandler.java   (with props)
    tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SerializerTest.java   (with props)
Modified:
    tika/trunk/tika-core/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java

Added: tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToTextContentHandler.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToTextContentHandler.java?rev=1175353&view=auto
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToTextContentHandler.java (added)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToTextContentHandler.java Sun Sep 25 09:42:05 2011
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.sax;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.StringWriter;
+import java.io.UnsupportedEncodingException;
+import java.io.Writer;
+
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+/**
+ * SAX event handler that writes all character content out to a character
+ * stream. No escaping or other transformations are made on the character
+ * content.
+ *
+ * @since Apache Tika 1.0
+ */
+public class ToTextContentHandler extends DefaultHandler {
+
+    /**
+     * The character stream.
+     */
+    private final Writer writer;
+
+    /**
+     * Creates a content handler that writes character events to
+     * the given writer.
+     *
+     * @param writer writer
+     */
+    public ToTextContentHandler(Writer writer) {
+        this.writer = writer;
+    }
+
+    /**
+     * Creates a content handler that writes character events to
+     * the given output stream using the platform default encoding.
+     *
+     * @param stream output stream
+     */
+    public ToTextContentHandler(OutputStream stream) {
+        this(new OutputStreamWriter(stream));
+    }
+
+    /**
+     * Creates a content handler that writes character events to
+     * the given output stream using the given encoding.
+     *
+     * @param stream output stream
+     * @param encoding output encoding
+     * @throws UnsupportedEncodingException if the encoding is unsupported
+     */
+    public ToTextContentHandler(OutputStream stream, String encoding)
+            throws UnsupportedEncodingException {
+        this(new OutputStreamWriter(stream, encoding));
+    }
+
+    /**
+     * Creates a content handler that writes character events
+     * to an internal string buffer. Use the {@link #toString()}
+     * method to access the collected character content.
+     */
+    public ToTextContentHandler() {
+        this(new StringWriter());
+    }
+
+    /**
+     * Writes the given characters to the given character stream.
+     */
+    @Override
+    public void characters(char[] ch, int start, int length)
+            throws SAXException {
+        try {
+            writer.write(ch, start, length);
+        } catch (IOException e) {
+            throw new SAXException(
+                    "Error writing: " + new String(ch, start, length), e);
+        }
+    }
+
+
+    /**
+     * Writes the given ignorable characters to the given character stream.
+     * The default implementation simply forwards the call to the
+     * {@link #characters(char[], int, int)} method.
+     */
+    @Override
+    public void ignorableWhitespace(char[] ch, int start, int length)
+            throws SAXException {
+        characters(ch, start, length);
+    }
+
+    /**
+     * Flushes the character stream so that no characters are forgotten
+     * in internal buffers.
+     *
+     * @see <a href="https://issues.apache.org/jira/browse/TIKA-179">TIKA-179</a>
+     * @throws SAXException if the stream can not be flushed
+     */
+    @Override
+    public void endDocument() throws SAXException {
+        try {
+            writer.flush();
+        } catch (IOException e) {
+            throw new SAXException("Error flushing character output", e);
+        }
+    }
+
+    /**
+     * Returns the contents of the internal string buffer where
+     * all the received characters have been collected. Only works
+     * when this object was constructed using the empty default
+     * constructor or by passing a {@link StringWriter} to the
+     * other constructor.
+     */
+    @Override
+    public String toString() {
+        return writer.toString();
+    }
+
+}

Propchange: tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToTextContentHandler.java
------------------------------------------------------------------------------
    svn:executable = *

Added: tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToXMLContentHandler.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToXMLContentHandler.java?rev=1175353&view=auto
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToXMLContentHandler.java (added)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToXMLContentHandler.java Sun Sep 25 09:42:05 2011
@@ -0,0 +1,270 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.sax;
+
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+
+/**
+ * SAX event handler that serializes the XML document to a character stream.
+ * The incoming SAX events are expected to be well-formed (properly nested,
+ * etc.) and to explicitly include namespace declaration attributes and
+ * corresponding namespace prefixes in element and attribute names.
+ *
+ * @since Apache Tika 1.0
+ */
+public class ToXMLContentHandler extends ToTextContentHandler {
+
+    private static class ElementInfo {
+
+        private final ElementInfo parent;
+
+        private final Map<String, String> namespaces;
+
+        public ElementInfo(ElementInfo parent, Map<String, String> namespaces) {
+            this.parent = parent;
+            if (namespaces.isEmpty()) {
+                this.namespaces = Collections.emptyMap();
+            } else {
+                this.namespaces = new HashMap<String, String>(namespaces);
+            }
+        }
+
+        public String getPrefix(String uri) throws SAXException {
+            String prefix = namespaces.get(uri);
+            if (prefix != null) {
+                return prefix;
+            } else if (parent != null) {
+                return parent.getPrefix(uri);
+            } else if (uri == null || uri.length() == 0) {
+                return "";
+            } else {
+                throw new SAXException("Namespace " + uri + " not declared");
+            }
+        }
+
+        public String getQName(String uri, String localName)
+                throws SAXException {
+            String prefix = getPrefix(uri);
+            if (prefix.length() > 0) {
+                return prefix + ":" + localName;
+            } else {
+                return localName;
+            }
+        }
+
+    }
+
+    private final String encoding;
+
+    private boolean inStartElement = false;
+
+    private final Map<String, String> namespaces =
+        new HashMap<String, String>();
+
+    private ElementInfo currentElement;
+
+    /**
+     * Creates an XML serializer that writes to the given byte stream
+     * using the given character encoding.
+     *
+     * @param stream output stream
+     * @param encoding output encoding
+     * @throws UnsupportedEncodingException if the encoding is unsupported
+     */
+    public ToXMLContentHandler(OutputStream stream, String encoding)
+            throws UnsupportedEncodingException {
+        super(stream, encoding);
+        this.encoding = encoding;
+    }
+
+    public ToXMLContentHandler(String encoding) {
+        super();
+        this.encoding = encoding;
+    }
+
+    public ToXMLContentHandler() {
+        super();
+        this.encoding = null;
+    }
+
+    /**
+     * Writes the XML prefix.
+     */
+    @Override
+    public void startDocument() throws SAXException {
+        if (encoding != null) {
+            write("<?xml version=\"1.0\" encoding=\"");
+            write(encoding);
+            write("\"?>\n");
+        }
+
+        currentElement = null;
+        namespaces.clear();
+    }
+
+    @Override
+    public void startPrefixMapping(String prefix, String uri)
+            throws SAXException {
+        namespaces.put(uri, prefix);
+    }
+
+    @Override
+    public void startElement(
+            String uri, String localName, String qName, Attributes atts)
+            throws SAXException {
+        lazyCloseStartElement();
+
+        currentElement = new ElementInfo(currentElement, namespaces);
+
+        write('<');
+        write(currentElement.getQName(uri, localName));
+
+        for (int i = 0; i < atts.getLength(); i++) {
+            write(' ');
+            write(currentElement.getQName(atts.getURI(i), atts.getLocalName(i)));
+            write('=');
+            write('"');
+            char[] ch = atts.getValue(i).toCharArray();
+            writeEscaped(ch, 0, ch.length, true);
+            write('"');
+        }
+
+        for (Map.Entry<String, String> entry : namespaces.entrySet()) {
+            write(' ');
+            write("xmlns");
+            String prefix = entry.getValue();
+            if (prefix.length() > 0) {
+                write(':');
+                write(prefix);
+            }
+            write('=');
+            write('"');
+            char[] ch = entry.getKey().toCharArray();
+            writeEscaped(ch, 0, ch.length, true);
+            write('"');
+        }
+        namespaces.clear();
+
+        inStartElement = true;
+    }
+
+    @Override
+    public void endElement(String uri, String localName, String qName)
+            throws SAXException {
+        if (inStartElement) {
+            write(" />");
+            inStartElement = false;
+        } else {
+            write("</");
+            write(qName);
+            write('>');
+        }
+
+        namespaces.clear();
+    }
+
+    @Override
+    public void characters(char[] ch, int start, int length)
+            throws SAXException {
+        lazyCloseStartElement();
+        writeEscaped(ch, start, start + length, false);
+    }
+
+    private void lazyCloseStartElement() throws SAXException {
+        if (inStartElement) {
+            write('>');
+            inStartElement = false;
+        }
+    }
+
+    /**
+     * Writes the given character as-is.
+     *
+     * @param ch character to be written
+     * @throws SAXException if the character could not be written
+     */
+    protected void write(char ch) throws SAXException {
+        super.characters(new char[] { ch }, 0, 1);
+    }
+
+    /**
+     * Writes the given string of character as-is.
+     *
+     * @param string string of character to be written
+     * @throws SAXException if the character string could not be written
+     */
+    protected void write(String string) throws SAXException {
+        super.characters(string.toCharArray(), 0, string.length());
+    }
+
+    /**
+     * Writes the given characters as-is followed by the given entity.
+     *
+     * @param ch character array
+     * @param from start position in the array
+     * @param to end position in the array
+     * @param entity entity code
+     * @return next position in the array,
+     *         after the characters plus one entity
+     * @throws SAXException if the characters could not be written
+     */
+    private int writeCharsAndEntity(char[] ch, int from, int to, String entity)
+            throws SAXException {
+        super.characters(ch, from, to - from);
+        write('&');
+        write(entity);
+        write(';');
+        return to + 1;
+    }
+
+    /**
+     * Writes the given characters with XML meta characters escaped.
+     *
+     * @param ch character array
+     * @param from start position in the array
+     * @param to end position in the array
+     * @param attribute whether the characters should be escaped as
+     *                  an attribute value or normal character content
+     * @throws SAXException if the characters could not be written
+     */
+    private void writeEscaped(char[] ch, int from, int to, boolean attribute)
+            throws SAXException {
+        int pos = from;
+        while (pos < to) {
+            if (ch[pos] == '<') {
+                from = pos = writeCharsAndEntity(ch, from, pos, "lt");
+            } else if (ch[pos] == '>') {
+                from = pos = writeCharsAndEntity(ch, from, pos, "gt");
+            } else if (ch[pos] == '&') {
+                from = pos = writeCharsAndEntity(ch, from, pos, "amp");
+            } else if (attribute && ch[pos] == '"') {
+                from = pos = writeCharsAndEntity(ch, from, pos, "quot");
+            } else {
+                pos++;
+            }
+        }
+        super.characters(ch, from, to - from);
+    }
+
+}

Propchange: tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToXMLContentHandler.java
------------------------------------------------------------------------------
    svn:executable = *

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java?rev=1175353&r1=1175352&r2=1175353&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java Sun Sep 25 09:42:05 2011
@@ -16,25 +16,26 @@
  */
 package org.apache.tika.sax;
 
-import java.io.IOException;
 import java.io.OutputStream;
 import java.io.OutputStreamWriter;
+import java.io.Serializable;
 import java.io.StringWriter;
 import java.io.Writer;
+import java.util.UUID;
 
+import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
-import org.xml.sax.helpers.DefaultHandler;
 
 /**
- * SAX event handler that writes all character content out to
- * a {@link Writer} character stream.
+ * SAX event handler that writes content up to an optional write
+ * limit out to a character stream or other decorated handler.
  */
-public class WriteOutContentHandler extends DefaultHandler {
+public class WriteOutContentHandler extends ContentHandlerDecorator {
 
     /**
-     * The character stream.
+     * The unique tag associated with exceptions from stream.
      */
-    private final Writer writer;
+    private final Serializable tag = UUID.randomUUID();
 
     /**
      * The maximum number of characters to write to the character stream.
@@ -47,12 +48,32 @@ public class WriteOutContentHandler exte
      */
     private int writeCount = 0;
 
-    private WriteOutContentHandler(Writer writer, int writeLimit) {
-        this.writer = writer;
+    /**
+     * Creates a content handler that writes content up to the given
+     * write limit to the given content handler.
+     *
+     * @since Apache Tika 1.0
+     * @param handler content handler to be decorated
+     * @param writeLimit write limit
+     */
+    public WriteOutContentHandler(ContentHandler handler, int writeLimit) {
+        super(handler);
         this.writeLimit = writeLimit;
     }
 
     /**
+     * Creates a content handler that writes content up to the given
+     * write limit to the given character stream.
+     *
+     * @since Apache Tika 1.0
+     * @param writer character stream
+     * @param writeLimit write limit
+     */
+    public WriteOutContentHandler(Writer writer, int writeLimit) {
+        this(new ToTextContentHandler(writer), writeLimit);
+    }
+
+    /**
      * Creates a content handler that writes character events to
      * the given writer.
      *
@@ -110,65 +131,22 @@ public class WriteOutContentHandler exte
     @Override
     public void characters(char[] ch, int start, int length)
             throws SAXException {
-        try {
-            if (writeLimit == -1 || writeCount + length <= writeLimit) {
-                writer.write(ch, start, length);
-                writeCount += length;
-            } else {
-                writer.write(ch, start, writeLimit - writeCount);
-                writeCount = writeLimit;
-                throw new WriteLimitReachedException(
-                      "Your document contained more than " + writeLimit + " " +
-                      "characters, and so your requested limit has been " +
-                      "reached. To receive the full text of the document, " +
-                      "increase your limit. " +
-                      "(Text up to the limit is however available)."
-                );
-            }
-        } catch (IOException e) {
-            throw new SAXException("Error writing out character content", e);
-        }
-    }
-
-
-    /**
-     * Writes the given ignorable characters to the given character stream.
-     */
-    @Override
-    public void ignorableWhitespace(char[] ch, int start, int length)
-            throws SAXException {
-        characters(ch, start, length);
-    }
-
-    /**
-     * Flushes the character stream so that no characters are forgotten
-     * in internal buffers.
-     *
-     * @see <a href="https://issues.apache.org/jira/browse/TIKA-179">TIKA-179</a>
-     * @throws SAXException if the stream can not be flushed
-     */
-    @Override
-    public void endDocument() throws SAXException {
-        try {
-            writer.flush();
-        } catch (IOException e) {
-            throw new SAXException("Error flushing character output", e);
+        if (writeLimit == -1 || writeCount + length <= writeLimit) {
+            super.characters(ch, start, length);
+            writeCount += length;
+        } else {
+            super.characters(ch, start, writeLimit - writeCount);
+            writeCount = writeLimit;
+            throw new WriteLimitReachedException(
+                    "Your document contained more than " + writeLimit
+                    + " characters, and so your requested limit has been"
+                    + " reached. To receive the full text of the document,"
+                    + " increase your limit. (Text up to the limit is"
+                    + " however available).", tag);
         }
     }
 
     /**
-     * Returns the contents of the internal string buffer where
-     * all the received characters have been collected. Only works
-     * when this object was constructed using the empty default
-     * constructor or by passing a {@link StringWriter} to the
-     * other constructor.
-     */
-    @Override
-    public String toString() {
-        return writer.toString();
-    }
-
-    /**
      * Checks whether the given exception (or any of it's root causes) was
      * thrown by this handler as a signal of reaching the write limit.
      *
@@ -179,7 +157,7 @@ public class WriteOutContentHandler exte
      */
     public boolean isWriteLimitReached(Throwable t) {
         if (t instanceof WriteLimitReachedException) {
-            return this == ((WriteLimitReachedException) t).getSource();
+            return tag.equals(((WriteLimitReachedException) t).tag);
         } else {
             return t.getCause() != null && isWriteLimitReached(t.getCause());
         }
@@ -188,13 +166,17 @@ public class WriteOutContentHandler exte
     /**
      * The exception used as a signal when the write limit has been reached.
      */
-    private class WriteLimitReachedException extends SAXException {
-        public WriteLimitReachedException(String message) {
-           super(message);
-        }
+    private static class WriteLimitReachedException extends SAXException {
+
+        /** Serial version UID */
+        private static final long serialVersionUID = -1850581945459429943L;
 
-        public WriteOutContentHandler getSource() {
-            return WriteOutContentHandler.this;
+        /** Serializable tag of the handler that caused this exception */
+        private final Serializable tag;
+
+        public WriteLimitReachedException(String message, Serializable tag) {
+           super(message);
+           this.tag = tag;
         }
 
     }

Added: tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SerializerTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SerializerTest.java?rev=1175353&view=auto
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SerializerTest.java (added)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SerializerTest.java Sun Sep 25 09:42:05 2011
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.sax;
+
+import junit.framework.TestCase;
+
+import org.xml.sax.ContentHandler;
+import org.xml.sax.helpers.AttributesImpl;
+
+public class SerializerTest extends TestCase {
+
+    public void testToTextContentHandler() throws Exception {
+        assertStartDocument("", new ToTextContentHandler());
+        assertCharacters("content", new ToTextContentHandler());
+        assertCharacterEscaping("<&\">", new ToTextContentHandler());
+        assertIgnorableWhitespace(" \t\r\n", new ToTextContentHandler());
+        assertEmptyElement("", new ToTextContentHandler());
+        assertEmptyElementWithAttributes("", new ToTextContentHandler());
+        assertEmptyElementWithAttributeEscaping("", new ToTextContentHandler());
+        assertElement("content", new ToTextContentHandler());
+        assertElementWithAttributes("content", new ToTextContentHandler());
+    }
+
+    public void testToXMLContentHandler() throws Exception {
+        assertStartDocument("", new ToXMLContentHandler());
+        assertStartDocument(
+                "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n",
+                new ToXMLContentHandler("UTF-8"));
+        assertCharacters("content", new ToXMLContentHandler());
+        assertCharacterEscaping("&lt;&amp;\"&gt;", new ToXMLContentHandler());
+        assertIgnorableWhitespace(" \t\r\n", new ToXMLContentHandler());
+        assertEmptyElement("<p />", new ToXMLContentHandler());
+        assertEmptyElementWithAttributes(
+                "<p class=\"test\" />",
+                new ToXMLContentHandler());
+        assertEmptyElementWithAttributeEscaping(
+                "<p class=\"&lt;&amp;&quot;&gt;\" />",
+                new ToXMLContentHandler());
+        assertElement("<p>content</p>", new ToXMLContentHandler());
+        assertElementWithAttributes(
+                "<p class=\"test\">content</p>",
+                new ToXMLContentHandler());
+    }
+
+    private void assertStartDocument(String expected, ContentHandler handler)
+            throws Exception {
+        handler.startDocument();
+        assertEquals(expected, handler.toString());
+    }
+
+    private void assertCharacters(String expected, ContentHandler handler)
+            throws Exception {
+        handler.characters("content".toCharArray(), 0, 7);
+        assertEquals(expected, handler.toString());
+    }
+
+    private void assertCharacterEscaping(
+            String expected, ContentHandler handler) throws Exception {
+        handler.characters("<&\">".toCharArray(), 0, 4);
+        assertEquals(expected, handler.toString());
+    }
+
+    private void assertIgnorableWhitespace(
+            String expected, ContentHandler handler) throws Exception {
+        handler.ignorableWhitespace(" \t\r\n".toCharArray(), 0, 4);
+        assertEquals(expected, handler.toString());
+    }
+
+    private void assertEmptyElement(String expected, ContentHandler handler)
+            throws Exception {
+        AttributesImpl attributes = new AttributesImpl();
+        handler.startElement("", "p", "p", attributes);
+        handler.endElement("", "p", "p");
+        assertEquals(expected, handler.toString());
+    }
+
+    private void assertEmptyElementWithAttributes(
+            String expected, ContentHandler handler) throws Exception {
+        AttributesImpl attributes = new AttributesImpl();
+        attributes.addAttribute("", "class", "class", "CDATA", "test");
+        handler.startElement("", "p", "p", attributes);
+        handler.endElement("", "p", "p");
+        assertEquals(expected, handler.toString());
+    }
+
+    private void assertEmptyElementWithAttributeEscaping(
+            String expected, ContentHandler handler) throws Exception {
+        AttributesImpl attributes = new AttributesImpl();
+        attributes.addAttribute("", "class", "class", "CDATA", "<&\">");
+        handler.startElement("", "p", "p", attributes);
+        handler.endElement("", "p", "p");
+        assertEquals(expected, handler.toString());
+    }
+
+    private void assertElement(
+            String expected, ContentHandler handler) throws Exception {
+        AttributesImpl attributes = new AttributesImpl();
+        handler.startElement("", "p", "p", attributes);
+        handler.characters("content".toCharArray(), 0, 7);
+        handler.endElement("", "p", "p");
+        assertEquals(expected, handler.toString());
+    }
+
+    private void assertElementWithAttributes(
+            String expected, ContentHandler handler) throws Exception {
+        AttributesImpl attributes = new AttributesImpl();
+        attributes.addAttribute("", "class", "class", "CDATA", "test");
+        handler.startElement("", "p", "p", attributes);
+        handler.characters("content".toCharArray(), 0, 7);
+        handler.endElement("", "p", "p");
+        assertEquals(expected, handler.toString());
+    }
+
+}

Propchange: tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SerializerTest.java
------------------------------------------------------------------------------
    svn:executable = *