You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2011/09/25 11:42:05 UTC
svn commit: r1175353 - in /tika/trunk/tika-core/src:
main/java/org/apache/tika/sax/ test/java/org/apache/tika/sax/
Author: jukka
Date: Sun Sep 25 09:42:05 2011
New Revision: 1175353
URL: http://svn.apache.org/viewvc?rev=1175353&view=rev
Log:
TIKA-651: Unescaped attribute value generated
Add simple text and XML serializers
Added:
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToTextContentHandler.java (with props)
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToXMLContentHandler.java (with props)
tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SerializerTest.java (with props)
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java
Added: tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToTextContentHandler.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToTextContentHandler.java?rev=1175353&view=auto
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToTextContentHandler.java (added)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToTextContentHandler.java Sun Sep 25 09:42:05 2011
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.sax;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.StringWriter;
+import java.io.UnsupportedEncodingException;
+import java.io.Writer;
+
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+/**
+ * SAX event handler that writes all character content out to a character
+ * stream. No escaping or other transformations are made on the character
+ * content.
+ *
+ * @since Apache Tika 1.0
+ */
+public class ToTextContentHandler extends DefaultHandler {
+
+ /**
+ * The character stream.
+ */
+ private final Writer writer;
+
+ /**
+ * Creates a content handler that writes character events to
+ * the given writer.
+ *
+ * @param writer writer
+ */
+ public ToTextContentHandler(Writer writer) {
+ this.writer = writer;
+ }
+
+ /**
+ * Creates a content handler that writes character events to
+ * the given output stream using the platform default encoding.
+ *
+ * @param stream output stream
+ */
+ public ToTextContentHandler(OutputStream stream) {
+ this(new OutputStreamWriter(stream));
+ }
+
+ /**
+ * Creates a content handler that writes character events to
+ * the given output stream using the given encoding.
+ *
+ * @param stream output stream
+ * @param encoding output encoding
+ * @throws UnsupportedEncodingException if the encoding is unsupported
+ */
+ public ToTextContentHandler(OutputStream stream, String encoding)
+ throws UnsupportedEncodingException {
+ this(new OutputStreamWriter(stream, encoding));
+ }
+
+ /**
+ * Creates a content handler that writes character events
+ * to an internal string buffer. Use the {@link #toString()}
+ * method to access the collected character content.
+ */
+ public ToTextContentHandler() {
+ this(new StringWriter());
+ }
+
+ /**
+ * Writes the given characters to the given character stream.
+ */
+ @Override
+ public void characters(char[] ch, int start, int length)
+ throws SAXException {
+ try {
+ writer.write(ch, start, length);
+ } catch (IOException e) {
+ throw new SAXException(
+ "Error writing: " + new String(ch, start, length), e);
+ }
+ }
+
+
+ /**
+ * Writes the given ignorable characters to the given character stream.
+ * The default implementation simply forwards the call to the
+ * {@link #characters(char[], int, int)} method.
+ */
+ @Override
+ public void ignorableWhitespace(char[] ch, int start, int length)
+ throws SAXException {
+ characters(ch, start, length);
+ }
+
+ /**
+ * Flushes the character stream so that no characters are forgotten
+ * in internal buffers.
+ *
+ * @see <a href="https://issues.apache.org/jira/browse/TIKA-179">TIKA-179</a>
+ * @throws SAXException if the stream can not be flushed
+ */
+ @Override
+ public void endDocument() throws SAXException {
+ try {
+ writer.flush();
+ } catch (IOException e) {
+ throw new SAXException("Error flushing character output", e);
+ }
+ }
+
+ /**
+ * Returns the contents of the internal string buffer where
+ * all the received characters have been collected. Only works
+ * when this object was constructed using the empty default
+ * constructor or by passing a {@link StringWriter} to the
+ * other constructor.
+ */
+ @Override
+ public String toString() {
+ return writer.toString();
+ }
+
+}
Propchange: tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToTextContentHandler.java
------------------------------------------------------------------------------
svn:executable = *
Added: tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToXMLContentHandler.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToXMLContentHandler.java?rev=1175353&view=auto
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToXMLContentHandler.java (added)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToXMLContentHandler.java Sun Sep 25 09:42:05 2011
@@ -0,0 +1,270 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.sax;
+
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+
+/**
+ * SAX event handler that serializes the XML document to a character stream.
+ * The incoming SAX events are expected to be well-formed (properly nested,
+ * etc.) and to explicitly include namespace declaration attributes and
+ * corresponding namespace prefixes in element and attribute names.
+ *
+ * @since Apache Tika 1.0
+ */
+public class ToXMLContentHandler extends ToTextContentHandler {
+
+ private static class ElementInfo {
+
+ private final ElementInfo parent;
+
+ private final Map<String, String> namespaces;
+
+ public ElementInfo(ElementInfo parent, Map<String, String> namespaces) {
+ this.parent = parent;
+ if (namespaces.isEmpty()) {
+ this.namespaces = Collections.emptyMap();
+ } else {
+ this.namespaces = new HashMap<String, String>(namespaces);
+ }
+ }
+
+ public String getPrefix(String uri) throws SAXException {
+ String prefix = namespaces.get(uri);
+ if (prefix != null) {
+ return prefix;
+ } else if (parent != null) {
+ return parent.getPrefix(uri);
+ } else if (uri == null || uri.length() == 0) {
+ return "";
+ } else {
+ throw new SAXException("Namespace " + uri + " not declared");
+ }
+ }
+
+ public String getQName(String uri, String localName)
+ throws SAXException {
+ String prefix = getPrefix(uri);
+ if (prefix.length() > 0) {
+ return prefix + ":" + localName;
+ } else {
+ return localName;
+ }
+ }
+
+ }
+
+ private final String encoding;
+
+ private boolean inStartElement = false;
+
+ private final Map<String, String> namespaces =
+ new HashMap<String, String>();
+
+ private ElementInfo currentElement;
+
+ /**
+ * Creates an XML serializer that writes to the given byte stream
+ * using the given character encoding.
+ *
+ * @param stream output stream
+ * @param encoding output encoding
+ * @throws UnsupportedEncodingException if the encoding is unsupported
+ */
+ public ToXMLContentHandler(OutputStream stream, String encoding)
+ throws UnsupportedEncodingException {
+ super(stream, encoding);
+ this.encoding = encoding;
+ }
+
+ public ToXMLContentHandler(String encoding) {
+ super();
+ this.encoding = encoding;
+ }
+
+ public ToXMLContentHandler() {
+ super();
+ this.encoding = null;
+ }
+
+ /**
+ * Writes the XML prefix.
+ */
+ @Override
+ public void startDocument() throws SAXException {
+ if (encoding != null) {
+ write("<?xml version=\"1.0\" encoding=\"");
+ write(encoding);
+ write("\"?>\n");
+ }
+
+ currentElement = null;
+ namespaces.clear();
+ }
+
+ @Override
+ public void startPrefixMapping(String prefix, String uri)
+ throws SAXException {
+ namespaces.put(uri, prefix);
+ }
+
+ @Override
+ public void startElement(
+ String uri, String localName, String qName, Attributes atts)
+ throws SAXException {
+ lazyCloseStartElement();
+
+ currentElement = new ElementInfo(currentElement, namespaces);
+
+ write('<');
+ write(currentElement.getQName(uri, localName));
+
+ for (int i = 0; i < atts.getLength(); i++) {
+ write(' ');
+ write(currentElement.getQName(atts.getURI(i), atts.getLocalName(i)));
+ write('=');
+ write('"');
+ char[] ch = atts.getValue(i).toCharArray();
+ writeEscaped(ch, 0, ch.length, true);
+ write('"');
+ }
+
+ for (Map.Entry<String, String> entry : namespaces.entrySet()) {
+ write(' ');
+ write("xmlns");
+ String prefix = entry.getValue();
+ if (prefix.length() > 0) {
+ write(':');
+ write(prefix);
+ }
+ write('=');
+ write('"');
+ char[] ch = entry.getKey().toCharArray();
+ writeEscaped(ch, 0, ch.length, true);
+ write('"');
+ }
+ namespaces.clear();
+
+ inStartElement = true;
+ }
+
+ @Override
+ public void endElement(String uri, String localName, String qName)
+ throws SAXException {
+ if (inStartElement) {
+ write(" />");
+ inStartElement = false;
+ } else {
+ write("</");
+ write(qName);
+ write('>');
+ }
+
+ namespaces.clear();
+ }
+
+ @Override
+ public void characters(char[] ch, int start, int length)
+ throws SAXException {
+ lazyCloseStartElement();
+ writeEscaped(ch, start, start + length, false);
+ }
+
+ private void lazyCloseStartElement() throws SAXException {
+ if (inStartElement) {
+ write('>');
+ inStartElement = false;
+ }
+ }
+
+ /**
+ * Writes the given character as-is.
+ *
+ * @param ch character to be written
+ * @throws SAXException if the character could not be written
+ */
+ protected void write(char ch) throws SAXException {
+ super.characters(new char[] { ch }, 0, 1);
+ }
+
+ /**
+ * Writes the given string of character as-is.
+ *
+ * @param string string of character to be written
+ * @throws SAXException if the character string could not be written
+ */
+ protected void write(String string) throws SAXException {
+ super.characters(string.toCharArray(), 0, string.length());
+ }
+
+ /**
+ * Writes the given characters as-is followed by the given entity.
+ *
+ * @param ch character array
+ * @param from start position in the array
+ * @param to end position in the array
+ * @param entity entity code
+ * @return next position in the array,
+ * after the characters plus one entity
+ * @throws SAXException if the characters could not be written
+ */
+ private int writeCharsAndEntity(char[] ch, int from, int to, String entity)
+ throws SAXException {
+ super.characters(ch, from, to - from);
+ write('&');
+ write(entity);
+ write(';');
+ return to + 1;
+ }
+
+ /**
+ * Writes the given characters with XML meta characters escaped.
+ *
+ * @param ch character array
+ * @param from start position in the array
+ * @param to end position in the array
+ * @param attribute whether the characters should be escaped as
+ * an attribute value or normal character content
+ * @throws SAXException if the characters could not be written
+ */
+ private void writeEscaped(char[] ch, int from, int to, boolean attribute)
+ throws SAXException {
+ int pos = from;
+ while (pos < to) {
+ if (ch[pos] == '<') {
+ from = pos = writeCharsAndEntity(ch, from, pos, "lt");
+ } else if (ch[pos] == '>') {
+ from = pos = writeCharsAndEntity(ch, from, pos, "gt");
+ } else if (ch[pos] == '&') {
+ from = pos = writeCharsAndEntity(ch, from, pos, "amp");
+ } else if (attribute && ch[pos] == '"') {
+ from = pos = writeCharsAndEntity(ch, from, pos, "quot");
+ } else {
+ pos++;
+ }
+ }
+ super.characters(ch, from, to - from);
+ }
+
+}
Propchange: tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToXMLContentHandler.java
------------------------------------------------------------------------------
svn:executable = *
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java?rev=1175353&r1=1175352&r2=1175353&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/sax/WriteOutContentHandler.java Sun Sep 25 09:42:05 2011
@@ -16,25 +16,26 @@
*/
package org.apache.tika.sax;
-import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
+import java.io.Serializable;
import java.io.StringWriter;
import java.io.Writer;
+import java.util.UUID;
+import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
-import org.xml.sax.helpers.DefaultHandler;
/**
- * SAX event handler that writes all character content out to
- * a {@link Writer} character stream.
+ * SAX event handler that writes content up to an optional write
+ * limit out to a character stream or other decorated handler.
*/
-public class WriteOutContentHandler extends DefaultHandler {
+public class WriteOutContentHandler extends ContentHandlerDecorator {
/**
- * The character stream.
+ * The unique tag associated with exceptions from stream.
*/
- private final Writer writer;
+ private final Serializable tag = UUID.randomUUID();
/**
* The maximum number of characters to write to the character stream.
@@ -47,12 +48,32 @@ public class WriteOutContentHandler exte
*/
private int writeCount = 0;
- private WriteOutContentHandler(Writer writer, int writeLimit) {
- this.writer = writer;
+ /**
+ * Creates a content handler that writes content up to the given
+ * write limit to the given content handler.
+ *
+ * @since Apache Tika 1.0
+ * @param handler content handler to be decorated
+ * @param writeLimit write limit
+ */
+ public WriteOutContentHandler(ContentHandler handler, int writeLimit) {
+ super(handler);
this.writeLimit = writeLimit;
}
/**
+ * Creates a content handler that writes content up to the given
+ * write limit to the given character stream.
+ *
+ * @since Apache Tika 1.0
+ * @param writer character stream
+ * @param writeLimit write limit
+ */
+ public WriteOutContentHandler(Writer writer, int writeLimit) {
+ this(new ToTextContentHandler(writer), writeLimit);
+ }
+
+ /**
* Creates a content handler that writes character events to
* the given writer.
*
@@ -110,65 +131,22 @@ public class WriteOutContentHandler exte
@Override
public void characters(char[] ch, int start, int length)
throws SAXException {
- try {
- if (writeLimit == -1 || writeCount + length <= writeLimit) {
- writer.write(ch, start, length);
- writeCount += length;
- } else {
- writer.write(ch, start, writeLimit - writeCount);
- writeCount = writeLimit;
- throw new WriteLimitReachedException(
- "Your document contained more than " + writeLimit + " " +
- "characters, and so your requested limit has been " +
- "reached. To receive the full text of the document, " +
- "increase your limit. " +
- "(Text up to the limit is however available)."
- );
- }
- } catch (IOException e) {
- throw new SAXException("Error writing out character content", e);
- }
- }
-
-
- /**
- * Writes the given ignorable characters to the given character stream.
- */
- @Override
- public void ignorableWhitespace(char[] ch, int start, int length)
- throws SAXException {
- characters(ch, start, length);
- }
-
- /**
- * Flushes the character stream so that no characters are forgotten
- * in internal buffers.
- *
- * @see <a href="https://issues.apache.org/jira/browse/TIKA-179">TIKA-179</a>
- * @throws SAXException if the stream can not be flushed
- */
- @Override
- public void endDocument() throws SAXException {
- try {
- writer.flush();
- } catch (IOException e) {
- throw new SAXException("Error flushing character output", e);
+ if (writeLimit == -1 || writeCount + length <= writeLimit) {
+ super.characters(ch, start, length);
+ writeCount += length;
+ } else {
+ super.characters(ch, start, writeLimit - writeCount);
+ writeCount = writeLimit;
+ throw new WriteLimitReachedException(
+ "Your document contained more than " + writeLimit
+ + " characters, and so your requested limit has been"
+ + " reached. To receive the full text of the document,"
+ + " increase your limit. (Text up to the limit is"
+ + " however available).", tag);
}
}
/**
- * Returns the contents of the internal string buffer where
- * all the received characters have been collected. Only works
- * when this object was constructed using the empty default
- * constructor or by passing a {@link StringWriter} to the
- * other constructor.
- */
- @Override
- public String toString() {
- return writer.toString();
- }
-
- /**
* Checks whether the given exception (or any of it's root causes) was
* thrown by this handler as a signal of reaching the write limit.
*
@@ -179,7 +157,7 @@ public class WriteOutContentHandler exte
*/
public boolean isWriteLimitReached(Throwable t) {
if (t instanceof WriteLimitReachedException) {
- return this == ((WriteLimitReachedException) t).getSource();
+ return tag.equals(((WriteLimitReachedException) t).tag);
} else {
return t.getCause() != null && isWriteLimitReached(t.getCause());
}
@@ -188,13 +166,17 @@ public class WriteOutContentHandler exte
/**
* The exception used as a signal when the write limit has been reached.
*/
- private class WriteLimitReachedException extends SAXException {
- public WriteLimitReachedException(String message) {
- super(message);
- }
+ private static class WriteLimitReachedException extends SAXException {
+
+ /** Serial version UID */
+ private static final long serialVersionUID = -1850581945459429943L;
- public WriteOutContentHandler getSource() {
- return WriteOutContentHandler.this;
+ /** Serializable tag of the handler that caused this exception */
+ private final Serializable tag;
+
+ public WriteLimitReachedException(String message, Serializable tag) {
+ super(message);
+ this.tag = tag;
}
}
Added: tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SerializerTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SerializerTest.java?rev=1175353&view=auto
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SerializerTest.java (added)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SerializerTest.java Sun Sep 25 09:42:05 2011
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.sax;
+
+import junit.framework.TestCase;
+
+import org.xml.sax.ContentHandler;
+import org.xml.sax.helpers.AttributesImpl;
+
+public class SerializerTest extends TestCase {
+
+ public void testToTextContentHandler() throws Exception {
+ assertStartDocument("", new ToTextContentHandler());
+ assertCharacters("content", new ToTextContentHandler());
+ assertCharacterEscaping("<&\">", new ToTextContentHandler());
+ assertIgnorableWhitespace(" \t\r\n", new ToTextContentHandler());
+ assertEmptyElement("", new ToTextContentHandler());
+ assertEmptyElementWithAttributes("", new ToTextContentHandler());
+ assertEmptyElementWithAttributeEscaping("", new ToTextContentHandler());
+ assertElement("content", new ToTextContentHandler());
+ assertElementWithAttributes("content", new ToTextContentHandler());
+ }
+
+ public void testToXMLContentHandler() throws Exception {
+ assertStartDocument("", new ToXMLContentHandler());
+ assertStartDocument(
+ "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n",
+ new ToXMLContentHandler("UTF-8"));
+ assertCharacters("content", new ToXMLContentHandler());
+ assertCharacterEscaping("<&\">", new ToXMLContentHandler());
+ assertIgnorableWhitespace(" \t\r\n", new ToXMLContentHandler());
+ assertEmptyElement("<p />", new ToXMLContentHandler());
+ assertEmptyElementWithAttributes(
+ "<p class=\"test\" />",
+ new ToXMLContentHandler());
+ assertEmptyElementWithAttributeEscaping(
+ "<p class=\"<&">\" />",
+ new ToXMLContentHandler());
+ assertElement("<p>content</p>", new ToXMLContentHandler());
+ assertElementWithAttributes(
+ "<p class=\"test\">content</p>",
+ new ToXMLContentHandler());
+ }
+
+ private void assertStartDocument(String expected, ContentHandler handler)
+ throws Exception {
+ handler.startDocument();
+ assertEquals(expected, handler.toString());
+ }
+
+ private void assertCharacters(String expected, ContentHandler handler)
+ throws Exception {
+ handler.characters("content".toCharArray(), 0, 7);
+ assertEquals(expected, handler.toString());
+ }
+
+ private void assertCharacterEscaping(
+ String expected, ContentHandler handler) throws Exception {
+ handler.characters("<&\">".toCharArray(), 0, 4);
+ assertEquals(expected, handler.toString());
+ }
+
+ private void assertIgnorableWhitespace(
+ String expected, ContentHandler handler) throws Exception {
+ handler.ignorableWhitespace(" \t\r\n".toCharArray(), 0, 4);
+ assertEquals(expected, handler.toString());
+ }
+
+ private void assertEmptyElement(String expected, ContentHandler handler)
+ throws Exception {
+ AttributesImpl attributes = new AttributesImpl();
+ handler.startElement("", "p", "p", attributes);
+ handler.endElement("", "p", "p");
+ assertEquals(expected, handler.toString());
+ }
+
+ private void assertEmptyElementWithAttributes(
+ String expected, ContentHandler handler) throws Exception {
+ AttributesImpl attributes = new AttributesImpl();
+ attributes.addAttribute("", "class", "class", "CDATA", "test");
+ handler.startElement("", "p", "p", attributes);
+ handler.endElement("", "p", "p");
+ assertEquals(expected, handler.toString());
+ }
+
+ private void assertEmptyElementWithAttributeEscaping(
+ String expected, ContentHandler handler) throws Exception {
+ AttributesImpl attributes = new AttributesImpl();
+ attributes.addAttribute("", "class", "class", "CDATA", "<&\">");
+ handler.startElement("", "p", "p", attributes);
+ handler.endElement("", "p", "p");
+ assertEquals(expected, handler.toString());
+ }
+
+ private void assertElement(
+ String expected, ContentHandler handler) throws Exception {
+ AttributesImpl attributes = new AttributesImpl();
+ handler.startElement("", "p", "p", attributes);
+ handler.characters("content".toCharArray(), 0, 7);
+ handler.endElement("", "p", "p");
+ assertEquals(expected, handler.toString());
+ }
+
+ private void assertElementWithAttributes(
+ String expected, ContentHandler handler) throws Exception {
+ AttributesImpl attributes = new AttributesImpl();
+ attributes.addAttribute("", "class", "class", "CDATA", "test");
+ handler.startElement("", "p", "p", attributes);
+ handler.characters("content".toCharArray(), 0, 7);
+ handler.endElement("", "p", "p");
+ assertEquals(expected, handler.toString());
+ }
+
+}
Propchange: tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SerializerTest.java
------------------------------------------------------------------------------
svn:executable = *