You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2015/05/13 15:49:37 UTC
svn commit: r1679211 [2/7] - in /tika/trunk:
tika-app/src/main/java/org/apache/tika/cli/ tika-app/src/main/resources/
tika-app/src/test/java/org/apache/tika/cli/ tika-app/src/test/resources/
tika-batch/src/main/java/org/apache/tika/batch/ tika-batch/sr...
Modified: tika/trunk/tika-batch/src/test/resources/log4j.properties
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/resources/log4j.properties?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/test/resources/log4j.properties (original)
+++ tika/trunk/tika-batch/src/test/resources/log4j.properties Wed May 13 13:49:36 2015
@@ -1,22 +1,22 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-log4j.rootLogger=OFF
-
-#for debugging
-#log4j.rootLogger=TRACE,A1
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+log4j.rootLogger=OFF
+
+#for debugging
+#log4j.rootLogger=TRACE,A1
log4j.appender.A1=org.apache.log4j.ConsoleAppender
Modified: tika/trunk/tika-batch/src/test/resources/log4j_process.properties
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/resources/log4j_process.properties?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/test/resources/log4j_process.properties (original)
+++ tika/trunk/tika-batch/src/test/resources/log4j_process.properties Wed May 13 13:49:36 2015
@@ -1,24 +1,24 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#This is used by the batch process; see log4j.properties for the driver
-
-log4j.rootLogger=OFF
-
-#for debugging
-#log4j.rootLogger=TRACE,A1
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#This is used by the batch process; see log4j.properties for the driver
+
+log4j.rootLogger=OFF
+
+#for debugging
+#log4j.rootLogger=TRACE,A1
log4j.appender.A1=org.apache.log4j.ConsoleAppender
Modified: tika/trunk/tika-batch/src/test/resources/tika-batch-config-MockConsumersBuilder.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/resources/tika-batch-config-MockConsumersBuilder.xml?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/test/resources/tika-batch-config-MockConsumersBuilder.xml (original)
+++ tika/trunk/tika-batch/src/test/resources/tika-batch-config-MockConsumersBuilder.xml Wed May 13 13:49:36 2015
@@ -103,10 +103,10 @@
<outputstream class="FSOutputStreamFactory"
encoding="UTF-8" outputSuffix="xml"/>
- </consumers>
-
- <!-- reporter and interrupter are optional -->
- <reporter builderClass="org.apache.tika.batch.builders.SimpleLogReporterBuilder" reporterSleepMillis="1000"
- reporterStaleThresholdMillis="500000"/>
- <interrupter builderClass="org.apache.tika.batch.builders.InterrupterBuilder"/>
+ </consumers>
+
+ <!-- reporter and interrupter are optional -->
+ <reporter builderClass="org.apache.tika.batch.builders.SimpleLogReporterBuilder" reporterSleepMillis="1000"
+ reporterStaleThresholdMillis="500000"/>
+ <interrupter builderClass="org.apache.tika.batch.builders.InterrupterBuilder"/>
</tika-batch-config>
\ No newline at end of file
Modified: tika/trunk/tika-batch/src/test/resources/tika-batch-config-broken.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/resources/tika-batch-config-broken.xml?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/test/resources/tika-batch-config-broken.xml (original)
+++ tika/trunk/tika-batch/src/test/resources/tika-batch-config-broken.xml Wed May 13 13:49:36 2015
@@ -96,10 +96,10 @@
<outputstream class="FSOutputStreamFactory"
encoding="UTF-8" outputSuffix="xml"/>
- </consumers>
-
- <!-- reporter and interrupter are optional -->
- <reporter builderClass="org.apache.tika.batch.builders.SimpleLogReporterBuilder" reporterSleepMillis="1000"
- reporterStaleThresholdMillis="500000"/>
- <interrupter builderClass="org.apache.tika.batch.builders.InterrupterBuilder"/>
+ </consumers>
+
+ <!-- reporter and interrupter are optional -->
+ <reporter builderClass="org.apache.tika.batch.builders.SimpleLogReporterBuilder" reporterSleepMillis="1000"
+ reporterStaleThresholdMillis="500000"/>
+ <interrupter builderClass="org.apache.tika.batch.builders.InterrupterBuilder"/>
</tika-batch-config>
\ No newline at end of file
Modified: tika/trunk/tika-batch/src/test/resources/tika-batch-config-test.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/resources/tika-batch-config-test.xml?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/test/resources/tika-batch-config-test.xml (original)
+++ tika/trunk/tika-batch/src/test/resources/tika-batch-config-test.xml Wed May 13 13:49:36 2015
@@ -102,10 +102,10 @@
<outputstream class="FSOutputStreamFactory"
encoding="UTF-8" outputSuffix="xml"/>
- </consumers>
-
- <!-- reporter and interrupter are optional -->
- <reporter builderClass="org.apache.tika.batch.builders.SimpleLogReporterBuilder" reporterSleepMillis="1000"
- reporterStaleThresholdMillis="500000"/>
- <interrupter builderClass="org.apache.tika.batch.builders.InterrupterBuilder"/>
+ </consumers>
+
+ <!-- reporter and interrupter are optional -->
+ <reporter builderClass="org.apache.tika.batch.builders.SimpleLogReporterBuilder" reporterSleepMillis="1000"
+ reporterStaleThresholdMillis="500000"/>
+ <interrupter builderClass="org.apache.tika.batch.builders.InterrupterBuilder"/>
</tika-batch-config>
\ No newline at end of file
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/exception/AccessPermissionException.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/exception/AccessPermissionException.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/exception/AccessPermissionException.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/exception/AccessPermissionException.java Wed May 13 13:49:36 2015
@@ -1,40 +1,40 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.exception;
-
-/**
- * Exception to be thrown when a document does not allow content extraction.
- * As of this writing, PDF documents are the only type of document that might
- * cause this type of exception.
- */
-public class AccessPermissionException extends TikaException {
- public AccessPermissionException() {
- super("Unable to process: content extraction is not allowed");
- }
-
- public AccessPermissionException(Throwable th) {
- super("Unable to process: content extraction is not allowed", th);
- }
-
- public AccessPermissionException(String info) {
- super(info);
- }
-
- public AccessPermissionException(String info, Throwable th) {
- super(info, th);
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.exception;
+
+/**
+ * Exception to be thrown when a document does not allow content extraction.
+ * As of this writing, PDF documents are the only type of document that might
+ * cause this type of exception.
+ */
+public class AccessPermissionException extends TikaException {
+ public AccessPermissionException() {
+ super("Unable to process: content extraction is not allowed");
+ }
+
+ public AccessPermissionException(Throwable th) {
+ super("Unable to process: content extraction is not allowed", th);
+ }
+
+ public AccessPermissionException(String info) {
+ super(info);
+ }
+
+ public AccessPermissionException(String info, Throwable th) {
+ super(info, th);
+ }
+}
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/AccessPermissions.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/AccessPermissions.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/AccessPermissions.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/AccessPermissions.java Wed May 13 13:49:36 2015
@@ -1,71 +1,71 @@
-package org.apache.tika.metadata;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Until we can find a common standard, we'll use these options. They
- * were mostly derived from PDFBox's AccessPermission, but some can
- * apply to other document formats, especially CAN_MODIFY and FILL_IN_FORM.
- */
-public interface AccessPermissions {
-
- final static String PREFIX = "access_permission"+Metadata.NAMESPACE_PREFIX_DELIMITER;
-
- /**
- * Can any modifications be made to the document
- */
- Property CAN_MODIFY = Property.externalTextBag(PREFIX+"can_modify");
-
- /**
- * Should content be extracted, generally.
- */
- Property EXTRACT_CONTENT = Property.externalText(PREFIX+"extract_content");
-
- /**
- * Should content be extracted for the purposes
- * of accessibility.
- */
- Property EXTRACT_FOR_ACCESSIBILITY = Property.externalText(PREFIX + "extract_for_accessibility");
-
- /**
- * Can the user insert/rotate/delete pages.
- */
- Property ASSEMBLE_DOCUMENT = Property.externalText(PREFIX+"assemble_document");
-
-
- /**
- * Can the user fill in a form
- */
- Property FILL_IN_FORM = Property.externalText(PREFIX+"fill_in_form");
-
- /**
- * Can the user modify annotations
- */
- Property CAN_MODIFY_ANNOTATIONS = Property.externalText(PREFIX+"modify_annotations");
-
- /**
- * Can the user print the document
- */
- Property CAN_PRINT = Property.externalText(PREFIX+"can_print");
-
- /**
- * Can the user print an image-degraded version of the document.
- */
- Property CAN_PRINT_DEGRADED = Property.externalText(PREFIX+"can_print_degraded");
-
-}
+package org.apache.tika.metadata;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Until we can find a common standard, we'll use these options. They
+ * were mostly derived from PDFBox's AccessPermission, but some can
+ * apply to other document formats, especially CAN_MODIFY and FILL_IN_FORM.
+ */
+public interface AccessPermissions {
+
+ final static String PREFIX = "access_permission"+Metadata.NAMESPACE_PREFIX_DELIMITER;
+
+ /**
+ * Can any modifications be made to the document
+ */
+ Property CAN_MODIFY = Property.externalTextBag(PREFIX+"can_modify");
+
+ /**
+ * Should content be extracted, generally.
+ */
+ Property EXTRACT_CONTENT = Property.externalText(PREFIX+"extract_content");
+
+ /**
+ * Should content be extracted for the purposes
+ * of accessibility.
+ */
+ Property EXTRACT_FOR_ACCESSIBILITY = Property.externalText(PREFIX + "extract_for_accessibility");
+
+ /**
+ * Can the user insert/rotate/delete pages.
+ */
+ Property ASSEMBLE_DOCUMENT = Property.externalText(PREFIX+"assemble_document");
+
+
+ /**
+ * Can the user fill in a form
+ */
+ Property FILL_IN_FORM = Property.externalText(PREFIX+"fill_in_form");
+
+ /**
+ * Can the user modify annotations
+ */
+ Property CAN_MODIFY_ANNOTATIONS = Property.externalText(PREFIX+"modify_annotations");
+
+ /**
+ * Can the user print the document
+ */
+ Property CAN_PRINT = Property.externalText(PREFIX+"can_print");
+
+ /**
+ * Can the user print an image-degraded version of the document.
+ */
+ Property CAN_PRINT_DEGRADED = Property.externalText(PREFIX+"can_print_degraded");
+
+}
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToHTMLContentHandler.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToHTMLContentHandler.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToHTMLContentHandler.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToHTMLContentHandler.java Wed May 13 13:49:36 2015
@@ -1,70 +1,70 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.sax;
-
-import java.io.OutputStream;
-import java.io.UnsupportedEncodingException;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.Set;
-
-import org.xml.sax.SAXException;
-
-/**
- * SAX event handler that serializes the HTML document to a character stream.
- * The incoming SAX events are expected to be well-formed (properly nested,
- * etc.) and valid HTML.
- *
- * @since Apache Tika 0.10
- */
-public class ToHTMLContentHandler extends ToXMLContentHandler {
-
- private static final Set<String> EMPTY_ELEMENTS =
- new HashSet<String>(Arrays.asList(
- "area", "base", "basefont", "br", "col", "frame", "hr",
- "img", "input", "isindex", "link", "meta", "param"));
-
- public ToHTMLContentHandler(OutputStream stream, String encoding)
- throws UnsupportedEncodingException {
- super(stream, encoding);
- }
-
- public ToHTMLContentHandler() {
- super();
- }
-
- @Override
- public void startDocument() throws SAXException {
- }
-
- @Override
- public void endElement(String uri, String localName, String qName)
- throws SAXException {
- if (inStartElement) {
- write('>');
- inStartElement = false;
-
- if (EMPTY_ELEMENTS.contains(localName)) {
- namespaces.clear();
- return;
- }
- }
-
- super.endElement(uri, localName, qName);
- }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.sax;
+
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.xml.sax.SAXException;
+
+/**
+ * SAX event handler that serializes the HTML document to a character stream.
+ * The incoming SAX events are expected to be well-formed (properly nested,
+ * etc.) and valid HTML.
+ *
+ * @since Apache Tika 0.10
+ */
+public class ToHTMLContentHandler extends ToXMLContentHandler {
+
+ private static final Set<String> EMPTY_ELEMENTS =
+ new HashSet<String>(Arrays.asList(
+ "area", "base", "basefont", "br", "col", "frame", "hr",
+ "img", "input", "isindex", "link", "meta", "param"));
+
+ public ToHTMLContentHandler(OutputStream stream, String encoding)
+ throws UnsupportedEncodingException {
+ super(stream, encoding);
+ }
+
+ public ToHTMLContentHandler() {
+ super();
+ }
+
+ @Override
+ public void startDocument() throws SAXException {
+ }
+
+ @Override
+ public void endElement(String uri, String localName, String qName)
+ throws SAXException {
+ if (inStartElement) {
+ write('>');
+ inStartElement = false;
+
+ if (EMPTY_ELEMENTS.contains(localName)) {
+ namespaces.clear();
+ return;
+ }
+ }
+
+ super.endElement(uri, localName, qName);
+ }
+
+}
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToTextContentHandler.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToTextContentHandler.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToTextContentHandler.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToTextContentHandler.java Wed May 13 13:49:36 2015
@@ -1,140 +1,140 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.sax;
-
-import java.io.IOException;
-import java.io.OutputStream;
-import java.io.OutputStreamWriter;
-import java.io.StringWriter;
-import java.io.UnsupportedEncodingException;
-import java.io.Writer;
-import java.nio.charset.Charset;
-
-import org.xml.sax.SAXException;
-import org.xml.sax.helpers.DefaultHandler;
-
-/**
- * SAX event handler that writes all character content out to a character
- * stream. No escaping or other transformations are made on the character
- * content.
- *
- * @since Apache Tika 0.10
- */
-public class ToTextContentHandler extends DefaultHandler {
-
- /**
- * The character stream.
- */
- private final Writer writer;
-
- /**
- * Creates a content handler that writes character events to
- * the given writer.
- *
- * @param writer writer
- */
- public ToTextContentHandler(Writer writer) {
- this.writer = writer;
- }
-
- /**
- * Creates a content handler that writes character events to
- * the given output stream using the platform default encoding.
- *
- * @param stream output stream
- */
- public ToTextContentHandler(OutputStream stream) {
- this(new OutputStreamWriter(stream, Charset.defaultCharset()));
- }
-
- /**
- * Creates a content handler that writes character events to
- * the given output stream using the given encoding.
- *
- * @param stream output stream
- * @param encoding output encoding
- * @throws UnsupportedEncodingException if the encoding is unsupported
- */
- public ToTextContentHandler(OutputStream stream, String encoding)
- throws UnsupportedEncodingException {
- this(new OutputStreamWriter(stream, encoding));
- }
-
- /**
- * Creates a content handler that writes character events
- * to an internal string buffer. Use the {@link #toString()}
- * method to access the collected character content.
- */
- public ToTextContentHandler() {
- this(new StringWriter());
- }
-
- /**
- * Writes the given characters to the given character stream.
- */
- @Override
- public void characters(char[] ch, int start, int length)
- throws SAXException {
- try {
- writer.write(ch, start, length);
- } catch (IOException e) {
- throw new SAXException(
- "Error writing: " + new String(ch, start, length), e);
- }
- }
-
-
- /**
- * Writes the given ignorable characters to the given character stream.
- * The default implementation simply forwards the call to the
- * {@link #characters(char[], int, int)} method.
- */
- @Override
- public void ignorableWhitespace(char[] ch, int start, int length)
- throws SAXException {
- characters(ch, start, length);
- }
-
- /**
- * Flushes the character stream so that no characters are forgotten
- * in internal buffers.
- *
- * @see <a href="https://issues.apache.org/jira/browse/TIKA-179">TIKA-179</a>
- * @throws SAXException if the stream can not be flushed
- */
- @Override
- public void endDocument() throws SAXException {
- try {
- writer.flush();
- } catch (IOException e) {
- throw new SAXException("Error flushing character output", e);
- }
- }
-
- /**
- * Returns the contents of the internal string buffer where
- * all the received characters have been collected. Only works
- * when this object was constructed using the empty default
- * constructor or by passing a {@link StringWriter} to the
- * other constructor.
- */
- @Override
- public String toString() {
- return writer.toString();
- }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.sax;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.StringWriter;
+import java.io.UnsupportedEncodingException;
+import java.io.Writer;
+import java.nio.charset.Charset;
+
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+/**
+ * SAX event handler that writes all character content out to a character
+ * stream. No escaping or other transformations are made on the character
+ * content.
+ *
+ * @since Apache Tika 0.10
+ */
+public class ToTextContentHandler extends DefaultHandler {
+
+ /**
+ * The character stream.
+ */
+ private final Writer writer;
+
+ /**
+ * Creates a content handler that writes character events to
+ * the given writer.
+ *
+ * @param writer writer
+ */
+ public ToTextContentHandler(Writer writer) {
+ this.writer = writer;
+ }
+
+ /**
+ * Creates a content handler that writes character events to
+ * the given output stream using the platform default encoding.
+ *
+ * @param stream output stream
+ */
+ public ToTextContentHandler(OutputStream stream) {
+ this(new OutputStreamWriter(stream, Charset.defaultCharset()));
+ }
+
+ /**
+ * Creates a content handler that writes character events to
+ * the given output stream using the given encoding.
+ *
+ * @param stream output stream
+ * @param encoding output encoding
+ * @throws UnsupportedEncodingException if the encoding is unsupported
+ */
+ public ToTextContentHandler(OutputStream stream, String encoding)
+ throws UnsupportedEncodingException {
+ this(new OutputStreamWriter(stream, encoding));
+ }
+
+ /**
+ * Creates a content handler that writes character events
+ * to an internal string buffer. Use the {@link #toString()}
+ * method to access the collected character content.
+ */
+ public ToTextContentHandler() {
+ this(new StringWriter());
+ }
+
+ /**
+ * Writes the given characters to the given character stream.
+ */
+ @Override
+ public void characters(char[] ch, int start, int length)
+ throws SAXException {
+ try {
+ writer.write(ch, start, length);
+ } catch (IOException e) {
+ throw new SAXException(
+ "Error writing: " + new String(ch, start, length), e);
+ }
+ }
+
+
+ /**
+ * Writes the given ignorable characters to the given character stream.
+ * The default implementation simply forwards the call to the
+ * {@link #characters(char[], int, int)} method.
+ */
+ @Override
+ public void ignorableWhitespace(char[] ch, int start, int length)
+ throws SAXException {
+ characters(ch, start, length);
+ }
+
+ /**
+ * Flushes the character stream so that no characters are forgotten
+ * in internal buffers.
+ *
+ * @see <a href="https://issues.apache.org/jira/browse/TIKA-179">TIKA-179</a>
+ * @throws SAXException if the stream can not be flushed
+ */
+ @Override
+ public void endDocument() throws SAXException {
+ try {
+ writer.flush();
+ } catch (IOException e) {
+ throw new SAXException("Error flushing character output", e);
+ }
+ }
+
+ /**
+ * Returns the contents of the internal string buffer where
+ * all the received characters have been collected. Only works
+ * when this object was constructed using the empty default
+ * constructor or by passing a {@link StringWriter} to the
+ * other constructor.
+ */
+ @Override
+ public String toString() {
+ return writer.toString();
+ }
+
+}
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToXMLContentHandler.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToXMLContentHandler.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToXMLContentHandler.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToXMLContentHandler.java Wed May 13 13:49:36 2015
@@ -1,281 +1,281 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.sax;
-
-import java.io.OutputStream;
-import java.io.UnsupportedEncodingException;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Map;
-
-import org.xml.sax.Attributes;
-import org.xml.sax.SAXException;
-
-/**
- * SAX event handler that serializes the XML document to a character stream.
- * The incoming SAX events are expected to be well-formed (properly nested,
- * etc.) and to explicitly include namespace declaration attributes and
- * corresponding namespace prefixes in element and attribute names.
- *
- * @since Apache Tika 0.10
- */
-public class ToXMLContentHandler extends ToTextContentHandler {
-
- private static class ElementInfo {
-
- private final ElementInfo parent;
-
- private final Map<String, String> namespaces;
-
- public ElementInfo(ElementInfo parent, Map<String, String> namespaces) {
- this.parent = parent;
- if (namespaces.isEmpty()) {
- this.namespaces = Collections.emptyMap();
- } else {
- this.namespaces = new HashMap<String, String>(namespaces);
- }
- }
-
- public String getPrefix(String uri) throws SAXException {
- String prefix = namespaces.get(uri);
- if (prefix != null) {
- return prefix;
- } else if (parent != null) {
- return parent.getPrefix(uri);
- } else if (uri == null || uri.length() == 0) {
- return "";
- } else {
- throw new SAXException("Namespace " + uri + " not declared");
- }
- }
-
- public String getQName(String uri, String localName)
- throws SAXException {
- String prefix = getPrefix(uri);
- if (prefix.length() > 0) {
- return prefix + ":" + localName;
- } else {
- return localName;
- }
- }
-
- }
-
- private final String encoding;
-
- protected boolean inStartElement = false;
-
- protected final Map<String, String> namespaces =
- new HashMap<String, String>();
-
- private ElementInfo currentElement;
-
- /**
- * Creates an XML serializer that writes to the given byte stream
- * using the given character encoding.
- *
- * @param stream output stream
- * @param encoding output encoding
- * @throws UnsupportedEncodingException if the encoding is unsupported
- */
- public ToXMLContentHandler(OutputStream stream, String encoding)
- throws UnsupportedEncodingException {
- super(stream, encoding);
- this.encoding = encoding;
- }
-
- public ToXMLContentHandler(String encoding) {
- super();
- this.encoding = encoding;
- }
-
- public ToXMLContentHandler() {
- super();
- this.encoding = null;
- }
-
- /**
- * Writes the XML prefix.
- */
- @Override
- public void startDocument() throws SAXException {
- if (encoding != null) {
- write("<?xml version=\"1.0\" encoding=\"");
- write(encoding);
- write("\"?>\n");
- }
-
- currentElement = null;
- namespaces.clear();
- }
-
- @Override
- public void startPrefixMapping(String prefix, String uri)
- throws SAXException {
- try {
- if (currentElement != null
- && prefix.equals(currentElement.getPrefix(uri))) {
- return;
- }
- } catch (SAXException ignore) {
- }
- namespaces.put(uri, prefix);
- }
-
- @Override
- public void startElement(
- String uri, String localName, String qName, Attributes atts)
- throws SAXException {
- lazyCloseStartElement();
-
- currentElement = new ElementInfo(currentElement, namespaces);
-
- write('<');
- write(currentElement.getQName(uri, localName));
-
- for (int i = 0; i < atts.getLength(); i++) {
- write(' ');
- write(currentElement.getQName(atts.getURI(i), atts.getLocalName(i)));
- write('=');
- write('"');
- char[] ch = atts.getValue(i).toCharArray();
- writeEscaped(ch, 0, ch.length, true);
- write('"');
- }
-
- for (Map.Entry<String, String> entry : namespaces.entrySet()) {
- write(' ');
- write("xmlns");
- String prefix = entry.getValue();
- if (prefix.length() > 0) {
- write(':');
- write(prefix);
- }
- write('=');
- write('"');
- char[] ch = entry.getKey().toCharArray();
- writeEscaped(ch, 0, ch.length, true);
- write('"');
- }
- namespaces.clear();
-
- inStartElement = true;
- }
-
- @Override
- public void endElement(String uri, String localName, String qName)
- throws SAXException {
- if (inStartElement) {
- write(" />");
- inStartElement = false;
- } else {
- write("</");
- write(qName);
- write('>');
- }
-
- namespaces.clear();
-
- // Reset the position in the tree, to avoid endless stack overflow
- // chains (see TIKA-1070)
- currentElement = currentElement.parent;
- }
-
- @Override
- public void characters(char[] ch, int start, int length)
- throws SAXException {
- lazyCloseStartElement();
- writeEscaped(ch, start, start + length, false);
- }
-
- private void lazyCloseStartElement() throws SAXException {
- if (inStartElement) {
- write('>');
- inStartElement = false;
- }
- }
-
- /**
- * Writes the given character as-is.
- *
- * @param ch character to be written
- * @throws SAXException if the character could not be written
- */
- protected void write(char ch) throws SAXException {
- super.characters(new char[] { ch }, 0, 1);
- }
-
- /**
- * Writes the given string of character as-is.
- *
- * @param string string of character to be written
- * @throws SAXException if the character string could not be written
- */
- protected void write(String string) throws SAXException {
- super.characters(string.toCharArray(), 0, string.length());
- }
-
- /**
- * Writes the given characters as-is followed by the given entity.
- *
- * @param ch character array
- * @param from start position in the array
- * @param to end position in the array
- * @param entity entity code
- * @return next position in the array,
- * after the characters plus one entity
- * @throws SAXException if the characters could not be written
- */
- private int writeCharsAndEntity(char[] ch, int from, int to, String entity)
- throws SAXException {
- super.characters(ch, from, to - from);
- write('&');
- write(entity);
- write(';');
- return to + 1;
- }
-
- /**
- * Writes the given characters with XML meta characters escaped.
- *
- * @param ch character array
- * @param from start position in the array
- * @param to end position in the array
- * @param attribute whether the characters should be escaped as
- * an attribute value or normal character content
- * @throws SAXException if the characters could not be written
- */
- private void writeEscaped(char[] ch, int from, int to, boolean attribute)
- throws SAXException {
- int pos = from;
- while (pos < to) {
- if (ch[pos] == '<') {
- from = pos = writeCharsAndEntity(ch, from, pos, "lt");
- } else if (ch[pos] == '>') {
- from = pos = writeCharsAndEntity(ch, from, pos, "gt");
- } else if (ch[pos] == '&') {
- from = pos = writeCharsAndEntity(ch, from, pos, "amp");
- } else if (attribute && ch[pos] == '"') {
- from = pos = writeCharsAndEntity(ch, from, pos, "quot");
- } else {
- pos++;
- }
- }
- super.characters(ch, from, to - from);
- }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.sax;
+
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+
+/**
+ * SAX event handler that serializes the XML document to a character stream.
+ * The incoming SAX events are expected to be well-formed (properly nested,
+ * etc.) and to explicitly include namespace declaration attributes and
+ * corresponding namespace prefixes in element and attribute names.
+ *
+ * @since Apache Tika 0.10
+ */
+public class ToXMLContentHandler extends ToTextContentHandler {
+
+ private static class ElementInfo {
+
+ private final ElementInfo parent;
+
+ private final Map<String, String> namespaces;
+
+ public ElementInfo(ElementInfo parent, Map<String, String> namespaces) {
+ this.parent = parent;
+ if (namespaces.isEmpty()) {
+ this.namespaces = Collections.emptyMap();
+ } else {
+ this.namespaces = new HashMap<String, String>(namespaces);
+ }
+ }
+
+ public String getPrefix(String uri) throws SAXException {
+ String prefix = namespaces.get(uri);
+ if (prefix != null) {
+ return prefix;
+ } else if (parent != null) {
+ return parent.getPrefix(uri);
+ } else if (uri == null || uri.length() == 0) {
+ return "";
+ } else {
+ throw new SAXException("Namespace " + uri + " not declared");
+ }
+ }
+
+ public String getQName(String uri, String localName)
+ throws SAXException {
+ String prefix = getPrefix(uri);
+ if (prefix.length() > 0) {
+ return prefix + ":" + localName;
+ } else {
+ return localName;
+ }
+ }
+
+ }
+
+ private final String encoding;
+
+ protected boolean inStartElement = false;
+
+ protected final Map<String, String> namespaces =
+ new HashMap<String, String>();
+
+ private ElementInfo currentElement;
+
+ /**
+ * Creates an XML serializer that writes to the given byte stream
+ * using the given character encoding.
+ *
+ * @param stream output stream
+ * @param encoding output encoding
+ * @throws UnsupportedEncodingException if the encoding is unsupported
+ */
+ public ToXMLContentHandler(OutputStream stream, String encoding)
+ throws UnsupportedEncodingException {
+ super(stream, encoding);
+ this.encoding = encoding;
+ }
+
+ public ToXMLContentHandler(String encoding) {
+ super();
+ this.encoding = encoding;
+ }
+
+ public ToXMLContentHandler() {
+ super();
+ this.encoding = null;
+ }
+
+ /**
+ * Writes the XML prefix.
+ */
+ @Override
+ public void startDocument() throws SAXException {
+ if (encoding != null) {
+ write("<?xml version=\"1.0\" encoding=\"");
+ write(encoding);
+ write("\"?>\n");
+ }
+
+ currentElement = null;
+ namespaces.clear();
+ }
+
+ @Override
+ public void startPrefixMapping(String prefix, String uri)
+ throws SAXException {
+ try {
+ if (currentElement != null
+ && prefix.equals(currentElement.getPrefix(uri))) {
+ return;
+ }
+ } catch (SAXException ignore) {
+ }
+ namespaces.put(uri, prefix);
+ }
+
+ @Override
+ public void startElement(
+ String uri, String localName, String qName, Attributes atts)
+ throws SAXException {
+ lazyCloseStartElement();
+
+ currentElement = new ElementInfo(currentElement, namespaces);
+
+ write('<');
+ write(currentElement.getQName(uri, localName));
+
+ for (int i = 0; i < atts.getLength(); i++) {
+ write(' ');
+ write(currentElement.getQName(atts.getURI(i), atts.getLocalName(i)));
+ write('=');
+ write('"');
+ char[] ch = atts.getValue(i).toCharArray();
+ writeEscaped(ch, 0, ch.length, true);
+ write('"');
+ }
+
+ for (Map.Entry<String, String> entry : namespaces.entrySet()) {
+ write(' ');
+ write("xmlns");
+ String prefix = entry.getValue();
+ if (prefix.length() > 0) {
+ write(':');
+ write(prefix);
+ }
+ write('=');
+ write('"');
+ char[] ch = entry.getKey().toCharArray();
+ writeEscaped(ch, 0, ch.length, true);
+ write('"');
+ }
+ namespaces.clear();
+
+ inStartElement = true;
+ }
+
+ @Override
+ public void endElement(String uri, String localName, String qName)
+ throws SAXException {
+ if (inStartElement) {
+ write(" />");
+ inStartElement = false;
+ } else {
+ write("</");
+ write(qName);
+ write('>');
+ }
+
+ namespaces.clear();
+
+ // Reset the position in the tree, to avoid endless stack overflow
+ // chains (see TIKA-1070)
+ currentElement = currentElement.parent;
+ }
+
+ @Override
+ public void characters(char[] ch, int start, int length)
+ throws SAXException {
+ lazyCloseStartElement();
+ writeEscaped(ch, start, start + length, false);
+ }
+
+ private void lazyCloseStartElement() throws SAXException {
+ if (inStartElement) {
+ write('>');
+ inStartElement = false;
+ }
+ }
+
+ /**
+ * Writes the given character as-is.
+ *
+ * @param ch character to be written
+ * @throws SAXException if the character could not be written
+ */
+ protected void write(char ch) throws SAXException {
+ super.characters(new char[] { ch }, 0, 1);
+ }
+
+ /**
+ * Writes the given string of character as-is.
+ *
+ * @param string string of character to be written
+ * @throws SAXException if the character string could not be written
+ */
+ protected void write(String string) throws SAXException {
+ super.characters(string.toCharArray(), 0, string.length());
+ }
+
+ /**
+ * Writes the given characters as-is followed by the given entity.
+ *
+ * @param ch character array
+ * @param from start position in the array
+ * @param to end position in the array
+ * @param entity entity code
+ * @return next position in the array,
+ * after the characters plus one entity
+ * @throws SAXException if the characters could not be written
+ */
+ private int writeCharsAndEntity(char[] ch, int from, int to, String entity)
+ throws SAXException {
+ super.characters(ch, from, to - from);
+ write('&');
+ write(entity);
+ write(';');
+ return to + 1;
+ }
+
+ /**
+ * Writes the given characters with XML meta characters escaped.
+ *
+ * @param ch character array
+ * @param from start position in the array
+ * @param to end position in the array
+ * @param attribute whether the characters should be escaped as
+ * an attribute value or normal character content
+ * @throws SAXException if the characters could not be written
+ */
+ private void writeEscaped(char[] ch, int from, int to, boolean attribute)
+ throws SAXException {
+ int pos = from;
+ while (pos < to) {
+ if (ch[pos] == '<') {
+ from = pos = writeCharsAndEntity(ch, from, pos, "lt");
+ } else if (ch[pos] == '>') {
+ from = pos = writeCharsAndEntity(ch, from, pos, "gt");
+ } else if (ch[pos] == '&') {
+ from = pos = writeCharsAndEntity(ch, from, pos, "amp");
+ } else if (attribute && ch[pos] == '"') {
+ from = pos = writeCharsAndEntity(ch, from, pos, "quot");
+ } else {
+ pos++;
+ }
+ }
+ super.characters(ch, from, to - from);
+ }
+
+}
Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/parser/mock/MockParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/parser/mock/MockParser.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/parser/mock/MockParser.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/parser/mock/MockParser.java Wed May 13 13:49:36 2015
@@ -1,301 +1,301 @@
-package org.apache.tika.parser.mock;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.parsers.ParserConfigurationException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.lang.reflect.Constructor;
-import java.util.ArrayList;
-import java.util.Date;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.AbstractParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.sax.XHTMLContentHandler;
-import org.w3c.dom.Document;
-import org.w3c.dom.NamedNodeMap;
-import org.w3c.dom.Node;
-import org.w3c.dom.NodeList;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-/**
- * This class enables mocking of parser behavior for use in testing
- * wrappers and drivers of parsers.
- * <p>
- * See resources/test-documents/mock/example.xml in tika-parsers/test for the documentation
- * of all the options for this MockParser.
- * <p>
- * Tests for this class are in tika-parsers.
- * <p>
- * See also {@link org.apache.tika.parser.DummyParser} for another option.
- */
-
-public class MockParser extends AbstractParser {
-
- private static final long serialVersionUID = 1L;
-
- @Override
- public Set<MediaType> getSupportedTypes(ParseContext context) {
- Set<MediaType> types = new HashSet<MediaType>();
- MediaType type = MediaType.application("mock+xml");
- types.add(type);
- return types;
- }
-
- @Override
- public void parse(InputStream stream, ContentHandler handler,
- Metadata metadata, ParseContext context) throws IOException,
- SAXException, TikaException {
- Document doc = null;
- DocumentBuilderFactory fact = DocumentBuilderFactory.newInstance();
- DocumentBuilder docBuilder = null;
- try {
- docBuilder = fact.newDocumentBuilder();
- doc = docBuilder.parse(stream);
- } catch (ParserConfigurationException e) {
- throw new IOException(e);
- } catch (SAXException e) {
- throw new IOException(e);
- }
- Node root = doc.getDocumentElement();
- NodeList actions = root.getChildNodes();
- XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
- xhtml.startDocument();
- for (int i = 0; i < actions.getLength(); i++) {
- executeAction(actions.item(i), metadata, xhtml);
- }
- xhtml.endDocument();
- }
-
- private void executeAction(Node action, Metadata metadata, XHTMLContentHandler xhtml) throws SAXException,
- IOException, TikaException {
-
- if (action.getNodeType() != 1) {
- return;
- }
-
- String name = action.getNodeName();
- if ("metadata".equals(name)) {
- metadata(action, metadata);
- } else if("write".equals(name)) {
- write(action, xhtml);
- } else if ("throw".equals(name)) {
- throwIt(action);
- } else if ("hang".equals(name)) {
- hang(action);
- } else if ("oom".equals(name)) {
- kabOOM();
- } else if ("print_out".equals(name) || "print_err".equals(name)){
- print(action, name);
- } else {
- throw new IllegalArgumentException("Didn't recognize mock action: "+name);
- }
- }
-
- private void print(Node action, String name) {
- String content = action.getTextContent();
- if ("print_out".equals(name)) {
- System.out.println(content);
- } else if ("print_err".equals(name)) {
- System.err.println(content);
- } else {
- throw new IllegalArgumentException("must be print_out or print_err");
- }
- }
- private void hang(Node action) {
- boolean interruptible = true;
- boolean heavy = false;
- long millis = -1;
- long pulseMillis = -1;
- NamedNodeMap attrs = action.getAttributes();
- Node iNode = attrs.getNamedItem("interruptible");
- if (iNode != null) {
- interruptible = ("true".equals(iNode.getNodeValue()));
- }
- Node hNode = attrs.getNamedItem("heavy");
- if (hNode != null) {
- heavy = ("true".equals(hNode.getNodeValue()));
- }
-
- Node mNode = attrs.getNamedItem("millis");
- if (mNode == null) {
- throw new RuntimeException("Must specify \"millis\" attribute for hang.");
- }
- String millisString = mNode.getNodeValue();
- try {
- millis = Long.parseLong(millisString);
- } catch (NumberFormatException e) {
- throw new RuntimeException("Value for \"millis\" attribute must be a long.");
- }
-
- if (heavy) {
- Node pNode = attrs.getNamedItem("pulse_millis");
- if (pNode == null) {
- throw new RuntimeException("Must specify attribute \"pulse_millis\" if the hang is \"heavy\"");
- }
- String pulseMillisString = mNode.getNodeValue();
- try {
- pulseMillis = Long.parseLong(pulseMillisString);
- } catch (NumberFormatException e) {
- throw new RuntimeException("Value for \"millis\" attribute must be a long.");
- }
- }
- if (heavy) {
- hangHeavy(millis, pulseMillis, interruptible);
- } else {
- sleep(millis, interruptible);
- }
- }
-
- private void throwIt(Node action) throws IOException,
- SAXException, TikaException {
- NamedNodeMap attrs = action.getAttributes();
- String className = attrs.getNamedItem("class").getNodeValue();
- String msg = action.getTextContent();
- throwIt(className, msg);
- }
-
- private void metadata(Node action, Metadata metadata) {
- NamedNodeMap attrs = action.getAttributes();
- //throws npe unless there is a name
- String name = attrs.getNamedItem("name").getNodeValue();
- String value = action.getTextContent();
- Node actionType = attrs.getNamedItem("action");
- if (actionType == null) {
- metadata.add(name, value);
- } else {
- if ("set".equals(actionType.getNodeValue())) {
- metadata.set(name, value);
- } else {
- metadata.add(name, value);
- }
- }
- }
-
- private void write(Node action, XHTMLContentHandler xhtml) throws SAXException {
- NamedNodeMap attrs = action.getAttributes();
- Node eNode = attrs.getNamedItem("element");
- String elementType = "p";
- if (eNode != null) {
- elementType = eNode.getTextContent();
- }
- String text = action.getTextContent();
- xhtml.startElement(elementType);
- xhtml.characters(text);
- xhtml.endElement(elementType);
- }
-
-
- private void throwIt(String className, String msg) throws IOException,
- SAXException, TikaException {
- Throwable t = null;
- if (msg == null || msg.equals("")) {
- try {
- t = (Throwable) Class.forName(className).newInstance();
- } catch (Exception e) {
- throw new RuntimeException("couldn't create throwable class:"+className, e);
- }
- } else {
- try {
- Class<?> clazz = Class.forName(className);
- Constructor<?> con = clazz.getConstructor(String.class);
- t = (Throwable) con.newInstance(msg);
- } catch (Exception e) {
- throw new RuntimeException("couldn't create throwable class:" + className, e);
- }
- }
- if (t instanceof SAXException) {
- throw (SAXException)t;
- } else if (t instanceof IOException) {
- throw (IOException) t;
- } else if (t instanceof TikaException) {
- throw (TikaException) t;
- } else if (t instanceof Error) {
- throw (Error) t;
- } else if (t instanceof RuntimeException) {
- throw (RuntimeException) t;
- } else {
- //wrap the throwable in a RuntimeException
- throw new RuntimeException(t);
- }
- }
-
- private void kabOOM() {
- List<int[]> ints = new ArrayList<int[]>();
-
- while (true) {
- int[] intArr = new int[32000];
- ints.add(intArr);
- }
- }
-
- private void hangHeavy(long maxMillis, long pulseCheckMillis, boolean interruptible) {
- //do some heavy computation and occasionally check for
- //whether time has exceeded maxMillis (see TIKA-1132 for inspiration)
- //or whether the thread was interrupted
- long start = new Date().getTime();
- int lastChecked = 0;
- while (true) {
- for (int i = 1; i < Integer.MAX_VALUE; i++) {
- for (int j = 1; j < Integer.MAX_VALUE; j++) {
- double div = (double) i / (double) j;
- lastChecked++;
- if (lastChecked > pulseCheckMillis) {
- lastChecked = 0;
- if (interruptible && Thread.currentThread().isInterrupted()) {
- return;
- }
- long elapsed = new Date().getTime()-start;
- if (elapsed > maxMillis) {
- return;
- }
- }
- }
- }
- }
- }
-
- private void sleep(long maxMillis, boolean isInterruptible) {
- long start = new Date().getTime();
- long millisRemaining = maxMillis;
- while (true) {
- try {
- Thread.sleep(millisRemaining);
- } catch (InterruptedException e) {
- if (isInterruptible) {
- return;
- }
- }
- long elapsed = new Date().getTime()-start;
- millisRemaining = maxMillis - elapsed;
- if (millisRemaining <= 0) {
- break;
- }
- }
- }
+package org.apache.tika.parser.mock;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.lang.reflect.Constructor;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.w3c.dom.Document;
+import org.w3c.dom.NamedNodeMap;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * This class enables mocking of parser behavior for use in testing
+ * wrappers and drivers of parsers.
+ * <p>
+ * See resources/test-documents/mock/example.xml in tika-parsers/test for the documentation
+ * of all the options for this MockParser.
+ * <p>
+ * Tests for this class are in tika-parsers.
+ * <p>
+ * See also {@link org.apache.tika.parser.DummyParser} for another option.
+ */
+
+public class MockParser extends AbstractParser {
+
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public Set<MediaType> getSupportedTypes(ParseContext context) {
+ Set<MediaType> types = new HashSet<MediaType>();
+ MediaType type = MediaType.application("mock+xml");
+ types.add(type);
+ return types;
+ }
+
+ @Override
+ public void parse(InputStream stream, ContentHandler handler,
+ Metadata metadata, ParseContext context) throws IOException,
+ SAXException, TikaException {
+ Document doc = null;
+ DocumentBuilderFactory fact = DocumentBuilderFactory.newInstance();
+ DocumentBuilder docBuilder = null;
+ try {
+ docBuilder = fact.newDocumentBuilder();
+ doc = docBuilder.parse(stream);
+ } catch (ParserConfigurationException e) {
+ throw new IOException(e);
+ } catch (SAXException e) {
+ throw new IOException(e);
+ }
+ Node root = doc.getDocumentElement();
+ NodeList actions = root.getChildNodes();
+ XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+ xhtml.startDocument();
+ for (int i = 0; i < actions.getLength(); i++) {
+ executeAction(actions.item(i), metadata, xhtml);
+ }
+ xhtml.endDocument();
+ }
+
+ private void executeAction(Node action, Metadata metadata, XHTMLContentHandler xhtml) throws SAXException,
+ IOException, TikaException {
+
+ if (action.getNodeType() != 1) {
+ return;
+ }
+
+ String name = action.getNodeName();
+ if ("metadata".equals(name)) {
+ metadata(action, metadata);
+ } else if("write".equals(name)) {
+ write(action, xhtml);
+ } else if ("throw".equals(name)) {
+ throwIt(action);
+ } else if ("hang".equals(name)) {
+ hang(action);
+ } else if ("oom".equals(name)) {
+ kabOOM();
+ } else if ("print_out".equals(name) || "print_err".equals(name)){
+ print(action, name);
+ } else {
+ throw new IllegalArgumentException("Didn't recognize mock action: "+name);
+ }
+ }
+
+ private void print(Node action, String name) {
+ String content = action.getTextContent();
+ if ("print_out".equals(name)) {
+ System.out.println(content);
+ } else if ("print_err".equals(name)) {
+ System.err.println(content);
+ } else {
+ throw new IllegalArgumentException("must be print_out or print_err");
+ }
+ }
+ private void hang(Node action) {
+ boolean interruptible = true;
+ boolean heavy = false;
+ long millis = -1;
+ long pulseMillis = -1;
+ NamedNodeMap attrs = action.getAttributes();
+ Node iNode = attrs.getNamedItem("interruptible");
+ if (iNode != null) {
+ interruptible = ("true".equals(iNode.getNodeValue()));
+ }
+ Node hNode = attrs.getNamedItem("heavy");
+ if (hNode != null) {
+ heavy = ("true".equals(hNode.getNodeValue()));
+ }
+
+ Node mNode = attrs.getNamedItem("millis");
+ if (mNode == null) {
+ throw new RuntimeException("Must specify \"millis\" attribute for hang.");
+ }
+ String millisString = mNode.getNodeValue();
+ try {
+ millis = Long.parseLong(millisString);
+ } catch (NumberFormatException e) {
+ throw new RuntimeException("Value for \"millis\" attribute must be a long.");
+ }
+
+ if (heavy) {
+ Node pNode = attrs.getNamedItem("pulse_millis");
+ if (pNode == null) {
+ throw new RuntimeException("Must specify attribute \"pulse_millis\" if the hang is \"heavy\"");
+ }
+ String pulseMillisString = mNode.getNodeValue();
+ try {
+ pulseMillis = Long.parseLong(pulseMillisString);
+ } catch (NumberFormatException e) {
+ throw new RuntimeException("Value for \"millis\" attribute must be a long.");
+ }
+ }
+ if (heavy) {
+ hangHeavy(millis, pulseMillis, interruptible);
+ } else {
+ sleep(millis, interruptible);
+ }
+ }
+
+ private void throwIt(Node action) throws IOException,
+ SAXException, TikaException {
+ NamedNodeMap attrs = action.getAttributes();
+ String className = attrs.getNamedItem("class").getNodeValue();
+ String msg = action.getTextContent();
+ throwIt(className, msg);
+ }
+
+ private void metadata(Node action, Metadata metadata) {
+ NamedNodeMap attrs = action.getAttributes();
+ //throws npe unless there is a name
+ String name = attrs.getNamedItem("name").getNodeValue();
+ String value = action.getTextContent();
+ Node actionType = attrs.getNamedItem("action");
+ if (actionType == null) {
+ metadata.add(name, value);
+ } else {
+ if ("set".equals(actionType.getNodeValue())) {
+ metadata.set(name, value);
+ } else {
+ metadata.add(name, value);
+ }
+ }
+ }
+
+ private void write(Node action, XHTMLContentHandler xhtml) throws SAXException {
+ NamedNodeMap attrs = action.getAttributes();
+ Node eNode = attrs.getNamedItem("element");
+ String elementType = "p";
+ if (eNode != null) {
+ elementType = eNode.getTextContent();
+ }
+ String text = action.getTextContent();
+ xhtml.startElement(elementType);
+ xhtml.characters(text);
+ xhtml.endElement(elementType);
+ }
+
+
+ private void throwIt(String className, String msg) throws IOException,
+ SAXException, TikaException {
+ Throwable t = null;
+ if (msg == null || msg.equals("")) {
+ try {
+ t = (Throwable) Class.forName(className).newInstance();
+ } catch (Exception e) {
+ throw new RuntimeException("couldn't create throwable class:"+className, e);
+ }
+ } else {
+ try {
+ Class<?> clazz = Class.forName(className);
+ Constructor<?> con = clazz.getConstructor(String.class);
+ t = (Throwable) con.newInstance(msg);
+ } catch (Exception e) {
+ throw new RuntimeException("couldn't create throwable class:" + className, e);
+ }
+ }
+ if (t instanceof SAXException) {
+ throw (SAXException)t;
+ } else if (t instanceof IOException) {
+ throw (IOException) t;
+ } else if (t instanceof TikaException) {
+ throw (TikaException) t;
+ } else if (t instanceof Error) {
+ throw (Error) t;
+ } else if (t instanceof RuntimeException) {
+ throw (RuntimeException) t;
+ } else {
+ //wrap the throwable in a RuntimeException
+ throw new RuntimeException(t);
+ }
+ }
+
+ private void kabOOM() {
+ List<int[]> ints = new ArrayList<int[]>();
+
+ while (true) {
+ int[] intArr = new int[32000];
+ ints.add(intArr);
+ }
+ }
+
+ private void hangHeavy(long maxMillis, long pulseCheckMillis, boolean interruptible) {
+ //do some heavy computation and occasionally check for
+ //whether time has exceeded maxMillis (see TIKA-1132 for inspiration)
+ //or whether the thread was interrupted
+ long start = new Date().getTime();
+ int lastChecked = 0;
+ while (true) {
+ for (int i = 1; i < Integer.MAX_VALUE; i++) {
+ for (int j = 1; j < Integer.MAX_VALUE; j++) {
+ double div = (double) i / (double) j;
+ lastChecked++;
+ if (lastChecked > pulseCheckMillis) {
+ lastChecked = 0;
+ if (interruptible && Thread.currentThread().isInterrupted()) {
+ return;
+ }
+ long elapsed = new Date().getTime()-start;
+ if (elapsed > maxMillis) {
+ return;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ private void sleep(long maxMillis, boolean isInterruptible) {
+ long start = new Date().getTime();
+ long millisRemaining = maxMillis;
+ while (true) {
+ try {
+ Thread.sleep(millisRemaining);
+ } catch (InterruptedException e) {
+ if (isInterruptible) {
+ return;
+ }
+ }
+ long elapsed = new Date().getTime()-start;
+ millisRemaining = maxMillis - elapsed;
+ if (millisRemaining <= 0) {
+ break;
+ }
+ }
+ }
}
\ No newline at end of file
Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SerializerTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SerializerTest.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SerializerTest.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SerializerTest.java Wed May 13 13:49:36 2015
@@ -1,150 +1,150 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.sax;
-
-import static org.junit.Assert.assertEquals;
-import org.junit.Test;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.helpers.AttributesImpl;
-
-public class SerializerTest {
-
- @Test
- public void testToTextContentHandler() throws Exception {
- assertStartDocument("", new ToTextContentHandler());
- assertCharacters("content", new ToTextContentHandler());
- assertCharacterEscaping("<&\">", new ToTextContentHandler());
- assertIgnorableWhitespace(" \t\r\n", new ToTextContentHandler());
- assertEmptyElement("", new ToTextContentHandler());
- assertEmptyElementWithAttributes("", new ToTextContentHandler());
- assertEmptyElementWithAttributeEscaping("", new ToTextContentHandler());
- assertElement("content", new ToTextContentHandler());
- assertElementWithAttributes("content", new ToTextContentHandler());
- }
-
- @Test
- public void testToXMLContentHandler() throws Exception {
- assertStartDocument("", new ToXMLContentHandler());
- assertStartDocument(
- "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n",
- new ToXMLContentHandler("UTF-8"));
- assertCharacters("content", new ToXMLContentHandler());
- assertCharacterEscaping("<&\">", new ToXMLContentHandler());
- assertIgnorableWhitespace(" \t\r\n", new ToXMLContentHandler());
- assertEmptyElement("<br />", new ToXMLContentHandler());
- assertEmptyElementWithAttributes(
- "<meta name=\"foo\" value=\"bar\" />",
- new ToXMLContentHandler());
- assertEmptyElementWithAttributeEscaping(
- "<p class=\"<&">\" />",
- new ToXMLContentHandler());
- assertElement("<p>content</p>", new ToXMLContentHandler());
- assertElementWithAttributes(
- "<p class=\"test\">content</p>",
- new ToXMLContentHandler());
- }
-
- @Test
- public void testToHTMLContentHandler() throws Exception {
- assertStartDocument("", new ToHTMLContentHandler());
- assertCharacters("content", new ToHTMLContentHandler());
- assertCharacterEscaping("<&\">", new ToHTMLContentHandler());
- assertIgnorableWhitespace(" \t\r\n", new ToHTMLContentHandler());
- assertEmptyElement("<br>", new ToHTMLContentHandler());
- assertEmptyElementWithAttributes(
- "<meta name=\"foo\" value=\"bar\">",
- new ToHTMLContentHandler());
- assertEmptyElementWithAttributeEscaping(
- "<p class=\"<&">\"></p>",
- new ToHTMLContentHandler());
- assertElement("<p>content</p>", new ToHTMLContentHandler());
- assertElementWithAttributes(
- "<p class=\"test\">content</p>",
- new ToHTMLContentHandler());
- }
-
- private void assertStartDocument(String expected, ContentHandler handler)
- throws Exception {
- handler.startDocument();
- assertEquals(expected, handler.toString());
- }
-
- private void assertCharacters(String expected, ContentHandler handler)
- throws Exception {
- handler.characters("content".toCharArray(), 0, 7);
- assertEquals(expected, handler.toString());
- }
-
- private void assertCharacterEscaping(
- String expected, ContentHandler handler) throws Exception {
- handler.characters("<&\">".toCharArray(), 0, 4);
- assertEquals(expected, handler.toString());
- }
-
- private void assertIgnorableWhitespace(
- String expected, ContentHandler handler) throws Exception {
- handler.ignorableWhitespace(" \t\r\n".toCharArray(), 0, 4);
- assertEquals(expected, handler.toString());
- }
-
- private void assertEmptyElement(String expected, ContentHandler handler)
- throws Exception {
- AttributesImpl attributes = new AttributesImpl();
- handler.startElement("", "br", "br", attributes);
- handler.endElement("", "br", "br");
- assertEquals(expected, handler.toString());
- }
-
- private void assertEmptyElementWithAttributes(
- String expected, ContentHandler handler) throws Exception {
- AttributesImpl attributes = new AttributesImpl();
- attributes.addAttribute("", "name", "name", "CDATA", "foo");
- attributes.addAttribute("", "value", "value", "CDATA", "bar");
- handler.startElement("", "meta", "meta", attributes);
- handler.endElement("", "meta", "meta");
- assertEquals(expected, handler.toString());
- }
-
- private void assertEmptyElementWithAttributeEscaping(
- String expected, ContentHandler handler) throws Exception {
- AttributesImpl attributes = new AttributesImpl();
- attributes.addAttribute("", "class", "class", "CDATA", "<&\">");
- handler.startElement("", "p", "p", attributes);
- handler.endElement("", "p", "p");
- assertEquals(expected, handler.toString());
- }
-
- private void assertElement(
- String expected, ContentHandler handler) throws Exception {
- AttributesImpl attributes = new AttributesImpl();
- handler.startElement("", "p", "p", attributes);
- handler.characters("content".toCharArray(), 0, 7);
- handler.endElement("", "p", "p");
- assertEquals(expected, handler.toString());
- }
-
- private void assertElementWithAttributes(
- String expected, ContentHandler handler) throws Exception {
- AttributesImpl attributes = new AttributesImpl();
- attributes.addAttribute("", "class", "class", "CDATA", "test");
- handler.startElement("", "p", "p", attributes);
- handler.characters("content".toCharArray(), 0, 7);
- handler.endElement("", "p", "p");
- assertEquals(expected, handler.toString());
- }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.sax;
+
+import static org.junit.Assert.assertEquals;
+import org.junit.Test;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.helpers.AttributesImpl;
+
+public class SerializerTest {
+
+ @Test
+ public void testToTextContentHandler() throws Exception {
+ assertStartDocument("", new ToTextContentHandler());
+ assertCharacters("content", new ToTextContentHandler());
+ assertCharacterEscaping("<&\">", new ToTextContentHandler());
+ assertIgnorableWhitespace(" \t\r\n", new ToTextContentHandler());
+ assertEmptyElement("", new ToTextContentHandler());
+ assertEmptyElementWithAttributes("", new ToTextContentHandler());
+ assertEmptyElementWithAttributeEscaping("", new ToTextContentHandler());
+ assertElement("content", new ToTextContentHandler());
+ assertElementWithAttributes("content", new ToTextContentHandler());
+ }
+
+ @Test
+ public void testToXMLContentHandler() throws Exception {
+ assertStartDocument("", new ToXMLContentHandler());
+ assertStartDocument(
+ "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n",
+ new ToXMLContentHandler("UTF-8"));
+ assertCharacters("content", new ToXMLContentHandler());
+ assertCharacterEscaping("<&\">", new ToXMLContentHandler());
+ assertIgnorableWhitespace(" \t\r\n", new ToXMLContentHandler());
+ assertEmptyElement("<br />", new ToXMLContentHandler());
+ assertEmptyElementWithAttributes(
+ "<meta name=\"foo\" value=\"bar\" />",
+ new ToXMLContentHandler());
+ assertEmptyElementWithAttributeEscaping(
+ "<p class=\"<&">\" />",
+ new ToXMLContentHandler());
+ assertElement("<p>content</p>", new ToXMLContentHandler());
+ assertElementWithAttributes(
+ "<p class=\"test\">content</p>",
+ new ToXMLContentHandler());
+ }
+
+ @Test
+ public void testToHTMLContentHandler() throws Exception {
+ assertStartDocument("", new ToHTMLContentHandler());
+ assertCharacters("content", new ToHTMLContentHandler());
+ assertCharacterEscaping("<&\">", new ToHTMLContentHandler());
+ assertIgnorableWhitespace(" \t\r\n", new ToHTMLContentHandler());
+ assertEmptyElement("<br>", new ToHTMLContentHandler());
+ assertEmptyElementWithAttributes(
+ "<meta name=\"foo\" value=\"bar\">",
+ new ToHTMLContentHandler());
+ assertEmptyElementWithAttributeEscaping(
+ "<p class=\"<&">\"></p>",
+ new ToHTMLContentHandler());
+ assertElement("<p>content</p>", new ToHTMLContentHandler());
+ assertElementWithAttributes(
+ "<p class=\"test\">content</p>",
+ new ToHTMLContentHandler());
+ }
+
+ private void assertStartDocument(String expected, ContentHandler handler)
+ throws Exception {
+ handler.startDocument();
+ assertEquals(expected, handler.toString());
+ }
+
+ private void assertCharacters(String expected, ContentHandler handler)
+ throws Exception {
+ handler.characters("content".toCharArray(), 0, 7);
+ assertEquals(expected, handler.toString());
+ }
+
+ private void assertCharacterEscaping(
+ String expected, ContentHandler handler) throws Exception {
+ handler.characters("<&\">".toCharArray(), 0, 4);
+ assertEquals(expected, handler.toString());
+ }
+
+ private void assertIgnorableWhitespace(
+ String expected, ContentHandler handler) throws Exception {
+ handler.ignorableWhitespace(" \t\r\n".toCharArray(), 0, 4);
+ assertEquals(expected, handler.toString());
+ }
+
+ private void assertEmptyElement(String expected, ContentHandler handler)
+ throws Exception {
+ AttributesImpl attributes = new AttributesImpl();
+ handler.startElement("", "br", "br", attributes);
+ handler.endElement("", "br", "br");
+ assertEquals(expected, handler.toString());
+ }
+
+ private void assertEmptyElementWithAttributes(
+ String expected, ContentHandler handler) throws Exception {
+ AttributesImpl attributes = new AttributesImpl();
+ attributes.addAttribute("", "name", "name", "CDATA", "foo");
+ attributes.addAttribute("", "value", "value", "CDATA", "bar");
+ handler.startElement("", "meta", "meta", attributes);
+ handler.endElement("", "meta", "meta");
+ assertEquals(expected, handler.toString());
+ }
+
+ private void assertEmptyElementWithAttributeEscaping(
+ String expected, ContentHandler handler) throws Exception {
+ AttributesImpl attributes = new AttributesImpl();
+ attributes.addAttribute("", "class", "class", "CDATA", "<&\">");
+ handler.startElement("", "p", "p", attributes);
+ handler.endElement("", "p", "p");
+ assertEquals(expected, handler.toString());
+ }
+
+ private void assertElement(
+ String expected, ContentHandler handler) throws Exception {
+ AttributesImpl attributes = new AttributesImpl();
+ handler.startElement("", "p", "p", attributes);
+ handler.characters("content".toCharArray(), 0, 7);
+ handler.endElement("", "p", "p");
+ assertEquals(expected, handler.toString());
+ }
+
+ private void assertElementWithAttributes(
+ String expected, ContentHandler handler) throws Exception {
+ AttributesImpl attributes = new AttributesImpl();
+ attributes.addAttribute("", "class", "class", "CDATA", "test");
+ handler.startElement("", "p", "p", attributes);
+ handler.characters("content".toCharArray(), 0, 7);
+ handler.endElement("", "p", "p");
+ assertEquals(expected, handler.toString());
+ }
+
+}