You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2015/05/13 15:49:37 UTC

svn commit: r1679211 [2/7] - in /tika/trunk: tika-app/src/main/java/org/apache/tika/cli/ tika-app/src/main/resources/ tika-app/src/test/java/org/apache/tika/cli/ tika-app/src/test/resources/ tika-batch/src/main/java/org/apache/tika/batch/ tika-batch/sr...

Modified: tika/trunk/tika-batch/src/test/resources/log4j.properties
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/resources/log4j.properties?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/test/resources/log4j.properties (original)
+++ tika/trunk/tika-batch/src/test/resources/log4j.properties Wed May 13 13:49:36 2015
@@ -1,22 +1,22 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-log4j.rootLogger=OFF
-
-#for debugging
-#log4j.rootLogger=TRACE,A1
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+log4j.rootLogger=OFF
+
+#for debugging
+#log4j.rootLogger=TRACE,A1
 
 log4j.appender.A1=org.apache.log4j.ConsoleAppender
 

Modified: tika/trunk/tika-batch/src/test/resources/log4j_process.properties
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/resources/log4j_process.properties?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/test/resources/log4j_process.properties (original)
+++ tika/trunk/tika-batch/src/test/resources/log4j_process.properties Wed May 13 13:49:36 2015
@@ -1,24 +1,24 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#This is used by the batch process; see log4j.properties for the driver
-
-log4j.rootLogger=OFF
-
-#for debugging
-#log4j.rootLogger=TRACE,A1
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#This is used by the batch process; see log4j.properties for the driver
+
+log4j.rootLogger=OFF
+
+#for debugging
+#log4j.rootLogger=TRACE,A1
 
 log4j.appender.A1=org.apache.log4j.ConsoleAppender
 

Modified: tika/trunk/tika-batch/src/test/resources/tika-batch-config-MockConsumersBuilder.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/resources/tika-batch-config-MockConsumersBuilder.xml?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/test/resources/tika-batch-config-MockConsumersBuilder.xml (original)
+++ tika/trunk/tika-batch/src/test/resources/tika-batch-config-MockConsumersBuilder.xml Wed May 13 13:49:36 2015
@@ -103,10 +103,10 @@
 
 		<outputstream class="FSOutputStreamFactory"
                 encoding="UTF-8" outputSuffix="xml"/>
-	</consumers>
-	
-	<!-- reporter and interrupter are optional -->
-	<reporter builderClass="org.apache.tika.batch.builders.SimpleLogReporterBuilder" reporterSleepMillis="1000"
-              reporterStaleThresholdMillis="500000"/>
-	<interrupter builderClass="org.apache.tika.batch.builders.InterrupterBuilder"/>
+	</consumers>
+	
+	<!-- reporter and interrupter are optional -->
+	<reporter builderClass="org.apache.tika.batch.builders.SimpleLogReporterBuilder" reporterSleepMillis="1000"
+              reporterStaleThresholdMillis="500000"/>
+	<interrupter builderClass="org.apache.tika.batch.builders.InterrupterBuilder"/>
 </tika-batch-config>
\ No newline at end of file

Modified: tika/trunk/tika-batch/src/test/resources/tika-batch-config-broken.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/resources/tika-batch-config-broken.xml?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/test/resources/tika-batch-config-broken.xml (original)
+++ tika/trunk/tika-batch/src/test/resources/tika-batch-config-broken.xml Wed May 13 13:49:36 2015
@@ -96,10 +96,10 @@
 
 		<outputstream class="FSOutputStreamFactory"
                 encoding="UTF-8" outputSuffix="xml"/>
-	</consumers>
-	
-	<!-- reporter and interrupter are optional -->
-	<reporter builderClass="org.apache.tika.batch.builders.SimpleLogReporterBuilder" reporterSleepMillis="1000"
-              reporterStaleThresholdMillis="500000"/>
-	<interrupter builderClass="org.apache.tika.batch.builders.InterrupterBuilder"/>
+	</consumers>
+	
+	<!-- reporter and interrupter are optional -->
+	<reporter builderClass="org.apache.tika.batch.builders.SimpleLogReporterBuilder" reporterSleepMillis="1000"
+              reporterStaleThresholdMillis="500000"/>
+	<interrupter builderClass="org.apache.tika.batch.builders.InterrupterBuilder"/>
 </tika-batch-config>
\ No newline at end of file

Modified: tika/trunk/tika-batch/src/test/resources/tika-batch-config-test.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-batch/src/test/resources/tika-batch-config-test.xml?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- tika/trunk/tika-batch/src/test/resources/tika-batch-config-test.xml (original)
+++ tika/trunk/tika-batch/src/test/resources/tika-batch-config-test.xml Wed May 13 13:49:36 2015
@@ -102,10 +102,10 @@
 
 		<outputstream class="FSOutputStreamFactory"
                 encoding="UTF-8" outputSuffix="xml"/>
-	</consumers>
-	
-	<!-- reporter and interrupter are optional -->
-	<reporter builderClass="org.apache.tika.batch.builders.SimpleLogReporterBuilder" reporterSleepMillis="1000"
-              reporterStaleThresholdMillis="500000"/>
-	<interrupter builderClass="org.apache.tika.batch.builders.InterrupterBuilder"/>
+	</consumers>
+	
+	<!-- reporter and interrupter are optional -->
+	<reporter builderClass="org.apache.tika.batch.builders.SimpleLogReporterBuilder" reporterSleepMillis="1000"
+              reporterStaleThresholdMillis="500000"/>
+	<interrupter builderClass="org.apache.tika.batch.builders.InterrupterBuilder"/>
 </tika-batch-config>
\ No newline at end of file

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/exception/AccessPermissionException.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/exception/AccessPermissionException.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/exception/AccessPermissionException.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/exception/AccessPermissionException.java Wed May 13 13:49:36 2015
@@ -1,40 +1,40 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.exception;
-
-/**
- * Exception to be thrown when a document does not allow content extraction.
- * As of this writing, PDF documents are the only type of document that might
- * cause this type of exception.
- */
-public class AccessPermissionException extends TikaException {
-    public AccessPermissionException() {
-        super("Unable to process: content extraction is not allowed");
-    }
-
-    public AccessPermissionException(Throwable th) {
-        super("Unable to process: content extraction is not allowed", th);
-    }
-
-    public AccessPermissionException(String info) {
-        super(info);
-    }
-
-    public AccessPermissionException(String info, Throwable th) {
-        super(info, th);
-    }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.exception;
+
+/**
+ * Exception to be thrown when a document does not allow content extraction.
+ * As of this writing, PDF documents are the only type of document that might
+ * cause this type of exception.
+ */
+public class AccessPermissionException extends TikaException {
+    public AccessPermissionException() {
+        super("Unable to process: content extraction is not allowed");
+    }
+
+    public AccessPermissionException(Throwable th) {
+        super("Unable to process: content extraction is not allowed", th);
+    }
+
+    public AccessPermissionException(String info) {
+        super(info);
+    }
+
+    public AccessPermissionException(String info, Throwable th) {
+        super(info, th);
+    }
+}

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/AccessPermissions.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/AccessPermissions.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/AccessPermissions.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/metadata/AccessPermissions.java Wed May 13 13:49:36 2015
@@ -1,71 +1,71 @@
-package org.apache.tika.metadata;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Until we can find a common standard, we'll use these options.  They
- * were mostly derived from PDFBox's AccessPermission, but some can
- * apply to other document formats, especially CAN_MODIFY and FILL_IN_FORM.
- */
-public interface AccessPermissions {
-
-    final static String PREFIX = "access_permission"+Metadata.NAMESPACE_PREFIX_DELIMITER;
-
-    /**
-     * Can any modifications be made to the document
-     */
-    Property CAN_MODIFY = Property.externalTextBag(PREFIX+"can_modify");
-
-    /**
-     * Should content be extracted, generally.
-     */
-    Property EXTRACT_CONTENT = Property.externalText(PREFIX+"extract_content");
-
-    /**
-     * Should content be extracted for the purposes
-     * of accessibility.
-     */
-    Property EXTRACT_FOR_ACCESSIBILITY = Property.externalText(PREFIX + "extract_for_accessibility");
-
-    /**
-     * Can the user insert/rotate/delete pages.
-     */
-    Property ASSEMBLE_DOCUMENT = Property.externalText(PREFIX+"assemble_document");
-
-
-    /**
-     * Can the user fill in a form
-     */
-    Property FILL_IN_FORM = Property.externalText(PREFIX+"fill_in_form");
-
-    /**
-     * Can the user modify annotations
-     */
-    Property CAN_MODIFY_ANNOTATIONS = Property.externalText(PREFIX+"modify_annotations");
-
-    /**
-     * Can the user print the document
-     */
-    Property CAN_PRINT = Property.externalText(PREFIX+"can_print");
-
-    /**
-     * Can the user print an image-degraded version of the document.
-     */
-    Property CAN_PRINT_DEGRADED = Property.externalText(PREFIX+"can_print_degraded");
-
-}
+package org.apache.tika.metadata;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Until we can find a common standard, we'll use these options.  They
+ * were mostly derived from PDFBox's AccessPermission, but some can
+ * apply to other document formats, especially CAN_MODIFY and FILL_IN_FORM.
+ */
+public interface AccessPermissions {
+
+    final static String PREFIX = "access_permission"+Metadata.NAMESPACE_PREFIX_DELIMITER;
+
+    /**
+     * Can any modifications be made to the document
+     */
+    Property CAN_MODIFY = Property.externalTextBag(PREFIX+"can_modify");
+
+    /**
+     * Should content be extracted, generally.
+     */
+    Property EXTRACT_CONTENT = Property.externalText(PREFIX+"extract_content");
+
+    /**
+     * Should content be extracted for the purposes
+     * of accessibility.
+     */
+    Property EXTRACT_FOR_ACCESSIBILITY = Property.externalText(PREFIX + "extract_for_accessibility");
+
+    /**
+     * Can the user insert/rotate/delete pages.
+     */
+    Property ASSEMBLE_DOCUMENT = Property.externalText(PREFIX+"assemble_document");
+
+
+    /**
+     * Can the user fill in a form
+     */
+    Property FILL_IN_FORM = Property.externalText(PREFIX+"fill_in_form");
+
+    /**
+     * Can the user modify annotations
+     */
+    Property CAN_MODIFY_ANNOTATIONS = Property.externalText(PREFIX+"modify_annotations");
+
+    /**
+     * Can the user print the document
+     */
+    Property CAN_PRINT = Property.externalText(PREFIX+"can_print");
+
+    /**
+     * Can the user print an image-degraded version of the document.
+     */
+    Property CAN_PRINT_DEGRADED = Property.externalText(PREFIX+"can_print_degraded");
+
+}

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToHTMLContentHandler.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToHTMLContentHandler.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToHTMLContentHandler.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToHTMLContentHandler.java Wed May 13 13:49:36 2015
@@ -1,70 +1,70 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.sax;
-
-import java.io.OutputStream;
-import java.io.UnsupportedEncodingException;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.Set;
-
-import org.xml.sax.SAXException;
-
-/**
- * SAX event handler that serializes the HTML document to a character stream.
- * The incoming SAX events are expected to be well-formed (properly nested,
- * etc.) and valid HTML.
- *
- * @since Apache Tika 0.10
- */
-public class ToHTMLContentHandler extends ToXMLContentHandler {
-
-    private static final Set<String> EMPTY_ELEMENTS =
-        new HashSet<String>(Arrays.asList(
-            "area", "base", "basefont", "br", "col", "frame", "hr",
-            "img", "input", "isindex", "link", "meta", "param"));
-
-    public ToHTMLContentHandler(OutputStream stream, String encoding)
-            throws UnsupportedEncodingException {
-        super(stream, encoding);
-    }
-
-    public ToHTMLContentHandler() {
-        super();
-    }
-
-    @Override
-    public void startDocument() throws SAXException {
-    }
-
-    @Override
-    public void endElement(String uri, String localName, String qName)
-            throws SAXException {
-        if (inStartElement) {
-            write('>');
-            inStartElement = false;
-
-            if (EMPTY_ELEMENTS.contains(localName)) {
-                namespaces.clear();
-                return;
-            }
-        }
-
-        super.endElement(uri, localName, qName);
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.sax;
+
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.xml.sax.SAXException;
+
+/**
+ * SAX event handler that serializes the HTML document to a character stream.
+ * The incoming SAX events are expected to be well-formed (properly nested,
+ * etc.) and valid HTML.
+ *
+ * @since Apache Tika 0.10
+ */
+public class ToHTMLContentHandler extends ToXMLContentHandler {
+
+    private static final Set<String> EMPTY_ELEMENTS =
+        new HashSet<String>(Arrays.asList(
+            "area", "base", "basefont", "br", "col", "frame", "hr",
+            "img", "input", "isindex", "link", "meta", "param"));
+
+    public ToHTMLContentHandler(OutputStream stream, String encoding)
+            throws UnsupportedEncodingException {
+        super(stream, encoding);
+    }
+
+    public ToHTMLContentHandler() {
+        super();
+    }
+
+    @Override
+    public void startDocument() throws SAXException {
+    }
+
+    @Override
+    public void endElement(String uri, String localName, String qName)
+            throws SAXException {
+        if (inStartElement) {
+            write('>');
+            inStartElement = false;
+
+            if (EMPTY_ELEMENTS.contains(localName)) {
+                namespaces.clear();
+                return;
+            }
+        }
+
+        super.endElement(uri, localName, qName);
+    }
+
+}

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToTextContentHandler.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToTextContentHandler.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToTextContentHandler.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToTextContentHandler.java Wed May 13 13:49:36 2015
@@ -1,140 +1,140 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.sax;
-
-import java.io.IOException;
-import java.io.OutputStream;
-import java.io.OutputStreamWriter;
-import java.io.StringWriter;
-import java.io.UnsupportedEncodingException;
-import java.io.Writer;
-import java.nio.charset.Charset;
-
-import org.xml.sax.SAXException;
-import org.xml.sax.helpers.DefaultHandler;
-
-/**
- * SAX event handler that writes all character content out to a character
- * stream. No escaping or other transformations are made on the character
- * content.
- *
- * @since Apache Tika 0.10
- */
-public class ToTextContentHandler extends DefaultHandler {
-
-    /**
-     * The character stream.
-     */
-    private final Writer writer;
-
-    /**
-     * Creates a content handler that writes character events to
-     * the given writer.
-     *
-     * @param writer writer
-     */
-    public ToTextContentHandler(Writer writer) {
-        this.writer = writer;
-    }
-
-    /**
-     * Creates a content handler that writes character events to
-     * the given output stream using the platform default encoding.
-     *
-     * @param stream output stream
-     */
-    public ToTextContentHandler(OutputStream stream) {
-        this(new OutputStreamWriter(stream, Charset.defaultCharset()));
-    }
-
-    /**
-     * Creates a content handler that writes character events to
-     * the given output stream using the given encoding.
-     *
-     * @param stream output stream
-     * @param encoding output encoding
-     * @throws UnsupportedEncodingException if the encoding is unsupported
-     */
-    public ToTextContentHandler(OutputStream stream, String encoding)
-            throws UnsupportedEncodingException {
-        this(new OutputStreamWriter(stream, encoding));
-    }
-
-    /**
-     * Creates a content handler that writes character events
-     * to an internal string buffer. Use the {@link #toString()}
-     * method to access the collected character content.
-     */
-    public ToTextContentHandler() {
-        this(new StringWriter());
-    }
-
-    /**
-     * Writes the given characters to the given character stream.
-     */
-    @Override
-    public void characters(char[] ch, int start, int length)
-            throws SAXException {
-        try {
-            writer.write(ch, start, length);
-        } catch (IOException e) {
-            throw new SAXException(
-                    "Error writing: " + new String(ch, start, length), e);
-        }
-    }
-
-
-    /**
-     * Writes the given ignorable characters to the given character stream.
-     * The default implementation simply forwards the call to the
-     * {@link #characters(char[], int, int)} method.
-     */
-    @Override
-    public void ignorableWhitespace(char[] ch, int start, int length)
-            throws SAXException {
-        characters(ch, start, length);
-    }
-
-    /**
-     * Flushes the character stream so that no characters are forgotten
-     * in internal buffers.
-     *
-     * @see <a href="https://issues.apache.org/jira/browse/TIKA-179">TIKA-179</a>
-     * @throws SAXException if the stream can not be flushed
-     */
-    @Override
-    public void endDocument() throws SAXException {
-        try {
-            writer.flush();
-        } catch (IOException e) {
-            throw new SAXException("Error flushing character output", e);
-        }
-    }
-
-    /**
-     * Returns the contents of the internal string buffer where
-     * all the received characters have been collected. Only works
-     * when this object was constructed using the empty default
-     * constructor or by passing a {@link StringWriter} to the
-     * other constructor.
-     */
-    @Override
-    public String toString() {
-        return writer.toString();
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.sax;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.StringWriter;
+import java.io.UnsupportedEncodingException;
+import java.io.Writer;
+import java.nio.charset.Charset;
+
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+/**
+ * SAX event handler that writes all character content out to a character
+ * stream. No escaping or other transformations are made on the character
+ * content.
+ *
+ * @since Apache Tika 0.10
+ */
+public class ToTextContentHandler extends DefaultHandler {
+
+    /**
+     * The character stream.
+     */
+    private final Writer writer;
+
+    /**
+     * Creates a content handler that writes character events to
+     * the given writer.
+     *
+     * @param writer writer
+     */
+    public ToTextContentHandler(Writer writer) {
+        this.writer = writer;
+    }
+
+    /**
+     * Creates a content handler that writes character events to
+     * the given output stream using the platform default encoding.
+     *
+     * @param stream output stream
+     */
+    public ToTextContentHandler(OutputStream stream) {
+        this(new OutputStreamWriter(stream, Charset.defaultCharset()));
+    }
+
+    /**
+     * Creates a content handler that writes character events to
+     * the given output stream using the given encoding.
+     *
+     * @param stream output stream
+     * @param encoding output encoding
+     * @throws UnsupportedEncodingException if the encoding is unsupported
+     */
+    public ToTextContentHandler(OutputStream stream, String encoding)
+            throws UnsupportedEncodingException {
+        this(new OutputStreamWriter(stream, encoding));
+    }
+
+    /**
+     * Creates a content handler that writes character events
+     * to an internal string buffer. Use the {@link #toString()}
+     * method to access the collected character content.
+     */
+    public ToTextContentHandler() {
+        this(new StringWriter());
+    }
+
+    /**
+     * Writes the given characters to the given character stream.
+     */
+    @Override
+    public void characters(char[] ch, int start, int length)
+            throws SAXException {
+        try {
+            writer.write(ch, start, length);
+        } catch (IOException e) {
+            throw new SAXException(
+                    "Error writing: " + new String(ch, start, length), e);
+        }
+    }
+
+
+    /**
+     * Writes the given ignorable characters to the given character stream.
+     * The default implementation simply forwards the call to the
+     * {@link #characters(char[], int, int)} method.
+     */
+    @Override
+    public void ignorableWhitespace(char[] ch, int start, int length)
+            throws SAXException {
+        characters(ch, start, length);
+    }
+
+    /**
+     * Flushes the character stream so that no characters are forgotten
+     * in internal buffers.
+     *
+     * @see <a href="https://issues.apache.org/jira/browse/TIKA-179">TIKA-179</a>
+     * @throws SAXException if the stream can not be flushed
+     */
+    @Override
+    public void endDocument() throws SAXException {
+        try {
+            writer.flush();
+        } catch (IOException e) {
+            throw new SAXException("Error flushing character output", e);
+        }
+    }
+
+    /**
+     * Returns the contents of the internal string buffer where
+     * all the received characters have been collected. Only works
+     * when this object was constructed using the empty default
+     * constructor or by passing a {@link StringWriter} to the
+     * other constructor.
+     */
+    @Override
+    public String toString() {
+        return writer.toString();
+    }
+
+}

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToXMLContentHandler.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToXMLContentHandler.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToXMLContentHandler.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/sax/ToXMLContentHandler.java Wed May 13 13:49:36 2015
@@ -1,281 +1,281 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.sax;
-
-import java.io.OutputStream;
-import java.io.UnsupportedEncodingException;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Map;
-
-import org.xml.sax.Attributes;
-import org.xml.sax.SAXException;
-
-/**
- * SAX event handler that serializes the XML document to a character stream.
- * The incoming SAX events are expected to be well-formed (properly nested,
- * etc.) and to explicitly include namespace declaration attributes and
- * corresponding namespace prefixes in element and attribute names.
- *
- * @since Apache Tika 0.10
- */
-public class ToXMLContentHandler extends ToTextContentHandler {
-
-    private static class ElementInfo {
-
-        private final ElementInfo parent;
-
-        private final Map<String, String> namespaces;
-
-        public ElementInfo(ElementInfo parent, Map<String, String> namespaces) {
-            this.parent = parent;
-            if (namespaces.isEmpty()) {
-                this.namespaces = Collections.emptyMap();
-            } else {
-                this.namespaces = new HashMap<String, String>(namespaces);
-            }
-        }
-
-        public String getPrefix(String uri) throws SAXException {
-            String prefix = namespaces.get(uri);
-            if (prefix != null) {
-                return prefix;
-            } else if (parent != null) {
-                return parent.getPrefix(uri);
-            } else if (uri == null || uri.length() == 0) {
-                return "";
-            } else {
-                throw new SAXException("Namespace " + uri + " not declared");
-            }
-        }
-
-        public String getQName(String uri, String localName)
-                throws SAXException {
-            String prefix = getPrefix(uri);
-            if (prefix.length() > 0) {
-                return prefix + ":" + localName;
-            } else {
-                return localName;
-            }
-        }
-
-    }
-
-    private final String encoding;
-
-    protected boolean inStartElement = false;
-
-    protected final Map<String, String> namespaces =
-        new HashMap<String, String>();
-
-    private ElementInfo currentElement;
-
-    /**
-     * Creates an XML serializer that writes to the given byte stream
-     * using the given character encoding.
-     *
-     * @param stream output stream
-     * @param encoding output encoding
-     * @throws UnsupportedEncodingException if the encoding is unsupported
-     */
-    public ToXMLContentHandler(OutputStream stream, String encoding)
-            throws UnsupportedEncodingException {
-        super(stream, encoding);
-        this.encoding = encoding;
-    }
-
-    public ToXMLContentHandler(String encoding) {
-        super();
-        this.encoding = encoding;
-    }
-
-    public ToXMLContentHandler() {
-        super();
-        this.encoding = null;
-    }
-
-    /**
-     * Writes the XML prefix.
-     */
-    @Override
-    public void startDocument() throws SAXException {
-        if (encoding != null) {
-            write("<?xml version=\"1.0\" encoding=\"");
-            write(encoding);
-            write("\"?>\n");
-        }
-
-        currentElement = null;
-        namespaces.clear();
-    }
-
-    @Override
-    public void startPrefixMapping(String prefix, String uri)
-            throws SAXException {
-        try {
-            if (currentElement != null
-                    && prefix.equals(currentElement.getPrefix(uri))) {
-                return;
-            }
-        } catch (SAXException ignore) {
-        }
-        namespaces.put(uri, prefix);
-    }
-
-    @Override
-    public void startElement(
-            String uri, String localName, String qName, Attributes atts)
-            throws SAXException {
-        lazyCloseStartElement();
-
-        currentElement = new ElementInfo(currentElement, namespaces);
-
-        write('<');
-        write(currentElement.getQName(uri, localName));
-
-        for (int i = 0; i < atts.getLength(); i++) {
-            write(' ');
-            write(currentElement.getQName(atts.getURI(i), atts.getLocalName(i)));
-            write('=');
-            write('"');
-            char[] ch = atts.getValue(i).toCharArray();
-            writeEscaped(ch, 0, ch.length, true);
-            write('"');
-        }
-
-        for (Map.Entry<String, String> entry : namespaces.entrySet()) {
-            write(' ');
-            write("xmlns");
-            String prefix = entry.getValue();
-            if (prefix.length() > 0) {
-                write(':');
-                write(prefix);
-            }
-            write('=');
-            write('"');
-            char[] ch = entry.getKey().toCharArray();
-            writeEscaped(ch, 0, ch.length, true);
-            write('"');
-        }
-        namespaces.clear();
-
-        inStartElement = true;
-    }
-
-    @Override
-    public void endElement(String uri, String localName, String qName)
-            throws SAXException {
-        if (inStartElement) {
-            write(" />");
-            inStartElement = false;
-        } else {
-            write("</");
-            write(qName);
-            write('>');
-        }
-
-        namespaces.clear();
-
-        // Reset the position in the tree, to avoid endless stack overflow
-        // chains (see TIKA-1070)
-        currentElement = currentElement.parent;
-    }
-
-    @Override
-    public void characters(char[] ch, int start, int length)
-            throws SAXException {
-        lazyCloseStartElement();
-        writeEscaped(ch, start, start + length, false);
-    }
-
-    private void lazyCloseStartElement() throws SAXException {
-        if (inStartElement) {
-            write('>');
-            inStartElement = false;
-        }
-    }
-
-    /**
-     * Writes the given character as-is.
-     *
-     * @param ch character to be written
-     * @throws SAXException if the character could not be written
-     */
-    protected void write(char ch) throws SAXException {
-        super.characters(new char[] { ch }, 0, 1);
-    }
-
-    /**
-     * Writes the given string of character as-is.
-     *
-     * @param string string of character to be written
-     * @throws SAXException if the character string could not be written
-     */
-    protected void write(String string) throws SAXException {
-        super.characters(string.toCharArray(), 0, string.length());
-    }
-
-    /**
-     * Writes the given characters as-is followed by the given entity.
-     *
-     * @param ch character array
-     * @param from start position in the array
-     * @param to end position in the array
-     * @param entity entity code
-     * @return next position in the array,
-     *         after the characters plus one entity
-     * @throws SAXException if the characters could not be written
-     */
-    private int writeCharsAndEntity(char[] ch, int from, int to, String entity)
-            throws SAXException {
-        super.characters(ch, from, to - from);
-        write('&');
-        write(entity);
-        write(';');
-        return to + 1;
-    }
-
-    /**
-     * Writes the given characters with XML meta characters escaped.
-     *
-     * @param ch character array
-     * @param from start position in the array
-     * @param to end position in the array
-     * @param attribute whether the characters should be escaped as
-     *                  an attribute value or normal character content
-     * @throws SAXException if the characters could not be written
-     */
-    private void writeEscaped(char[] ch, int from, int to, boolean attribute)
-            throws SAXException {
-        int pos = from;
-        while (pos < to) {
-            if (ch[pos] == '<') {
-                from = pos = writeCharsAndEntity(ch, from, pos, "lt");
-            } else if (ch[pos] == '>') {
-                from = pos = writeCharsAndEntity(ch, from, pos, "gt");
-            } else if (ch[pos] == '&') {
-                from = pos = writeCharsAndEntity(ch, from, pos, "amp");
-            } else if (attribute && ch[pos] == '"') {
-                from = pos = writeCharsAndEntity(ch, from, pos, "quot");
-            } else {
-                pos++;
-            }
-        }
-        super.characters(ch, from, to - from);
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.sax;
+
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+
+/**
+ * SAX event handler that serializes the XML document to a character stream.
+ * The incoming SAX events are expected to be well-formed (properly nested,
+ * etc.) and to explicitly include namespace declaration attributes and
+ * corresponding namespace prefixes in element and attribute names.
+ *
+ * @since Apache Tika 0.10
+ */
+public class ToXMLContentHandler extends ToTextContentHandler {
+
+    private static class ElementInfo {
+
+        private final ElementInfo parent;
+
+        private final Map<String, String> namespaces;
+
+        public ElementInfo(ElementInfo parent, Map<String, String> namespaces) {
+            this.parent = parent;
+            if (namespaces.isEmpty()) {
+                this.namespaces = Collections.emptyMap();
+            } else {
+                this.namespaces = new HashMap<String, String>(namespaces);
+            }
+        }
+
+        public String getPrefix(String uri) throws SAXException {
+            String prefix = namespaces.get(uri);
+            if (prefix != null) {
+                return prefix;
+            } else if (parent != null) {
+                return parent.getPrefix(uri);
+            } else if (uri == null || uri.length() == 0) {
+                return "";
+            } else {
+                throw new SAXException("Namespace " + uri + " not declared");
+            }
+        }
+
+        public String getQName(String uri, String localName)
+                throws SAXException {
+            String prefix = getPrefix(uri);
+            if (prefix.length() > 0) {
+                return prefix + ":" + localName;
+            } else {
+                return localName;
+            }
+        }
+
+    }
+
+    private final String encoding;
+
+    protected boolean inStartElement = false;
+
+    protected final Map<String, String> namespaces =
+        new HashMap<String, String>();
+
+    private ElementInfo currentElement;
+
+    /**
+     * Creates an XML serializer that writes to the given byte stream
+     * using the given character encoding.
+     *
+     * @param stream output stream
+     * @param encoding output encoding
+     * @throws UnsupportedEncodingException if the encoding is unsupported
+     */
+    public ToXMLContentHandler(OutputStream stream, String encoding)
+            throws UnsupportedEncodingException {
+        super(stream, encoding);
+        this.encoding = encoding;
+    }
+
+    public ToXMLContentHandler(String encoding) {
+        super();
+        this.encoding = encoding;
+    }
+
+    public ToXMLContentHandler() {
+        super();
+        this.encoding = null;
+    }
+
+    /**
+     * Writes the XML prefix.
+     */
+    @Override
+    public void startDocument() throws SAXException {
+        if (encoding != null) {
+            write("<?xml version=\"1.0\" encoding=\"");
+            write(encoding);
+            write("\"?>\n");
+        }
+
+        currentElement = null;
+        namespaces.clear();
+    }
+
+    @Override
+    public void startPrefixMapping(String prefix, String uri)
+            throws SAXException {
+        try {
+            if (currentElement != null
+                    && prefix.equals(currentElement.getPrefix(uri))) {
+                return;
+            }
+        } catch (SAXException ignore) {
+        }
+        namespaces.put(uri, prefix);
+    }
+
+    @Override
+    public void startElement(
+            String uri, String localName, String qName, Attributes atts)
+            throws SAXException {
+        lazyCloseStartElement();
+
+        currentElement = new ElementInfo(currentElement, namespaces);
+
+        write('<');
+        write(currentElement.getQName(uri, localName));
+
+        for (int i = 0; i < atts.getLength(); i++) {
+            write(' ');
+            write(currentElement.getQName(atts.getURI(i), atts.getLocalName(i)));
+            write('=');
+            write('"');
+            char[] ch = atts.getValue(i).toCharArray();
+            writeEscaped(ch, 0, ch.length, true);
+            write('"');
+        }
+
+        for (Map.Entry<String, String> entry : namespaces.entrySet()) {
+            write(' ');
+            write("xmlns");
+            String prefix = entry.getValue();
+            if (prefix.length() > 0) {
+                write(':');
+                write(prefix);
+            }
+            write('=');
+            write('"');
+            char[] ch = entry.getKey().toCharArray();
+            writeEscaped(ch, 0, ch.length, true);
+            write('"');
+        }
+        namespaces.clear();
+
+        inStartElement = true;
+    }
+
+    @Override
+    public void endElement(String uri, String localName, String qName)
+            throws SAXException {
+        if (inStartElement) {
+            write(" />");
+            inStartElement = false;
+        } else {
+            write("</");
+            write(qName);
+            write('>');
+        }
+
+        namespaces.clear();
+
+        // Reset the position in the tree, to avoid endless stack overflow
+        // chains (see TIKA-1070)
+        currentElement = currentElement.parent;
+    }
+
+    @Override
+    public void characters(char[] ch, int start, int length)
+            throws SAXException {
+        lazyCloseStartElement();
+        writeEscaped(ch, start, start + length, false);
+    }
+
+    private void lazyCloseStartElement() throws SAXException {
+        if (inStartElement) {
+            write('>');
+            inStartElement = false;
+        }
+    }
+
+    /**
+     * Writes the given character as-is.
+     *
+     * @param ch character to be written
+     * @throws SAXException if the character could not be written
+     */
+    protected void write(char ch) throws SAXException {
+        super.characters(new char[] { ch }, 0, 1);
+    }
+
+    /**
+     * Writes the given string of character as-is.
+     *
+     * @param string string of character to be written
+     * @throws SAXException if the character string could not be written
+     */
+    protected void write(String string) throws SAXException {
+        super.characters(string.toCharArray(), 0, string.length());
+    }
+
+    /**
+     * Writes the given characters as-is followed by the given entity.
+     *
+     * @param ch character array
+     * @param from start position in the array
+     * @param to end position in the array
+     * @param entity entity code
+     * @return next position in the array,
+     *         after the characters plus one entity
+     * @throws SAXException if the characters could not be written
+     */
+    private int writeCharsAndEntity(char[] ch, int from, int to, String entity)
+            throws SAXException {
+        super.characters(ch, from, to - from);
+        write('&');
+        write(entity);
+        write(';');
+        return to + 1;
+    }
+
+    /**
+     * Writes the given characters with XML meta characters escaped.
+     *
+     * @param ch character array
+     * @param from start position in the array
+     * @param to end position in the array
+     * @param attribute whether the characters should be escaped as
+     *                  an attribute value or normal character content
+     * @throws SAXException if the characters could not be written
+     */
+    private void writeEscaped(char[] ch, int from, int to, boolean attribute)
+            throws SAXException {
+        int pos = from;
+        while (pos < to) {
+            if (ch[pos] == '<') {
+                from = pos = writeCharsAndEntity(ch, from, pos, "lt");
+            } else if (ch[pos] == '>') {
+                from = pos = writeCharsAndEntity(ch, from, pos, "gt");
+            } else if (ch[pos] == '&') {
+                from = pos = writeCharsAndEntity(ch, from, pos, "amp");
+            } else if (attribute && ch[pos] == '"') {
+                from = pos = writeCharsAndEntity(ch, from, pos, "quot");
+            } else {
+                pos++;
+            }
+        }
+        super.characters(ch, from, to - from);
+    }
+
+}

Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/parser/mock/MockParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/parser/mock/MockParser.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/parser/mock/MockParser.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/parser/mock/MockParser.java Wed May 13 13:49:36 2015
@@ -1,301 +1,301 @@
-package org.apache.tika.parser.mock;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.parsers.ParserConfigurationException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.lang.reflect.Constructor;
-import java.util.ArrayList;
-import java.util.Date;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.mime.MediaType;
-import org.apache.tika.parser.AbstractParser;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.sax.XHTMLContentHandler;
-import org.w3c.dom.Document;
-import org.w3c.dom.NamedNodeMap;
-import org.w3c.dom.Node;
-import org.w3c.dom.NodeList;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.SAXException;
-
-/**
- * This class enables mocking of parser behavior for use in testing
- * wrappers and drivers of parsers.
- * <p>
- * See resources/test-documents/mock/example.xml in tika-parsers/test for the documentation
- * of all the options for this MockParser.
- * <p>
- * Tests for this class are in tika-parsers.
- * <p>
- * See also {@link org.apache.tika.parser.DummyParser} for another option.
- */
-
-public class MockParser extends AbstractParser {
-
-    private static final long serialVersionUID = 1L;
-
-    @Override
-    public Set<MediaType> getSupportedTypes(ParseContext context) {
-        Set<MediaType> types = new HashSet<MediaType>();
-        MediaType type = MediaType.application("mock+xml");
-        types.add(type);
-        return types;
-    }
-
-    @Override
-    public void parse(InputStream stream, ContentHandler handler,
-                      Metadata metadata, ParseContext context) throws IOException,
-            SAXException, TikaException {
-        Document doc = null;
-        DocumentBuilderFactory fact = DocumentBuilderFactory.newInstance();
-        DocumentBuilder docBuilder = null;
-        try {
-            docBuilder = fact.newDocumentBuilder();
-            doc = docBuilder.parse(stream);
-        } catch (ParserConfigurationException e) {
-            throw new IOException(e);
-        } catch (SAXException e) {
-            throw new IOException(e);
-        }
-        Node root = doc.getDocumentElement();
-        NodeList actions = root.getChildNodes();
-        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
-        xhtml.startDocument();
-        for (int i = 0; i < actions.getLength(); i++) {
-            executeAction(actions.item(i), metadata, xhtml);
-        }
-        xhtml.endDocument();
-    }
-
-    private void executeAction(Node action, Metadata metadata, XHTMLContentHandler xhtml) throws SAXException,
-            IOException, TikaException {
-
-        if (action.getNodeType() != 1) {
-            return;
-        }
-
-        String name = action.getNodeName();
-        if ("metadata".equals(name)) {
-            metadata(action, metadata);
-        } else if("write".equals(name)) {
-            write(action, xhtml);
-        } else if ("throw".equals(name)) {
-            throwIt(action);
-        } else if ("hang".equals(name)) {
-            hang(action);
-        } else if ("oom".equals(name)) {
-            kabOOM();
-        } else if ("print_out".equals(name) || "print_err".equals(name)){
-            print(action, name);
-        } else {
-            throw new IllegalArgumentException("Didn't recognize mock action: "+name);
-        }
-    }
-
-    private void print(Node action, String name) {
-        String content = action.getTextContent();
-        if ("print_out".equals(name)) {
-            System.out.println(content);
-        } else if ("print_err".equals(name)) {
-            System.err.println(content);
-        } else {
-            throw new IllegalArgumentException("must be print_out or print_err");
-        }
-    }
-    private void hang(Node action) {
-        boolean interruptible = true;
-        boolean heavy = false;
-        long millis = -1;
-        long pulseMillis = -1;
-        NamedNodeMap attrs = action.getAttributes();
-        Node iNode = attrs.getNamedItem("interruptible");
-        if (iNode != null) {
-            interruptible = ("true".equals(iNode.getNodeValue()));
-        }
-        Node hNode = attrs.getNamedItem("heavy");
-        if (hNode != null) {
-            heavy = ("true".equals(hNode.getNodeValue()));
-        }
-
-        Node mNode = attrs.getNamedItem("millis");
-        if (mNode == null) {
-            throw new RuntimeException("Must specify \"millis\" attribute for hang.");
-        }
-        String millisString = mNode.getNodeValue();
-        try {
-            millis = Long.parseLong(millisString);
-        } catch (NumberFormatException e) {
-            throw new RuntimeException("Value for \"millis\" attribute must be a long.");
-        }
-
-        if (heavy) {
-            Node pNode = attrs.getNamedItem("pulse_millis");
-            if (pNode == null) {
-                throw new RuntimeException("Must specify attribute \"pulse_millis\" if the hang is \"heavy\"");
-            }
-            String pulseMillisString = mNode.getNodeValue();
-            try {
-                pulseMillis = Long.parseLong(pulseMillisString);
-            } catch (NumberFormatException e) {
-                throw new RuntimeException("Value for \"millis\" attribute must be a long.");
-            }
-        }
-        if (heavy) {
-            hangHeavy(millis, pulseMillis, interruptible);
-        } else {
-            sleep(millis, interruptible);
-        }
-    }
-
-    private void throwIt(Node action) throws IOException,
-            SAXException, TikaException {
-        NamedNodeMap attrs = action.getAttributes();
-        String className = attrs.getNamedItem("class").getNodeValue();
-        String msg = action.getTextContent();
-        throwIt(className, msg);
-    }
-
-    private void metadata(Node action, Metadata metadata) {
-        NamedNodeMap attrs = action.getAttributes();
-        //throws npe unless there is a name
-        String name = attrs.getNamedItem("name").getNodeValue();
-        String value = action.getTextContent();
-        Node actionType = attrs.getNamedItem("action");
-        if (actionType == null) {
-            metadata.add(name, value);
-        } else {
-            if ("set".equals(actionType.getNodeValue())) {
-                metadata.set(name, value);
-            } else {
-                metadata.add(name, value);
-            }
-        }
-    }
-
-    private void write(Node action, XHTMLContentHandler xhtml) throws SAXException {
-        NamedNodeMap attrs = action.getAttributes();
-        Node eNode = attrs.getNamedItem("element");
-        String elementType = "p";
-        if (eNode != null) {
-            elementType = eNode.getTextContent();
-        }
-        String text = action.getTextContent();
-        xhtml.startElement(elementType);
-        xhtml.characters(text);
-        xhtml.endElement(elementType);
-    }
-
-
-    private void throwIt(String className, String msg) throws IOException,
-            SAXException, TikaException {
-        Throwable t = null;
-        if (msg == null || msg.equals("")) {
-            try {
-                t = (Throwable) Class.forName(className).newInstance();
-            } catch (Exception e) {
-                throw new RuntimeException("couldn't create throwable class:"+className, e);
-            }
-        } else {
-            try {
-                Class<?> clazz = Class.forName(className);
-                Constructor<?> con = clazz.getConstructor(String.class);
-                t = (Throwable) con.newInstance(msg);
-            } catch (Exception e) {
-                throw new RuntimeException("couldn't create throwable class:" + className, e);
-            }
-        }
-        if (t instanceof SAXException) {
-            throw (SAXException)t;
-        } else if (t instanceof IOException) {
-            throw (IOException) t;
-        } else if (t instanceof TikaException) {
-            throw (TikaException) t;
-        } else if (t instanceof Error) {
-            throw (Error) t;
-        } else if (t instanceof RuntimeException) {
-            throw (RuntimeException) t;
-        } else {
-            //wrap the throwable in a RuntimeException
-            throw new RuntimeException(t);
-        }
-    }
-
-    private void kabOOM() {
-        List<int[]> ints = new ArrayList<int[]>();
-
-        while (true) {
-            int[] intArr = new int[32000];
-            ints.add(intArr);
-        }
-    }
-
-    private void hangHeavy(long maxMillis, long pulseCheckMillis, boolean interruptible) {
-        //do some heavy computation and occasionally check for
-        //whether time has exceeded maxMillis (see TIKA-1132 for inspiration)
-        //or whether the thread was interrupted
-        long start = new Date().getTime();
-        int lastChecked = 0;
-        while (true) {
-            for (int i = 1; i < Integer.MAX_VALUE; i++) {
-                for (int j = 1; j < Integer.MAX_VALUE; j++) {
-                    double div = (double) i / (double) j;
-                    lastChecked++;
-                    if (lastChecked > pulseCheckMillis) {
-                        lastChecked = 0;
-                        if (interruptible && Thread.currentThread().isInterrupted()) {
-                            return;
-                        }
-                        long elapsed = new Date().getTime()-start;
-                        if (elapsed > maxMillis) {
-                            return;
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    private void sleep(long maxMillis, boolean isInterruptible) {
-        long start = new Date().getTime();
-        long millisRemaining = maxMillis;
-        while (true) {
-            try {
-                Thread.sleep(millisRemaining);
-            } catch (InterruptedException e) {
-                if (isInterruptible) {
-                    return;
-                }
-            }
-            long elapsed = new Date().getTime()-start;
-            millisRemaining = maxMillis - elapsed;
-            if (millisRemaining <= 0) {
-                break;
-            }
-        }
-    }
+package org.apache.tika.parser.mock;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.lang.reflect.Constructor;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.w3c.dom.Document;
+import org.w3c.dom.NamedNodeMap;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * This class enables mocking of parser behavior for use in testing
+ * wrappers and drivers of parsers.
+ * <p>
+ * See resources/test-documents/mock/example.xml in tika-parsers/test for the documentation
+ * of all the options for this MockParser.
+ * <p>
+ * Tests for this class are in tika-parsers.
+ * <p>
+ * See also {@link org.apache.tika.parser.DummyParser} for another option.
+ */
+
+public class MockParser extends AbstractParser {
+
+    private static final long serialVersionUID = 1L;
+
+    @Override
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        Set<MediaType> types = new HashSet<MediaType>();
+        MediaType type = MediaType.application("mock+xml");
+        types.add(type);
+        return types;
+    }
+
+    @Override
+    public void parse(InputStream stream, ContentHandler handler,
+                      Metadata metadata, ParseContext context) throws IOException,
+            SAXException, TikaException {
+        Document doc = null;
+        DocumentBuilderFactory fact = DocumentBuilderFactory.newInstance();
+        DocumentBuilder docBuilder = null;
+        try {
+            docBuilder = fact.newDocumentBuilder();
+            doc = docBuilder.parse(stream);
+        } catch (ParserConfigurationException e) {
+            throw new IOException(e);
+        } catch (SAXException e) {
+            throw new IOException(e);
+        }
+        Node root = doc.getDocumentElement();
+        NodeList actions = root.getChildNodes();
+        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+        xhtml.startDocument();
+        for (int i = 0; i < actions.getLength(); i++) {
+            executeAction(actions.item(i), metadata, xhtml);
+        }
+        xhtml.endDocument();
+    }
+
+    private void executeAction(Node action, Metadata metadata, XHTMLContentHandler xhtml) throws SAXException,
+            IOException, TikaException {
+
+        if (action.getNodeType() != 1) {
+            return;
+        }
+
+        String name = action.getNodeName();
+        if ("metadata".equals(name)) {
+            metadata(action, metadata);
+        } else if("write".equals(name)) {
+            write(action, xhtml);
+        } else if ("throw".equals(name)) {
+            throwIt(action);
+        } else if ("hang".equals(name)) {
+            hang(action);
+        } else if ("oom".equals(name)) {
+            kabOOM();
+        } else if ("print_out".equals(name) || "print_err".equals(name)){
+            print(action, name);
+        } else {
+            throw new IllegalArgumentException("Didn't recognize mock action: "+name);
+        }
+    }
+
+    private void print(Node action, String name) {
+        String content = action.getTextContent();
+        if ("print_out".equals(name)) {
+            System.out.println(content);
+        } else if ("print_err".equals(name)) {
+            System.err.println(content);
+        } else {
+            throw new IllegalArgumentException("must be print_out or print_err");
+        }
+    }
+    private void hang(Node action) {
+        boolean interruptible = true;
+        boolean heavy = false;
+        long millis = -1;
+        long pulseMillis = -1;
+        NamedNodeMap attrs = action.getAttributes();
+        Node iNode = attrs.getNamedItem("interruptible");
+        if (iNode != null) {
+            interruptible = ("true".equals(iNode.getNodeValue()));
+        }
+        Node hNode = attrs.getNamedItem("heavy");
+        if (hNode != null) {
+            heavy = ("true".equals(hNode.getNodeValue()));
+        }
+
+        Node mNode = attrs.getNamedItem("millis");
+        if (mNode == null) {
+            throw new RuntimeException("Must specify \"millis\" attribute for hang.");
+        }
+        String millisString = mNode.getNodeValue();
+        try {
+            millis = Long.parseLong(millisString);
+        } catch (NumberFormatException e) {
+            throw new RuntimeException("Value for \"millis\" attribute must be a long.");
+        }
+
+        if (heavy) {
+            Node pNode = attrs.getNamedItem("pulse_millis");
+            if (pNode == null) {
+                throw new RuntimeException("Must specify attribute \"pulse_millis\" if the hang is \"heavy\"");
+            }
+            String pulseMillisString = mNode.getNodeValue();
+            try {
+                pulseMillis = Long.parseLong(pulseMillisString);
+            } catch (NumberFormatException e) {
+                throw new RuntimeException("Value for \"millis\" attribute must be a long.");
+            }
+        }
+        if (heavy) {
+            hangHeavy(millis, pulseMillis, interruptible);
+        } else {
+            sleep(millis, interruptible);
+        }
+    }
+
+    private void throwIt(Node action) throws IOException,
+            SAXException, TikaException {
+        NamedNodeMap attrs = action.getAttributes();
+        String className = attrs.getNamedItem("class").getNodeValue();
+        String msg = action.getTextContent();
+        throwIt(className, msg);
+    }
+
+    private void metadata(Node action, Metadata metadata) {
+        NamedNodeMap attrs = action.getAttributes();
+        //throws npe unless there is a name
+        String name = attrs.getNamedItem("name").getNodeValue();
+        String value = action.getTextContent();
+        Node actionType = attrs.getNamedItem("action");
+        if (actionType == null) {
+            metadata.add(name, value);
+        } else {
+            if ("set".equals(actionType.getNodeValue())) {
+                metadata.set(name, value);
+            } else {
+                metadata.add(name, value);
+            }
+        }
+    }
+
+    private void write(Node action, XHTMLContentHandler xhtml) throws SAXException {
+        NamedNodeMap attrs = action.getAttributes();
+        Node eNode = attrs.getNamedItem("element");
+        String elementType = "p";
+        if (eNode != null) {
+            elementType = eNode.getTextContent();
+        }
+        String text = action.getTextContent();
+        xhtml.startElement(elementType);
+        xhtml.characters(text);
+        xhtml.endElement(elementType);
+    }
+
+
+    private void throwIt(String className, String msg) throws IOException,
+            SAXException, TikaException {
+        Throwable t = null;
+        if (msg == null || msg.equals("")) {
+            try {
+                t = (Throwable) Class.forName(className).newInstance();
+            } catch (Exception e) {
+                throw new RuntimeException("couldn't create throwable class:"+className, e);
+            }
+        } else {
+            try {
+                Class<?> clazz = Class.forName(className);
+                Constructor<?> con = clazz.getConstructor(String.class);
+                t = (Throwable) con.newInstance(msg);
+            } catch (Exception e) {
+                throw new RuntimeException("couldn't create throwable class:" + className, e);
+            }
+        }
+        if (t instanceof SAXException) {
+            throw (SAXException)t;
+        } else if (t instanceof IOException) {
+            throw (IOException) t;
+        } else if (t instanceof TikaException) {
+            throw (TikaException) t;
+        } else if (t instanceof Error) {
+            throw (Error) t;
+        } else if (t instanceof RuntimeException) {
+            throw (RuntimeException) t;
+        } else {
+            //wrap the throwable in a RuntimeException
+            throw new RuntimeException(t);
+        }
+    }
+
+    private void kabOOM() {
+        List<int[]> ints = new ArrayList<int[]>();
+
+        while (true) {
+            int[] intArr = new int[32000];
+            ints.add(intArr);
+        }
+    }
+
+    private void hangHeavy(long maxMillis, long pulseCheckMillis, boolean interruptible) {
+        //do some heavy computation and occasionally check for
+        //whether time has exceeded maxMillis (see TIKA-1132 for inspiration)
+        //or whether the thread was interrupted
+        long start = new Date().getTime();
+        int lastChecked = 0;
+        while (true) {
+            for (int i = 1; i < Integer.MAX_VALUE; i++) {
+                for (int j = 1; j < Integer.MAX_VALUE; j++) {
+                    double div = (double) i / (double) j;
+                    lastChecked++;
+                    if (lastChecked > pulseCheckMillis) {
+                        lastChecked = 0;
+                        if (interruptible && Thread.currentThread().isInterrupted()) {
+                            return;
+                        }
+                        long elapsed = new Date().getTime()-start;
+                        if (elapsed > maxMillis) {
+                            return;
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    private void sleep(long maxMillis, boolean isInterruptible) {
+        long start = new Date().getTime();
+        long millisRemaining = maxMillis;
+        while (true) {
+            try {
+                Thread.sleep(millisRemaining);
+            } catch (InterruptedException e) {
+                if (isInterruptible) {
+                    return;
+                }
+            }
+            long elapsed = new Date().getTime()-start;
+            millisRemaining = maxMillis - elapsed;
+            if (millisRemaining <= 0) {
+                break;
+            }
+        }
+    }
 }
\ No newline at end of file

Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SerializerTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SerializerTest.java?rev=1679211&r1=1679210&r2=1679211&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SerializerTest.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/sax/SerializerTest.java Wed May 13 13:49:36 2015
@@ -1,150 +1,150 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.sax;
-
-import static org.junit.Assert.assertEquals;
-import org.junit.Test;
-import org.xml.sax.ContentHandler;
-import org.xml.sax.helpers.AttributesImpl;
-
-public class SerializerTest {
-
-    @Test
-    public void testToTextContentHandler() throws Exception {
-        assertStartDocument("", new ToTextContentHandler());
-        assertCharacters("content", new ToTextContentHandler());
-        assertCharacterEscaping("<&\">", new ToTextContentHandler());
-        assertIgnorableWhitespace(" \t\r\n", new ToTextContentHandler());
-        assertEmptyElement("", new ToTextContentHandler());
-        assertEmptyElementWithAttributes("", new ToTextContentHandler());
-        assertEmptyElementWithAttributeEscaping("", new ToTextContentHandler());
-        assertElement("content", new ToTextContentHandler());
-        assertElementWithAttributes("content", new ToTextContentHandler());
-    }
-
-    @Test
-    public void testToXMLContentHandler() throws Exception {
-        assertStartDocument("", new ToXMLContentHandler());
-        assertStartDocument(
-                "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n",
-                new ToXMLContentHandler("UTF-8"));
-        assertCharacters("content", new ToXMLContentHandler());
-        assertCharacterEscaping("&lt;&amp;\"&gt;", new ToXMLContentHandler());
-        assertIgnorableWhitespace(" \t\r\n", new ToXMLContentHandler());
-        assertEmptyElement("<br />", new ToXMLContentHandler());
-        assertEmptyElementWithAttributes(
-                "<meta name=\"foo\" value=\"bar\" />",
-                new ToXMLContentHandler());
-        assertEmptyElementWithAttributeEscaping(
-                "<p class=\"&lt;&amp;&quot;&gt;\" />",
-                new ToXMLContentHandler());
-        assertElement("<p>content</p>", new ToXMLContentHandler());
-        assertElementWithAttributes(
-                "<p class=\"test\">content</p>",
-                new ToXMLContentHandler());
-    }
-
-    @Test
-    public void testToHTMLContentHandler() throws Exception {
-        assertStartDocument("", new ToHTMLContentHandler());
-        assertCharacters("content", new ToHTMLContentHandler());
-        assertCharacterEscaping("&lt;&amp;\"&gt;", new ToHTMLContentHandler());
-        assertIgnorableWhitespace(" \t\r\n", new ToHTMLContentHandler());
-        assertEmptyElement("<br>", new ToHTMLContentHandler());
-        assertEmptyElementWithAttributes(
-                "<meta name=\"foo\" value=\"bar\">",
-                new ToHTMLContentHandler());
-        assertEmptyElementWithAttributeEscaping(
-                "<p class=\"&lt;&amp;&quot;&gt;\"></p>",
-                new ToHTMLContentHandler());
-        assertElement("<p>content</p>", new ToHTMLContentHandler());
-        assertElementWithAttributes(
-                "<p class=\"test\">content</p>",
-                new ToHTMLContentHandler());
-    }
-
-    private void assertStartDocument(String expected, ContentHandler handler)
-            throws Exception {
-        handler.startDocument();
-        assertEquals(expected, handler.toString());
-    }
-
-    private void assertCharacters(String expected, ContentHandler handler)
-            throws Exception {
-        handler.characters("content".toCharArray(), 0, 7);
-        assertEquals(expected, handler.toString());
-    }
-
-    private void assertCharacterEscaping(
-            String expected, ContentHandler handler) throws Exception {
-        handler.characters("<&\">".toCharArray(), 0, 4);
-        assertEquals(expected, handler.toString());
-    }
-
-    private void assertIgnorableWhitespace(
-            String expected, ContentHandler handler) throws Exception {
-        handler.ignorableWhitespace(" \t\r\n".toCharArray(), 0, 4);
-        assertEquals(expected, handler.toString());
-    }
-
-    private void assertEmptyElement(String expected, ContentHandler handler)
-            throws Exception {
-        AttributesImpl attributes = new AttributesImpl();
-        handler.startElement("", "br", "br", attributes);
-        handler.endElement("", "br", "br");
-        assertEquals(expected, handler.toString());
-    }
-
-    private void assertEmptyElementWithAttributes(
-            String expected, ContentHandler handler) throws Exception {
-        AttributesImpl attributes = new AttributesImpl();
-        attributes.addAttribute("", "name", "name", "CDATA", "foo");
-        attributes.addAttribute("", "value", "value", "CDATA", "bar");
-        handler.startElement("", "meta", "meta", attributes);
-        handler.endElement("", "meta", "meta");
-        assertEquals(expected, handler.toString());
-    }
-
-    private void assertEmptyElementWithAttributeEscaping(
-            String expected, ContentHandler handler) throws Exception {
-        AttributesImpl attributes = new AttributesImpl();
-        attributes.addAttribute("", "class", "class", "CDATA", "<&\">");
-        handler.startElement("", "p", "p", attributes);
-        handler.endElement("", "p", "p");
-        assertEquals(expected, handler.toString());
-    }
-
-    private void assertElement(
-            String expected, ContentHandler handler) throws Exception {
-        AttributesImpl attributes = new AttributesImpl();
-        handler.startElement("", "p", "p", attributes);
-        handler.characters("content".toCharArray(), 0, 7);
-        handler.endElement("", "p", "p");
-        assertEquals(expected, handler.toString());
-    }
-
-    private void assertElementWithAttributes(
-            String expected, ContentHandler handler) throws Exception {
-        AttributesImpl attributes = new AttributesImpl();
-        attributes.addAttribute("", "class", "class", "CDATA", "test");
-        handler.startElement("", "p", "p", attributes);
-        handler.characters("content".toCharArray(), 0, 7);
-        handler.endElement("", "p", "p");
-        assertEquals(expected, handler.toString());
-    }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.sax;
+
+import static org.junit.Assert.assertEquals;
+import org.junit.Test;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.helpers.AttributesImpl;
+
+public class SerializerTest {
+
+    @Test
+    public void testToTextContentHandler() throws Exception {
+        assertStartDocument("", new ToTextContentHandler());
+        assertCharacters("content", new ToTextContentHandler());
+        assertCharacterEscaping("<&\">", new ToTextContentHandler());
+        assertIgnorableWhitespace(" \t\r\n", new ToTextContentHandler());
+        assertEmptyElement("", new ToTextContentHandler());
+        assertEmptyElementWithAttributes("", new ToTextContentHandler());
+        assertEmptyElementWithAttributeEscaping("", new ToTextContentHandler());
+        assertElement("content", new ToTextContentHandler());
+        assertElementWithAttributes("content", new ToTextContentHandler());
+    }
+
+    @Test
+    public void testToXMLContentHandler() throws Exception {
+        assertStartDocument("", new ToXMLContentHandler());
+        assertStartDocument(
+                "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n",
+                new ToXMLContentHandler("UTF-8"));
+        assertCharacters("content", new ToXMLContentHandler());
+        assertCharacterEscaping("&lt;&amp;\"&gt;", new ToXMLContentHandler());
+        assertIgnorableWhitespace(" \t\r\n", new ToXMLContentHandler());
+        assertEmptyElement("<br />", new ToXMLContentHandler());
+        assertEmptyElementWithAttributes(
+                "<meta name=\"foo\" value=\"bar\" />",
+                new ToXMLContentHandler());
+        assertEmptyElementWithAttributeEscaping(
+                "<p class=\"&lt;&amp;&quot;&gt;\" />",
+                new ToXMLContentHandler());
+        assertElement("<p>content</p>", new ToXMLContentHandler());
+        assertElementWithAttributes(
+                "<p class=\"test\">content</p>",
+                new ToXMLContentHandler());
+    }
+
+    @Test
+    public void testToHTMLContentHandler() throws Exception {
+        assertStartDocument("", new ToHTMLContentHandler());
+        assertCharacters("content", new ToHTMLContentHandler());
+        assertCharacterEscaping("&lt;&amp;\"&gt;", new ToHTMLContentHandler());
+        assertIgnorableWhitespace(" \t\r\n", new ToHTMLContentHandler());
+        assertEmptyElement("<br>", new ToHTMLContentHandler());
+        assertEmptyElementWithAttributes(
+                "<meta name=\"foo\" value=\"bar\">",
+                new ToHTMLContentHandler());
+        assertEmptyElementWithAttributeEscaping(
+                "<p class=\"&lt;&amp;&quot;&gt;\"></p>",
+                new ToHTMLContentHandler());
+        assertElement("<p>content</p>", new ToHTMLContentHandler());
+        assertElementWithAttributes(
+                "<p class=\"test\">content</p>",
+                new ToHTMLContentHandler());
+    }
+
+    private void assertStartDocument(String expected, ContentHandler handler)
+            throws Exception {
+        handler.startDocument();
+        assertEquals(expected, handler.toString());
+    }
+
+    private void assertCharacters(String expected, ContentHandler handler)
+            throws Exception {
+        handler.characters("content".toCharArray(), 0, 7);
+        assertEquals(expected, handler.toString());
+    }
+
+    private void assertCharacterEscaping(
+            String expected, ContentHandler handler) throws Exception {
+        handler.characters("<&\">".toCharArray(), 0, 4);
+        assertEquals(expected, handler.toString());
+    }
+
+    private void assertIgnorableWhitespace(
+            String expected, ContentHandler handler) throws Exception {
+        handler.ignorableWhitespace(" \t\r\n".toCharArray(), 0, 4);
+        assertEquals(expected, handler.toString());
+    }
+
+    private void assertEmptyElement(String expected, ContentHandler handler)
+            throws Exception {
+        AttributesImpl attributes = new AttributesImpl();
+        handler.startElement("", "br", "br", attributes);
+        handler.endElement("", "br", "br");
+        assertEquals(expected, handler.toString());
+    }
+
+    private void assertEmptyElementWithAttributes(
+            String expected, ContentHandler handler) throws Exception {
+        AttributesImpl attributes = new AttributesImpl();
+        attributes.addAttribute("", "name", "name", "CDATA", "foo");
+        attributes.addAttribute("", "value", "value", "CDATA", "bar");
+        handler.startElement("", "meta", "meta", attributes);
+        handler.endElement("", "meta", "meta");
+        assertEquals(expected, handler.toString());
+    }
+
+    private void assertEmptyElementWithAttributeEscaping(
+            String expected, ContentHandler handler) throws Exception {
+        AttributesImpl attributes = new AttributesImpl();
+        attributes.addAttribute("", "class", "class", "CDATA", "<&\">");
+        handler.startElement("", "p", "p", attributes);
+        handler.endElement("", "p", "p");
+        assertEquals(expected, handler.toString());
+    }
+
+    private void assertElement(
+            String expected, ContentHandler handler) throws Exception {
+        AttributesImpl attributes = new AttributesImpl();
+        handler.startElement("", "p", "p", attributes);
+        handler.characters("content".toCharArray(), 0, 7);
+        handler.endElement("", "p", "p");
+        assertEquals(expected, handler.toString());
+    }
+
+    private void assertElementWithAttributes(
+            String expected, ContentHandler handler) throws Exception {
+        AttributesImpl attributes = new AttributesImpl();
+        attributes.addAttribute("", "class", "class", "CDATA", "test");
+        handler.startElement("", "p", "p", attributes);
+        handler.characters("content".toCharArray(), 0, 7);
+        handler.endElement("", "p", "p");
+        assertEquals(expected, handler.toString());
+    }
+
+}