You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by bo...@apache.org on 2016/03/31 06:10:39 UTC

[2/2] tika git commit: TIKA-1915 and TIKA-1706 - Remove POI. Replace with commons-io+tika-core

TIKA-1915 and TIKA-1706 - Remove POI. Replace with commons-io+tika-core

Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/05f4af30
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/05f4af30
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/05f4af30

Branch: refs/heads/2.x
Commit: 05f4af3002f1f376095f6b4810d505ea50d08b3c
Parents: 28dcca9
Author: Bob Paulin <bo...@apache.org>
Authored: Wed Mar 30 23:10:40 2016 -0500
Committer: Bob Paulin <bo...@apache.org>
Committed: Wed Mar 30 23:10:40 2016 -0500

----------------------------------------------------------------------
 .../apache/tika/parser/mock/MockParserTest.java |    2 +-
 tika-core/pom.xml                               |    9 +-
 .../src/main/java/org/apache/tika/Tika.java     |    7 +-
 .../apache/tika/detect/XmlRootExtractor.java    |    2 +-
 .../apache/tika/embedder/ExternalEmbedder.java  |    2 +-
 .../ParsingEmbeddedDocumentExtractor.java       |    2 +-
 .../java/org/apache/tika/fork/ForkClient.java   |    2 +-
 .../apache/tika/io/CloseShieldInputStream.java  |   52 -
 .../org/apache/tika/io/ClosedInputStream.java   |   43 -
 .../org/apache/tika/io/CountingInputStream.java |  182 ---
 .../apache/tika/io/IOExceptionWithCause.java    |   67 -
 .../main/java/org/apache/tika/io/IOUtils.java   | 1186 ------------------
 .../org/apache/tika/io/NullInputStream.java     |  337 -----
 .../org/apache/tika/io/NullOutputStream.java    |   68 -
 .../java/org/apache/tika/io/StringUtil.java     |  121 ++
 .../org/apache/tika/io/TaggedIOException.java   |    2 +
 .../org/apache/tika/parser/NetworkParser.java   |    4 +-
 .../tika/parser/external/ExternalParser.java    |    4 +-
 .../apache/tika/sax/OfflineContentHandler.java  |    2 +-
 .../src/test/java/org/apache/tika/TikaTest.java |    2 +-
 .../org/apache/tika/TypeDetectionBenchmark.java |    2 +-
 .../org/apache/tika/io/TikaInputStreamTest.java |    1 +
 .../tika/sax/SecureContentHandlerTest.java      |    2 +-
 .../tika/langdetect/LanguageDetectorTest.java   |    6 +-
 .../langdetect/OptimaizeLangDetectorTest.java   |    6 +-
 .../tika-parser-cad-bundle/pom.xml              |    2 +-
 .../tika-parser-multimedia-bundle/pom.xml       |    1 -
 .../parser/ner/corenlp/CoreNLPNERecogniser.java |    5 +-
 .../parser/ner/opennlp/OpenNLPNameFinder.java   |    2 +-
 .../tika-parser-cad-module/pom.xml              |    8 +-
 .../org/apache/tika/parser/dwg/DWGParser.java   |    4 +-
 .../org/apache/tika/parser/prt/PRTParser.java   |    2 +-
 .../tika-parser-multimedia-module/pom.xml       |   25 -
 .../org/apache/tika/parser/image/BPGParser.java |    2 +-
 .../parser/image/ImageMetadataExtractor.java    |    2 +-
 .../org/apache/tika/parser/image/PSDParser.java |    2 +-
 36 files changed, 173 insertions(+), 1995 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/05f4af30/tika-app/src/test/java/org/apache/tika/parser/mock/MockParserTest.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/parser/mock/MockParserTest.java b/tika-app/src/test/java/org/apache/tika/parser/mock/MockParserTest.java
index 52af12b..2d785b7 100644
--- a/tika-app/src/test/java/org/apache/tika/parser/mock/MockParserTest.java
+++ b/tika-app/src/test/java/org/apache/tika/parser/mock/MockParserTest.java
@@ -30,7 +30,7 @@ import java.util.Date;
 
 import org.apache.tika.TikaTest;
 import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.IOUtils;
+import org.apache.commons.io.IOUtils;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.Parser;

http://git-wip-us.apache.org/repos/asf/tika/blob/05f4af30/tika-core/pom.xml
----------------------------------------------------------------------
diff --git a/tika-core/pom.xml b/tika-core/pom.xml
index 2c61616..e1261bb 100644
--- a/tika-core/pom.xml
+++ b/tika-core/pom.xml
@@ -36,6 +36,7 @@
   <properties>
     <!-- NOTE: sync codec version with POI -->
     <codec.version>1.10</codec.version>
+    <commons-io.version>2.4</commons-io.version>
   </properties>
 
   <dependencies>
@@ -44,6 +45,11 @@
       <artifactId>commons-codec</artifactId>
       <version>${codec.version}</version>
     </dependency>
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+      <version>${commons-io.version}</version>
+    </dependency>
     <!-- Optional OSGi dependencies, used only when running within OSGi -->
     <dependency>
       <groupId>org.osgi</groupId>
@@ -125,7 +131,8 @@
             <Bundle-Activator>org.apache.tika.config.TikaActivator</Bundle-Activator>
             <Bundle-ActivationPolicy>lazy</Bundle-ActivationPolicy>
             <Embed-Dependency>
-              commons-codec
+              commons-codec,
+              commons-io
             </Embed-Dependency>
           </instructions>
         </configuration>

http://git-wip-us.apache.org/repos/asf/tika/blob/05f4af30/tika-core/src/main/java/org/apache/tika/Tika.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/Tika.java b/tika-core/src/main/java/org/apache/tika/Tika.java
index c0cf281..7eaac29 100644
--- a/tika-core/src/main/java/org/apache/tika/Tika.java
+++ b/tika-core/src/main/java/org/apache/tika/Tika.java
@@ -22,13 +22,14 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.Reader;
 import java.net.URL;
+import java.nio.charset.Charset;
 import java.nio.file.Path;
 import java.util.Properties;
 
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.detect.Detector;
 import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.IOUtils;
+import org.apache.commons.io.IOUtils;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.language.translate.Translator;
 import org.apache.tika.metadata.Metadata;
@@ -367,7 +368,7 @@ public class Tika {
      */
     public String translate(InputStream text, String sourceLanguage, String targetLanguage){
         try {
-            return translator.translate(IOUtils.toString(text), sourceLanguage, targetLanguage);
+            return translator.translate(IOUtils.toString(text, Charset.defaultCharset()), sourceLanguage, targetLanguage);
         } catch (Exception e){
             throw new IllegalStateException("Error translating data.", e);
         }
@@ -383,7 +384,7 @@ public class Tika {
      */
     public String translate(InputStream text, String targetLanguage){
         try {
-            return translator.translate(IOUtils.toString(text), targetLanguage);
+            return translator.translate(IOUtils.toString(text, Charset.defaultCharset()), targetLanguage);
         } catch (Exception e){
             throw new IllegalStateException("Error translating data.", e);
         }

http://git-wip-us.apache.org/repos/asf/tika/blob/05f4af30/tika-core/src/main/java/org/apache/tika/detect/XmlRootExtractor.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/detect/XmlRootExtractor.java b/tika-core/src/main/java/org/apache/tika/detect/XmlRootExtractor.java
index 74d994d..5069483 100644
--- a/tika-core/src/main/java/org/apache/tika/detect/XmlRootExtractor.java
+++ b/tika-core/src/main/java/org/apache/tika/detect/XmlRootExtractor.java
@@ -23,7 +23,7 @@ import javax.xml.XMLConstants;
 import javax.xml.namespace.QName;
 import javax.xml.parsers.SAXParserFactory;
 
-import org.apache.tika.io.CloseShieldInputStream;
+import org.apache.commons.io.input.CloseShieldInputStream;
 import org.apache.tika.sax.OfflineContentHandler;
 import org.xml.sax.Attributes;
 import org.xml.sax.SAXException;

http://git-wip-us.apache.org/repos/asf/tika/blob/05f4af30/tika-core/src/main/java/org/apache/tika/embedder/ExternalEmbedder.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/embedder/ExternalEmbedder.java b/tika-core/src/main/java/org/apache/tika/embedder/ExternalEmbedder.java
index 84dc5da..f59b61f 100644
--- a/tika-core/src/main/java/org/apache/tika/embedder/ExternalEmbedder.java
+++ b/tika-core/src/main/java/org/apache/tika/embedder/ExternalEmbedder.java
@@ -30,7 +30,7 @@ import java.util.Map;
 import java.util.Set;
 
 import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.IOUtils;
+import org.apache.commons.io.IOUtils;
 import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;

http://git-wip-us.apache.org/repos/asf/tika/blob/05f4af30/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java b/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java
index d67f086..dcae874 100644
--- a/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java
+++ b/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java
@@ -23,7 +23,7 @@ import java.io.InputStream;
 
 import org.apache.tika.exception.EncryptedDocumentException;
 import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.CloseShieldInputStream;
+import org.apache.commons.io.input.CloseShieldInputStream;
 import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;

http://git-wip-us.apache.org/repos/asf/tika/blob/05f4af30/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java b/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java
index 6a1fde9..b41cb22 100644
--- a/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java
+++ b/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java
@@ -30,7 +30,7 @@ import java.util.jar.JarOutputStream;
 import java.util.zip.ZipEntry;
 
 import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.IOUtils;
+import org.apache.commons.io.IOUtils;
 import org.xml.sax.ContentHandler;
 
 import static java.nio.charset.StandardCharsets.UTF_8;

http://git-wip-us.apache.org/repos/asf/tika/blob/05f4af30/tika-core/src/main/java/org/apache/tika/io/CloseShieldInputStream.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/io/CloseShieldInputStream.java b/tika-core/src/main/java/org/apache/tika/io/CloseShieldInputStream.java
deleted file mode 100644
index 3033eea..0000000
--- a/tika-core/src/main/java/org/apache/tika/io/CloseShieldInputStream.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- *      http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.io;
-
-import java.io.InputStream;
-
-/**
- * Proxy stream that prevents the underlying input stream from being closed.
- * <p>
- * This class is typically used in cases where an input stream needs to be
- * passed to a component that wants to explicitly close the stream even if
- * more input would still be available to other components.
- *
- * @since Apache Tika 0.4, copied from Commons IO 1.4
- */
-public class CloseShieldInputStream extends ProxyInputStream {
-
-    /**
-     * Creates a proxy that shields the given input stream from being
-     * closed.
-     *
-     * @param in underlying input stream
-     */
-    public CloseShieldInputStream(InputStream in) {
-        super(in);
-    }
-
-    /**
-     * Replaces the underlying input stream with a {@link ClosedInputStream}
-     * sentinel. The original input stream will remain open, but this proxy
-     * will appear closed.
-     */
-    @Override
-    public void close() {
-        in = new ClosedInputStream();
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/tika/blob/05f4af30/tika-core/src/main/java/org/apache/tika/io/ClosedInputStream.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/io/ClosedInputStream.java b/tika-core/src/main/java/org/apache/tika/io/ClosedInputStream.java
deleted file mode 100644
index f7c192c..0000000
--- a/tika-core/src/main/java/org/apache/tika/io/ClosedInputStream.java
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- *      http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.io;
-
-import java.io.InputStream;
-
-/**
- * Closed input stream. This stream returns -1 to all attempts to read
- * something from the stream.
- * <p>
- * Typically uses of this class include testing for corner cases in methods
- * that accept input streams and acting as a sentinel value instead of a
- * <code>null</code> input stream.
- *
- * @since Apache Tika 0.4, copied from Commons IO 1.4
- */
-public class ClosedInputStream extends InputStream {
-
-    /**
-     * Returns -1 to indicate that the stream is closed.
-     *
-     * @return always -1
-     */
-    @Override
-    public int read() {
-        return -1;
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/tika/blob/05f4af30/tika-core/src/main/java/org/apache/tika/io/CountingInputStream.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/io/CountingInputStream.java b/tika-core/src/main/java/org/apache/tika/io/CountingInputStream.java
deleted file mode 100644
index ffb4d6b..0000000
--- a/tika-core/src/main/java/org/apache/tika/io/CountingInputStream.java
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- *      http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.io;
-
-import java.io.IOException;
-import java.io.InputStream;
-
-/**
- * A decorating input stream that counts the number of bytes that have passed
- * through the stream so far.
- * <p>
- * A typical use case would be during debugging, to ensure that data is being
- * read as expected.
- *
- * @author Marcelo Liberato
- * @since Apache Tika 0.4, copied from Commons IO 1.4
- */
-public class CountingInputStream extends ProxyInputStream {
-
-    /** The count of bytes that have passed. */
-    private long count;
-
-    /**
-     * Constructs a new CountingInputStream.
-     *
-     * @param in  the InputStream to delegate to
-     */
-    public CountingInputStream(InputStream in) {
-        super(in);
-    }
-
-    //-----------------------------------------------------------------------
-    /**
-     * Reads a number of bytes into the byte array, keeping count of the
-     * number read.
-     *
-     * @param b  the buffer into which the data is read, not null
-     * @return the total number of bytes read into the buffer, -1 if end of stream
-     * @throws IOException if an I/O error occurs
-     * @see java.io.InputStream#read(byte[]) 
-     */
-    @Override
-    public int read(byte[] b) throws IOException {
-        int found = super.read(b);
-        this.count += (found >= 0) ? found : 0;
-        return found;
-    }
-
-    /**
-     * Reads a number of bytes into the byte array at a specific offset,
-     * keeping count of the number read.
-     *
-     * @param b  the buffer into which the data is read, not null
-     * @param off  the start offset in the buffer
-     * @param len  the maximum number of bytes to read
-     * @return the total number of bytes read into the buffer, -1 if end of stream
-     * @throws IOException if an I/O error occurs
-     * @see java.io.InputStream#read(byte[], int, int)
-     */
-    @Override
-    public int read(byte[] b, int off, int len) throws IOException {
-        int found = super.read(b, off, len);
-        this.count += (found >= 0) ? found : 0;
-        return found;
-    }
-
-    /**
-     * Reads the next byte of data adding to the count of bytes received
-     * if a byte is successfully read. 
-     *
-     * @return the byte read, -1 if end of stream
-     * @throws IOException if an I/O error occurs
-     * @see java.io.InputStream#read()
-     */
-    @Override
-    public int read() throws IOException {
-        int found = super.read();
-        this.count += (found >= 0) ? 1 : 0;
-        return found;
-    }
-
-    /**
-     * Skips the stream over the specified number of bytes, adding the skipped
-     * amount to the count.
-     *
-     * @param length  the number of bytes to skip
-     * @return the actual number of bytes skipped
-     * @throws IOException if an I/O error occurs
-     * @see java.io.InputStream#skip(long)
-     */
-    @Override
-    public long skip(final long length) throws IOException {
-        final long skip = super.skip(length);
-        this.count += skip;
-        return skip;
-    }
-
-    //-----------------------------------------------------------------------
-    /**
-     * The number of bytes that have passed through this stream.
-     * <p>
-     * NOTE: From v1.3 this method throws an ArithmeticException if the
-     * count is greater than can be expressed by an <code>int</code>.
-     * See {@link #getByteCount()} for a method using a <code>long</code>.
-     *
-     * @return the number of bytes accumulated
-     * @throws ArithmeticException if the byte count is too large
-     */
-    public synchronized int getCount() {
-        long result = getByteCount();
-        if (result > Integer.MAX_VALUE) {
-            throw new ArithmeticException("The byte count " + result + " is too large to be converted to an int");
-        }
-        return (int) result;
-    }
-
-    /** 
-     * Set the byte count back to 0. 
-     * <p>
-     * NOTE: From v1.3 this method throws an ArithmeticException if the
-     * count is greater than can be expressed by an <code>int</code>.
-     * See {@link #resetByteCount()} for a method using a <code>long</code>.
-     *
-     * @return the count previous to resetting
-     * @throws ArithmeticException if the byte count is too large
-     */
-    public synchronized int resetCount() {
-        long result = resetByteCount();
-        if (result > Integer.MAX_VALUE) {
-            throw new ArithmeticException("The byte count " + result + " is too large to be converted to an int");
-        }
-        return (int) result;
-    }
-
-    /**
-     * The number of bytes that have passed through this stream.
-     * <p>
-     * NOTE: This method is an alternative for <code>getCount()</code>
-     * and was added because that method returns an integer which will
-     * result in incorrect count for files over 2GB.
-     *
-     * @return the number of bytes accumulated
-     * @since Commons IO 1.3
-     */
-    public synchronized long getByteCount() {
-        return this.count;
-    }
-
-    /** 
-     * Set the byte count back to 0. 
-     * <p>
-     * NOTE: This method is an alternative for <code>resetCount()</code>
-     * and was added because that method returns an integer which will
-     * result in incorrect count for files over 2GB.
-     *
-     * @return the count previous to resetting
-     * @since Commons IO 1.3
-     */
-    public synchronized long resetByteCount() {
-        long tmp = this.count;
-        this.count = 0;
-        return tmp;
-    }
-
-    public String toString() {
-       return "Tika Counting InputStream wrapping " + in.toString(); 
-   }
-}

http://git-wip-us.apache.org/repos/asf/tika/blob/05f4af30/tika-core/src/main/java/org/apache/tika/io/IOExceptionWithCause.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/io/IOExceptionWithCause.java b/tika-core/src/main/java/org/apache/tika/io/IOExceptionWithCause.java
deleted file mode 100644
index 9abaf41..0000000
--- a/tika-core/src/main/java/org/apache/tika/io/IOExceptionWithCause.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- *      http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.io;
-
-import java.io.IOException;
-
-/**
- * Subclasses IOException with the {@link Throwable} constructors missing before Java 6. If you are using Java 6,
- * consider this class deprecated and use {@link IOException}.
- * 
- * @author <a href="http://commons.apache.org/io/">Apache Commons IO</a>
- * @since Apache Tika 0.4, copied from Commons IO 1.4
- */
-public class IOExceptionWithCause extends IOException {
-
-    /**
-     * Defines the serial version UID.
-     */
-    private static final long serialVersionUID = 1L;
-
-    /**
-     * Constructs a new instance with the given message and cause.
-     * <p>
-     * As specified in {@link Throwable}, the message in the given <code>cause</code> is not used in this instance's
-     * message.
-     * </p>
-     * 
-     * @param message
-     *            the message (see {@link #getMessage()})
-     * @param cause
-     *            the cause (see {@link #getCause()}). A <code>null</code> value is allowed.
-     */
-    public IOExceptionWithCause(String message, Throwable cause) {
-        super(message);
-        this.initCause(cause);
-    }
-
-    /**
-     * Constructs a new instance with the given cause.
-     * <p>
-     * The message is set to <code>cause==null ? null : cause.toString()</code>, which by default contains the class
-     * and message of <code>cause</code>. This constructor is useful for call sites that just wrap another throwable.
-     * </p>
-     * 
-     * @param cause
-     *            the cause (see {@link #getCause()}). A <code>null</code> value is allowed.
-     */
-    public IOExceptionWithCause(Throwable cause) {
-        super(cause == null ? null : cause.toString());
-        this.initCause(cause);
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/tika/blob/05f4af30/tika-core/src/main/java/org/apache/tika/io/IOUtils.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/io/IOUtils.java b/tika-core/src/main/java/org/apache/tika/io/IOUtils.java
deleted file mode 100644
index 11d3bd3..0000000
--- a/tika-core/src/main/java/org/apache/tika/io/IOUtils.java
+++ /dev/null
@@ -1,1186 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- *      http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.io;
-
-import java.io.BufferedInputStream;
-import java.io.BufferedReader;
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.CharArrayWriter;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.OutputStream;
-import java.io.OutputStreamWriter;
-import java.io.Reader;
-import java.io.StringWriter;
-import java.io.Writer;
-import java.nio.channels.Channel;
-import java.nio.charset.Charset;
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * General IO stream manipulation utilities.
- * <p>
- * This class provides static utility methods for input/output operations.
- * <ul>
- * <li>closeQuietly - these methods close a stream ignoring nulls and exceptions
- * <li>toXxx/read - these methods read data from a stream
- * <li>write - these methods write data to a stream
- * <li>copy - these methods copy all the data from one stream to another
- * <li>contentEquals - these methods compare the content of two streams
- * </ul>
- * <p>
- * The byte-to-char methods and char-to-byte methods involve a conversion step.
- * Two methods are provided in each case, one that uses the platform default
- * encoding and the other which allows you to specify an encoding. You are
- * encouraged to always specify an encoding because relying on the platform
- * default can lead to unexpected results, for example when moving from
- * development to production.
- * <p>
- * All the methods in this class that read a stream are buffered internally.
- * This means that there is no cause to use a <code>BufferedInputStream</code>
- * or <code>BufferedReader</code>. The default buffer size of 4K has been shown
- * to be efficient in tests.
- * <p>
- * Wherever possible, the methods in this class do <em>not</em> flush or close
- * the stream. This is to avoid making non-portable assumptions about the
- * streams' origin and further use. Thus the caller is still responsible for
- * closing streams after use.
- * <p>
- * Origin of code: Excalibur.
- *
- * @author Peter Donald
- * @author Jeff Turner
- * @author Matthew Hawthorne
- * @author Stephen Colebourne
- * @author Gareth Davis
- * @author Ian Springer
- * @author Niall Pemberton
- * @author Sandy McArthur
- * @since Apache Tika 0.4, copied (partially) from Commons IO 1.4
- */
-public class IOUtils {
-    // TODO Remove this when we've finished TIKA-1706 and TIKA-1710
-    public static final Charset UTF_8 = java.nio.charset.StandardCharsets.UTF_8;
-
-    /**
-     * The default buffer size to use.
-     */
-    private static final int DEFAULT_BUFFER_SIZE = 1024 * 4;
-
-    /**
-     * Instances should NOT be constructed in standard programming.
-     */
-    public IOUtils() {
-        super();
-    }
-
-    //-----------------------------------------------------------------------
-    /**
-     * Unconditionally close an <code>Reader</code>.
-     * <p>
-     * Equivalent to {@link Reader#close()}, except any exceptions will be ignored.
-     * This is typically used in finally blocks.
-     *
-     * @param input  the Reader to close, may be null or already closed
-     */
-    public static void closeQuietly(Reader input) {
-        try {
-            if (input != null) {
-                input.close();
-            }
-        } catch (IOException ioe) {
-            // ignore
-        }
-    }
-
-    /**
-     * Unconditionally close a <code>Channel</code>.
-     * <p>
-     * Equivalent to {@link Channel#close()}, except any exceptions will be ignored.
-     * This is typically used in finally blocks.
-     *
-     * @param channel the Channel to close, may be null or already closed
-     */
-    public static void closeQuietly(Channel channel) {
-        try {
-            if (channel != null) {
-                channel.close();
-            }
-        } catch (IOException ioe) {
-            // ignore
-        }
-    }
-
-    /**
-     * Unconditionally close a <code>Writer</code>.
-     * <p>
-     * Equivalent to {@link Writer#close()}, except any exceptions will be ignored.
-     * This is typically used in finally blocks.
-     *
-     * @param output  the Writer to close, may be null or already closed
-     */
-    public static void closeQuietly(Writer output) {
-        try {
-            if (output != null) {
-                output.close();
-            }
-        } catch (IOException ioe) {
-            // ignore
-        }
-    }
-
-    /**
-     * Unconditionally close an <code>InputStream</code>.
-     * <p>
-     * Equivalent to {@link InputStream#close()}, except any exceptions will be ignored.
-     * This is typically used in finally blocks.
-     *
-     * @param input  the InputStream to close, may be null or already closed
-     */
-    public static void closeQuietly(InputStream input) {
-        try {
-            if (input != null) {
-                input.close();
-            }
-        } catch (IOException ioe) {
-            // ignore
-        }
-    }
-
-    /**
-     * Unconditionally close an <code>OutputStream</code>.
-     * <p>
-     * Equivalent to {@link OutputStream#close()}, except any exceptions will be ignored.
-     * This is typically used in finally blocks.
-     *
-     * @param output  the OutputStream to close, may be null or already closed
-     */
-    public static void closeQuietly(OutputStream output) {
-        try {
-            if (output != null) {
-                output.close();
-            }
-        } catch (IOException ioe) {
-            // ignore
-        }
-    }
-
-    // read toByteArray
-    //-----------------------------------------------------------------------
-    /**
-     * Get the contents of an <code>InputStream</code> as a <code>byte[]</code>.
-     * <p>
-     * This method buffers the input internally, so there is no need to use a
-     * <code>BufferedInputStream</code>.
-     * 
-     * @param input  the <code>InputStream</code> to read from
-     * @return the requested byte array
-     * @throws NullPointerException if the input is null
-     * @throws IOException if an I/O error occurs
-     */
-    public static byte[] toByteArray(InputStream input) throws IOException {
-        ByteArrayOutputStream output = new ByteArrayOutputStream();
-        copy(input, output);
-        return output.toByteArray();
-    }
-
-    /**
-     * Get the contents of a <code>Reader</code> as a <code>byte[]</code>
-     * using the default character encoding of the platform.
-     * <p>
-     * This method buffers the input internally, so there is no need to use a
-     * <code>BufferedReader</code>.
-     * 
-     * @param input  the <code>Reader</code> to read from
-     * @return the requested byte array
-     * @throws NullPointerException if the input is null
-     * @throws IOException if an I/O error occurs
-     */
-    public static byte[] toByteArray(Reader input) throws IOException {
-        ByteArrayOutputStream output = new ByteArrayOutputStream();
-        copy(input, output);
-        return output.toByteArray();
-    }
-
-    /**
-     * Get the contents of a <code>Reader</code> as a <code>byte[]</code>
-     * using the specified character encoding.
-     * <p>
-     * Character encoding names can be found at
-     * <a href="http://www.iana.org/assignments/character-sets">IANA</a>.
-     * <p>
-     * This method buffers the input internally, so there is no need to use a
-     * <code>BufferedReader</code>.
-     * 
-     * @param input  the <code>Reader</code> to read from
-     * @param encoding  the encoding to use, null means platform default
-     * @return the requested byte array
-     * @throws NullPointerException if the input is null
-     * @throws IOException if an I/O error occurs
-     * @since Commons IO 1.1
-     */
-    public static byte[] toByteArray(Reader input, String encoding)
-            throws IOException {
-        ByteArrayOutputStream output = new ByteArrayOutputStream();
-        copy(input, output, encoding);
-        return output.toByteArray();
-    }
-
-    /**
-     * Get the contents of a <code>String</code> as a <code>byte[]</code>
-     * using the default character encoding of the platform.
-     * <p>
-     * This is the same as {@link String#getBytes()}.
-     * 
-     * @param input  the <code>String</code> to convert
-     * @return the requested byte array
-     * @throws NullPointerException if the input is null
-     * @throws IOException if an I/O error occurs (never occurs)
-     * @deprecated Use {@link String#getBytes()}
-     */
-    @Deprecated
-    public static byte[] toByteArray(String input) throws IOException {
-        return input.getBytes(UTF_8);
-    }
-
-    // read char[]
-    //-----------------------------------------------------------------------
-    /**
-     * Get the contents of an <code>InputStream</code> as a character array
-     * using the default character encoding of the platform.
-     * <p>
-     * This method buffers the input internally, so there is no need to use a
-     * <code>BufferedInputStream</code>.
-     * 
-     * @param is  the <code>InputStream</code> to read from
-     * @return the requested character array
-     * @throws NullPointerException if the input is null
-     * @throws IOException if an I/O error occurs
-     * @since Commons IO 1.1
-     */
-    public static char[] toCharArray(InputStream is) throws IOException {
-        CharArrayWriter output = new CharArrayWriter();
-        copy(is, output);
-        return output.toCharArray();
-    }
-
-    /**
-     * Get the contents of an <code>InputStream</code> as a character array
-     * using the specified character encoding.
-     * <p>
-     * Character encoding names can be found at
-     * <a href="http://www.iana.org/assignments/character-sets">IANA</a>.
-     * <p>
-     * This method buffers the input internally, so there is no need to use a
-     * <code>BufferedInputStream</code>.
-     * 
-     * @param is  the <code>InputStream</code> to read from
-     * @param encoding  the encoding to use, null means platform default
-     * @return the requested character array
-     * @throws NullPointerException if the input is null
-     * @throws IOException if an I/O error occurs
-     * @since Commons IO 1.1
-     */
-    public static char[] toCharArray(InputStream is, String encoding)
-            throws IOException {
-        CharArrayWriter output = new CharArrayWriter();
-        copy(is, output, encoding);
-        return output.toCharArray();
-    }
-
-    /**
-     * Get the contents of a <code>Reader</code> as a character array.
-     * <p>
-     * This method buffers the input internally, so there is no need to use a
-     * <code>BufferedReader</code>.
-     * 
-     * @param input  the <code>Reader</code> to read from
-     * @return the requested character array
-     * @throws NullPointerException if the input is null
-     * @throws IOException if an I/O error occurs
-     * @since Commons IO 1.1
-     */
-    public static char[] toCharArray(Reader input) throws IOException {
-        CharArrayWriter sw = new CharArrayWriter();
-        copy(input, sw);
-        return sw.toCharArray();
-    }
-
-    // read toString
-    //-----------------------------------------------------------------------
-    /**
-     * Get the contents of an <code>InputStream</code> as a String
-     * using the default character encoding of the platform.
-     * <p>
-     * This method buffers the input internally, so there is no need to use a
-     * <code>BufferedInputStream</code>.
-     * 
-     * @param input  the <code>InputStream</code> to read from
-     * @return the requested String
-     * @throws NullPointerException if the input is null
-     * @throws IOException if an I/O error occurs
-     */
-    public static String toString(InputStream input) throws IOException {
-        StringWriter sw = new StringWriter();
-        copy(input, sw);
-        return sw.toString();
-    }
-
-    /**
-     * Get the contents of an <code>InputStream</code> as a String
-     * using the specified character encoding.
-     * <p>
-     * Character encoding names can be found at
-     * <a href="http://www.iana.org/assignments/character-sets">IANA</a>.
-     * <p>
-     * This method buffers the input internally, so there is no need to use a
-     * <code>BufferedInputStream</code>.
-     * 
-     * @param input  the <code>InputStream</code> to read from
-     * @param encoding  the encoding to use, null means platform default
-     * @return the requested String
-     * @throws NullPointerException if the input is null
-     * @throws IOException if an I/O error occurs
-     */
-    public static String toString(InputStream input, String encoding)
-            throws IOException {
-        StringWriter sw = new StringWriter();
-        copy(input, sw, encoding);
-        return sw.toString();
-    }
-
-    /**
-     * Get the contents of a <code>Reader</code> as a String.
-     * <p>
-     * This method buffers the input internally, so there is no need to use a
-     * <code>BufferedReader</code>.
-     * 
-     * @param input  the <code>Reader</code> to read from
-     * @return the requested String
-     * @throws NullPointerException if the input is null
-     * @throws IOException if an I/O error occurs
-     */
-    public static String toString(Reader input) throws IOException {
-        StringWriter sw = new StringWriter();
-        copy(input, sw);
-        return sw.toString();
-    }
-
-    /**
-     * Get the contents of a <code>byte[]</code> as a String
-     * using the default character encoding of the platform.
-     * 
-     * @param input the byte array to read from
-     * @return the requested String
-     * @throws NullPointerException if the input is null
-     * @throws IOException if an I/O error occurs (never occurs)
-     * @deprecated Use {@link String#String(byte[])}
-     */
-    @Deprecated
-    public static String toString(byte[] input) throws IOException {
-        return new String(input, UTF_8);
-    }
-
-    /**
-     * Get the contents of a <code>byte[]</code> as a String
-     * using the specified character encoding.
-     * <p>
-     * Character encoding names can be found at
-     * <a href="http://www.iana.org/assignments/character-sets">IANA</a>.
-     * 
-     * @param input the byte array to read from
-     * @param encoding  the encoding to use, null means platform default
-     * @return the requested String
-     * @throws NullPointerException if the input is null
-     * @throws IOException if an I/O error occurs (never occurs)
-     * @deprecated Use {@link String#String(byte[],String)}
-     */
-    @Deprecated
-    public static String toString(byte[] input, String encoding)
-            throws IOException {
-        // If no encoding is specified, default to UTF-8.
-        if (encoding == null) {
-            return new String(input, UTF_8);
-        } else {
-            return new String(input, encoding);
-        }
-    }
-
-    // readLines
-    //-----------------------------------------------------------------------
-    /**
-     * Get the contents of an <code>InputStream</code> as a list of Strings,
-     * one entry per line, using the default character encoding of the platform.
-     * <p>
-     * This method buffers the input internally, so there is no need to use a
-     * <code>BufferedInputStream</code>.
-     *
-     * @param input  the <code>InputStream</code> to read from, not null
-     * @return the list of Strings, never null
-     * @throws NullPointerException if the input is null
-     * @throws IOException if an I/O error occurs
-     * @since Commons IO 1.1
-     */
-    public static List<String> readLines(InputStream input) throws IOException {
-        InputStreamReader reader = new InputStreamReader(input, UTF_8);
-        return readLines(reader);
-    }
-
-    /**
-     * Get the contents of an <code>InputStream</code> as a list of Strings,
-     * one entry per line, using the specified character encoding.
-     * <p>
-     * Character encoding names can be found at
-     * <a href="http://www.iana.org/assignments/character-sets">IANA</a>.
-     * <p>
-     * This method buffers the input internally, so there is no need to use a
-     * <code>BufferedInputStream</code>.
-     *
-     * @param input  the <code>InputStream</code> to read from, not null
-     * @param encoding  the encoding to use, null means platform default
-     * @return the list of Strings, never null
-     * @throws NullPointerException if the input is null
-     * @throws IOException if an I/O error occurs
-     * @since Commons IO 1.1
-     */
-    public static List<String> readLines(InputStream input, String encoding) throws IOException {
-        if (encoding == null) {
-            return readLines(input);
-        } else {
-            InputStreamReader reader = new InputStreamReader(input, encoding);
-            return readLines(reader);
-        }
-    }
-
-    /**
-     * Get the contents of a <code>Reader</code> as a list of Strings,
-     * one entry per line.
-     * <p>
-     * This method buffers the input internally, so there is no need to use a
-     * <code>BufferedReader</code>.
-     *
-     * @param input  the <code>Reader</code> to read from, not null
-     * @return the list of Strings, never null
-     * @throws NullPointerException if the input is null
-     * @throws IOException if an I/O error occurs
-     * @since Commons IO 1.1
-     */
-    public static List<String> readLines(Reader input) throws IOException {
-        BufferedReader reader = new BufferedReader(input);
-        List<String> list = new ArrayList<String>();
-        String line = reader.readLine();
-        while (line != null) {
-            list.add(line);
-            line = reader.readLine();
-        }
-        return list;
-    }
-
-    //-----------------------------------------------------------------------
-    /**
-     * Convert the specified CharSequence to an input stream, encoded as bytes
-     * using the default character encoding of the platform.
-     *
-     * @param input the CharSequence to convert
-     * @return an input stream
-     * @since IO 2.0
-     */
-    public static InputStream toInputStream(CharSequence input) {
-        return toInputStream(input.toString());
-    }
-
-    /**
-     * Convert the specified CharSequence to an input stream, encoded as bytes
-     * using the specified character encoding.
-     * <p>
-     * Character encoding names can be found at
-     * <a href="http://www.iana.org/assignments/character-sets">IANA</a>.
-     *
-     * @param input the CharSequence to convert
-     * @param encoding the encoding to use, null means platform default
-     * @throws IOException if the encoding is invalid
-     * @return an input stream
-     * @since IO 2.0
-     */
-    public static InputStream toInputStream(CharSequence input, String encoding) throws IOException {
-        return toInputStream(input.toString(), encoding);
-    }
-
-    //-----------------------------------------------------------------------
-    /**
-     * Convert the specified string to an input stream, encoded as bytes
-     * using the default character encoding of the platform.
-     *
-     * @param input the string to convert
-     * @return an input stream
-     * @since Commons IO 1.1
-     */
-    public static InputStream toInputStream(String input) {
-        byte[] bytes = input.getBytes(UTF_8);
-        return new ByteArrayInputStream(bytes);
-    }
-
-    /**
-     * Convert the specified string to an input stream, encoded as bytes
-     * using the specified character encoding.
-     * <p>
-     * Character encoding names can be found at
-     * <a href="http://www.iana.org/assignments/character-sets">IANA</a>.
-     *
-     * @param input the string to convert
-     * @param encoding the encoding to use, null means platform default
-     * @throws IOException if the encoding is invalid
-     * @return an input stream
-     * @since Commons IO 1.1
-     */
-    public static InputStream toInputStream(String input, String encoding) throws IOException {
-        byte[] bytes = encoding != null ? input.getBytes(encoding) : input.getBytes(UTF_8);
-        return new ByteArrayInputStream(bytes);
-    }
-
-    // write byte[]
-    //-----------------------------------------------------------------------
-    /**
-     * Writes bytes from a <code>byte[]</code> to an <code>OutputStream</code>.
-     * 
-     * @param data  the byte array to write, do not modify during output,
-     * null ignored
-     * @param output  the <code>OutputStream</code> to write to
-     * @throws NullPointerException if output is null
-     * @throws IOException if an I/O error occurs
-     * @since Commons IO 1.1
-     */
-    public static void write(byte[] data, OutputStream output)
-            throws IOException {
-        if (data != null) {
-            output.write(data);
-        }
-    }
-
-    /**
-     * Writes bytes from a <code>byte[]</code> to chars on a <code>Writer</code>
-     * using the default character encoding of the platform.
-     * <p>
-     * This method uses {@link String#String(byte[])}.
-     * 
-     * @param data  the byte array to write, do not modify during output,
-     * null ignored
-     * @param output  the <code>Writer</code> to write to
-     * @throws NullPointerException if output is null
-     * @throws IOException if an I/O error occurs
-     * @since Commons IO 1.1
-     */
-    public static void write(byte[] data, Writer output) throws IOException {
-        if (data != null) {
-            output.write(new String(data, UTF_8));
-        }
-    }
-
-    /**
-     * Writes bytes from a <code>byte[]</code> to chars on a <code>Writer</code>
-     * using the specified character encoding.
-     * <p>
-     * Character encoding names can be found at
-     * <a href="http://www.iana.org/assignments/character-sets">IANA</a>.
-     * <p>
-     * This method uses {@link String#String(byte[], String)}.
-     * 
-     * @param data  the byte array to write, do not modify during output,
-     * null ignored
-     * @param output  the <code>Writer</code> to write to
-     * @param encoding  the encoding to use, null means platform default
-     * @throws NullPointerException if output is null
-     * @throws IOException if an I/O error occurs
-     * @since Commons IO 1.1
-     */
-    public static void write(byte[] data, Writer output, String encoding)
-            throws IOException {
-        if (data != null) {
-            if (encoding == null) {
-                write(data, output);
-            } else {
-                output.write(new String(data, encoding));
-            }
-        }
-    }
-
-    // write char[]
-    //-----------------------------------------------------------------------
-    /**
-     * Writes chars from a <code>char[]</code> to a <code>Writer</code>
-     * using the default character encoding of the platform.
-     * 
-     * @param data  the char array to write, do not modify during output,
-     * null ignored
-     * @param output  the <code>Writer</code> to write to
-     * @throws NullPointerException if output is null
-     * @throws IOException if an I/O error occurs
-     * @since Commons IO 1.1
-     */
-    public static void write(char[] data, Writer output) throws IOException {
-        if (data != null) {
-            output.write(data);
-        }
-    }
-
-    /**
-     * Writes chars from a <code>char[]</code> to bytes on an
-     * <code>OutputStream</code>.
-     * <p>
-     * This method uses {@link String#String(char[])} and
-     * {@link String#getBytes()}.
-     * 
-     * @param data  the char array to write, do not modify during output,
-     * null ignored
-     * @param output  the <code>OutputStream</code> to write to
-     * @throws NullPointerException if output is null
-     * @throws IOException if an I/O error occurs
-     * @since Commons IO 1.1
-     */
-    public static void write(char[] data, OutputStream output)
-            throws IOException {
-        if (data != null) {
-            output.write(new String(data).getBytes(UTF_8));
-        }
-    }
-
-    /**
-     * Writes chars from a <code>char[]</code> to bytes on an
-     * <code>OutputStream</code> using the specified character encoding.
-     * <p>
-     * Character encoding names can be found at
-     * <a href="http://www.iana.org/assignments/character-sets">IANA</a>.
-     * <p>
-     * This method uses {@link String#String(char[])} and
-     * {@link String#getBytes(String)}.
-     * 
-     * @param data  the char array to write, do not modify during output,
-     * null ignored
-     * @param output  the <code>OutputStream</code> to write to
-     * @param encoding  the encoding to use, null means platform default
-     * @throws NullPointerException if output is null
-     * @throws IOException if an I/O error occurs
-     * @since Commons IO 1.1
-     */
-    public static void write(char[] data, OutputStream output, String encoding)
-            throws IOException {
-        if (data != null) {
-            if (encoding == null) {
-                write(data, output);
-            } else {
-                output.write(new String(data).getBytes(encoding));
-            }
-        }
-    }
-
-    // write CharSequence
-    //-----------------------------------------------------------------------
-    /**
-     * Writes chars from a <code>CharSequence</code> to a <code>Writer</code>.
-     * 
-     * @param data  the <code>CharSequence</code> to write, null ignored
-     * @param output  the <code>Writer</code> to write to
-     * @throws NullPointerException if output is null
-     * @throws IOException if an I/O error occurs
-     * @since Commons IO 2.0
-     */
-    public static void write(CharSequence data, Writer output) throws IOException {
-        if (data != null) {
-            write(data.toString(), output);
-        }
-    }
-
-    /**
-     * Writes chars from a <code>CharSequence</code> to bytes on an
-     * <code>OutputStream</code> using the default character encoding of the
-     * platform.
-     * <p>
-     * This method uses {@link String#getBytes()}.
-     * 
-     * @param data  the <code>CharSequence</code> to write, null ignored
-     * @param output  the <code>OutputStream</code> to write to
-     * @throws NullPointerException if output is null
-     * @throws IOException if an I/O error occurs
-     * @since Commons IO 2.0
-     */
-    public static void write(CharSequence data, OutputStream output)
-            throws IOException {
-        if (data != null) {
-            write(data.toString(), output);
-        }
-    }
-
-    /**
-     * Writes chars from a <code>CharSequence</code> to bytes on an
-     * <code>OutputStream</code> using the specified character encoding.
-     * <p>
-     * Character encoding names can be found at
-     * <a href="http://www.iana.org/assignments/character-sets">IANA</a>.
-     * <p>
-     * This method uses {@link String#getBytes(String)}.
-     * 
-     * @param data  the <code>CharSequence</code> to write, null ignored
-     * @param output  the <code>OutputStream</code> to write to
-     * @param encoding  the encoding to use, null means platform default
-     * @throws NullPointerException if output is null
-     * @throws IOException if an I/O error occurs
-     * @since Commons IO 2.0
-     */
-    public static void write(CharSequence data, OutputStream output, String encoding)
-            throws IOException {
-        if (data != null) {
-            write(data.toString(), output, encoding);
-        }
-    }
-
-    // write String
-    //-----------------------------------------------------------------------
-    /**
-     * Writes chars from a <code>String</code> to a <code>Writer</code>.
-     * 
-     * @param data  the <code>String</code> to write, null ignored
-     * @param output  the <code>Writer</code> to write to
-     * @throws NullPointerException if output is null
-     * @throws IOException if an I/O error occurs
-     * @since Commons IO 1.1
-     */
-    public static void write(String data, Writer output) throws IOException {
-        if (data != null) {
-            output.write(data);
-        }
-    }
-
-    /**
-     * Writes chars from a <code>String</code> to bytes on an
-     * <code>OutputStream</code> using the default character encoding of the
-     * platform.
-     * <p>
-     * This method uses {@link String#getBytes()}.
-     * 
-     * @param data  the <code>String</code> to write, null ignored
-     * @param output  the <code>OutputStream</code> to write to
-     * @throws NullPointerException if output is null
-     * @throws IOException if an I/O error occurs
-     * @since Commons IO 1.1
-     */
-    public static void write(String data, OutputStream output)
-            throws IOException {
-        if (data != null) {
-            output.write(data.getBytes(UTF_8));
-        }
-    }
-
-    /**
-     * Writes chars from a <code>String</code> to bytes on an
-     * <code>OutputStream</code> using the specified character encoding.
-     * <p>
-     * Character encoding names can be found at
-     * <a href="http://www.iana.org/assignments/character-sets">IANA</a>.
-     * <p>
-     * This method uses {@link String#getBytes(String)}.
-     * 
-     * @param data  the <code>String</code> to write, null ignored
-     * @param output  the <code>OutputStream</code> to write to
-     * @param encoding  the encoding to use, null means platform default
-     * @throws NullPointerException if output is null
-     * @throws IOException if an I/O error occurs
-     * @since Commons IO 1.1
-     */
-    public static void write(String data, OutputStream output, String encoding)
-            throws IOException {
-        if (data != null) {
-            if (encoding == null) {
-                write(data, output);
-            } else {
-                output.write(data.getBytes(encoding));
-            }
-        }
-    }
-
-    // write StringBuffer
-    //-----------------------------------------------------------------------
-    /**
-     * Writes chars from a <code>StringBuffer</code> to a <code>Writer</code>.
-     * 
-     * @param data  the <code>StringBuffer</code> to write, null ignored
-     * @param output  the <code>Writer</code> to write to
-     * @throws NullPointerException if output is null
-     * @throws IOException if an I/O error occurs
-     * @since Commons IO 1.1
-     * @deprecated replaced by write(CharSequence, Writer)
-     */
-    @Deprecated
-    public static void write(StringBuffer data, Writer output)
-            throws IOException {
-        if (data != null) {
-            output.write(data.toString());
-        }
-    }
-
-    /**
-     * Writes chars from a <code>StringBuffer</code> to bytes on an
-     * <code>OutputStream</code> using the default character encoding of the
-     * platform.
-     * <p>
-     * This method uses {@link String#getBytes()}.
-     * 
-     * @param data  the <code>StringBuffer</code> to write, null ignored
-     * @param output  the <code>OutputStream</code> to write to
-     * @throws NullPointerException if output is null
-     * @throws IOException if an I/O error occurs
-     * @since Commons IO 1.1
-     * @deprecated replaced by write(CharSequence, OutputStream)
-     */
-    @Deprecated
-    public static void write(StringBuffer data, OutputStream output)
-            throws IOException {
-        if (data != null) {
-            output.write(data.toString().getBytes(UTF_8));
-        }
-    }
-
-    /**
-     * Writes chars from a <code>StringBuffer</code> to bytes on an
-     * <code>OutputStream</code> using the specified character encoding.
-     * <p>
-     * Character encoding names can be found at
-     * <a href="http://www.iana.org/assignments/character-sets">IANA</a>.
-     * <p>
-     * This method uses {@link String#getBytes(String)}.
-     * 
-     * @param data  the <code>StringBuffer</code> to write, null ignored
-     * @param output  the <code>OutputStream</code> to write to
-     * @param encoding  the encoding to use, null means platform default
-     * @throws NullPointerException if output is null
-     * @throws IOException if an I/O error occurs
-     * @since Commons IO 1.1
-     * @deprecated replaced by write(CharSequence, OutputStream, String)
-     */
-    @Deprecated
-    public static void write(StringBuffer data, OutputStream output,
-            String encoding) throws IOException {
-        if (data != null) {
-            if (encoding == null) {
-                write(data, output);
-            } else {
-                output.write(data.toString().getBytes(encoding));
-            }
-        }
-    }
-
-    // copy from InputStream
-    //-----------------------------------------------------------------------
-    /**
-     * Copy bytes from an <code>InputStream</code> to an
-     * <code>OutputStream</code>.
-     * <p>
-     * This method buffers the input internally, so there is no need to use a
-     * <code>BufferedInputStream</code>.
-     * <p>
-     * Large streams (over 2GB) will return a bytes copied value of
-     * <code>-1</code> after the copy has completed since the correct
-     * number of bytes cannot be returned as an int. For large streams
-     * use the <code>copyLarge(InputStream, OutputStream)</code> method.
-     * 
-     * @param input  the <code>InputStream</code> to read from
-     * @param output  the <code>OutputStream</code> to write to
-     * @return the number of bytes copied
-     * @throws NullPointerException if the input or output is null
-     * @throws IOException if an I/O error occurs
-     * @throws ArithmeticException if the byte count is too large
-     * @since Commons IO 1.1
-     */
-    public static int copy(InputStream input, OutputStream output) throws IOException {
-        long count = copyLarge(input, output);
-        if (count > Integer.MAX_VALUE) {
-            return -1;
-        }
-        return (int) count;
-    }
-
-    /**
-     * Copy bytes from a large (over 2GB) <code>InputStream</code> to an
-     * <code>OutputStream</code>.
-     * <p>
-     * This method buffers the input internally, so there is no need to use a
-     * <code>BufferedInputStream</code>.
-     * 
-     * @param input  the <code>InputStream</code> to read from
-     * @param output  the <code>OutputStream</code> to write to
-     * @return the number of bytes copied
-     * @throws NullPointerException if the input or output is null
-     * @throws IOException if an I/O error occurs
-     * @since Commons IO 1.3
-     */
-    public static long copyLarge(InputStream input, OutputStream output)
-            throws IOException {
-        byte[] buffer = new byte[DEFAULT_BUFFER_SIZE];
-        long count = 0;
-        int n = 0;
-        while (-1 != (n = input.read(buffer))) {
-            output.write(buffer, 0, n);
-            count += n;
-        }
-        return count;
-    }
-
-    /**
-     * Copy bytes from an <code>InputStream</code> to chars on a
-     * <code>Writer</code> using the default character encoding of the platform.
-     * <p>
-     * This method buffers the input internally, so there is no need to use a
-     * <code>BufferedInputStream</code>.
-     * <p>
-     * This method uses {@link InputStreamReader}.
-     *
-     * @param input  the <code>InputStream</code> to read from
-     * @param output  the <code>Writer</code> to write to
-     * @throws NullPointerException if the input or output is null
-     * @throws IOException if an I/O error occurs
-     * @since Commons IO 1.1
-     */
-    public static void copy(InputStream input, Writer output)
-            throws IOException {
-        InputStreamReader in = new InputStreamReader(input, UTF_8);
-        copy(in, output);
-    }
-
-    /**
-     * Copy bytes from an <code>InputStream</code> to chars on a
-     * <code>Writer</code> using the specified character encoding.
-     * <p>
-     * This method buffers the input internally, so there is no need to use a
-     * <code>BufferedInputStream</code>.
-     * <p>
-     * Character encoding names can be found at
-     * <a href="http://www.iana.org/assignments/character-sets">IANA</a>.
-     * <p>
-     * This method uses {@link InputStreamReader}.
-     *
-     * @param input  the <code>InputStream</code> to read from
-     * @param output  the <code>Writer</code> to write to
-     * @param encoding  the encoding to use, null means platform default
-     * @throws NullPointerException if the input or output is null
-     * @throws IOException if an I/O error occurs
-     * @since Commons IO 1.1
-     */
-    public static void copy(InputStream input, Writer output, String encoding)
-            throws IOException {
-        if (encoding == null) {
-            copy(input, output);
-        } else {
-            InputStreamReader in = new InputStreamReader(input, encoding);
-            copy(in, output);
-        }
-    }
-
-    // copy from Reader
-    //-----------------------------------------------------------------------
-    /**
-     * Copy chars from a <code>Reader</code> to a <code>Writer</code>.
-     * <p>
-     * This method buffers the input internally, so there is no need to use a
-     * <code>BufferedReader</code>.
-     * <p>
-     * Large streams (over 2GB) will return a chars copied value of
-     * <code>-1</code> after the copy has completed since the correct
-     * number of chars cannot be returned as an int. For large streams
-     * use the <code>copyLarge(Reader, Writer)</code> method.
-     *
-     * @param input  the <code>Reader</code> to read from
-     * @param output  the <code>Writer</code> to write to
-     * @return the number of characters copied
-     * @throws NullPointerException if the input or output is null
-     * @throws IOException if an I/O error occurs
-     * @throws ArithmeticException if the character count is too large
-     * @since Commons IO 1.1
-     */
-    public static int copy(Reader input, Writer output) throws IOException {
-        long count = copyLarge(input, output);
-        if (count > Integer.MAX_VALUE) {
-            return -1;
-        }
-        return (int) count;
-    }
-
-    /**
-     * Copy chars from a large (over 2GB) <code>Reader</code> to a <code>Writer</code>.
-     * <p>
-     * This method buffers the input internally, so there is no need to use a
-     * <code>BufferedReader</code>.
-     *
-     * @param input  the <code>Reader</code> to read from
-     * @param output  the <code>Writer</code> to write to
-     * @return the number of characters copied
-     * @throws NullPointerException if the input or output is null
-     * @throws IOException if an I/O error occurs
-     * @since Commons IO 1.3
-     */
-    public static long copyLarge(Reader input, Writer output) throws IOException {
-        char[] buffer = new char[DEFAULT_BUFFER_SIZE];
-        long count = 0;
-        int n = 0;
-        while (-1 != (n = input.read(buffer))) {
-            output.write(buffer, 0, n);
-            count += n;
-        }
-        return count;
-    }
-
-    /**
-     * Copy chars from a <code>Reader</code> to bytes on an
-     * <code>OutputStream</code> using the default character encoding of the
-     * platform, and calling flush.
-     * <p>
-     * This method buffers the input internally, so there is no need to use a
-     * <code>BufferedReader</code>.
-     * <p>
-     * Due to the implementation of OutputStreamWriter, this method performs a
-     * flush.
-     * <p>
-     * This method uses {@link OutputStreamWriter}.
-     *
-     * @param input  the <code>Reader</code> to read from
-     * @param output  the <code>OutputStream</code> to write to
-     * @throws NullPointerException if the input or output is null
-     * @throws IOException if an I/O error occurs
-     * @since Commons IO 1.1
-     */
-    public static void copy(Reader input, OutputStream output)
-            throws IOException {
-        OutputStreamWriter out = new OutputStreamWriter(output, UTF_8);
-        copy(input, out);
-        // XXX Unless anyone is planning on rewriting OutputStreamWriter, we
-        // have to flush here.
-        out.flush();
-    }
-
-    /**
-     * Copy chars from a <code>Reader</code> to bytes on an
-     * <code>OutputStream</code> using the specified character encoding, and
-     * calling flush.
-     * <p>
-     * This method buffers the input internally, so there is no need to use a
-     * <code>BufferedReader</code>.
-     * <p>
-     * Character encoding names can be found at
-     * <a href="http://www.iana.org/assignments/character-sets">IANA</a>.
-     * <p>
-     * Due to the implementation of OutputStreamWriter, this method performs a
-     * flush.
-     * <p>
-     * This method uses {@link OutputStreamWriter}.
-     *
-     * @param input  the <code>Reader</code> to read from
-     * @param output  the <code>OutputStream</code> to write to
-     * @param encoding  the encoding to use, null means platform default
-     * @throws NullPointerException if the input or output is null
-     * @throws IOException if an I/O error occurs
-     * @since Commons IO 1.1
-     */
-    public static void copy(Reader input, OutputStream output, String encoding)
-            throws IOException {
-        if (encoding == null) {
-            copy(input, output);
-        } else {
-            OutputStreamWriter out = new OutputStreamWriter(output, encoding);
-            copy(input, out);
-            // XXX Unless anyone is planning on rewriting OutputStreamWriter,
-            // we have to flush here.
-            out.flush();
-        }
-    }
-
-    // content equals
-    //-----------------------------------------------------------------------
-    /**
-     * Compare the contents of two Streams to determine if they are equal or
-     * not.
-     * <p>
-     * This method buffers the input internally using
-     * <code>BufferedInputStream</code> if they are not already buffered.
-     *
-     * @param input1  the first stream
-     * @param input2  the second stream
-     * @return true if the content of the streams are equal or they both don't
-     * exist, false otherwise
-     * @throws NullPointerException if either input is null
-     * @throws IOException if an I/O error occurs
-     */
-    public static boolean contentEquals(InputStream input1, InputStream input2)
-            throws IOException {
-        if (!(input1 instanceof BufferedInputStream)) {
-            input1 = new BufferedInputStream(input1);
-        }
-        if (!(input2 instanceof BufferedInputStream)) {
-            input2 = new BufferedInputStream(input2);
-        }
-
-        int ch = input1.read();
-        while (-1 != ch) {
-            int ch2 = input2.read();
-            if (ch != ch2) {
-                return false;
-            }
-            ch = input1.read();
-        }
-
-        int ch2 = input2.read();
-        return (ch2 == -1);
-    }
-
-    /**
-     * Compare the contents of two Readers to determine if they are equal or
-     * not.
-     * <p>
-     * This method buffers the input internally using
-     * <code>BufferedReader</code> if they are not already buffered.
-     *
-     * @param input1  the first reader
-     * @param input2  the second reader
-     * @return true if the content of the readers are equal or they both don't
-     * exist, false otherwise
-     * @throws NullPointerException if either input is null
-     * @throws IOException if an I/O error occurs
-     * @since Commons IO 1.1
-     */
-    public static boolean contentEquals(Reader input1, Reader input2)
-            throws IOException {
-        if (!(input1 instanceof BufferedReader)) {
-            input1 = new BufferedReader(input1);
-        }
-        if (!(input2 instanceof BufferedReader)) {
-            input2 = new BufferedReader(input2);
-        }
-
-        int ch = input1.read();
-        while (-1 != ch) {
-            int ch2 = input2.read();
-            if (ch != ch2) {
-                return false;
-            }
-            ch = input1.read();
-        }
-
-        int ch2 = input2.read();
-        return (ch2 == -1);
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/tika/blob/05f4af30/tika-core/src/main/java/org/apache/tika/io/NullInputStream.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/io/NullInputStream.java b/tika-core/src/main/java/org/apache/tika/io/NullInputStream.java
deleted file mode 100644
index 2647825..0000000
--- a/tika-core/src/main/java/org/apache/tika/io/NullInputStream.java
+++ /dev/null
@@ -1,337 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- *      http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.io;
-
-import java.io.EOFException;
-import java.io.IOException;
-import java.io.InputStream;
-
-/**
- * A functional, light weight {@link InputStream} that emulates
- * a stream of a specified size.
- * <p>
- * This implementation provides a light weight
- * object for testing with an {@link InputStream}
- * where the contents don't matter.
- * <p>
- * One use case would be for testing the handling of
- * large {@link InputStream} as it can emulate that
- * scenario without the overhead of actually processing
- * large numbers of bytes - significantly speeding up
- * test execution times.
- * <p>
- * This implementation returns zero from the method that
- * reads a byte and leaves the array unchanged in the read
- * methods that are passed a byte array.
- * If alternative data is required the <code>processByte()</code> and
- * <code>processBytes()</code> methods can be implemented to generate
- * data, for example:
- *
- * <pre>
- *  public class TestInputStream extends NullInputStream {
- *      public TestInputStream(int size) {
- *          super(size);
- *      }
- *      protected int processByte() {
- *          return ... // return required value here
- *      }
- *      protected void processBytes(byte[] bytes, int offset, int length) {
- *          for (int i = offset; i < length; i++) {
- *              bytes[i] = ... // set array value here
- *          }
- *      }
- *  }
- * </pre>
- *
- * @since Apache Tika 0.4, copied from Commons IO 1.4
- */
-public class NullInputStream extends InputStream {
-
-    private final long size;
-    private long position;
-    private long mark = -1;
-    private long readlimit;
-    private boolean eof;
-    private final boolean throwEofException;
-    private final boolean markSupported;
-
-    /**
-     * Create an {@link InputStream} that emulates a specified size
-     * which supports marking and does not throw EOFException.
-     *
-     * @param size The size of the input stream to emulate.
-     */
-    public NullInputStream(long size) {
-       this(size, true, false);
-    }
-
-    /**
-     * Create an {@link InputStream} that emulates a specified
-     * size with option settings.
-     *
-     * @param size The size of the input stream to emulate.
-     * @param markSupported Whether this instance will support
-     * the <code>mark()</code> functionality.
-     * @param throwEofException Whether this implementation
-     * will throw an {@link EOFException} or return -1 when the
-     * end of file is reached.
-     */
-    public NullInputStream(long size, boolean markSupported, boolean throwEofException) {
-       this.size = size;
-       this.markSupported = markSupported;
-       this.throwEofException = throwEofException;
-    }
-
-    /**
-     * Return the current position.
-     *
-     * @return the current position.
-     */
-    public long getPosition() {
-        return position;
-    }
-
-    /**
-     * Return the size this {@link InputStream} emulates.
-     *
-     * @return The size of the input stream to emulate.
-     */
-    public long getSize() {
-        return size;
-    }
-
-    /**
-     * Return the number of bytes that can be read.
-     *
-     * @return The number of bytes that can be read.
-     */
-    @Override
-    public int available() {
-        long avail = size - position;
-        if (avail <= 0) {
-            return 0;
-        } else if (avail > Integer.MAX_VALUE) {
-            return Integer.MAX_VALUE;
-        } else {
-            return (int)avail;
-        }
-    }
-
-    /**
-     * Close this input stream - resets the internal state to
-     * the initial values.
-     *
-     * @throws IOException If an error occurs.
-     */
-    @Override
-    public void close() throws IOException {
-        eof = false;
-        position = 0;
-        mark = -1;
-    }
-
-    /**
-     * Mark the current position.
-     *
-     * @param readlimit The number of bytes before this marked position
-     * is invalid.
-     * @throws UnsupportedOperationException if mark is not supported.
-     */
-    @Override
-    public synchronized void mark(int readlimit) {
-        if (!markSupported) {
-            throw new UnsupportedOperationException("Mark not supported");
-        }
-        mark = position;
-        this.readlimit = readlimit;
-    }
-
-    /**
-     * Indicates whether <i>mark</i> is supported.
-     *
-     * @return Whether <i>mark</i> is supported or not.
-     */
-    @Override
-    public boolean markSupported() {
-        return markSupported;
-    }
-
-    /**
-     * Read a byte.
-     *
-     * @return Either The byte value returned by <code>processByte()</code>
-     * or <code>-1</code> if the end of file has been reached and
-     * <code>throwEofException</code> is set to <code>false</code>.
-     * @throws EOFException if the end of file is reached and
-     * <code>throwEofException</code> is set to <code>true</code>.
-     * @throws IOException if trying to read past the end of file.
-     */
-    @Override
-    public int read() throws IOException {
-        if (eof) {
-            throw new IOException("Read after end of file");
-        }
-        if (position == size) {
-            return doEndOfFile();
-        }
-        position++;
-        return processByte();
-    }
-
-    /**
-     * Read some bytes into the specified array.
-     *
-     * @param bytes The byte array to read into
-     * @return The number of bytes read or <code>-1</code>
-     * if the end of file has been reached and
-     * <code>throwEofException</code> is set to <code>false</code>.
-     * @throws EOFException if the end of file is reached and
-     * <code>throwEofException</code> is set to <code>true</code>.
-     * @throws IOException if trying to read past the end of file.
-     */
-    @Override
-    public int read(byte[] bytes) throws IOException {
-        return read(bytes, 0, bytes.length);
-    }
-
-    /**
-     * Read the specified number bytes into an array.
-     *
-     * @param bytes The byte array to read into.
-     * @param offset The offset to start reading bytes into.
-     * @param length The number of bytes to read.
-     * @return The number of bytes read or <code>-1</code>
-     * if the end of file has been reached and
-     * <code>throwEofException</code> is set to <code>false</code>.
-     * @throws EOFException if the end of file is reached and
-     * <code>throwEofException</code> is set to <code>true</code>.
-     * @throws IOException if trying to read past the end of file.
-     */
-    @Override
-    public int read(byte[] bytes, int offset, int length) throws IOException {
-        if (eof) {
-            throw new IOException("Read after end of file");
-        }
-        if (position == size) {
-            return doEndOfFile();
-        }
-        position += length;
-        int returnLength = length;
-        if (position > size) {
-            returnLength = length - (int)(position - size);
-            position = size;
-        }
-        processBytes(bytes, offset, returnLength);
-        return returnLength;
-    }
-
-    /**
-     * Reset the stream to the point when mark was last called.
-     *
-     * @throws UnsupportedOperationException if mark is not supported.
-     * @throws IOException If no position has been marked
-     * or the read limit has been exceed since the last position was
-     * marked.
-     */
-    @Override
-    public synchronized void reset() throws IOException {
-        if (!markSupported) {
-            throw new UnsupportedOperationException("Mark not supported");
-        }
-        if (mark < 0) {
-            throw new IOException("No position has been marked");
-        }
-        if (position > (mark + readlimit)) {
-            throw new IOException("Marked position [" + mark +
-                    "] is no longer valid - passed the read limit [" +
-                    readlimit + "]");
-        }
-        position = mark;
-        eof = false;
-    }
-
-    /**
-     * Skip a specified number of bytes.
-     *
-     * @param numberOfBytes The number of bytes to skip.
-     * @return The number of bytes skipped or <code>-1</code>
-     * if the end of file has been reached and
-     * <code>throwEofException</code> is set to <code>false</code>.
-     * @throws EOFException if the end of file is reached and
-     * <code>throwEofException</code> is set to <code>true</code>.
-     * @throws IOException if trying to read past the end of file.
-     */
-    @Override
-    public long skip(long numberOfBytes) throws IOException {
-        if (eof) {
-            throw new IOException("Skip after end of file");
-        }
-        if (position == size) {
-            return doEndOfFile();
-        }
-        position += numberOfBytes;
-        long returnLength = numberOfBytes;
-        if (position > size) {
-            returnLength = numberOfBytes - (position - size);
-            position = size;
-        }
-        return returnLength;
-    }
-
-    /**
-     * Return a byte value for the  <code>read()</code> method.
-     * <p>
-     * This implementation returns zero.
-     *
-     * @return This implementation always returns zero.
-     */
-    protected int processByte() {
-        // do nothing - overridable by subclass
-        return 0;
-    }
-
-    /**
-     * Process the bytes for the <code>read(byte[], offset, length)</code>
-     * method.
-     * <p>
-     * This implementation leaves the byte array unchanged.
-     *
-     * @param bytes The byte array
-     * @param offset The offset to start at.
-     * @param length The number of bytes.
-     */
-    protected void processBytes(byte[] bytes, int offset, int length) {
-        // do nothing - overridable by subclass
-    }
-
-    /**
-     * Handle End of File.
-     *
-     * @return <code>-1</code> if <code>throwEofException</code> is
-     * set to <code>false</code>
-     * @throws EOFException if <code>throwEofException</code> is set
-     * to <code>true</code>.
-     */
-    private int doEndOfFile() throws EOFException {
-        eof = true;
-        if (throwEofException) {
-            throw new EOFException();
-        }
-        return -1;
-    }
-
-}

http://git-wip-us.apache.org/repos/asf/tika/blob/05f4af30/tika-core/src/main/java/org/apache/tika/io/NullOutputStream.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/io/NullOutputStream.java b/tika-core/src/main/java/org/apache/tika/io/NullOutputStream.java
deleted file mode 100644
index 519bc95..0000000
--- a/tika-core/src/main/java/org/apache/tika/io/NullOutputStream.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- * 
- *      http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.io;
- 
-import java.io.IOException;
-import java.io.OutputStream;
-
-/**
- * This OutputStream writes all data to the famous <b>/dev/null</b>.
- * <p>
- * This output stream has no destination (file/socket etc.) and all
- * bytes written to it are ignored and lost.
- * 
- * @author Jeremias Maerki
- * @since Apache Tika 0.4, copied from Commons IO 1.4
- */
-public class NullOutputStream extends OutputStream {
-    
-    /**
-     * A singleton.
-     */
-    public static final NullOutputStream NULL_OUTPUT_STREAM = new NullOutputStream();
-
-    /**
-     * Does nothing - output to <code>/dev/null</code>.
-     * @param b The bytes to write
-     * @param off The start offset
-     * @param len The number of bytes to write
-     */
-    @Override
-    public void write(byte[] b, int off, int len) {
-        //to /dev/null
-    }
-
-    /**
-     * Does nothing - output to <code>/dev/null</code>.
-     * @param b The byte to write
-     */
-    @Override
-    public void write(int b) {
-        //to /dev/null
-    }
-
-    /**
-     * Does nothing - output to <code>/dev/null</code>.
-     * @param b The bytes to write
-     * @throws IOException never
-     */
-    @Override
-    public void write(byte[] b) throws IOException {
-        //to /dev/null
-    }
-
-}