You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by bo...@apache.org on 2016/03/31 06:10:39 UTC
[2/2] tika git commit: TIKA-1915 and TIKA-1706 - Remove POI. Replace
with commons-io+tika-core
TIKA-1915 and TIKA-1706 - Remove POI. Replace with commons-io+tika-core
Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/05f4af30
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/05f4af30
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/05f4af30
Branch: refs/heads/2.x
Commit: 05f4af3002f1f376095f6b4810d505ea50d08b3c
Parents: 28dcca9
Author: Bob Paulin <bo...@apache.org>
Authored: Wed Mar 30 23:10:40 2016 -0500
Committer: Bob Paulin <bo...@apache.org>
Committed: Wed Mar 30 23:10:40 2016 -0500
----------------------------------------------------------------------
.../apache/tika/parser/mock/MockParserTest.java | 2 +-
tika-core/pom.xml | 9 +-
.../src/main/java/org/apache/tika/Tika.java | 7 +-
.../apache/tika/detect/XmlRootExtractor.java | 2 +-
.../apache/tika/embedder/ExternalEmbedder.java | 2 +-
.../ParsingEmbeddedDocumentExtractor.java | 2 +-
.../java/org/apache/tika/fork/ForkClient.java | 2 +-
.../apache/tika/io/CloseShieldInputStream.java | 52 -
.../org/apache/tika/io/ClosedInputStream.java | 43 -
.../org/apache/tika/io/CountingInputStream.java | 182 ---
.../apache/tika/io/IOExceptionWithCause.java | 67 -
.../main/java/org/apache/tika/io/IOUtils.java | 1186 ------------------
.../org/apache/tika/io/NullInputStream.java | 337 -----
.../org/apache/tika/io/NullOutputStream.java | 68 -
.../java/org/apache/tika/io/StringUtil.java | 121 ++
.../org/apache/tika/io/TaggedIOException.java | 2 +
.../org/apache/tika/parser/NetworkParser.java | 4 +-
.../tika/parser/external/ExternalParser.java | 4 +-
.../apache/tika/sax/OfflineContentHandler.java | 2 +-
.../src/test/java/org/apache/tika/TikaTest.java | 2 +-
.../org/apache/tika/TypeDetectionBenchmark.java | 2 +-
.../org/apache/tika/io/TikaInputStreamTest.java | 1 +
.../tika/sax/SecureContentHandlerTest.java | 2 +-
.../tika/langdetect/LanguageDetectorTest.java | 6 +-
.../langdetect/OptimaizeLangDetectorTest.java | 6 +-
.../tika-parser-cad-bundle/pom.xml | 2 +-
.../tika-parser-multimedia-bundle/pom.xml | 1 -
.../parser/ner/corenlp/CoreNLPNERecogniser.java | 5 +-
.../parser/ner/opennlp/OpenNLPNameFinder.java | 2 +-
.../tika-parser-cad-module/pom.xml | 8 +-
.../org/apache/tika/parser/dwg/DWGParser.java | 4 +-
.../org/apache/tika/parser/prt/PRTParser.java | 2 +-
.../tika-parser-multimedia-module/pom.xml | 25 -
.../org/apache/tika/parser/image/BPGParser.java | 2 +-
.../parser/image/ImageMetadataExtractor.java | 2 +-
.../org/apache/tika/parser/image/PSDParser.java | 2 +-
36 files changed, 173 insertions(+), 1995 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tika/blob/05f4af30/tika-app/src/test/java/org/apache/tika/parser/mock/MockParserTest.java
----------------------------------------------------------------------
diff --git a/tika-app/src/test/java/org/apache/tika/parser/mock/MockParserTest.java b/tika-app/src/test/java/org/apache/tika/parser/mock/MockParserTest.java
index 52af12b..2d785b7 100644
--- a/tika-app/src/test/java/org/apache/tika/parser/mock/MockParserTest.java
+++ b/tika-app/src/test/java/org/apache/tika/parser/mock/MockParserTest.java
@@ -30,7 +30,7 @@ import java.util.Date;
import org.apache.tika.TikaTest;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.IOUtils;
+import org.apache.commons.io.IOUtils;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.Parser;
http://git-wip-us.apache.org/repos/asf/tika/blob/05f4af30/tika-core/pom.xml
----------------------------------------------------------------------
diff --git a/tika-core/pom.xml b/tika-core/pom.xml
index 2c61616..e1261bb 100644
--- a/tika-core/pom.xml
+++ b/tika-core/pom.xml
@@ -36,6 +36,7 @@
<properties>
<!-- NOTE: sync codec version with POI -->
<codec.version>1.10</codec.version>
+ <commons-io.version>2.4</commons-io.version>
</properties>
<dependencies>
@@ -44,6 +45,11 @@
<artifactId>commons-codec</artifactId>
<version>${codec.version}</version>
</dependency>
+ <dependency>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ <version>${commons-io.version}</version>
+ </dependency>
<!-- Optional OSGi dependencies, used only when running within OSGi -->
<dependency>
<groupId>org.osgi</groupId>
@@ -125,7 +131,8 @@
<Bundle-Activator>org.apache.tika.config.TikaActivator</Bundle-Activator>
<Bundle-ActivationPolicy>lazy</Bundle-ActivationPolicy>
<Embed-Dependency>
- commons-codec
+ commons-codec,
+ commons-io
</Embed-Dependency>
</instructions>
</configuration>
http://git-wip-us.apache.org/repos/asf/tika/blob/05f4af30/tika-core/src/main/java/org/apache/tika/Tika.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/Tika.java b/tika-core/src/main/java/org/apache/tika/Tika.java
index c0cf281..7eaac29 100644
--- a/tika-core/src/main/java/org/apache/tika/Tika.java
+++ b/tika-core/src/main/java/org/apache/tika/Tika.java
@@ -22,13 +22,14 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.net.URL;
+import java.nio.charset.Charset;
import java.nio.file.Path;
import java.util.Properties;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.detect.Detector;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.IOUtils;
+import org.apache.commons.io.IOUtils;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.language.translate.Translator;
import org.apache.tika.metadata.Metadata;
@@ -367,7 +368,7 @@ public class Tika {
*/
public String translate(InputStream text, String sourceLanguage, String targetLanguage){
try {
- return translator.translate(IOUtils.toString(text), sourceLanguage, targetLanguage);
+ return translator.translate(IOUtils.toString(text, Charset.defaultCharset()), sourceLanguage, targetLanguage);
} catch (Exception e){
throw new IllegalStateException("Error translating data.", e);
}
@@ -383,7 +384,7 @@ public class Tika {
*/
public String translate(InputStream text, String targetLanguage){
try {
- return translator.translate(IOUtils.toString(text), targetLanguage);
+ return translator.translate(IOUtils.toString(text, Charset.defaultCharset()), targetLanguage);
} catch (Exception e){
throw new IllegalStateException("Error translating data.", e);
}
http://git-wip-us.apache.org/repos/asf/tika/blob/05f4af30/tika-core/src/main/java/org/apache/tika/detect/XmlRootExtractor.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/detect/XmlRootExtractor.java b/tika-core/src/main/java/org/apache/tika/detect/XmlRootExtractor.java
index 74d994d..5069483 100644
--- a/tika-core/src/main/java/org/apache/tika/detect/XmlRootExtractor.java
+++ b/tika-core/src/main/java/org/apache/tika/detect/XmlRootExtractor.java
@@ -23,7 +23,7 @@ import javax.xml.XMLConstants;
import javax.xml.namespace.QName;
import javax.xml.parsers.SAXParserFactory;
-import org.apache.tika.io.CloseShieldInputStream;
+import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.tika.sax.OfflineContentHandler;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
http://git-wip-us.apache.org/repos/asf/tika/blob/05f4af30/tika-core/src/main/java/org/apache/tika/embedder/ExternalEmbedder.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/embedder/ExternalEmbedder.java b/tika-core/src/main/java/org/apache/tika/embedder/ExternalEmbedder.java
index 84dc5da..f59b61f 100644
--- a/tika-core/src/main/java/org/apache/tika/embedder/ExternalEmbedder.java
+++ b/tika-core/src/main/java/org/apache/tika/embedder/ExternalEmbedder.java
@@ -30,7 +30,7 @@ import java.util.Map;
import java.util.Set;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.IOUtils;
+import org.apache.commons.io.IOUtils;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
http://git-wip-us.apache.org/repos/asf/tika/blob/05f4af30/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java b/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java
index d67f086..dcae874 100644
--- a/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java
+++ b/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java
@@ -23,7 +23,7 @@ import java.io.InputStream;
import org.apache.tika.exception.EncryptedDocumentException;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.CloseShieldInputStream;
+import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
http://git-wip-us.apache.org/repos/asf/tika/blob/05f4af30/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java b/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java
index 6a1fde9..b41cb22 100644
--- a/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java
+++ b/tika-core/src/main/java/org/apache/tika/fork/ForkClient.java
@@ -30,7 +30,7 @@ import java.util.jar.JarOutputStream;
import java.util.zip.ZipEntry;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.IOUtils;
+import org.apache.commons.io.IOUtils;
import org.xml.sax.ContentHandler;
import static java.nio.charset.StandardCharsets.UTF_8;
http://git-wip-us.apache.org/repos/asf/tika/blob/05f4af30/tika-core/src/main/java/org/apache/tika/io/CloseShieldInputStream.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/io/CloseShieldInputStream.java b/tika-core/src/main/java/org/apache/tika/io/CloseShieldInputStream.java
deleted file mode 100644
index 3033eea..0000000
--- a/tika-core/src/main/java/org/apache/tika/io/CloseShieldInputStream.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.io;
-
-import java.io.InputStream;
-
-/**
- * Proxy stream that prevents the underlying input stream from being closed.
- * <p>
- * This class is typically used in cases where an input stream needs to be
- * passed to a component that wants to explicitly close the stream even if
- * more input would still be available to other components.
- *
- * @since Apache Tika 0.4, copied from Commons IO 1.4
- */
-public class CloseShieldInputStream extends ProxyInputStream {
-
- /**
- * Creates a proxy that shields the given input stream from being
- * closed.
- *
- * @param in underlying input stream
- */
- public CloseShieldInputStream(InputStream in) {
- super(in);
- }
-
- /**
- * Replaces the underlying input stream with a {@link ClosedInputStream}
- * sentinel. The original input stream will remain open, but this proxy
- * will appear closed.
- */
- @Override
- public void close() {
- in = new ClosedInputStream();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/tika/blob/05f4af30/tika-core/src/main/java/org/apache/tika/io/ClosedInputStream.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/io/ClosedInputStream.java b/tika-core/src/main/java/org/apache/tika/io/ClosedInputStream.java
deleted file mode 100644
index f7c192c..0000000
--- a/tika-core/src/main/java/org/apache/tika/io/ClosedInputStream.java
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.io;
-
-import java.io.InputStream;
-
-/**
- * Closed input stream. This stream returns -1 to all attempts to read
- * something from the stream.
- * <p>
- * Typically uses of this class include testing for corner cases in methods
- * that accept input streams and acting as a sentinel value instead of a
- * <code>null</code> input stream.
- *
- * @since Apache Tika 0.4, copied from Commons IO 1.4
- */
-public class ClosedInputStream extends InputStream {
-
- /**
- * Returns -1 to indicate that the stream is closed.
- *
- * @return always -1
- */
- @Override
- public int read() {
- return -1;
- }
-
-}
http://git-wip-us.apache.org/repos/asf/tika/blob/05f4af30/tika-core/src/main/java/org/apache/tika/io/CountingInputStream.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/io/CountingInputStream.java b/tika-core/src/main/java/org/apache/tika/io/CountingInputStream.java
deleted file mode 100644
index ffb4d6b..0000000
--- a/tika-core/src/main/java/org/apache/tika/io/CountingInputStream.java
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.io;
-
-import java.io.IOException;
-import java.io.InputStream;
-
-/**
- * A decorating input stream that counts the number of bytes that have passed
- * through the stream so far.
- * <p>
- * A typical use case would be during debugging, to ensure that data is being
- * read as expected.
- *
- * @author Marcelo Liberato
- * @since Apache Tika 0.4, copied from Commons IO 1.4
- */
-public class CountingInputStream extends ProxyInputStream {
-
- /** The count of bytes that have passed. */
- private long count;
-
- /**
- * Constructs a new CountingInputStream.
- *
- * @param in the InputStream to delegate to
- */
- public CountingInputStream(InputStream in) {
- super(in);
- }
-
- //-----------------------------------------------------------------------
- /**
- * Reads a number of bytes into the byte array, keeping count of the
- * number read.
- *
- * @param b the buffer into which the data is read, not null
- * @return the total number of bytes read into the buffer, -1 if end of stream
- * @throws IOException if an I/O error occurs
- * @see java.io.InputStream#read(byte[])
- */
- @Override
- public int read(byte[] b) throws IOException {
- int found = super.read(b);
- this.count += (found >= 0) ? found : 0;
- return found;
- }
-
- /**
- * Reads a number of bytes into the byte array at a specific offset,
- * keeping count of the number read.
- *
- * @param b the buffer into which the data is read, not null
- * @param off the start offset in the buffer
- * @param len the maximum number of bytes to read
- * @return the total number of bytes read into the buffer, -1 if end of stream
- * @throws IOException if an I/O error occurs
- * @see java.io.InputStream#read(byte[], int, int)
- */
- @Override
- public int read(byte[] b, int off, int len) throws IOException {
- int found = super.read(b, off, len);
- this.count += (found >= 0) ? found : 0;
- return found;
- }
-
- /**
- * Reads the next byte of data adding to the count of bytes received
- * if a byte is successfully read.
- *
- * @return the byte read, -1 if end of stream
- * @throws IOException if an I/O error occurs
- * @see java.io.InputStream#read()
- */
- @Override
- public int read() throws IOException {
- int found = super.read();
- this.count += (found >= 0) ? 1 : 0;
- return found;
- }
-
- /**
- * Skips the stream over the specified number of bytes, adding the skipped
- * amount to the count.
- *
- * @param length the number of bytes to skip
- * @return the actual number of bytes skipped
- * @throws IOException if an I/O error occurs
- * @see java.io.InputStream#skip(long)
- */
- @Override
- public long skip(final long length) throws IOException {
- final long skip = super.skip(length);
- this.count += skip;
- return skip;
- }
-
- //-----------------------------------------------------------------------
- /**
- * The number of bytes that have passed through this stream.
- * <p>
- * NOTE: From v1.3 this method throws an ArithmeticException if the
- * count is greater than can be expressed by an <code>int</code>.
- * See {@link #getByteCount()} for a method using a <code>long</code>.
- *
- * @return the number of bytes accumulated
- * @throws ArithmeticException if the byte count is too large
- */
- public synchronized int getCount() {
- long result = getByteCount();
- if (result > Integer.MAX_VALUE) {
- throw new ArithmeticException("The byte count " + result + " is too large to be converted to an int");
- }
- return (int) result;
- }
-
- /**
- * Set the byte count back to 0.
- * <p>
- * NOTE: From v1.3 this method throws an ArithmeticException if the
- * count is greater than can be expressed by an <code>int</code>.
- * See {@link #resetByteCount()} for a method using a <code>long</code>.
- *
- * @return the count previous to resetting
- * @throws ArithmeticException if the byte count is too large
- */
- public synchronized int resetCount() {
- long result = resetByteCount();
- if (result > Integer.MAX_VALUE) {
- throw new ArithmeticException("The byte count " + result + " is too large to be converted to an int");
- }
- return (int) result;
- }
-
- /**
- * The number of bytes that have passed through this stream.
- * <p>
- * NOTE: This method is an alternative for <code>getCount()</code>
- * and was added because that method returns an integer which will
- * result in incorrect count for files over 2GB.
- *
- * @return the number of bytes accumulated
- * @since Commons IO 1.3
- */
- public synchronized long getByteCount() {
- return this.count;
- }
-
- /**
- * Set the byte count back to 0.
- * <p>
- * NOTE: This method is an alternative for <code>resetCount()</code>
- * and was added because that method returns an integer which will
- * result in incorrect count for files over 2GB.
- *
- * @return the count previous to resetting
- * @since Commons IO 1.3
- */
- public synchronized long resetByteCount() {
- long tmp = this.count;
- this.count = 0;
- return tmp;
- }
-
- public String toString() {
- return "Tika Counting InputStream wrapping " + in.toString();
- }
-}
http://git-wip-us.apache.org/repos/asf/tika/blob/05f4af30/tika-core/src/main/java/org/apache/tika/io/IOExceptionWithCause.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/io/IOExceptionWithCause.java b/tika-core/src/main/java/org/apache/tika/io/IOExceptionWithCause.java
deleted file mode 100644
index 9abaf41..0000000
--- a/tika-core/src/main/java/org/apache/tika/io/IOExceptionWithCause.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.io;
-
-import java.io.IOException;
-
-/**
- * Subclasses IOException with the {@link Throwable} constructors missing before Java 6. If you are using Java 6,
- * consider this class deprecated and use {@link IOException}.
- *
- * @author <a href="http://commons.apache.org/io/">Apache Commons IO</a>
- * @since Apache Tika 0.4, copied from Commons IO 1.4
- */
-public class IOExceptionWithCause extends IOException {
-
- /**
- * Defines the serial version UID.
- */
- private static final long serialVersionUID = 1L;
-
- /**
- * Constructs a new instance with the given message and cause.
- * <p>
- * As specified in {@link Throwable}, the message in the given <code>cause</code> is not used in this instance's
- * message.
- * </p>
- *
- * @param message
- * the message (see {@link #getMessage()})
- * @param cause
- * the cause (see {@link #getCause()}). A <code>null</code> value is allowed.
- */
- public IOExceptionWithCause(String message, Throwable cause) {
- super(message);
- this.initCause(cause);
- }
-
- /**
- * Constructs a new instance with the given cause.
- * <p>
- * The message is set to <code>cause==null ? null : cause.toString()</code>, which by default contains the class
- * and message of <code>cause</code>. This constructor is useful for call sites that just wrap another throwable.
- * </p>
- *
- * @param cause
- * the cause (see {@link #getCause()}). A <code>null</code> value is allowed.
- */
- public IOExceptionWithCause(Throwable cause) {
- super(cause == null ? null : cause.toString());
- this.initCause(cause);
- }
-
-}
http://git-wip-us.apache.org/repos/asf/tika/blob/05f4af30/tika-core/src/main/java/org/apache/tika/io/IOUtils.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/io/IOUtils.java b/tika-core/src/main/java/org/apache/tika/io/IOUtils.java
deleted file mode 100644
index 11d3bd3..0000000
--- a/tika-core/src/main/java/org/apache/tika/io/IOUtils.java
+++ /dev/null
@@ -1,1186 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.io;
-
-import java.io.BufferedInputStream;
-import java.io.BufferedReader;
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.CharArrayWriter;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.OutputStream;
-import java.io.OutputStreamWriter;
-import java.io.Reader;
-import java.io.StringWriter;
-import java.io.Writer;
-import java.nio.channels.Channel;
-import java.nio.charset.Charset;
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * General IO stream manipulation utilities.
- * <p>
- * This class provides static utility methods for input/output operations.
- * <ul>
- * <li>closeQuietly - these methods close a stream ignoring nulls and exceptions
- * <li>toXxx/read - these methods read data from a stream
- * <li>write - these methods write data to a stream
- * <li>copy - these methods copy all the data from one stream to another
- * <li>contentEquals - these methods compare the content of two streams
- * </ul>
- * <p>
- * The byte-to-char methods and char-to-byte methods involve a conversion step.
- * Two methods are provided in each case, one that uses the platform default
- * encoding and the other which allows you to specify an encoding. You are
- * encouraged to always specify an encoding because relying on the platform
- * default can lead to unexpected results, for example when moving from
- * development to production.
- * <p>
- * All the methods in this class that read a stream are buffered internally.
- * This means that there is no cause to use a <code>BufferedInputStream</code>
- * or <code>BufferedReader</code>. The default buffer size of 4K has been shown
- * to be efficient in tests.
- * <p>
- * Wherever possible, the methods in this class do <em>not</em> flush or close
- * the stream. This is to avoid making non-portable assumptions about the
- * streams' origin and further use. Thus the caller is still responsible for
- * closing streams after use.
- * <p>
- * Origin of code: Excalibur.
- *
- * @author Peter Donald
- * @author Jeff Turner
- * @author Matthew Hawthorne
- * @author Stephen Colebourne
- * @author Gareth Davis
- * @author Ian Springer
- * @author Niall Pemberton
- * @author Sandy McArthur
- * @since Apache Tika 0.4, copied (partially) from Commons IO 1.4
- */
-public class IOUtils {
- // TODO Remove this when we've finished TIKA-1706 and TIKA-1710
- public static final Charset UTF_8 = java.nio.charset.StandardCharsets.UTF_8;
-
- /**
- * The default buffer size to use.
- */
- private static final int DEFAULT_BUFFER_SIZE = 1024 * 4;
-
- /**
- * Instances should NOT be constructed in standard programming.
- */
- public IOUtils() {
- super();
- }
-
- //-----------------------------------------------------------------------
- /**
- * Unconditionally close an <code>Reader</code>.
- * <p>
- * Equivalent to {@link Reader#close()}, except any exceptions will be ignored.
- * This is typically used in finally blocks.
- *
- * @param input the Reader to close, may be null or already closed
- */
- public static void closeQuietly(Reader input) {
- try {
- if (input != null) {
- input.close();
- }
- } catch (IOException ioe) {
- // ignore
- }
- }
-
- /**
- * Unconditionally close a <code>Channel</code>.
- * <p>
- * Equivalent to {@link Channel#close()}, except any exceptions will be ignored.
- * This is typically used in finally blocks.
- *
- * @param channel the Channel to close, may be null or already closed
- */
- public static void closeQuietly(Channel channel) {
- try {
- if (channel != null) {
- channel.close();
- }
- } catch (IOException ioe) {
- // ignore
- }
- }
-
- /**
- * Unconditionally close a <code>Writer</code>.
- * <p>
- * Equivalent to {@link Writer#close()}, except any exceptions will be ignored.
- * This is typically used in finally blocks.
- *
- * @param output the Writer to close, may be null or already closed
- */
- public static void closeQuietly(Writer output) {
- try {
- if (output != null) {
- output.close();
- }
- } catch (IOException ioe) {
- // ignore
- }
- }
-
- /**
- * Unconditionally close an <code>InputStream</code>.
- * <p>
- * Equivalent to {@link InputStream#close()}, except any exceptions will be ignored.
- * This is typically used in finally blocks.
- *
- * @param input the InputStream to close, may be null or already closed
- */
- public static void closeQuietly(InputStream input) {
- try {
- if (input != null) {
- input.close();
- }
- } catch (IOException ioe) {
- // ignore
- }
- }
-
- /**
- * Unconditionally close an <code>OutputStream</code>.
- * <p>
- * Equivalent to {@link OutputStream#close()}, except any exceptions will be ignored.
- * This is typically used in finally blocks.
- *
- * @param output the OutputStream to close, may be null or already closed
- */
- public static void closeQuietly(OutputStream output) {
- try {
- if (output != null) {
- output.close();
- }
- } catch (IOException ioe) {
- // ignore
- }
- }
-
- // read toByteArray
- //-----------------------------------------------------------------------
- /**
- * Get the contents of an <code>InputStream</code> as a <code>byte[]</code>.
- * <p>
- * This method buffers the input internally, so there is no need to use a
- * <code>BufferedInputStream</code>.
- *
- * @param input the <code>InputStream</code> to read from
- * @return the requested byte array
- * @throws NullPointerException if the input is null
- * @throws IOException if an I/O error occurs
- */
- public static byte[] toByteArray(InputStream input) throws IOException {
- ByteArrayOutputStream output = new ByteArrayOutputStream();
- copy(input, output);
- return output.toByteArray();
- }
-
- /**
- * Get the contents of a <code>Reader</code> as a <code>byte[]</code>
- * using the default character encoding of the platform.
- * <p>
- * This method buffers the input internally, so there is no need to use a
- * <code>BufferedReader</code>.
- *
- * @param input the <code>Reader</code> to read from
- * @return the requested byte array
- * @throws NullPointerException if the input is null
- * @throws IOException if an I/O error occurs
- */
- public static byte[] toByteArray(Reader input) throws IOException {
- ByteArrayOutputStream output = new ByteArrayOutputStream();
- copy(input, output);
- return output.toByteArray();
- }
-
- /**
- * Get the contents of a <code>Reader</code> as a <code>byte[]</code>
- * using the specified character encoding.
- * <p>
- * Character encoding names can be found at
- * <a href="http://www.iana.org/assignments/character-sets">IANA</a>.
- * <p>
- * This method buffers the input internally, so there is no need to use a
- * <code>BufferedReader</code>.
- *
- * @param input the <code>Reader</code> to read from
- * @param encoding the encoding to use, null means platform default
- * @return the requested byte array
- * @throws NullPointerException if the input is null
- * @throws IOException if an I/O error occurs
- * @since Commons IO 1.1
- */
- public static byte[] toByteArray(Reader input, String encoding)
- throws IOException {
- ByteArrayOutputStream output = new ByteArrayOutputStream();
- copy(input, output, encoding);
- return output.toByteArray();
- }
-
- /**
- * Get the contents of a <code>String</code> as a <code>byte[]</code>
- * using the default character encoding of the platform.
- * <p>
- * This is the same as {@link String#getBytes()}.
- *
- * @param input the <code>String</code> to convert
- * @return the requested byte array
- * @throws NullPointerException if the input is null
- * @throws IOException if an I/O error occurs (never occurs)
- * @deprecated Use {@link String#getBytes()}
- */
- @Deprecated
- public static byte[] toByteArray(String input) throws IOException {
- return input.getBytes(UTF_8);
- }
-
- // read char[]
- //-----------------------------------------------------------------------
- /**
- * Get the contents of an <code>InputStream</code> as a character array
- * using the default character encoding of the platform.
- * <p>
- * This method buffers the input internally, so there is no need to use a
- * <code>BufferedInputStream</code>.
- *
- * @param is the <code>InputStream</code> to read from
- * @return the requested character array
- * @throws NullPointerException if the input is null
- * @throws IOException if an I/O error occurs
- * @since Commons IO 1.1
- */
- public static char[] toCharArray(InputStream is) throws IOException {
- CharArrayWriter output = new CharArrayWriter();
- copy(is, output);
- return output.toCharArray();
- }
-
- /**
- * Get the contents of an <code>InputStream</code> as a character array
- * using the specified character encoding.
- * <p>
- * Character encoding names can be found at
- * <a href="http://www.iana.org/assignments/character-sets">IANA</a>.
- * <p>
- * This method buffers the input internally, so there is no need to use a
- * <code>BufferedInputStream</code>.
- *
- * @param is the <code>InputStream</code> to read from
- * @param encoding the encoding to use, null means platform default
- * @return the requested character array
- * @throws NullPointerException if the input is null
- * @throws IOException if an I/O error occurs
- * @since Commons IO 1.1
- */
- public static char[] toCharArray(InputStream is, String encoding)
- throws IOException {
- CharArrayWriter output = new CharArrayWriter();
- copy(is, output, encoding);
- return output.toCharArray();
- }
-
- /**
- * Get the contents of a <code>Reader</code> as a character array.
- * <p>
- * This method buffers the input internally, so there is no need to use a
- * <code>BufferedReader</code>.
- *
- * @param input the <code>Reader</code> to read from
- * @return the requested character array
- * @throws NullPointerException if the input is null
- * @throws IOException if an I/O error occurs
- * @since Commons IO 1.1
- */
- public static char[] toCharArray(Reader input) throws IOException {
- CharArrayWriter sw = new CharArrayWriter();
- copy(input, sw);
- return sw.toCharArray();
- }
-
- // read toString
- //-----------------------------------------------------------------------
- /**
- * Get the contents of an <code>InputStream</code> as a String
- * using the default character encoding of the platform.
- * <p>
- * This method buffers the input internally, so there is no need to use a
- * <code>BufferedInputStream</code>.
- *
- * @param input the <code>InputStream</code> to read from
- * @return the requested String
- * @throws NullPointerException if the input is null
- * @throws IOException if an I/O error occurs
- */
- public static String toString(InputStream input) throws IOException {
- StringWriter sw = new StringWriter();
- copy(input, sw);
- return sw.toString();
- }
-
- /**
- * Get the contents of an <code>InputStream</code> as a String
- * using the specified character encoding.
- * <p>
- * Character encoding names can be found at
- * <a href="http://www.iana.org/assignments/character-sets">IANA</a>.
- * <p>
- * This method buffers the input internally, so there is no need to use a
- * <code>BufferedInputStream</code>.
- *
- * @param input the <code>InputStream</code> to read from
- * @param encoding the encoding to use, null means platform default
- * @return the requested String
- * @throws NullPointerException if the input is null
- * @throws IOException if an I/O error occurs
- */
- public static String toString(InputStream input, String encoding)
- throws IOException {
- StringWriter sw = new StringWriter();
- copy(input, sw, encoding);
- return sw.toString();
- }
-
- /**
- * Get the contents of a <code>Reader</code> as a String.
- * <p>
- * This method buffers the input internally, so there is no need to use a
- * <code>BufferedReader</code>.
- *
- * @param input the <code>Reader</code> to read from
- * @return the requested String
- * @throws NullPointerException if the input is null
- * @throws IOException if an I/O error occurs
- */
- public static String toString(Reader input) throws IOException {
- StringWriter sw = new StringWriter();
- copy(input, sw);
- return sw.toString();
- }
-
- /**
- * Get the contents of a <code>byte[]</code> as a String
- * using the default character encoding of the platform.
- *
- * @param input the byte array to read from
- * @return the requested String
- * @throws NullPointerException if the input is null
- * @throws IOException if an I/O error occurs (never occurs)
- * @deprecated Use {@link String#String(byte[])}
- */
- @Deprecated
- public static String toString(byte[] input) throws IOException {
- return new String(input, UTF_8);
- }
-
- /**
- * Get the contents of a <code>byte[]</code> as a String
- * using the specified character encoding.
- * <p>
- * Character encoding names can be found at
- * <a href="http://www.iana.org/assignments/character-sets">IANA</a>.
- *
- * @param input the byte array to read from
- * @param encoding the encoding to use, null means platform default
- * @return the requested String
- * @throws NullPointerException if the input is null
- * @throws IOException if an I/O error occurs (never occurs)
- * @deprecated Use {@link String#String(byte[],String)}
- */
- @Deprecated
- public static String toString(byte[] input, String encoding)
- throws IOException {
- // If no encoding is specified, default to UTF-8.
- if (encoding == null) {
- return new String(input, UTF_8);
- } else {
- return new String(input, encoding);
- }
- }
-
- // readLines
- //-----------------------------------------------------------------------
- /**
- * Get the contents of an <code>InputStream</code> as a list of Strings,
- * one entry per line, using the default character encoding of the platform.
- * <p>
- * This method buffers the input internally, so there is no need to use a
- * <code>BufferedInputStream</code>.
- *
- * @param input the <code>InputStream</code> to read from, not null
- * @return the list of Strings, never null
- * @throws NullPointerException if the input is null
- * @throws IOException if an I/O error occurs
- * @since Commons IO 1.1
- */
- public static List<String> readLines(InputStream input) throws IOException {
- InputStreamReader reader = new InputStreamReader(input, UTF_8);
- return readLines(reader);
- }
-
- /**
- * Get the contents of an <code>InputStream</code> as a list of Strings,
- * one entry per line, using the specified character encoding.
- * <p>
- * Character encoding names can be found at
- * <a href="http://www.iana.org/assignments/character-sets">IANA</a>.
- * <p>
- * This method buffers the input internally, so there is no need to use a
- * <code>BufferedInputStream</code>.
- *
- * @param input the <code>InputStream</code> to read from, not null
- * @param encoding the encoding to use, null means platform default
- * @return the list of Strings, never null
- * @throws NullPointerException if the input is null
- * @throws IOException if an I/O error occurs
- * @since Commons IO 1.1
- */
- public static List<String> readLines(InputStream input, String encoding) throws IOException {
- if (encoding == null) {
- return readLines(input);
- } else {
- InputStreamReader reader = new InputStreamReader(input, encoding);
- return readLines(reader);
- }
- }
-
- /**
- * Get the contents of a <code>Reader</code> as a list of Strings,
- * one entry per line.
- * <p>
- * This method buffers the input internally, so there is no need to use a
- * <code>BufferedReader</code>.
- *
- * @param input the <code>Reader</code> to read from, not null
- * @return the list of Strings, never null
- * @throws NullPointerException if the input is null
- * @throws IOException if an I/O error occurs
- * @since Commons IO 1.1
- */
- public static List<String> readLines(Reader input) throws IOException {
- BufferedReader reader = new BufferedReader(input);
- List<String> list = new ArrayList<String>();
- String line = reader.readLine();
- while (line != null) {
- list.add(line);
- line = reader.readLine();
- }
- return list;
- }
-
- //-----------------------------------------------------------------------
- /**
- * Convert the specified CharSequence to an input stream, encoded as bytes
- * using the default character encoding of the platform.
- *
- * @param input the CharSequence to convert
- * @return an input stream
- * @since IO 2.0
- */
- public static InputStream toInputStream(CharSequence input) {
- return toInputStream(input.toString());
- }
-
- /**
- * Convert the specified CharSequence to an input stream, encoded as bytes
- * using the specified character encoding.
- * <p>
- * Character encoding names can be found at
- * <a href="http://www.iana.org/assignments/character-sets">IANA</a>.
- *
- * @param input the CharSequence to convert
- * @param encoding the encoding to use, null means platform default
- * @throws IOException if the encoding is invalid
- * @return an input stream
- * @since IO 2.0
- */
- public static InputStream toInputStream(CharSequence input, String encoding) throws IOException {
- return toInputStream(input.toString(), encoding);
- }
-
- //-----------------------------------------------------------------------
- /**
- * Convert the specified string to an input stream, encoded as bytes
- * using the default character encoding of the platform.
- *
- * @param input the string to convert
- * @return an input stream
- * @since Commons IO 1.1
- */
- public static InputStream toInputStream(String input) {
- byte[] bytes = input.getBytes(UTF_8);
- return new ByteArrayInputStream(bytes);
- }
-
- /**
- * Convert the specified string to an input stream, encoded as bytes
- * using the specified character encoding.
- * <p>
- * Character encoding names can be found at
- * <a href="http://www.iana.org/assignments/character-sets">IANA</a>.
- *
- * @param input the string to convert
- * @param encoding the encoding to use, null means platform default
- * @throws IOException if the encoding is invalid
- * @return an input stream
- * @since Commons IO 1.1
- */
- public static InputStream toInputStream(String input, String encoding) throws IOException {
- byte[] bytes = encoding != null ? input.getBytes(encoding) : input.getBytes(UTF_8);
- return new ByteArrayInputStream(bytes);
- }
-
- // write byte[]
- //-----------------------------------------------------------------------
- /**
- * Writes bytes from a <code>byte[]</code> to an <code>OutputStream</code>.
- *
- * @param data the byte array to write, do not modify during output,
- * null ignored
- * @param output the <code>OutputStream</code> to write to
- * @throws NullPointerException if output is null
- * @throws IOException if an I/O error occurs
- * @since Commons IO 1.1
- */
- public static void write(byte[] data, OutputStream output)
- throws IOException {
- if (data != null) {
- output.write(data);
- }
- }
-
- /**
- * Writes bytes from a <code>byte[]</code> to chars on a <code>Writer</code>
- * using the default character encoding of the platform.
- * <p>
- * This method uses {@link String#String(byte[])}.
- *
- * @param data the byte array to write, do not modify during output,
- * null ignored
- * @param output the <code>Writer</code> to write to
- * @throws NullPointerException if output is null
- * @throws IOException if an I/O error occurs
- * @since Commons IO 1.1
- */
- public static void write(byte[] data, Writer output) throws IOException {
- if (data != null) {
- output.write(new String(data, UTF_8));
- }
- }
-
- /**
- * Writes bytes from a <code>byte[]</code> to chars on a <code>Writer</code>
- * using the specified character encoding.
- * <p>
- * Character encoding names can be found at
- * <a href="http://www.iana.org/assignments/character-sets">IANA</a>.
- * <p>
- * This method uses {@link String#String(byte[], String)}.
- *
- * @param data the byte array to write, do not modify during output,
- * null ignored
- * @param output the <code>Writer</code> to write to
- * @param encoding the encoding to use, null means platform default
- * @throws NullPointerException if output is null
- * @throws IOException if an I/O error occurs
- * @since Commons IO 1.1
- */
- public static void write(byte[] data, Writer output, String encoding)
- throws IOException {
- if (data != null) {
- if (encoding == null) {
- write(data, output);
- } else {
- output.write(new String(data, encoding));
- }
- }
- }
-
- // write char[]
- //-----------------------------------------------------------------------
- /**
- * Writes chars from a <code>char[]</code> to a <code>Writer</code>
- * using the default character encoding of the platform.
- *
- * @param data the char array to write, do not modify during output,
- * null ignored
- * @param output the <code>Writer</code> to write to
- * @throws NullPointerException if output is null
- * @throws IOException if an I/O error occurs
- * @since Commons IO 1.1
- */
- public static void write(char[] data, Writer output) throws IOException {
- if (data != null) {
- output.write(data);
- }
- }
-
- /**
- * Writes chars from a <code>char[]</code> to bytes on an
- * <code>OutputStream</code>.
- * <p>
- * This method uses {@link String#String(char[])} and
- * {@link String#getBytes()}.
- *
- * @param data the char array to write, do not modify during output,
- * null ignored
- * @param output the <code>OutputStream</code> to write to
- * @throws NullPointerException if output is null
- * @throws IOException if an I/O error occurs
- * @since Commons IO 1.1
- */
- public static void write(char[] data, OutputStream output)
- throws IOException {
- if (data != null) {
- output.write(new String(data).getBytes(UTF_8));
- }
- }
-
- /**
- * Writes chars from a <code>char[]</code> to bytes on an
- * <code>OutputStream</code> using the specified character encoding.
- * <p>
- * Character encoding names can be found at
- * <a href="http://www.iana.org/assignments/character-sets">IANA</a>.
- * <p>
- * This method uses {@link String#String(char[])} and
- * {@link String#getBytes(String)}.
- *
- * @param data the char array to write, do not modify during output,
- * null ignored
- * @param output the <code>OutputStream</code> to write to
- * @param encoding the encoding to use, null means platform default
- * @throws NullPointerException if output is null
- * @throws IOException if an I/O error occurs
- * @since Commons IO 1.1
- */
- public static void write(char[] data, OutputStream output, String encoding)
- throws IOException {
- if (data != null) {
- if (encoding == null) {
- write(data, output);
- } else {
- output.write(new String(data).getBytes(encoding));
- }
- }
- }
-
- // write CharSequence
- //-----------------------------------------------------------------------
- /**
- * Writes chars from a <code>CharSequence</code> to a <code>Writer</code>.
- *
- * @param data the <code>CharSequence</code> to write, null ignored
- * @param output the <code>Writer</code> to write to
- * @throws NullPointerException if output is null
- * @throws IOException if an I/O error occurs
- * @since Commons IO 2.0
- */
- public static void write(CharSequence data, Writer output) throws IOException {
- if (data != null) {
- write(data.toString(), output);
- }
- }
-
- /**
- * Writes chars from a <code>CharSequence</code> to bytes on an
- * <code>OutputStream</code> using the default character encoding of the
- * platform.
- * <p>
- * This method uses {@link String#getBytes()}.
- *
- * @param data the <code>CharSequence</code> to write, null ignored
- * @param output the <code>OutputStream</code> to write to
- * @throws NullPointerException if output is null
- * @throws IOException if an I/O error occurs
- * @since Commons IO 2.0
- */
- public static void write(CharSequence data, OutputStream output)
- throws IOException {
- if (data != null) {
- write(data.toString(), output);
- }
- }
-
- /**
- * Writes chars from a <code>CharSequence</code> to bytes on an
- * <code>OutputStream</code> using the specified character encoding.
- * <p>
- * Character encoding names can be found at
- * <a href="http://www.iana.org/assignments/character-sets">IANA</a>.
- * <p>
- * This method uses {@link String#getBytes(String)}.
- *
- * @param data the <code>CharSequence</code> to write, null ignored
- * @param output the <code>OutputStream</code> to write to
- * @param encoding the encoding to use, null means platform default
- * @throws NullPointerException if output is null
- * @throws IOException if an I/O error occurs
- * @since Commons IO 2.0
- */
- public static void write(CharSequence data, OutputStream output, String encoding)
- throws IOException {
- if (data != null) {
- write(data.toString(), output, encoding);
- }
- }
-
- // write String
- //-----------------------------------------------------------------------
- /**
- * Writes chars from a <code>String</code> to a <code>Writer</code>.
- *
- * @param data the <code>String</code> to write, null ignored
- * @param output the <code>Writer</code> to write to
- * @throws NullPointerException if output is null
- * @throws IOException if an I/O error occurs
- * @since Commons IO 1.1
- */
- public static void write(String data, Writer output) throws IOException {
- if (data != null) {
- output.write(data);
- }
- }
-
- /**
- * Writes chars from a <code>String</code> to bytes on an
- * <code>OutputStream</code> using the default character encoding of the
- * platform.
- * <p>
- * This method uses {@link String#getBytes()}.
- *
- * @param data the <code>String</code> to write, null ignored
- * @param output the <code>OutputStream</code> to write to
- * @throws NullPointerException if output is null
- * @throws IOException if an I/O error occurs
- * @since Commons IO 1.1
- */
- public static void write(String data, OutputStream output)
- throws IOException {
- if (data != null) {
- output.write(data.getBytes(UTF_8));
- }
- }
-
- /**
- * Writes chars from a <code>String</code> to bytes on an
- * <code>OutputStream</code> using the specified character encoding.
- * <p>
- * Character encoding names can be found at
- * <a href="http://www.iana.org/assignments/character-sets">IANA</a>.
- * <p>
- * This method uses {@link String#getBytes(String)}.
- *
- * @param data the <code>String</code> to write, null ignored
- * @param output the <code>OutputStream</code> to write to
- * @param encoding the encoding to use, null means platform default
- * @throws NullPointerException if output is null
- * @throws IOException if an I/O error occurs
- * @since Commons IO 1.1
- */
- public static void write(String data, OutputStream output, String encoding)
- throws IOException {
- if (data != null) {
- if (encoding == null) {
- write(data, output);
- } else {
- output.write(data.getBytes(encoding));
- }
- }
- }
-
- // write StringBuffer
- //-----------------------------------------------------------------------
- /**
- * Writes chars from a <code>StringBuffer</code> to a <code>Writer</code>.
- *
- * @param data the <code>StringBuffer</code> to write, null ignored
- * @param output the <code>Writer</code> to write to
- * @throws NullPointerException if output is null
- * @throws IOException if an I/O error occurs
- * @since Commons IO 1.1
- * @deprecated replaced by write(CharSequence, Writer)
- */
- @Deprecated
- public static void write(StringBuffer data, Writer output)
- throws IOException {
- if (data != null) {
- output.write(data.toString());
- }
- }
-
- /**
- * Writes chars from a <code>StringBuffer</code> to bytes on an
- * <code>OutputStream</code> using the default character encoding of the
- * platform.
- * <p>
- * This method uses {@link String#getBytes()}.
- *
- * @param data the <code>StringBuffer</code> to write, null ignored
- * @param output the <code>OutputStream</code> to write to
- * @throws NullPointerException if output is null
- * @throws IOException if an I/O error occurs
- * @since Commons IO 1.1
- * @deprecated replaced by write(CharSequence, OutputStream)
- */
- @Deprecated
- public static void write(StringBuffer data, OutputStream output)
- throws IOException {
- if (data != null) {
- output.write(data.toString().getBytes(UTF_8));
- }
- }
-
- /**
- * Writes chars from a <code>StringBuffer</code> to bytes on an
- * <code>OutputStream</code> using the specified character encoding.
- * <p>
- * Character encoding names can be found at
- * <a href="http://www.iana.org/assignments/character-sets">IANA</a>.
- * <p>
- * This method uses {@link String#getBytes(String)}.
- *
- * @param data the <code>StringBuffer</code> to write, null ignored
- * @param output the <code>OutputStream</code> to write to
- * @param encoding the encoding to use, null means platform default
- * @throws NullPointerException if output is null
- * @throws IOException if an I/O error occurs
- * @since Commons IO 1.1
- * @deprecated replaced by write(CharSequence, OutputStream, String)
- */
- @Deprecated
- public static void write(StringBuffer data, OutputStream output,
- String encoding) throws IOException {
- if (data != null) {
- if (encoding == null) {
- write(data, output);
- } else {
- output.write(data.toString().getBytes(encoding));
- }
- }
- }
-
- // copy from InputStream
- //-----------------------------------------------------------------------
- /**
- * Copy bytes from an <code>InputStream</code> to an
- * <code>OutputStream</code>.
- * <p>
- * This method buffers the input internally, so there is no need to use a
- * <code>BufferedInputStream</code>.
- * <p>
- * Large streams (over 2GB) will return a bytes copied value of
- * <code>-1</code> after the copy has completed since the correct
- * number of bytes cannot be returned as an int. For large streams
- * use the <code>copyLarge(InputStream, OutputStream)</code> method.
- *
- * @param input the <code>InputStream</code> to read from
- * @param output the <code>OutputStream</code> to write to
- * @return the number of bytes copied
- * @throws NullPointerException if the input or output is null
- * @throws IOException if an I/O error occurs
- * @throws ArithmeticException if the byte count is too large
- * @since Commons IO 1.1
- */
- public static int copy(InputStream input, OutputStream output) throws IOException {
- long count = copyLarge(input, output);
- if (count > Integer.MAX_VALUE) {
- return -1;
- }
- return (int) count;
- }
-
- /**
- * Copy bytes from a large (over 2GB) <code>InputStream</code> to an
- * <code>OutputStream</code>.
- * <p>
- * This method buffers the input internally, so there is no need to use a
- * <code>BufferedInputStream</code>.
- *
- * @param input the <code>InputStream</code> to read from
- * @param output the <code>OutputStream</code> to write to
- * @return the number of bytes copied
- * @throws NullPointerException if the input or output is null
- * @throws IOException if an I/O error occurs
- * @since Commons IO 1.3
- */
- public static long copyLarge(InputStream input, OutputStream output)
- throws IOException {
- byte[] buffer = new byte[DEFAULT_BUFFER_SIZE];
- long count = 0;
- int n = 0;
- while (-1 != (n = input.read(buffer))) {
- output.write(buffer, 0, n);
- count += n;
- }
- return count;
- }
-
- /**
- * Copy bytes from an <code>InputStream</code> to chars on a
- * <code>Writer</code> using the default character encoding of the platform.
- * <p>
- * This method buffers the input internally, so there is no need to use a
- * <code>BufferedInputStream</code>.
- * <p>
- * This method uses {@link InputStreamReader}.
- *
- * @param input the <code>InputStream</code> to read from
- * @param output the <code>Writer</code> to write to
- * @throws NullPointerException if the input or output is null
- * @throws IOException if an I/O error occurs
- * @since Commons IO 1.1
- */
- public static void copy(InputStream input, Writer output)
- throws IOException {
- InputStreamReader in = new InputStreamReader(input, UTF_8);
- copy(in, output);
- }
-
- /**
- * Copy bytes from an <code>InputStream</code> to chars on a
- * <code>Writer</code> using the specified character encoding.
- * <p>
- * This method buffers the input internally, so there is no need to use a
- * <code>BufferedInputStream</code>.
- * <p>
- * Character encoding names can be found at
- * <a href="http://www.iana.org/assignments/character-sets">IANA</a>.
- * <p>
- * This method uses {@link InputStreamReader}.
- *
- * @param input the <code>InputStream</code> to read from
- * @param output the <code>Writer</code> to write to
- * @param encoding the encoding to use, null means platform default
- * @throws NullPointerException if the input or output is null
- * @throws IOException if an I/O error occurs
- * @since Commons IO 1.1
- */
- public static void copy(InputStream input, Writer output, String encoding)
- throws IOException {
- if (encoding == null) {
- copy(input, output);
- } else {
- InputStreamReader in = new InputStreamReader(input, encoding);
- copy(in, output);
- }
- }
-
- // copy from Reader
- //-----------------------------------------------------------------------
- /**
- * Copy chars from a <code>Reader</code> to a <code>Writer</code>.
- * <p>
- * This method buffers the input internally, so there is no need to use a
- * <code>BufferedReader</code>.
- * <p>
- * Large streams (over 2GB) will return a chars copied value of
- * <code>-1</code> after the copy has completed since the correct
- * number of chars cannot be returned as an int. For large streams
- * use the <code>copyLarge(Reader, Writer)</code> method.
- *
- * @param input the <code>Reader</code> to read from
- * @param output the <code>Writer</code> to write to
- * @return the number of characters copied
- * @throws NullPointerException if the input or output is null
- * @throws IOException if an I/O error occurs
- * @throws ArithmeticException if the character count is too large
- * @since Commons IO 1.1
- */
- public static int copy(Reader input, Writer output) throws IOException {
- long count = copyLarge(input, output);
- if (count > Integer.MAX_VALUE) {
- return -1;
- }
- return (int) count;
- }
-
- /**
- * Copy chars from a large (over 2GB) <code>Reader</code> to a <code>Writer</code>.
- * <p>
- * This method buffers the input internally, so there is no need to use a
- * <code>BufferedReader</code>.
- *
- * @param input the <code>Reader</code> to read from
- * @param output the <code>Writer</code> to write to
- * @return the number of characters copied
- * @throws NullPointerException if the input or output is null
- * @throws IOException if an I/O error occurs
- * @since Commons IO 1.3
- */
- public static long copyLarge(Reader input, Writer output) throws IOException {
- char[] buffer = new char[DEFAULT_BUFFER_SIZE];
- long count = 0;
- int n = 0;
- while (-1 != (n = input.read(buffer))) {
- output.write(buffer, 0, n);
- count += n;
- }
- return count;
- }
-
- /**
- * Copy chars from a <code>Reader</code> to bytes on an
- * <code>OutputStream</code> using the default character encoding of the
- * platform, and calling flush.
- * <p>
- * This method buffers the input internally, so there is no need to use a
- * <code>BufferedReader</code>.
- * <p>
- * Due to the implementation of OutputStreamWriter, this method performs a
- * flush.
- * <p>
- * This method uses {@link OutputStreamWriter}.
- *
- * @param input the <code>Reader</code> to read from
- * @param output the <code>OutputStream</code> to write to
- * @throws NullPointerException if the input or output is null
- * @throws IOException if an I/O error occurs
- * @since Commons IO 1.1
- */
- public static void copy(Reader input, OutputStream output)
- throws IOException {
- OutputStreamWriter out = new OutputStreamWriter(output, UTF_8);
- copy(input, out);
- // XXX Unless anyone is planning on rewriting OutputStreamWriter, we
- // have to flush here.
- out.flush();
- }
-
- /**
- * Copy chars from a <code>Reader</code> to bytes on an
- * <code>OutputStream</code> using the specified character encoding, and
- * calling flush.
- * <p>
- * This method buffers the input internally, so there is no need to use a
- * <code>BufferedReader</code>.
- * <p>
- * Character encoding names can be found at
- * <a href="http://www.iana.org/assignments/character-sets">IANA</a>.
- * <p>
- * Due to the implementation of OutputStreamWriter, this method performs a
- * flush.
- * <p>
- * This method uses {@link OutputStreamWriter}.
- *
- * @param input the <code>Reader</code> to read from
- * @param output the <code>OutputStream</code> to write to
- * @param encoding the encoding to use, null means platform default
- * @throws NullPointerException if the input or output is null
- * @throws IOException if an I/O error occurs
- * @since Commons IO 1.1
- */
- public static void copy(Reader input, OutputStream output, String encoding)
- throws IOException {
- if (encoding == null) {
- copy(input, output);
- } else {
- OutputStreamWriter out = new OutputStreamWriter(output, encoding);
- copy(input, out);
- // XXX Unless anyone is planning on rewriting OutputStreamWriter,
- // we have to flush here.
- out.flush();
- }
- }
-
- // content equals
- //-----------------------------------------------------------------------
- /**
- * Compare the contents of two Streams to determine if they are equal or
- * not.
- * <p>
- * This method buffers the input internally using
- * <code>BufferedInputStream</code> if they are not already buffered.
- *
- * @param input1 the first stream
- * @param input2 the second stream
- * @return true if the content of the streams are equal or they both don't
- * exist, false otherwise
- * @throws NullPointerException if either input is null
- * @throws IOException if an I/O error occurs
- */
- public static boolean contentEquals(InputStream input1, InputStream input2)
- throws IOException {
- if (!(input1 instanceof BufferedInputStream)) {
- input1 = new BufferedInputStream(input1);
- }
- if (!(input2 instanceof BufferedInputStream)) {
- input2 = new BufferedInputStream(input2);
- }
-
- int ch = input1.read();
- while (-1 != ch) {
- int ch2 = input2.read();
- if (ch != ch2) {
- return false;
- }
- ch = input1.read();
- }
-
- int ch2 = input2.read();
- return (ch2 == -1);
- }
-
- /**
- * Compare the contents of two Readers to determine if they are equal or
- * not.
- * <p>
- * This method buffers the input internally using
- * <code>BufferedReader</code> if they are not already buffered.
- *
- * @param input1 the first reader
- * @param input2 the second reader
- * @return true if the content of the readers are equal or they both don't
- * exist, false otherwise
- * @throws NullPointerException if either input is null
- * @throws IOException if an I/O error occurs
- * @since Commons IO 1.1
- */
- public static boolean contentEquals(Reader input1, Reader input2)
- throws IOException {
- if (!(input1 instanceof BufferedReader)) {
- input1 = new BufferedReader(input1);
- }
- if (!(input2 instanceof BufferedReader)) {
- input2 = new BufferedReader(input2);
- }
-
- int ch = input1.read();
- while (-1 != ch) {
- int ch2 = input2.read();
- if (ch != ch2) {
- return false;
- }
- ch = input1.read();
- }
-
- int ch2 = input2.read();
- return (ch2 == -1);
- }
-
-}
http://git-wip-us.apache.org/repos/asf/tika/blob/05f4af30/tika-core/src/main/java/org/apache/tika/io/NullInputStream.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/io/NullInputStream.java b/tika-core/src/main/java/org/apache/tika/io/NullInputStream.java
deleted file mode 100644
index 2647825..0000000
--- a/tika-core/src/main/java/org/apache/tika/io/NullInputStream.java
+++ /dev/null
@@ -1,337 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.io;
-
-import java.io.EOFException;
-import java.io.IOException;
-import java.io.InputStream;
-
-/**
- * A functional, light weight {@link InputStream} that emulates
- * a stream of a specified size.
- * <p>
- * This implementation provides a light weight
- * object for testing with an {@link InputStream}
- * where the contents don't matter.
- * <p>
- * One use case would be for testing the handling of
- * large {@link InputStream} as it can emulate that
- * scenario without the overhead of actually processing
- * large numbers of bytes - significantly speeding up
- * test execution times.
- * <p>
- * This implementation returns zero from the method that
- * reads a byte and leaves the array unchanged in the read
- * methods that are passed a byte array.
- * If alternative data is required the <code>processByte()</code> and
- * <code>processBytes()</code> methods can be implemented to generate
- * data, for example:
- *
- * <pre>
- * public class TestInputStream extends NullInputStream {
- * public TestInputStream(int size) {
- * super(size);
- * }
- * protected int processByte() {
- * return ... // return required value here
- * }
- * protected void processBytes(byte[] bytes, int offset, int length) {
- * for (int i = offset; i < length; i++) {
- * bytes[i] = ... // set array value here
- * }
- * }
- * }
- * </pre>
- *
- * @since Apache Tika 0.4, copied from Commons IO 1.4
- */
-public class NullInputStream extends InputStream {
-
- private final long size;
- private long position;
- private long mark = -1;
- private long readlimit;
- private boolean eof;
- private final boolean throwEofException;
- private final boolean markSupported;
-
- /**
- * Create an {@link InputStream} that emulates a specified size
- * which supports marking and does not throw EOFException.
- *
- * @param size The size of the input stream to emulate.
- */
- public NullInputStream(long size) {
- this(size, true, false);
- }
-
- /**
- * Create an {@link InputStream} that emulates a specified
- * size with option settings.
- *
- * @param size The size of the input stream to emulate.
- * @param markSupported Whether this instance will support
- * the <code>mark()</code> functionality.
- * @param throwEofException Whether this implementation
- * will throw an {@link EOFException} or return -1 when the
- * end of file is reached.
- */
- public NullInputStream(long size, boolean markSupported, boolean throwEofException) {
- this.size = size;
- this.markSupported = markSupported;
- this.throwEofException = throwEofException;
- }
-
- /**
- * Return the current position.
- *
- * @return the current position.
- */
- public long getPosition() {
- return position;
- }
-
- /**
- * Return the size this {@link InputStream} emulates.
- *
- * @return The size of the input stream to emulate.
- */
- public long getSize() {
- return size;
- }
-
- /**
- * Return the number of bytes that can be read.
- *
- * @return The number of bytes that can be read.
- */
- @Override
- public int available() {
- long avail = size - position;
- if (avail <= 0) {
- return 0;
- } else if (avail > Integer.MAX_VALUE) {
- return Integer.MAX_VALUE;
- } else {
- return (int)avail;
- }
- }
-
- /**
- * Close this input stream - resets the internal state to
- * the initial values.
- *
- * @throws IOException If an error occurs.
- */
- @Override
- public void close() throws IOException {
- eof = false;
- position = 0;
- mark = -1;
- }
-
- /**
- * Mark the current position.
- *
- * @param readlimit The number of bytes before this marked position
- * is invalid.
- * @throws UnsupportedOperationException if mark is not supported.
- */
- @Override
- public synchronized void mark(int readlimit) {
- if (!markSupported) {
- throw new UnsupportedOperationException("Mark not supported");
- }
- mark = position;
- this.readlimit = readlimit;
- }
-
- /**
- * Indicates whether <i>mark</i> is supported.
- *
- * @return Whether <i>mark</i> is supported or not.
- */
- @Override
- public boolean markSupported() {
- return markSupported;
- }
-
- /**
- * Read a byte.
- *
- * @return Either The byte value returned by <code>processByte()</code>
- * or <code>-1</code> if the end of file has been reached and
- * <code>throwEofException</code> is set to <code>false</code>.
- * @throws EOFException if the end of file is reached and
- * <code>throwEofException</code> is set to <code>true</code>.
- * @throws IOException if trying to read past the end of file.
- */
- @Override
- public int read() throws IOException {
- if (eof) {
- throw new IOException("Read after end of file");
- }
- if (position == size) {
- return doEndOfFile();
- }
- position++;
- return processByte();
- }
-
- /**
- * Read some bytes into the specified array.
- *
- * @param bytes The byte array to read into
- * @return The number of bytes read or <code>-1</code>
- * if the end of file has been reached and
- * <code>throwEofException</code> is set to <code>false</code>.
- * @throws EOFException if the end of file is reached and
- * <code>throwEofException</code> is set to <code>true</code>.
- * @throws IOException if trying to read past the end of file.
- */
- @Override
- public int read(byte[] bytes) throws IOException {
- return read(bytes, 0, bytes.length);
- }
-
- /**
- * Read the specified number bytes into an array.
- *
- * @param bytes The byte array to read into.
- * @param offset The offset to start reading bytes into.
- * @param length The number of bytes to read.
- * @return The number of bytes read or <code>-1</code>
- * if the end of file has been reached and
- * <code>throwEofException</code> is set to <code>false</code>.
- * @throws EOFException if the end of file is reached and
- * <code>throwEofException</code> is set to <code>true</code>.
- * @throws IOException if trying to read past the end of file.
- */
- @Override
- public int read(byte[] bytes, int offset, int length) throws IOException {
- if (eof) {
- throw new IOException("Read after end of file");
- }
- if (position == size) {
- return doEndOfFile();
- }
- position += length;
- int returnLength = length;
- if (position > size) {
- returnLength = length - (int)(position - size);
- position = size;
- }
- processBytes(bytes, offset, returnLength);
- return returnLength;
- }
-
- /**
- * Reset the stream to the point when mark was last called.
- *
- * @throws UnsupportedOperationException if mark is not supported.
- * @throws IOException If no position has been marked
- * or the read limit has been exceed since the last position was
- * marked.
- */
- @Override
- public synchronized void reset() throws IOException {
- if (!markSupported) {
- throw new UnsupportedOperationException("Mark not supported");
- }
- if (mark < 0) {
- throw new IOException("No position has been marked");
- }
- if (position > (mark + readlimit)) {
- throw new IOException("Marked position [" + mark +
- "] is no longer valid - passed the read limit [" +
- readlimit + "]");
- }
- position = mark;
- eof = false;
- }
-
- /**
- * Skip a specified number of bytes.
- *
- * @param numberOfBytes The number of bytes to skip.
- * @return The number of bytes skipped or <code>-1</code>
- * if the end of file has been reached and
- * <code>throwEofException</code> is set to <code>false</code>.
- * @throws EOFException if the end of file is reached and
- * <code>throwEofException</code> is set to <code>true</code>.
- * @throws IOException if trying to read past the end of file.
- */
- @Override
- public long skip(long numberOfBytes) throws IOException {
- if (eof) {
- throw new IOException("Skip after end of file");
- }
- if (position == size) {
- return doEndOfFile();
- }
- position += numberOfBytes;
- long returnLength = numberOfBytes;
- if (position > size) {
- returnLength = numberOfBytes - (position - size);
- position = size;
- }
- return returnLength;
- }
-
- /**
- * Return a byte value for the <code>read()</code> method.
- * <p>
- * This implementation returns zero.
- *
- * @return This implementation always returns zero.
- */
- protected int processByte() {
- // do nothing - overridable by subclass
- return 0;
- }
-
- /**
- * Process the bytes for the <code>read(byte[], offset, length)</code>
- * method.
- * <p>
- * This implementation leaves the byte array unchanged.
- *
- * @param bytes The byte array
- * @param offset The offset to start at.
- * @param length The number of bytes.
- */
- protected void processBytes(byte[] bytes, int offset, int length) {
- // do nothing - overridable by subclass
- }
-
- /**
- * Handle End of File.
- *
- * @return <code>-1</code> if <code>throwEofException</code> is
- * set to <code>false</code>
- * @throws EOFException if <code>throwEofException</code> is set
- * to <code>true</code>.
- */
- private int doEndOfFile() throws EOFException {
- eof = true;
- if (throwEofException) {
- throw new EOFException();
- }
- return -1;
- }
-
-}
http://git-wip-us.apache.org/repos/asf/tika/blob/05f4af30/tika-core/src/main/java/org/apache/tika/io/NullOutputStream.java
----------------------------------------------------------------------
diff --git a/tika-core/src/main/java/org/apache/tika/io/NullOutputStream.java b/tika-core/src/main/java/org/apache/tika/io/NullOutputStream.java
deleted file mode 100644
index 519bc95..0000000
--- a/tika-core/src/main/java/org/apache/tika/io/NullOutputStream.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.io;
-
-import java.io.IOException;
-import java.io.OutputStream;
-
-/**
- * This OutputStream writes all data to the famous <b>/dev/null</b>.
- * <p>
- * This output stream has no destination (file/socket etc.) and all
- * bytes written to it are ignored and lost.
- *
- * @author Jeremias Maerki
- * @since Apache Tika 0.4, copied from Commons IO 1.4
- */
-public class NullOutputStream extends OutputStream {
-
- /**
- * A singleton.
- */
- public static final NullOutputStream NULL_OUTPUT_STREAM = new NullOutputStream();
-
- /**
- * Does nothing - output to <code>/dev/null</code>.
- * @param b The bytes to write
- * @param off The start offset
- * @param len The number of bytes to write
- */
- @Override
- public void write(byte[] b, int off, int len) {
- //to /dev/null
- }
-
- /**
- * Does nothing - output to <code>/dev/null</code>.
- * @param b The byte to write
- */
- @Override
- public void write(int b) {
- //to /dev/null
- }
-
- /**
- * Does nothing - output to <code>/dev/null</code>.
- * @param b The bytes to write
- * @throws IOException never
- */
- @Override
- public void write(byte[] b) throws IOException {
- //to /dev/null
- }
-
-}