You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/03/03 20:09:32 UTC

[tika] branch main updated: TIKA-3309 Add convenience constructors to RereadableInputStream (#408)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new e47f625  TIKA-3309 Add convenience constructors to RereadableInputStream (#408)
e47f625 is described below

commit e47f625233a056a27bd7e03be0c4a8da98828cb2
Author: Peter Kronenberg <pa...@gmail.com>
AuthorDate: Wed Mar 3 15:09:26 2021 -0500

    TIKA-3309 Add convenience constructors to RereadableInputStream (#408)
    
    Co-authored-by: Peter Kronenberg <pe...@torch.ai>
---
 .../apache/tika/utils/RereadableInputStream.java   | 79 +++++++++++++++++-----
 1 file changed, 62 insertions(+), 17 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/utils/RereadableInputStream.java b/tika-core/src/main/java/org/apache/tika/utils/RereadableInputStream.java
index b3a528f..47c2b20 100644
--- a/tika-core/src/main/java/org/apache/tika/utils/RereadableInputStream.java
+++ b/tika-core/src/main/java/org/apache/tika/utils/RereadableInputStream.java
@@ -35,6 +35,11 @@ import java.io.OutputStream;
  */
 public class RereadableInputStream extends InputStream {
 
+    /**
+     * Default value for buffer size = 500M
+     */
+    private static final int DEFAULT_MAX_BYTES_IN_MEMORY = 512 * 1024 * 1024;
+
 
     /**
      * Input stream originally passed to the constructor.
@@ -96,14 +101,14 @@ public class RereadableInputStream extends InputStream {
      * then the first time when rewind() is called, only those bytes
      * already read from the original stream will be available from then on.
      */
-    private boolean readToEndOfStreamOnFirstRewind = true;
+    private final boolean readToEndOfStreamOnFirstRewind;
 
 
     /**
      * Specifies whether or not to close the original input stream
      * when close() is called.  Defaults to true.
      */
-    private boolean closeOriginalStreamOnClose = true;
+    private final boolean closeOriginalStreamOnClose;
 
 
     // TODO: At some point it would be better to replace the current approach
@@ -122,28 +127,67 @@ public class RereadableInputStream extends InputStream {
     // The original stream is closed when EOF is reached, or when close()
     // is called, whichever comes first.  Using this approach eliminates
     // the need to specify the flag (though makes implementation more complex).
-    
 
 
     /**
-     * Creates a rereadable input stream.
+     * Creates a rereadable input stream  with defaults of 512*1024*1024 bytes (500M) for maxBytesInMemory
+     * and both readToEndOfStreamOnFirstRewind and closeOriginalStreamOnClose set to true
      *
      * @param inputStream stream containing the source of data
+     */
+    public RereadableInputStream(InputStream inputStream) {
+        this(inputStream, DEFAULT_MAX_BYTES_IN_MEMORY, true, true);
+    }
+
+    /**
+     * Creates a rereadable input stream  defaulting to 512*1024*1024 bytes (500M) for maxBytesInMemory
+     *
+     * @param inputStream                    stream containing the source of data
+     * @param readToEndOfStreamOnFirstRewind Specifies whether or not to
+     *                                       read to the end of stream on first rewind.  If this is set to false,
+     *                                       then when rewind() is first called, only those bytes already read
+     *                                       from the original stream will be available from then on.
+     */
+    public RereadableInputStream(InputStream inputStream, boolean readToEndOfStreamOnFirstRewind, boolean closeOriginalStreamOnClose) {
+        this(inputStream, DEFAULT_MAX_BYTES_IN_MEMORY, readToEndOfStreamOnFirstRewind, closeOriginalStreamOnClose);
+    }
+
+    /**
+     * Creates a rereadable input stream  with both readToEndOfStreamOnfirstRewind
+     * and closeOriginalStreamOnClose set to true
+     *
+     * @param inputStream      stream containing the source of data
      * @param maxBytesInMemory maximum number of bytes to use to store
-     *     the stream's contents in memory before switching to disk; note that
-     *     the instance will preallocate a byte array whose size is
-     *     maxBytesInMemory.  This byte array will be made available for
-     *     garbage collection (i.e. its reference set to null) when the
-     *     content size exceeds the array's size, when close() is called, or
-     *     when there are no more references to the instance.
+     *                         the stream's contents in memory before switching to disk; note that
+     *                         the instance will preallocate a byte array whose size is
+     *                         maxBytesInMemory.  This byte array will be made available for
+     *                         garbage collection (i.e. its reference set to null) when the
+     *                         content size exceeds the array's size, when close() is called, or
+     *                         when there are no more references to the instance.
+     */
+    public RereadableInputStream(InputStream inputStream, int maxBytesInMemory) {
+        this(inputStream, maxBytesInMemory, true, true);
+    }
+
+    /**
+     * Creates a rereadable input stream.
+     *
+     * @param inputStream                    stream containing the source of data
+     * @param maxBytesInMemory               maximum number of bytes to use to store
+     *                                       the stream's contents in memory before switching to disk; note that
+     *                                       the instance will preallocate a byte array whose size is
+     *                                       maxBytesInMemory.  This byte array will be made available for
+     *                                       garbage collection (i.e. its reference set to null) when the
+     *                                       content size exceeds the array's size, when close() is called, or
+     *                                       when there are no more references to the instance.
      * @param readToEndOfStreamOnFirstRewind Specifies whether or not to
-     *     read to the end of stream on first rewind.  If this is set to false,
-     *     then when rewind() is first called, only those bytes already read
-     *     from the original stream will be available from then on.
+     *                                       read to the end of stream on first rewind.  If this is set to false,
+     *                                       then when rewind() is first called, only those bytes already read
+     *                                       from the original stream will be available from then on.
      */
     public RereadableInputStream(InputStream inputStream, int maxBytesInMemory,
-            boolean readToEndOfStreamOnFirstRewind,
-            boolean closeOriginalStreamOnClose) {
+                                 boolean readToEndOfStreamOnFirstRewind,
+                                 boolean closeOriginalStreamOnClose) {
         this.inputStream = inputStream;
         this.originalInputStream = inputStream;
         this.maxBytesInMemory = maxBytesInMemory;
@@ -170,6 +214,7 @@ public class RereadableInputStream extends InputStream {
 
     /**
      * "Rewinds" the stream to the beginning for rereading.
+     *
      * @throws IOException
      */
     public void rewind() throws IOException {
@@ -177,7 +222,7 @@ public class RereadableInputStream extends InputStream {
         if (firstPass && readToEndOfStreamOnFirstRewind) {
             // Force read to end of stream to fill store with any
             // remaining bytes from original stream.
-            while(read() != -1) {
+            while (read() != -1) {
                 // empty loop
             }
         }
@@ -214,7 +259,7 @@ public class RereadableInputStream extends InputStream {
     /**
      * Closes the input stream and removes the temporary file if one was
      * created.
-     * 
+     *
      * @throws IOException
      */
     public void close() throws IOException {