You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2010/09/06 22:36:18 UTC

svn commit: r993146 - /tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java

Author: jukka
Date: Mon Sep  6 20:36:18 2010
New Revision: 993146

URL: http://svn.apache.org/viewvc?rev=993146&view=rev
Log:
TIKA-153: Allow passing of files or memory buffers to parsers

Add support for accessing data from a database BLOB.

Modified:
    tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java?rev=993146&r1=993145&r2=993146&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java Mon Sep  6 20:36:18 2010
@@ -29,6 +29,8 @@ import java.net.URI;
 import java.net.URISyntaxException;
 import java.net.URL;
 import java.net.URLConnection;
+import java.sql.Blob;
+import java.sql.SQLException;
 
 import org.apache.tika.metadata.Metadata;
 
@@ -144,6 +146,61 @@ public class TikaInputStream extends Pro
     }
 
     /**
+     * Creates a TikaInputStream from the given database BLOB.
+     * <p>
+     * Note that the result set containing the BLOB may need to be kept open
+     * until the returned TikaInputStream has been processed and closed.
+     *
+     * @param blob database BLOB
+     * @return a TikaInputStream instance
+     * @throws SQLException if BLOB data can not be accessed
+     */
+    public static TikaInputStream get(Blob blob) throws SQLException {
+        return get(blob, new Metadata());
+    }
+
+    /**
+     * Blob size threshold that limits the largest BLOB size to be
+     * buffered fully in memory by the {@link #get(Blob, Metadata)}
+     * method.
+     */
+    private static final int BLOB_SIZE_THRESHOLD = 1024 * 1024;
+
+    /**
+     * Creates a TikaInputStream from the given database BLOB. The BLOB
+     * length (if available) is stored as input metadata in the given
+     * metadata instance.
+     * <p>
+     * Note that the result set containing the BLOB may need to be kept open
+     * until the returned TikaInputStream has been processed and closed.
+     *
+     * @param blob database BLOB
+     * @param metadata metadata instance
+     * @return a TikaInputStream instance
+     * @throws SQLException if BLOB data can not be accessed
+     */
+    public static TikaInputStream get(Blob blob, Metadata metadata)
+            throws SQLException {
+        long length = -1;
+        try {
+            length = blob.length();
+            metadata.set(Metadata.CONTENT_LENGTH, Long.toString(length));
+        } catch (SQLException ignore) {
+        }
+
+        // Prefer an in-memory buffer for reasonably sized blobs to reduce
+        // the likelihood of problems caused by long-lived database accesses
+        if (0 <= length && length <= BLOB_SIZE_THRESHOLD) {
+            // the offset in Blob.getBytes() starts at 1
+            return get(blob.getBytes(1, (int) length), metadata);
+        } else {
+            return new TikaInputStream(
+                    new BufferedInputStream(blob.getBinaryStream()),
+                    null, length);
+        }
+    }
+
+    /**
      * Creates a TikaInputStream from the resource at the given URI.
      *
      * @param uri resource URI