You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2010/04/28 14:15:12 UTC
svn commit: r938963 -
/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java
Author: jukka
Date: Wed Apr 28 12:15:12 2010
New Revision: 938963
URL: http://svn.apache.org/viewvc?rev=938963&view=rev
Log:
TIKA-153: Allow passing of files or memory buffers to parsers
Add a peek() method. Add metadata handling for files.
Modified:
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java
Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java?rev=938963&r1=938962&r2=938963&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java Wed Apr 28 12:15:12 2010
@@ -16,6 +16,7 @@
*/
package org.apache.tika.io;
+import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
@@ -48,7 +49,8 @@ public class TikaInputStream extends Pro
if (stream instanceof TikaInputStream) {
return (TikaInputStream) stream;
} else {
- return new TikaInputStream(stream, null, -1);
+ return new TikaInputStream(
+ new BufferedInputStream(stream), null, -1);
}
}
@@ -58,8 +60,17 @@ public class TikaInputStream extends Pro
}
public static TikaInputStream get(File file) throws IOException {
+ return get(file, new Metadata());
+ }
+
+ public static TikaInputStream get(File file, Metadata metadata)
+ throws IOException {
+ metadata.set(Metadata.RESOURCE_NAME_KEY, file.getName());
+ metadata.set(Metadata.CONTENT_LENGTH, Long.toString(file.length()));
+
return new TikaInputStream(
- new FileInputStream(file), file, file.length());
+ new BufferedInputStream(new FileInputStream(file)),
+ file, file.length());
}
/**
@@ -69,15 +80,20 @@ public class TikaInputStream extends Pro
* @throws IOException
*/
public static TikaInputStream get(URI uri) throws IOException {
+ return get(uri, new Metadata());
+ }
+
+ public static TikaInputStream get(URI uri, Metadata metadata)
+ throws IOException {
// Special handling for file:// URIs
if ("file".equalsIgnoreCase(uri.getScheme())) {
File file = new File(uri);
if (file.isFile()) {
- return get(file);
+ return get(file, metadata);
}
}
- return get(uri.toURL());
+ return get(uri.toURL(), metadata);
}
public static TikaInputStream get(URL url) throws IOException {
@@ -91,7 +107,7 @@ public class TikaInputStream extends Pro
try {
File file = new File(url.toURI());
if (file.isFile()) {
- return get(file);
+ return get(file, metadata);
}
} catch (URISyntaxException e) {
// fall through
@@ -121,7 +137,9 @@ public class TikaInputStream extends Pro
metadata.set(Metadata.CONTENT_LENGTH, Integer.toString(length));
}
- return new TikaInputStream(connection.getInputStream(), null, length);
+ return new TikaInputStream(
+ new BufferedInputStream(connection.getInputStream()),
+ null, length);
}
/**
@@ -146,6 +164,12 @@ public class TikaInputStream extends Pro
*/
private long position = 0;
+ /**
+ *
+ * @param stream <em>buffered</em> stream (must support the mark feature)
+ * @param file
+ * @param length
+ */
private TikaInputStream(InputStream stream, File file, long length) {
super(stream);
this.file = file;
@@ -153,6 +177,36 @@ public class TikaInputStream extends Pro
this.length = length;
}
+ /**
+ * Fills the given buffer with upcoming bytes from this stream without
+ * advancing the current stream position. The buffer is filled up unless
+ * the end of stream is encountered before that. This method will block
+ * if not enough bytes are immediately available.
+ *
+ * @param buffer byte buffer
+ * @return number of bytes written to the buffer
+ * @throws IOException if the stream can not be read
+ */
+ public int peek(byte[] buffer) throws IOException {
+ int n = 0;
+
+ mark(buffer.length);
+
+ int m = read(buffer);
+ while (m != -1) {
+ n += m;
+ if (n < buffer.length) {
+ m = read(buffer, n, buffer.length - n);
+ } else {
+ m = -1;
+ }
+ }
+
+ reset();
+
+ return n;
+ }
+
public File getFile() throws IOException {
if (file == null) {
if (in == null) {