You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2015/09/30 17:29:01 UTC
svn commit: r1706056 - in /tika/trunk/tika-core/src:
main/java/org/apache/tika/io/TikaInputStream.java
test/java/org/apache/tika/io/TikaInputStreamTest.java
Author: tallison
Date: Wed Sep 30 15:29:01 2015
New Revision: 1706056
URL: http://svn.apache.org/viewvc?rev=1706056&view=rev
Log:
TIKA-1744 via Yaniv Kunda -- upgrade TikaInputStream to use Path. Thank you, Yaniv.
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java
tika/trunk/tika-core/src/test/java/org/apache/tika/io/TikaInputStreamTest.java
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java?rev=1706056&r1=1706055&r2=1706056&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java Wed Sep 30 15:29:01 2015
@@ -16,21 +16,24 @@
*/
package org.apache.tika.io;
+import static java.nio.file.StandardCopyOption.REPLACE_EXISTING;
+
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
-import java.io.OutputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLConnection;
import java.nio.channels.FileChannel;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
import java.sql.Blob;
import java.sql.SQLException;
@@ -193,6 +196,39 @@ public class TikaInputStream extends Tag
}
/**
+ * Creates a TikaInputStream from the file at the given path.
+ * <p>
+ * Note that you must always explicitly close the returned stream to
+ * prevent leaking open file handles.
+ *
+ * @param path input file
+ * @return a TikaInputStream instance
+ * @throws IOException if an I/O error occurs
+ */
+ public static TikaInputStream get(Path path) throws IOException {
+ return get(path, new Metadata());
+ }
+
+ /**
+ * Creates a TikaInputStream from the file at the given path. The file name
+ * and length are stored as input metadata in the given metadata instance.
+ * <p>
+ * Note that you must always explicitly close the returned stream to
+ * prevent leaking open file handles.
+ *
+ * @param path input file
+ * @param metadata metadata instance
+ * @return a TikaInputStream instance
+ * @throws IOException if an I/O error occurs
+ */
+ public static TikaInputStream get(Path path, Metadata metadata)
+ throws IOException {
+ metadata.set(Metadata.RESOURCE_NAME_KEY, path.getFileName().toString());
+ metadata.set(Metadata.CONTENT_LENGTH, Long.toString(Files.size(path)));
+ return new TikaInputStream(path);
+ }
+
+ /**
* Creates a TikaInputStream from the given file.
* <p>
* Note that you must always explicitly close the returned stream to
@@ -201,6 +237,8 @@ public class TikaInputStream extends Tag
* @param file input file
* @return a TikaInputStream instance
* @throws FileNotFoundException if the file does not exist
+ * @deprecated use #get(Path). In Tika 2.0, this will be removed
+ * or modified to throw an IOException.
*/
public static TikaInputStream get(File file) throws FileNotFoundException {
return get(file, new Metadata());
@@ -217,6 +255,9 @@ public class TikaInputStream extends Tag
* @param metadata metadata instance
* @return a TikaInputStream instance
* @throws FileNotFoundException if the file does not exist
+ * or cannot be opened for reading
+ * @deprecated use #get(Path, Metadata). In Tika 2.0,
+ * this will be removed or modified to throw an IOException.
*/
public static TikaInputStream get(File file, Metadata metadata)
throws FileNotFoundException {
@@ -314,9 +355,9 @@ public class TikaInputStream extends Tag
throws IOException {
// Special handling for file:// URIs
if ("file".equalsIgnoreCase(uri.getScheme())) {
- File file = new File(uri);
- if (file.isFile()) {
- return get(file, metadata);
+ Path path = Paths.get(uri);
+ if (Files.isRegularFile(path)) {
+ return get(path, metadata);
}
}
@@ -354,9 +395,9 @@ public class TikaInputStream extends Tag
// Special handling for file:// URLs
if ("file".equalsIgnoreCase(url.getProtocol())) {
try {
- File file = new File(url.toURI());
- if (file.isFile()) {
- return get(file, metadata);
+ Path path = Paths.get(url.toURI());
+ if (Files.isRegularFile(path)) {
+ return get(path, metadata);
}
} catch (URISyntaxException e) {
// fall through
@@ -392,13 +433,13 @@ public class TikaInputStream extends Tag
}
/**
- * The file that contains the contents of this stream. This is either
- * the original file passed to the {@link #TikaInputStream(File)}
- * constructor or a temporary file created by a call to the
- * {@link #getFile()} method. If neither has been called, then
- * the value is <code>null</code>.
+ * The path to the file that contains the contents of this stream.
+ * This is either the original file passed to the
+ * {@link #TikaInputStream(Path)} constructor or a temporary file created
+ * by a call to the {@link #getPath()} method. If neither has been called,
+ * then the value is <code>null</code>.
*/
- private File file;
+ private Path path;
/**
* Tracker of temporary resources.
@@ -431,12 +472,27 @@ public class TikaInputStream extends Tag
* Creates a TikaInputStream instance. This private constructor is used
* by the static factory methods based on the available information.
*
+ * @param path the path to the file that contains the stream
+ * @throws IOException if an I/O error occurs
+ */
+ private TikaInputStream(Path path) throws IOException {
+ super(new BufferedInputStream(Files.newInputStream(path)));
+ this.path = path;
+ this.tmp = new TemporaryResources();
+ this.length = Files.size(path);
+ }
+
+ /**
+ * Creates a TikaInputStream instance. This private constructor is used
+ * by the static factory methods based on the available information.
+ *
* @param file the file that contains the stream
* @throws FileNotFoundException if the file does not exist
+ * @deprecated use #TikaInputStream(Path)
*/
private TikaInputStream(File file) throws FileNotFoundException {
super(new BufferedInputStream(new FileInputStream(file)));
- this.file = file;
+ this.path = file.toPath();
this.tmp = new TemporaryResources();
this.length = file.length();
}
@@ -456,7 +512,7 @@ public class TikaInputStream extends Tag
private TikaInputStream(
InputStream stream, TemporaryResources tmp, long length) {
super(stream);
- this.file = null;
+ this.path = null;
this.tmp = tmp;
this.length = length;
}
@@ -515,22 +571,20 @@ public class TikaInputStream extends Tag
}
public boolean hasFile() {
- return file != null;
+ return path != null;
}
- public File getFile() throws IOException {
- if (file == null) {
+ public Path getPath() throws IOException {
+ if (path == null) {
if (position > 0) {
throw new IOException("Stream is already being read");
} else {
// Spool the entire stream into a temporary file
- file = tmp.createTemporaryFile();
- try (OutputStream out = new FileOutputStream(file)) {
- IOUtils.copy(in, out);
- }
+ path = tmp.createTempFile();
+ Files.copy(in, path, REPLACE_EXISTING);
// Create a new input stream and make sure it'll get closed
- FileInputStream newStream = new FileInputStream(file);
+ InputStream newStream = Files.newInputStream(path);
tmp.addResource(newStream);
// Replace the spooled stream with the new stream in a way
@@ -545,16 +599,21 @@ public class TikaInputStream extends Tag
}
};
- length = file.length();
+ length = Files.size(path);
}
}
- return file;
+ return path;
+ }
+
+ /**
+ * @see #getPath()
+ */
+ public File getFile() throws IOException {
+ return getPath().toFile();
}
public FileChannel getFileChannel() throws IOException {
- FileInputStream fis = new FileInputStream(getFile());
- tmp.addResource(fis);
- FileChannel channel = fis.getChannel();
+ FileChannel channel = FileChannel.open(getPath());
tmp.addResource(channel);
return channel;
}
@@ -566,7 +625,7 @@ public class TikaInputStream extends Tag
/**
* Returns the length (in bytes) of this stream. Note that if the length
* was not available when this stream was instantiated, then this method
- * will use the {@link #getFile()} method to buffer the entire stream to
+ * will use the {@link #getPath()} method to buffer the entire stream to
* a temporary file in order to calculate the stream length. This case
* will only work if the stream has not yet been consumed.
*
@@ -575,7 +634,7 @@ public class TikaInputStream extends Tag
*/
public long getLength() throws IOException {
if (length == -1) {
- length = getFile().length();
+ getPath(); // updates length internally
}
return length;
}
@@ -616,7 +675,7 @@ public class TikaInputStream extends Tag
@Override
public void close() throws IOException {
- file = null;
+ path = null;
mark = -1;
// The close method was explicitly called, so we indeed
@@ -638,7 +697,7 @@ public class TikaInputStream extends Tag
public String toString() {
String str = "TikaInputStream of ";
if (hasFile()) {
- str += file.toString();
+ str += path.toString();
} else {
str += in.toString();
}
Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/io/TikaInputStreamTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/io/TikaInputStreamTest.java?rev=1706056&r1=1706055&r2=1706056&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/io/TikaInputStreamTest.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/io/TikaInputStreamTest.java Wed Sep 30 15:29:01 2015
@@ -21,15 +21,12 @@ import static org.junit.Assert.assertEqu
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
-import java.io.OutputStream;
import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
import org.apache.tika.metadata.Metadata;
import org.junit.Test;
@@ -38,13 +35,13 @@ public class TikaInputStreamTest {
@Test
public void testFileBased() throws IOException {
- File file = createTempFile("Hello, World!");
- InputStream stream = TikaInputStream.get(file);
+ Path path = createTempFile("Hello, World!");
+ InputStream stream = TikaInputStream.get(path);
assertEquals(
"The file returned by the getFile() method should"
+ " be the file used to instantiate a TikaInputStream",
- file, TikaInputStream.get(stream).getFile());
+ path, TikaInputStream.get(stream).getPath());
assertEquals(
"The contents of the TikaInputStream should equal the"
@@ -54,20 +51,19 @@ public class TikaInputStreamTest {
stream.close();
assertTrue(
"The close() method must not remove the file used to"
- + " instantiate a TikaInputStream",
- file.exists());
+ + " instantiate a TikaInputStream",
+ Files.exists(path));
- file.delete();
+ Files.delete(path);
}
@Test
public void testStreamBased() throws IOException {
- InputStream input =
- new ByteArrayInputStream("Hello, World!".getBytes(UTF_8));
+ InputStream input = IOUtils.toInputStream("Hello, World!", UTF_8.name());
InputStream stream = TikaInputStream.get(input);
- File file = TikaInputStream.get(stream).getFile();
- assertTrue(file != null && file.isFile());
+ Path file = TikaInputStream.get(stream).getPath();
+ assertTrue(file != null && Files.isRegularFile(file));
assertEquals(
"The contents of the file returned by the getFile method"
@@ -83,27 +79,21 @@ public class TikaInputStreamTest {
assertFalse(
"The close() method must remove the temporary file created"
+ " by a TikaInputStream",
- file.exists());
+ Files.exists(file));
}
- private File createTempFile(String data) throws IOException {
- File file = File.createTempFile("tika-", ".tmp");
- try (OutputStream stream = new FileOutputStream(file)) {
- stream.write(data.getBytes(UTF_8));
- }
+ private Path createTempFile(String data) throws IOException {
+ Path file = Files.createTempFile("tika-", ".tmp");
+ Files.write(file, data.getBytes(UTF_8));
return file;
}
- private String readFile(File file) throws IOException {
- try (InputStream stream = new FileInputStream(file)) {
- return readStream(stream);
- }
+ private String readFile(Path file) throws IOException {
+ return new String(Files.readAllBytes(file), UTF_8);
}
private String readStream(InputStream stream) throws IOException {
- ByteArrayOutputStream buffer = new ByteArrayOutputStream();
- IOUtils.copy(stream, buffer);
- return buffer.toString(UTF_8.name());
+ return IOUtils.toString(stream, UTF_8.name());
}
@Test
@@ -113,7 +103,7 @@ public class TikaInputStreamTest {
TikaInputStream.get(url, metadata).close();
assertEquals("test.txt", metadata.get(Metadata.RESOURCE_NAME_KEY));
assertEquals(
- Long.toString(new File(url.toURI()).length()),
+ Long.toString(Files.size(Paths.get(url.toURI()))),
metadata.get(Metadata.CONTENT_LENGTH));
}