You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2015/09/30 17:29:01 UTC

svn commit: r1706056 - in /tika/trunk/tika-core/src: main/java/org/apache/tika/io/TikaInputStream.java test/java/org/apache/tika/io/TikaInputStreamTest.java

Author: tallison
Date: Wed Sep 30 15:29:01 2015
New Revision: 1706056

URL: http://svn.apache.org/viewvc?rev=1706056&view=rev
Log:
TIKA-1744 via Yaniv Kunda -- upgrade TikaInputStream to use Path.  Thank you, Yaniv.

Modified:
    tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java
    tika/trunk/tika-core/src/test/java/org/apache/tika/io/TikaInputStreamTest.java

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java?rev=1706056&r1=1706055&r2=1706056&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java Wed Sep 30 15:29:01 2015
@@ -16,21 +16,24 @@
  */
 package org.apache.tika.io;
 
+import static java.nio.file.StandardCopyOption.REPLACE_EXISTING;
+
 import java.io.BufferedInputStream;
 import java.io.ByteArrayInputStream;
 import java.io.Closeable;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.OutputStream;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.net.URL;
 import java.net.URLConnection;
 import java.nio.channels.FileChannel;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.sql.Blob;
 import java.sql.SQLException;
 
@@ -193,6 +196,39 @@ public class TikaInputStream extends Tag
     }
 
     /**
+     * Creates a TikaInputStream from the file at the given path.
+     * <p>
+     * Note that you must always explicitly close the returned stream to
+     * prevent leaking open file handles.
+     *
+     * @param path input file
+     * @return a TikaInputStream instance
+     * @throws IOException if an I/O error occurs
+     */
+    public static TikaInputStream get(Path path) throws IOException {
+        return get(path, new Metadata());
+    }
+
+    /**
+     * Creates a TikaInputStream from the file at the given path. The file name
+     * and length are stored as input metadata in the given metadata instance.
+     * <p>
+     * Note that you must always explicitly close the returned stream to
+     * prevent leaking open file handles.
+     *
+     * @param path input file
+     * @param metadata metadata instance
+     * @return a TikaInputStream instance
+     * @throws IOException if an I/O error occurs
+     */
+    public static TikaInputStream get(Path path, Metadata metadata)
+            throws IOException {
+        metadata.set(Metadata.RESOURCE_NAME_KEY, path.getFileName().toString());
+        metadata.set(Metadata.CONTENT_LENGTH, Long.toString(Files.size(path)));
+        return new TikaInputStream(path);
+    }
+
+    /**
      * Creates a TikaInputStream from the given file.
      * <p>
      * Note that you must always explicitly close the returned stream to
@@ -201,6 +237,8 @@ public class TikaInputStream extends Tag
      * @param file input file
      * @return a TikaInputStream instance
      * @throws FileNotFoundException if the file does not exist
+     * @deprecated use #get(Path). In Tika 2.0, this will be removed
+     * or modified to throw an IOException.
      */
     public static TikaInputStream get(File file) throws FileNotFoundException {
         return get(file, new Metadata());
@@ -217,6 +255,9 @@ public class TikaInputStream extends Tag
      * @param metadata metadata instance
      * @return a TikaInputStream instance
      * @throws FileNotFoundException if the file does not exist
+     * or cannot be opened for reading
+     * @deprecated use #get(Path, Metadata). In Tika 2.0,
+     * this will be removed or modified to throw an IOException.
      */
     public static TikaInputStream get(File file, Metadata metadata)
             throws FileNotFoundException {
@@ -314,9 +355,9 @@ public class TikaInputStream extends Tag
             throws IOException {
         // Special handling for file:// URIs
         if ("file".equalsIgnoreCase(uri.getScheme())) {
-            File file = new File(uri);
-            if (file.isFile()) {
-                return get(file, metadata);
+            Path path = Paths.get(uri);
+            if (Files.isRegularFile(path)) {
+                return get(path, metadata);
             }
         }
 
@@ -354,9 +395,9 @@ public class TikaInputStream extends Tag
         // Special handling for file:// URLs
         if ("file".equalsIgnoreCase(url.getProtocol())) {
             try {
-                File file = new File(url.toURI());
-                if (file.isFile()) {
-                    return get(file, metadata);
+                Path path = Paths.get(url.toURI());
+                if (Files.isRegularFile(path)) {
+                    return get(path, metadata);
                 }
             } catch (URISyntaxException e) {
                 // fall through
@@ -392,13 +433,13 @@ public class TikaInputStream extends Tag
     }
 
     /**
-     * The file that contains the contents of this stream. This is either
-     * the original file passed to the {@link #TikaInputStream(File)}
-     * constructor or a temporary file created by a call to the
-     * {@link #getFile()} method. If neither has been called, then
-     * the value is <code>null</code>.
+     * The path to the file that contains the contents of this stream.
+     * This is either the original file passed to the
+     * {@link #TikaInputStream(Path)} constructor or a temporary file created
+     * by a call to the {@link #getPath()} method. If neither has been called,
+     * then the value is <code>null</code>.
      */
-    private File file;
+    private Path path;
 
     /**
      * Tracker of temporary resources.
@@ -431,12 +472,27 @@ public class TikaInputStream extends Tag
      * Creates a TikaInputStream instance. This private constructor is used
      * by the static factory methods based on the available information.
      *
+     * @param path the path to the file that contains the stream
+     * @throws IOException if an I/O error occurs
+     */
+    private TikaInputStream(Path path) throws IOException {
+        super(new BufferedInputStream(Files.newInputStream(path)));
+        this.path = path;
+        this.tmp = new TemporaryResources();
+        this.length = Files.size(path);
+    }
+
+    /**
+     * Creates a TikaInputStream instance. This private constructor is used
+     * by the static factory methods based on the available information.
+     *
      * @param file the file that contains the stream
      * @throws FileNotFoundException if the file does not exist
+     * @deprecated use #TikaInputStream(Path)
      */
     private TikaInputStream(File file) throws FileNotFoundException {
         super(new BufferedInputStream(new FileInputStream(file)));
-        this.file = file;
+        this.path = file.toPath();
         this.tmp = new TemporaryResources();
         this.length = file.length();
     }
@@ -456,7 +512,7 @@ public class TikaInputStream extends Tag
     private TikaInputStream(
             InputStream stream, TemporaryResources tmp, long length) {
         super(stream);
-        this.file = null;
+        this.path = null;
         this.tmp = tmp;
         this.length = length;
     }
@@ -515,22 +571,20 @@ public class TikaInputStream extends Tag
     }
 
     public boolean hasFile() {
-        return file != null;
+        return path != null;
     }
 
-    public File getFile() throws IOException {
-        if (file == null) {
+    public Path getPath() throws IOException {
+        if (path == null) {
             if (position > 0) {
                 throw new IOException("Stream is already being read");
             } else {
                 // Spool the entire stream into a temporary file
-                file = tmp.createTemporaryFile();
-                try (OutputStream out = new FileOutputStream(file)) {
-                    IOUtils.copy(in, out);
-                }
+                path = tmp.createTempFile();
+                Files.copy(in, path, REPLACE_EXISTING);
 
                 // Create a new input stream and make sure it'll get closed
-                FileInputStream newStream = new FileInputStream(file);
+                InputStream newStream = Files.newInputStream(path);
                 tmp.addResource(newStream);
 
                 // Replace the spooled stream with the new stream in a way
@@ -545,16 +599,21 @@ public class TikaInputStream extends Tag
                     }
                 };
 
-                length = file.length();
+                length = Files.size(path);
             }
         }
-        return file;
+        return path;
+    }
+
+    /**
+     * @see #getPath()
+     */
+    public File getFile() throws IOException {
+        return getPath().toFile();
     }
 
     public FileChannel getFileChannel() throws IOException {
-        FileInputStream fis = new FileInputStream(getFile());
-        tmp.addResource(fis);
-        FileChannel channel = fis.getChannel();
+        FileChannel channel = FileChannel.open(getPath());
         tmp.addResource(channel);
         return channel;
     }
@@ -566,7 +625,7 @@ public class TikaInputStream extends Tag
     /**
      * Returns the length (in bytes) of this stream. Note that if the length
      * was not available when this stream was instantiated, then this method
-     * will use the {@link #getFile()} method to buffer the entire stream to
+     * will use the {@link #getPath()} method to buffer the entire stream to
      * a temporary file in order to calculate the stream length. This case
      * will only work if the stream has not yet been consumed.
      *
@@ -575,7 +634,7 @@ public class TikaInputStream extends Tag
      */
     public long getLength() throws IOException {
         if (length == -1) {
-            length = getFile().length();
+            getPath(); // updates length internally
         }
         return length;
     }
@@ -616,7 +675,7 @@ public class TikaInputStream extends Tag
 
     @Override
     public void close() throws IOException {
-        file = null;
+        path = null;
         mark = -1;
 
         // The close method was explicitly called, so we indeed
@@ -638,7 +697,7 @@ public class TikaInputStream extends Tag
     public String toString() {
         String str = "TikaInputStream of ";
         if (hasFile()) {
-            str += file.toString();
+            str += path.toString();
         } else {
             str += in.toString();
         }

Modified: tika/trunk/tika-core/src/test/java/org/apache/tika/io/TikaInputStreamTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/io/TikaInputStreamTest.java?rev=1706056&r1=1706055&r2=1706056&view=diff
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/io/TikaInputStreamTest.java (original)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/io/TikaInputStreamTest.java Wed Sep 30 15:29:01 2015
@@ -21,15 +21,12 @@ import static org.junit.Assert.assertEqu
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.OutputStream;
 import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
 
 import org.apache.tika.metadata.Metadata;
 import org.junit.Test;
@@ -38,13 +35,13 @@ public class TikaInputStreamTest {
 
     @Test
     public void testFileBased() throws IOException {
-        File file = createTempFile("Hello, World!");
-        InputStream stream = TikaInputStream.get(file);
+        Path path = createTempFile("Hello, World!");
+        InputStream stream = TikaInputStream.get(path);
 
         assertEquals(
                 "The file returned by the getFile() method should"
                 + " be the file used to instantiate a TikaInputStream",
-                file, TikaInputStream.get(stream).getFile());
+                path, TikaInputStream.get(stream).getPath());
 
         assertEquals(
                 "The contents of the TikaInputStream should equal the"
@@ -54,20 +51,19 @@ public class TikaInputStreamTest {
         stream.close();
         assertTrue(
                 "The close() method must not remove the file used to"
-                + " instantiate a TikaInputStream",
-                file.exists());
+                        + " instantiate a TikaInputStream",
+                Files.exists(path));
 
-        file.delete();
+        Files.delete(path);
     }
 
     @Test
     public void testStreamBased() throws IOException {
-        InputStream input =
-            new ByteArrayInputStream("Hello, World!".getBytes(UTF_8));
+        InputStream input = IOUtils.toInputStream("Hello, World!", UTF_8.name());
         InputStream stream = TikaInputStream.get(input);
 
-        File file = TikaInputStream.get(stream).getFile();
-        assertTrue(file != null && file.isFile());
+        Path file = TikaInputStream.get(stream).getPath();
+        assertTrue(file != null && Files.isRegularFile(file));
 
         assertEquals(
                 "The contents of the file returned by the getFile method"
@@ -83,27 +79,21 @@ public class TikaInputStreamTest {
         assertFalse(
                 "The close() method must remove the temporary file created"
                 + " by a TikaInputStream",
-                file.exists());
+                Files.exists(file));
     }
 
-    private File createTempFile(String data) throws IOException {
-        File file = File.createTempFile("tika-", ".tmp");
-        try (OutputStream stream = new FileOutputStream(file)) {
-            stream.write(data.getBytes(UTF_8));
-        }
+    private Path createTempFile(String data) throws IOException {
+        Path file = Files.createTempFile("tika-", ".tmp");
+        Files.write(file, data.getBytes(UTF_8));
         return file;
     }
 
-    private String readFile(File file) throws IOException {
-        try (InputStream stream = new FileInputStream(file)) {
-            return readStream(stream);
-        }
+    private String readFile(Path file) throws IOException {
+        return new String(Files.readAllBytes(file), UTF_8);
     }
 
     private String readStream(InputStream stream) throws IOException {
-        ByteArrayOutputStream buffer = new ByteArrayOutputStream();
-        IOUtils.copy(stream, buffer);
-        return buffer.toString(UTF_8.name());
+        return IOUtils.toString(stream, UTF_8.name());
     }
 
     @Test
@@ -113,7 +103,7 @@ public class TikaInputStreamTest {
         TikaInputStream.get(url, metadata).close();
         assertEquals("test.txt", metadata.get(Metadata.RESOURCE_NAME_KEY));
         assertEquals(
-                Long.toString(new File(url.toURI()).length()),
+                Long.toString(Files.size(Paths.get(url.toURI()))),
                 metadata.get(Metadata.CONTENT_LENGTH));
     }