You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by rg...@apache.org on 2013/07/18 00:08:17 UTC

svn commit: r1504302 - in /tika/trunk: tika-core/src/main/java/org/apache/tika/embedder/ExternalEmbedder.java tika-parsers/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java

Author: rgauss
Date: Wed Jul 17 22:08:17 2013
New Revision: 1504302

URL: http://svn.apache.org/r1504302
Log:
TIKA-1147: File-Based TikaInputStreams are Deleted by ExternalEmbedder.embed
   - Restructured tests to be able to accept different input streams
   - Added test for passing in a TikaInputStream
   - Changed ExternalEmbedder to close the input stream rather than delete its file

Modified:
    tika/trunk/tika-core/src/main/java/org/apache/tika/embedder/ExternalEmbedder.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/embedder/ExternalEmbedder.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/embedder/ExternalEmbedder.java?rev=1504302&r1=1504301&r2=1504302&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/embedder/ExternalEmbedder.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/embedder/ExternalEmbedder.java Wed Jul 17 22:08:17 2013
@@ -405,8 +405,8 @@ public class ExternalEmbedder implements
                 }
             }
             if (!inputToStdIn) {
-                // Clean up temp input files
-                tikaInputStream.getFile().delete();
+                // Close input file (and delete if created by up TemporaryResources.createTemporaryFile) 
+                IOUtils.closeQuietly(tikaInputStream);
             }
             IOUtils.closeQuietly(outputStream);
             IOUtils.closeQuietly(stdErrOutputStream);

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java?rev=1504302&r1=1504301&r2=1504302&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java Wed Jul 17 22:08:17 2013
@@ -19,10 +19,13 @@ package org.apache.tika.embedder;
 import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.FileInputStream;
+import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStreamWriter;
+import java.net.URISyntaxException;
+import java.net.URL;
 import java.text.DateFormat;
 import java.text.SimpleDateFormat;
 import java.util.Date;
@@ -33,6 +36,7 @@ import org.apache.tika.embedder.Embedder
 import org.apache.tika.embedder.ExternalEmbedder;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.TemporaryResources;
+import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.Property;
 import org.apache.tika.metadata.TikaCoreProperties;
@@ -116,15 +120,46 @@ public class ExternalEmbedderTest extend
     }
 
     /**
-     * Gets the original input stream before metadata has been embedded.
+     * Gets the source input stream through standard Java resource loaders 
+     * before metadata has been embedded.
      *
      * @return a fresh input stream
      */
-    protected InputStream getOriginalInputStream() {
+    protected InputStream getSourceStandardInputStream() {
         return this.getClass().getResourceAsStream(TEST_TXT_PATH);
     }
 
     /**
+     * Gets the source input stream via {@link TikaInputStream}
+     * before metadata has been embedded.
+     *
+     * @return a fresh input stream
+     * @throws FileNotFoundException 
+     */
+    protected InputStream getSourceTikaInputStream() throws FileNotFoundException {
+        return TikaInputStream.get(getSourceInputFile());
+    }
+    
+    /**
+     * Gets the source input file through standard Java resource loaders
+     * before metadata has been embedded.
+     *
+     * @return a fresh input stream
+     * @throws FileNotFoundException 
+     */
+    protected File getSourceInputFile() throws FileNotFoundException {
+        URL origUrl = this.getClass().getResource(TEST_TXT_PATH);
+        if (origUrl == null) {
+            throw new FileNotFoundException("could not load " + TEST_TXT_PATH);
+        }
+        try {
+            return new File(origUrl.toURI());
+        } catch (URISyntaxException e) {
+            throw new FileNotFoundException(e.getMessage());
+        }
+    }
+
+    /**
      * Gets the parser to use to verify the result of the embed operation.
      *
      * @return the parser to read embedded metadata
@@ -148,19 +183,25 @@ public class ExternalEmbedderTest extend
      *
      * @param isResultExpectedInOutput whether or not results are expected in command line output
      */
-    protected void embedInTempFile(boolean isResultExpectedInOutput) {
+    protected void embedInTempFile(InputStream sourceInputStream, boolean isResultExpectedInOutput) {
+        Embedder embedder = getEmbedder();
+        
+        // TODO Move this check to ExternalEmbedder
+        String os = System.getProperty("os.name", "");
+        if (os.contains("Windows")) {
+            // Skip test on Windows
+            return;
+        }
+        
         Date timestamp = new Date();
         Metadata metadataToEmbed = getMetadataToEmbed(timestamp);
-        Embedder embedder = getEmbedder();
 
         try {
-            // Get the input stream for the test document
-            InputStream origInputStream = getOriginalInputStream();
             File tempOutputFile = tmp.createTemporaryFile();
             FileOutputStream tempFileOutputStream = new FileOutputStream(tempOutputFile);
 
             // Embed the metadata into a copy of the original output stream
-            embedder.embed(metadataToEmbed, origInputStream, tempFileOutputStream, null);
+            embedder.embed(metadataToEmbed, sourceInputStream, tempFileOutputStream, null);
 
             ParseContext context = new ParseContext();
             Parser parser = getParser();
@@ -230,12 +271,36 @@ public class ExternalEmbedderTest extend
             fail(e.getMessage());
         }
     }
-
-    public void testEmbed() throws IOException {
-        String os = System.getProperty("os.name", "");
-        if (!os.contains("Windows")) {
-            embedInTempFile(getIsMetadataExpectedInOutput());
+    
+    protected void checkSourceFileExists() {
+        String message = "the original input file was deleted";
+        try {
+            File origInputFile = getSourceInputFile();
+            assertNotNull(message, origInputFile);
+            assertTrue(message, origInputFile.exists());
+        } catch (FileNotFoundException e) {
+            fail(message + ": " + e.getMessage());
         }
     }
 
+    /**
+     * Tests embedding using an input stream obtained via {@link ExternalEmbedderTest#getSourceStandardInputStream()}
+     * 
+     * @throws IOException
+     */
+    public void testEmbedStandardInputStream() throws IOException {
+        embedInTempFile(getSourceStandardInputStream(), getIsMetadataExpectedInOutput());
+        checkSourceFileExists();
+    }
+    
+    /**
+     * Tests embedding using an input stream obtained via {@link ExternalEmbedderTest#getSourceTikaInputStream()}
+     * 
+     * @throws IOException
+     */
+    public void testEmbedTikaInputStream() throws IOException {
+        embedInTempFile(getSourceTikaInputStream(), getIsMetadataExpectedInOutput());
+        checkSourceFileExists();
+    }
+
 }