You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by rg...@apache.org on 2013/07/18 00:08:17 UTC
svn commit: r1504302 - in /tika/trunk:
tika-core/src/main/java/org/apache/tika/embedder/ExternalEmbedder.java
tika-parsers/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java
Author: rgauss
Date: Wed Jul 17 22:08:17 2013
New Revision: 1504302
URL: http://svn.apache.org/r1504302
Log:
TIKA-1147: File-Based TikaInputStreams are Deleted by ExternalEmbedder.embed
- Restructured tests to be able to accept different input streams
- Added test for passing in a TikaInputStream
- Changed ExternalEmbedder to close the input stream rather than delete its file
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/embedder/ExternalEmbedder.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/embedder/ExternalEmbedder.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/embedder/ExternalEmbedder.java?rev=1504302&r1=1504301&r2=1504302&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/embedder/ExternalEmbedder.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/embedder/ExternalEmbedder.java Wed Jul 17 22:08:17 2013
@@ -405,8 +405,8 @@ public class ExternalEmbedder implements
}
}
if (!inputToStdIn) {
- // Clean up temp input files
- tikaInputStream.getFile().delete();
+ // Close input file (and delete if created by up TemporaryResources.createTemporaryFile)
+ IOUtils.closeQuietly(tikaInputStream);
}
IOUtils.closeQuietly(outputStream);
IOUtils.closeQuietly(stdErrOutputStream);
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java?rev=1504302&r1=1504301&r2=1504302&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/embedder/ExternalEmbedderTest.java Wed Jul 17 22:08:17 2013
@@ -19,10 +19,13 @@ package org.apache.tika.embedder;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
+import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
+import java.net.URISyntaxException;
+import java.net.URL;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
@@ -33,6 +36,7 @@ import org.apache.tika.embedder.Embedder
import org.apache.tika.embedder.ExternalEmbedder;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TemporaryResources;
+import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
import org.apache.tika.metadata.TikaCoreProperties;
@@ -116,15 +120,46 @@ public class ExternalEmbedderTest extend
}
/**
- * Gets the original input stream before metadata has been embedded.
+ * Gets the source input stream through standard Java resource loaders
+ * before metadata has been embedded.
*
* @return a fresh input stream
*/
- protected InputStream getOriginalInputStream() {
+ protected InputStream getSourceStandardInputStream() {
return this.getClass().getResourceAsStream(TEST_TXT_PATH);
}
/**
+ * Gets the source input stream via {@link TikaInputStream}
+ * before metadata has been embedded.
+ *
+ * @return a fresh input stream
+ * @throws FileNotFoundException
+ */
+ protected InputStream getSourceTikaInputStream() throws FileNotFoundException {
+ return TikaInputStream.get(getSourceInputFile());
+ }
+
+ /**
+ * Gets the source input file through standard Java resource loaders
+ * before metadata has been embedded.
+ *
+ * @return a fresh input stream
+ * @throws FileNotFoundException
+ */
+ protected File getSourceInputFile() throws FileNotFoundException {
+ URL origUrl = this.getClass().getResource(TEST_TXT_PATH);
+ if (origUrl == null) {
+ throw new FileNotFoundException("could not load " + TEST_TXT_PATH);
+ }
+ try {
+ return new File(origUrl.toURI());
+ } catch (URISyntaxException e) {
+ throw new FileNotFoundException(e.getMessage());
+ }
+ }
+
+ /**
* Gets the parser to use to verify the result of the embed operation.
*
* @return the parser to read embedded metadata
@@ -148,19 +183,25 @@ public class ExternalEmbedderTest extend
*
* @param isResultExpectedInOutput whether or not results are expected in command line output
*/
- protected void embedInTempFile(boolean isResultExpectedInOutput) {
+ protected void embedInTempFile(InputStream sourceInputStream, boolean isResultExpectedInOutput) {
+ Embedder embedder = getEmbedder();
+
+ // TODO Move this check to ExternalEmbedder
+ String os = System.getProperty("os.name", "");
+ if (os.contains("Windows")) {
+ // Skip test on Windows
+ return;
+ }
+
Date timestamp = new Date();
Metadata metadataToEmbed = getMetadataToEmbed(timestamp);
- Embedder embedder = getEmbedder();
try {
- // Get the input stream for the test document
- InputStream origInputStream = getOriginalInputStream();
File tempOutputFile = tmp.createTemporaryFile();
FileOutputStream tempFileOutputStream = new FileOutputStream(tempOutputFile);
// Embed the metadata into a copy of the original output stream
- embedder.embed(metadataToEmbed, origInputStream, tempFileOutputStream, null);
+ embedder.embed(metadataToEmbed, sourceInputStream, tempFileOutputStream, null);
ParseContext context = new ParseContext();
Parser parser = getParser();
@@ -230,12 +271,36 @@ public class ExternalEmbedderTest extend
fail(e.getMessage());
}
}
-
- public void testEmbed() throws IOException {
- String os = System.getProperty("os.name", "");
- if (!os.contains("Windows")) {
- embedInTempFile(getIsMetadataExpectedInOutput());
+
+ protected void checkSourceFileExists() {
+ String message = "the original input file was deleted";
+ try {
+ File origInputFile = getSourceInputFile();
+ assertNotNull(message, origInputFile);
+ assertTrue(message, origInputFile.exists());
+ } catch (FileNotFoundException e) {
+ fail(message + ": " + e.getMessage());
}
}
+ /**
+ * Tests embedding using an input stream obtained via {@link ExternalEmbedderTest#getSourceStandardInputStream()}
+ *
+ * @throws IOException
+ */
+ public void testEmbedStandardInputStream() throws IOException {
+ embedInTempFile(getSourceStandardInputStream(), getIsMetadataExpectedInOutput());
+ checkSourceFileExists();
+ }
+
+ /**
+ * Tests embedding using an input stream obtained via {@link ExternalEmbedderTest#getSourceTikaInputStream()}
+ *
+ * @throws IOException
+ */
+ public void testEmbedTikaInputStream() throws IOException {
+ embedInTempFile(getSourceTikaInputStream(), getIsMetadataExpectedInOutput());
+ checkSourceFileExists();
+ }
+
}