You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2011/09/01 16:54:49 UTC
svn commit: r1164099 - in /tika/trunk:
tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java
tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmExtractor.java
Author: jukka
Date: Thu Sep 1 14:54:49 2011
New Revision: 1164099
URL: http://svn.apache.org/viewvc?rev=1164099&view=rev
Log:
TIKA-701: Fix problems with TemporaryFiles
More stream handling cleanups
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmExtractor.java
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java?rev=1164099&r1=1164098&r2=1164099&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java Thu Sep 1 14:54:49 2011
@@ -81,8 +81,6 @@ public class ExternalParser extends Abst
*/
private String[] command = new String[] { "cat" };
- private TemporaryResources tmp = new TemporaryResources();
-
public Set<MediaType> getSupportedTypes(ParseContext context) {
return getSupportedTypes();
}
@@ -134,25 +132,37 @@ public class ExternalParser extends Abst
* has been called to set patterns.
*/
public void parse(
- final InputStream stream, ContentHandler handler,
+ InputStream stream, ContentHandler handler,
Metadata metadata, ParseContext context)
throws IOException, SAXException, TikaException {
XHTMLContentHandler xhtml =
new XHTMLContentHandler(handler, metadata);
-
+
+ TemporaryResources tmp = new TemporaryResources();
+ try {
+ parse(TikaInputStream.get(stream, tmp),
+ xhtml, metadata, tmp);
+ } finally {
+ tmp.dispose();
+ }
+ }
+
+ private void parse(
+ TikaInputStream stream, XHTMLContentHandler xhtml,
+ Metadata metadata, TemporaryResources tmp)
+ throws IOException, SAXException, TikaException {
boolean inputToStdIn = true;
boolean outputFromStdOut = true;
boolean hasPatterns = (metadataPatterns != null && !metadataPatterns.isEmpty());
-
- TikaInputStream tikaStream = TikaInputStream.get(stream);
+
File output = null;
-
+
// Build our command
String[] cmd = new String[command.length];
System.arraycopy(command, 0, cmd, 0, command.length);
for(int i=0; i<cmd.length; i++) {
if(cmd[i].indexOf(INPUT_FILE_TOKEN) != -1) {
- cmd[i] = cmd[i].replace(INPUT_FILE_TOKEN, tikaStream.getFile().toString());
+ cmd[i] = cmd[i].replace(INPUT_FILE_TOKEN, stream.getFile().getPath());
inputToStdIn = false;
}
if(cmd[i].indexOf(OUTPUT_FILE_TOKEN) != -1) {
@@ -168,7 +178,7 @@ public class ExternalParser extends Abst
} else {
process = Runtime.getRuntime().exec( cmd );
}
-
+
try {
if(inputToStdIn) {
sendInput(process, stream);
@@ -202,12 +212,10 @@ public class ExternalParser extends Abst
} catch (InterruptedException ignore) {
}
}
-
+
// Grab the output if we haven't already
- if(!outputFromStdOut) {
- FileInputStream out = new FileInputStream(output);
- extractOutput(out, xhtml);
- tmp.dispose();
+ if (!outputFromStdOut) {
+ extractOutput(new FileInputStream(output), xhtml);
}
}
@@ -255,8 +263,6 @@ public class ExternalParser extends Abst
try {
IOUtils.copy(stream, stdin);
} catch (IOException e) {
- } finally {
- IOUtils.closeQuietly(stdin);
}
}
}.start();
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmExtractor.java?rev=1164099&r1=1164098&r2=1164099&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmExtractor.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmExtractor.java Thu Sep 1 14:54:49 2011
@@ -17,7 +17,6 @@
package org.apache.tika.parser.chm;
import java.io.ByteArrayInputStream;
-import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
@@ -27,7 +26,6 @@ import junit.framework.Assert;
import junit.framework.TestCase;
import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.chm.accessor.ChmDirectoryListingSet;
import org.apache.tika.parser.chm.accessor.DirectoryListingEntry;
@@ -65,25 +63,23 @@ public class TestChmExtractor extends Te
Assert.assertEquals(TestParameters.VP_CHM_ENTITIES_NUMBER, count);
}
- public void testChmParser() {
+ public void testChmParser() throws Exception{
List<String> files = new ArrayList<String>();
files.add("/test-documents/testChm.chm");
files.add("/test-documents/testChm3.chm");
for (String fileName : files) {
+ InputStream stream =
+ TestChmBlockInfo.class.getResourceAsStream(fileName);
try {
- InputStream stream = TikaInputStream.get(TestChmBlockInfo.class
- .getResource(fileName));
CHMDocumentInformation chmDocInfo = CHMDocumentInformation.load(stream);
Metadata md = new Metadata();
String text = chmDocInfo.getText();
chmDocInfo.getCHMDocInformation(md);
assertEquals(TestParameters.VP_CHM_MIME_TYPE, md.toString().trim());
assertTrue(text.length() > 0);
- } catch (IOException e) {
- e.printStackTrace();
- } catch (TikaException e) {
- e.printStackTrace();
+ } finally {
+ stream.close();
}
}
}