You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2011/09/01 16:54:49 UTC

svn commit: r1164099 - in /tika/trunk: tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmExtractor.java

Author: jukka
Date: Thu Sep  1 14:54:49 2011
New Revision: 1164099

URL: http://svn.apache.org/viewvc?rev=1164099&view=rev
Log:
TIKA-701: Fix problems with TemporaryFiles

More stream handling cleanups

Modified:
    tika/trunk/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmExtractor.java

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java?rev=1164099&r1=1164098&r2=1164099&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java Thu Sep  1 14:54:49 2011
@@ -81,8 +81,6 @@ public class ExternalParser extends Abst
      */
     private String[] command = new String[] { "cat" };
 
-    private TemporaryResources tmp = new TemporaryResources();
-
     public Set<MediaType> getSupportedTypes(ParseContext context) {
         return getSupportedTypes();
     }
@@ -134,25 +132,37 @@ public class ExternalParser extends Abst
      *  has been called to set patterns.
      */
     public void parse(
-            final InputStream stream, ContentHandler handler,
+            InputStream stream, ContentHandler handler,
             Metadata metadata, ParseContext context)
             throws IOException, SAXException, TikaException {
         XHTMLContentHandler xhtml =
             new XHTMLContentHandler(handler, metadata);
-        
+
+        TemporaryResources tmp = new TemporaryResources();
+        try {
+            parse(TikaInputStream.get(stream, tmp),
+                    xhtml, metadata, tmp);
+        } finally {
+            tmp.dispose();
+        }
+    }
+
+    private void parse(
+            TikaInputStream stream, XHTMLContentHandler xhtml,
+            Metadata metadata, TemporaryResources tmp)
+            throws IOException, SAXException, TikaException {
         boolean inputToStdIn = true;
         boolean outputFromStdOut = true;
         boolean hasPatterns = (metadataPatterns != null && !metadataPatterns.isEmpty());
-        
-        TikaInputStream tikaStream = TikaInputStream.get(stream);
+
         File output = null;
-        
+
         // Build our command
         String[] cmd = new String[command.length];
         System.arraycopy(command, 0, cmd, 0, command.length);
         for(int i=0; i<cmd.length; i++) {
            if(cmd[i].indexOf(INPUT_FILE_TOKEN) != -1) {
-              cmd[i] = cmd[i].replace(INPUT_FILE_TOKEN, tikaStream.getFile().toString());
+              cmd[i] = cmd[i].replace(INPUT_FILE_TOKEN, stream.getFile().getPath());
               inputToStdIn = false;
            }
            if(cmd[i].indexOf(OUTPUT_FILE_TOKEN) != -1) {
@@ -168,7 +178,7 @@ public class ExternalParser extends Abst
         } else {
            process = Runtime.getRuntime().exec( cmd );
         }
-        
+
         try {
             if(inputToStdIn) {
                sendInput(process, stream);
@@ -202,12 +212,10 @@ public class ExternalParser extends Abst
             } catch (InterruptedException ignore) {
             }
         }
-        
+
         // Grab the output if we haven't already
-        if(!outputFromStdOut) {
-           FileInputStream out = new FileInputStream(output);
-           extractOutput(out, xhtml);
-           tmp.dispose();
+        if (!outputFromStdOut) {
+            extractOutput(new FileInputStream(output), xhtml);
         }
     }
 
@@ -255,8 +263,6 @@ public class ExternalParser extends Abst
                 try {
                     IOUtils.copy(stream, stdin);
                 } catch (IOException e) {
-                } finally {
-                    IOUtils.closeQuietly(stdin);
                 }
             }
         }.start();

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmExtractor.java?rev=1164099&r1=1164098&r2=1164099&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmExtractor.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/chm/TestChmExtractor.java Thu Sep  1 14:54:49 2011
@@ -17,7 +17,6 @@
 package org.apache.tika.parser.chm;
 
 import java.io.ByteArrayInputStream;
-import java.io.IOException;
 import java.io.InputStream;
 import java.util.ArrayList;
 import java.util.Iterator;
@@ -27,7 +26,6 @@ import junit.framework.Assert;
 import junit.framework.TestCase;
 
 import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.chm.accessor.ChmDirectoryListingSet;
 import org.apache.tika.parser.chm.accessor.DirectoryListingEntry;
@@ -65,25 +63,23 @@ public class TestChmExtractor extends Te
         Assert.assertEquals(TestParameters.VP_CHM_ENTITIES_NUMBER, count);
     }
 
-    public void testChmParser() {
+    public void testChmParser() throws Exception{
         List<String> files = new ArrayList<String>();
         files.add("/test-documents/testChm.chm");
         files.add("/test-documents/testChm3.chm");
 
         for (String fileName : files) {
+            InputStream stream =
+                    TestChmBlockInfo.class.getResourceAsStream(fileName);
             try {
-                InputStream stream = TikaInputStream.get(TestChmBlockInfo.class
-                        .getResource(fileName));
                 CHMDocumentInformation chmDocInfo = CHMDocumentInformation.load(stream);
                 Metadata md = new Metadata();
                 String text = chmDocInfo.getText();
                 chmDocInfo.getCHMDocInformation(md);
                 assertEquals(TestParameters.VP_CHM_MIME_TYPE, md.toString().trim());
                 assertTrue(text.length() > 0);
-            } catch (IOException e) {
-                e.printStackTrace();
-            } catch (TikaException e) {
-                e.printStackTrace();
+            } finally {
+                stream.close();
             }
         }
     }