You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2011/09/01 16:55:14 UTC

svn commit: r1164100 - in /tika/trunk: tika-core/src/main/java/org/apache/tika/io/ tika-parsers/src/main/java/org/apache/tika/parser/font/ tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ tika-parsers/src/main/java/org/apache/tika/parser/pk...

Author: jukka
Date: Thu Sep  1 14:55:13 2011
New Revision: 1164100

URL: http://svn.apache.org/viewvc?rev=1164100&view=rev
Log:
TIKA-701: Fix problems with TemporaryFiles

Add a TikaInputStream.cast() method to simplify conditional code

Modified:
    tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java?rev=1164100&r1=1164099&r2=1164100&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java Thu Sep  1 14:55:13 2011
@@ -99,6 +99,7 @@ public class TikaInputStream extends Tag
      * is expected to explicitly close the original stream when it's no
      * longer used.
      *
+     * @since Apache Tika 1.0
      * @param stream normal input stream
      * @return a TikaInputStream instance
      */
@@ -154,6 +155,22 @@ public class TikaInputStream extends Tag
     }
 
     /**
+     * Returns the given stream casts to a TikaInputStream, or
+     * <code>null</code> if the stream is not a TikaInputStream.
+     *
+     * @since Apache Tika 1.0
+     * @param stream normal input stream
+     * @return a TikaInputStream instance
+     */
+    public static TikaInputStream cast(InputStream stream) {
+        if (stream instanceof TikaInputStream) {
+            return (TikaInputStream) stream;
+        } else {
+            return null;
+        }
+    }
+
+    /**
      * Creates a TikaInputStream from the given array of bytes.
      * <p>
      * Note that you must always explicitly close the returned stream as in

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java?rev=1164100&r1=1164099&r2=1164100&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java Thu Sep  1 14:55:13 2011
@@ -55,9 +55,9 @@ public class TrueTypeParser extends Abst
             Metadata metadata, ParseContext context)
             throws IOException, SAXException, TikaException {
         TrueTypeFont font;
-        TikaInputStream tis = TikaInputStream.get(stream);
         TTFParser parser = new TTFParser();
-        if (tis.hasFile()) {
+        TikaInputStream tis = TikaInputStream.cast(stream);
+        if (tis != null && tis.hasFile()) {
             font = parser.parseTTF(tis.getFile());
         } else {
             font = parser.parseTTF(stream);

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java?rev=1164100&r1=1164099&r2=1164100&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java Thu Sep  1 14:55:13 2011
@@ -163,17 +163,15 @@ public class OfficeParser extends Abstra
         xhtml.startDocument();
 
         NPOIFSFileSystem filesystem;
-        if(stream instanceof TikaInputStream) {
-            TikaInputStream tstream = (TikaInputStream)stream;
-            if(tstream.getOpenContainer() != null) {
-                filesystem = (NPOIFSFileSystem)tstream.getOpenContainer();
-            } else if(tstream.hasFile()) {
-                filesystem = new NPOIFSFileSystem(tstream.getFileChannel());
-            } else {
-                filesystem = new NPOIFSFileSystem(tstream);
-            }
-        } else {
+        TikaInputStream tstream = TikaInputStream.cast(stream);
+        if (tstream == null) {
             filesystem = new NPOIFSFileSystem(stream);
+        } else if (tstream.getOpenContainer() instanceof NPOIFSFileSystem) {
+            filesystem = (NPOIFSFileSystem) tstream.getOpenContainer();
+        } else if (tstream.hasFile()) {
+            filesystem = new NPOIFSFileSystem(tstream.getFileChannel());
+        } else {
+            filesystem = new NPOIFSFileSystem(tstream);
         }
 
         // Parse summary entries first, to make metadata available early

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java?rev=1164100&r1=1164099&r2=1164100&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java Thu Sep  1 14:55:13 2011
@@ -18,7 +18,6 @@ package org.apache.tika.parser.microsoft
 
 import static org.apache.tika.mime.MediaType.application;
 
-import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.channels.FileChannel;
@@ -29,7 +28,6 @@ import java.util.Set;
 import org.apache.poi.poifs.filesystem.Entry;
 import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
 import org.apache.tika.detect.Detector;
-import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
@@ -92,10 +90,8 @@ public class POIFSContainerDetector impl
         }
 
         // We can only detect the exact type when given a TikaInputStream
-        if (TikaInputStream.isTikaInputStream(input)) {
-            // No TemporaryResources as this is for sure a TikaInputStream
-            TikaInputStream tis = TikaInputStream.get(input);
-
+        TikaInputStream tis = TikaInputStream.cast(input);
+        if (tis != null) {
             // Look for known top level entry names to detect the document type
             Set<String> names = getTopLevelNames(tis);
             if (names.contains("Workbook")) {

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java?rev=1164100&r1=1164099&r2=1164100&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java Thu Sep  1 14:55:13 2011
@@ -16,7 +16,6 @@
  */
 package org.apache.tika.parser.pkg;
 
-import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
 
@@ -30,7 +29,6 @@ import org.apache.poi.openxml4j.opc.Pack
 import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
 import org.apache.tika.detect.Detector;
 import org.apache.tika.io.IOUtils;
-import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
@@ -66,41 +64,29 @@ public class ZipContainerDetector implem
         }
 
         // We can only detect the exact type when given a TikaInputStream
-        if (!TikaInputStream.isTikaInputStream(input)) {
-            return MediaType.APPLICATION_ZIP;
-        }
-
-        TemporaryResources tmp = new TemporaryResources();
-        ZipFile zip = null;
-        try {
-            File file = TikaInputStream.get(input, tmp).getFile();
-            zip = new ZipFile(file);
-
-            MediaType type = detectOpenDocument(zip);
-            if (type == null) {
-                type = detectOfficeOpenXML(zip, TikaInputStream.get(input));
-            }
-            if (type == null) {
-                type = detectIWork(zip);
-            }
-            if (type == null && zip.getEntry("META-INF/MANIFEST.MF") != null) {
-                type = MediaType.application("java-archive");
-            }
-            if (type == null) {
-                type = MediaType.APPLICATION_ZIP;
-            }
-            return type;
-        } catch (IOException e) {
-            return MediaType.APPLICATION_ZIP;
-        } finally {
-            if (zip!=null) {
-                try {
-                    zip.close();
-                } catch (IOException e) {
+        TikaInputStream tis = TikaInputStream.cast(input);
+        if (tis != null) {
+            try {
+                ZipFile zip = new ZipFile(tis.getFile());
+
+                MediaType type = detectOpenDocument(zip);
+                if (type == null) {
+                    type = detectOfficeOpenXML(zip, tis);
                 }
+                if (type == null) {
+                    type = detectIWork(zip);
+                }
+                if (type != null) {
+                    return type;
+                } else if (zip.getEntry("META-INF/MANIFEST.MF") != null) {
+                    return MediaType.application("java-archive");
+                }
+            } catch (IOException ignore) {
             }
-            tmp.close();
         }
+
+        // Fallback: it's still a zip file, we just don't know what kind of one
+        return MediaType.APPLICATION_ZIP;
     }
 
     private MediaType detectOpenDocument(ZipFile zip) {

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java?rev=1164100&r1=1164099&r2=1164100&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/AbstractPOIContainerExtractionTest.java Thu Sep  1 14:55:13 2011
@@ -17,6 +17,7 @@
 package org.apache.tika.parser.microsoft;
 
 import java.io.InputStream;
+import java.net.URL;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -50,29 +51,30 @@ public abstract class AbstractPOIContain
 
     protected TrackingHandler process(String filename, ContainerExtractor extractor, boolean recurse) throws Exception {
         TikaInputStream stream = getTestFile(filename);
-        assertEquals(true, extractor.isSupported(stream));
-        
-        // Process it
-        TrackingHandler handler = new TrackingHandler();
-        if(recurse) {
-           extractor.extract(stream, extractor, handler);
-        } else {
-           extractor.extract(stream, null, handler);
+        try {
+            assertEquals(true, extractor.isSupported(stream));
+
+            // Process it
+            TrackingHandler handler = new TrackingHandler();
+            if(recurse) {
+                extractor.extract(stream, extractor, handler);
+            } else {
+                extractor.extract(stream, null, handler);
+            }
+
+            // So they can check what happened
+            return handler;
+        } finally {
+            stream.close();
         }
-        
-        // So they can check what happened
-        return handler;
     }
     
     protected TikaInputStream getTestFile(String filename) throws Exception {
-       InputStream input = AbstractPOIContainerExtractionTest.class.getResourceAsStream(
-             "/test-documents/" + filename);
+        URL input = AbstractPOIContainerExtractionTest.class.getResource(
+               "/test-documents/" + filename);
         assertNotNull(filename + " not found", input);
-        
-        TikaInputStream stream = TikaInputStream.get(input);
-        assertNotNull(stream);
-        
-        return stream;
+
+        return TikaInputStream.get(input);
     }
     
     public static class TrackingHandler implements EmbeddedResourceHandler {