You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2011/01/19 13:21:42 UTC

svn commit: r1060783 - /tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java

Author: jukka
Date: Wed Jan 19 12:21:42 2011
New Revision: 1060783

URL: http://svn.apache.org/viewvc?rev=1060783&view=rev
Log:
TIKA-567: Temporary file leak in TikaInputStream

Prevent AbstractPOIFSExtractor from leaking temporary files.

Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java?rev=1060783&r1=1060782&r2=1060783&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java Wed Jan 19 12:21:42 2011
@@ -16,9 +16,20 @@
  */
 package org.apache.tika.parser.microsoft;
 
-import java.io.*;
-
-import org.apache.poi.poifs.filesystem.*;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.poi.poifs.filesystem.DirectoryEntry;
+import org.apache.poi.poifs.filesystem.DocumentEntry;
+import org.apache.poi.poifs.filesystem.DocumentInputStream;
+import org.apache.poi.poifs.filesystem.Entry;
+import org.apache.poi.poifs.filesystem.Ole10Native;
+import org.apache.poi.poifs.filesystem.Ole10NativeException;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
 import org.apache.poi.util.IOUtils;
 import org.apache.tika.detect.ZipContainerDetector;
 import org.apache.tika.exception.TikaException;
@@ -73,21 +84,24 @@ abstract class AbstractPOIFSExtractor {
     protected void handleEmbededOfficeDoc(
             DirectoryEntry dir, XHTMLContentHandler xhtml)
             throws IOException, SAXException, TikaException {
-       // Is it an embedded OLE2 document, or an embedded OOXML document?
-       try {
-          Entry ooxml = dir.getEntry("Package");
-
-          // It's OOXML
-          TikaInputStream ooxmlStream = TikaInputStream.get(
-                new DocumentInputStream((DocumentEntry)ooxml)
-          );
-          ZipContainerDetector detector = new ZipContainerDetector();
-          MediaType type = detector.detect(ooxmlStream, new Metadata());
-          handleEmbeddedResource(ooxmlStream, null, type.toString(), xhtml, true);
-          return;
-       } catch(FileNotFoundException e) {
-          // It's regular OLE2
-       }
+        // Is it an embedded OLE2 document, or an embedded OOXML document?
+        try {
+            Entry ooxml = dir.getEntry("Package");
+
+            // It's OOXML
+            TikaInputStream stream = TikaInputStream.get(
+                    new DocumentInputStream((DocumentEntry) ooxml));
+            try {
+                ZipContainerDetector detector = new ZipContainerDetector();
+                MediaType type = detector.detect(stream, new Metadata());
+                handleEmbeddedResource(stream, null, type.toString(), xhtml, true);
+                return;
+            } finally {
+                stream.close();
+            }
+        } catch(FileNotFoundException e) {
+            // It's regular OLE2
+        }
 
        // Need to dump the directory out to a new temp file, so
        //  it's stand along