You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2011/09/01 11:38:05 UTC

svn commit: r1163970 - in /tika/trunk: tika-core/src/main/java/org/apache/tika/extractor/ tika-core/src/main/java/org/apache/tika/io/ tika-core/src/main/java/org/apache/tika/parser/ tika-core/src/main/java/org/apache/tika/parser/external/ tika-parsers/...

Author: jukka
Date: Thu Sep  1 09:38:04 2011
New Revision: 1163970

URL: http://svn.apache.org/viewvc?rev=1163970&view=rev
Log:
TIKA-701: Fix problems with TemporaryFiles

Add a more generic TemporaryResources class that can handle any kinds of Closeable resources.

Use the new TemporaryResources class in TikaInputStream to better track all the resources being used.

Update all client classes to use the TemporaryResources class instead of TemporaryFiles.

Fix some problemns in how TikaInputStreams were being used.

Added:
    tika/trunk/tika-core/src/main/java/org/apache/tika/io/TemporaryResources.java
Modified:
    tika/trunk/tika-core/src/main/java/org/apache/tika/extractor/ParserContainerExtractor.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/io/TemporaryFiles.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/parser/NetworkParser.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/extractor/ParserContainerExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/extractor/ParserContainerExtractor.java?rev=1163970&r1=1163969&r2=1163970&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/extractor/ParserContainerExtractor.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/extractor/ParserContainerExtractor.java Thu Sep  1 09:38:04 2011
@@ -25,7 +25,7 @@ import org.apache.tika.config.TikaConfig
 import org.apache.tika.detect.DefaultDetector;
 import org.apache.tika.detect.Detector;
 import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.TemporaryFiles;
+import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
@@ -106,7 +106,7 @@ public class ParserContainerExtractor im
                 InputStream stream, ContentHandler ignored,
                 Metadata metadata, ParseContext context)
                 throws IOException, SAXException, TikaException {
-            TemporaryFiles tmp = new TemporaryFiles();
+            TemporaryResources tmp = new TemporaryResources();
             try {
                 TikaInputStream tis = TikaInputStream.get(stream, tmp);
 
@@ -121,8 +121,13 @@ public class ParserContainerExtractor im
                     // Use a temporary file to process the stream twice
                     File file = tis.getFile();
 
-                    // Let the handler process the embedded resource 
-                    handler.handle(filename, type, TikaInputStream.get(file));
+                    // Let the handler process the embedded resource
+                    InputStream input = TikaInputStream.get(file);
+                    try {
+                        handler.handle(filename, type, input);
+                    } finally {
+                        input.close();
+                    }
 
                     // Recurse
                     extractor.extract(tis, extractor, handler);

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java?rev=1163970&r1=1163969&r2=1163970&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java Thu Sep  1 09:38:04 2011
@@ -25,7 +25,7 @@ import java.io.InputStream;
 
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.CloseShieldInputStream;
-import org.apache.tika.io.TemporaryFiles;
+import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.DelegatingParser;
@@ -90,7 +90,7 @@ public class ParsingEmbeddedDocumentExtr
         }
 
         // Use the delegate parser to parse this entry
-        TemporaryFiles tmp = new TemporaryFiles();
+        TemporaryResources tmp = new TemporaryResources();
         try {
             DELEGATING_PARSER.parse(
                     TikaInputStream.get(new CloseShieldInputStream(stream), tmp),
@@ -99,7 +99,7 @@ public class ParsingEmbeddedDocumentExtr
         } catch (TikaException e) {
             // Could not parse the entry, just skip the content
         } finally {
-            tmp.dispose();
+            tmp.close();
         }
 
         if(outputHtml) {

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/io/TemporaryFiles.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TemporaryFiles.java?rev=1163970&r1=1163969&r2=1163970&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/io/TemporaryFiles.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/io/TemporaryFiles.java Thu Sep  1 09:38:04 2011
@@ -16,24 +16,19 @@
  */
 package org.apache.tika.io;
 
-import java.io.File;
 import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
 
-public class TemporaryFiles {
-
-    private final List<File> files = new ArrayList<File>();
-
-    public File createTemporaryFile() throws IOException {
-        File file = File.createTempFile("apache-tika-", ".tmp");
-        files.add(file);
-        return file;
-    }
+/**
+ * @deprecated Use the {@link TemporaryResources} class instead
+ */
+public class TemporaryFiles extends TemporaryResources {
 
+    @Override
     public void dispose() {
-        for (File file : files) {
-            file.delete();
+        try {
+            close();
+        } catch (IOException e) {
+            throw new RuntimeException(e);
         }
     }
 

Added: tika/trunk/tika-core/src/main/java/org/apache/tika/io/TemporaryResources.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TemporaryResources.java?rev=1163970&view=auto
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/io/TemporaryResources.java (added)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/io/TemporaryResources.java Thu Sep  1 09:38:04 2011
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.io;
+
+import java.io.Closeable;
+import java.io.File;
+import java.io.IOException;
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.tika.exception.TikaException;
+
+/**
+ * Utility class for tracking and ultimately closing or otherwise disposing
+ * a collection of temporary resources.
+ * <p>
+ * Note that this class is not thread-safe.
+ *
+ * @since Apache Tika 1.0
+ */
+public class TemporaryResources implements Closeable {
+
+    /**
+     * Tracked resources in LIFO order.
+     */
+    private final LinkedList<Closeable> resources = new LinkedList<Closeable>();
+
+    /**
+     * Directory for temporary files, <code>null</code> for the system default.
+     */
+    private File tmp = null;
+
+    /**
+     * Sets the directory to be used for the temporary files created by
+     * the {@link #createTemporaryFile()} method.
+     *
+     * @param tmp temporary file directory,
+     *            or <code>null</code> for the system default
+     */
+    public void setTemporaryFileDirectory(File tmp) {
+        this.tmp = tmp;
+    }
+
+    /**
+     * Creates and returns a temporary file that will automatically be
+     * deleted when the {@link #close()} method is called.
+     *
+     * @return
+     * @throws IOException
+     */
+    public File createTemporaryFile() throws IOException {
+        final File file = File.createTempFile("apache-tika-", ".tmp", tmp);
+        addResource(new Closeable() {
+            public void close() throws IOException {
+                if (!file.delete()) {
+                    throw new IOException(
+                            "Could not delete temporary file "
+                            + file.getPath());
+                }
+            }
+        });
+        return file;
+    }
+
+    /**
+     * Adds a new resource to the set of tracked resources that will all be
+     * closed when the {@link #close()} method is called.
+     *
+     * @param resource resource to be tracked
+     */
+    public void addResource(Closeable resource) {
+        resources.addFirst(resource);
+    }
+
+    /**
+     * Returns the latest of the tracked resources that implements or
+     * extends the given interface or class.
+     *
+     * @param klass interface or class
+     * @return matching resource, or <code>null</code> if not found
+     */
+    @SuppressWarnings("unchecked")
+    public <T extends Closeable> T getResource(Class<T> klass) {
+        for (Closeable resource : resources) {
+            if (klass.isAssignableFrom(resource.getClass())) {
+                return (T) resource;
+            }
+        }
+        return null;
+    }
+
+    /**
+     * Closes all tracked resources. The resources are closed in reverse order
+     * from how they were added.
+     * <p>
+     * Any thrown exceptions from managed resources are collected and
+     * then re-thrown only once all the resources have been closed.
+     *
+     * @throws IOException if one or more of the tracked resources
+     *                     could not be closed
+     */
+    public void close() throws IOException {
+        // Release all resources and keep track of any exceptions
+        List<IOException> exceptions = new LinkedList<IOException>();
+        for (Closeable resource : resources) {
+            try {
+                resource.close();
+            } catch (IOException e) {
+                exceptions.add(e);
+            }
+        }
+        resources.clear();
+
+        // Throw any exceptions that were captured from above
+        if (!exceptions.isEmpty()) {
+            if (exceptions.size() == 1) {
+                throw exceptions.get(0);
+            } else {
+                throw new IOExceptionWithCause(
+                        "Multiple IOExceptions" + exceptions,
+                        exceptions.get(0));
+            }
+        }
+    }
+
+    /**
+     * Calls the {@link #close()} method and wraps the potential
+     * {@link IOException} into a {@link TikaException} for convenience
+     * when used within Tika.
+     *
+     * @throws TikaException if one or more of the tracked resources
+     *                       could not be closed
+     */
+    public void dispose() throws TikaException {
+        try {
+            close();
+        } catch (IOException e) {
+            throw new TikaException("Failed to close temporary resources", e);
+        }
+    }
+
+}

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java?rev=1163970&r1=1163969&r2=1163970&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java Thu Sep  1 09:38:04 2011
@@ -30,6 +30,7 @@ import java.net.URI;
 import java.net.URISyntaxException;
 import java.net.URL;
 import java.net.URLConnection;
+import java.nio.channels.FileChannel;
 import java.sql.Blob;
 import java.sql.SQLException;
 
@@ -84,34 +85,54 @@ public class TikaInputStream extends Tag
      * when you <em>don't</em> explicitly close the returned stream. The
      * recommended access pattern is:
      * <pre>
-     * TemporaryFiles tmp = new TemporaryFiles();
+     * TemporaryResources tmp = new TemporaryResources();
      * try {
      *     TikaInputStream stream = TikaInputStream.get(..., tmp);
      *     // process stream but don't close it
      * } finally {
-     *     tmp.dispose();
+     *     tmp.close();
      * }
      * </pre>
+     * <p>
+     * The given stream instance will <em>not</em> be closed when the
+     * {@link TemporaryResources#close()} method is called. The caller
+     * is expected to explicitly close the original stream when it's no
+     * longer used.
      *
      * @param stream normal input stream
      * @return a TikaInputStream instance
      */
-    public static TikaInputStream get(InputStream stream, TemporaryFiles tmp) {
+    public static TikaInputStream get(
+            InputStream stream, TemporaryResources tmp) {
         if (stream instanceof TikaInputStream) {
             return (TikaInputStream) stream;
         } else {
+            // Make sure that the stream is buffered and that it
+            // (properly) supports the mark feature
+            if (!(stream instanceof BufferedInputStream)
+                    && !(stream instanceof ByteArrayInputStream)) {
+                stream = new BufferedInputStream(stream);
+            }
             return new TikaInputStream(stream, tmp, -1);
         }
     }
 
     /**
+     * @deprecated Use the {@link #get(InputStream, TemporaryResources)} instead
+     */
+    public static TikaInputStream get(InputStream stream, TemporaryFiles tmp) {
+        return get(stream, (TemporaryResources) tmp);
+    }
+
+    /**
      * Casts or wraps the given stream to a TikaInputStream instance.
      * This method can be used to access the functionality of this class
      * even when given just a normal input stream instance.
      * <p>
-     * Use this method instead of the {@link #get(InputStream, TemporaryFiles)}
-     * alternative when you <em>do</em> explicitly close the returned stream.
-     * The recommended access pattern is:
+     * Use this method instead of the
+     * {@link #get(InputStream, TemporaryResources)} alternative when you
+     * <em>do</em> explicitly close the returned stream. The recommended
+     * access pattern is:
      * <pre>
      * TikaInputStream stream = TikaInputStream.get(...);
      * try {
@@ -120,12 +141,16 @@ public class TikaInputStream extends Tag
      *     stream.close();
      * }
      * </pre>
+     * <p>
+     * The given stream instance will be closed along with any other resources
+     * associated with the returned TikaInputStream instance when the
+     * {@link #close()} method is called.
      *
      * @param stream normal input stream
      * @return a TikaInputStream instance
      */
     public static TikaInputStream get(InputStream stream) {
-        return get(stream, new TemporaryFiles());
+        return get(stream, new TemporaryResources());
     }
 
     /**
@@ -156,7 +181,8 @@ public class TikaInputStream extends Tag
     public static TikaInputStream get(byte[] data, Metadata metadata) {
         metadata.set(Metadata.CONTENT_LENGTH, Integer.toString(data.length));
         return new TikaInputStream(
-                new ByteArrayInputStream(data), new TemporaryFiles(), data.length);
+                new ByteArrayInputStream(data),
+                new TemporaryResources(), data.length);
     }
 
     /**
@@ -247,7 +273,7 @@ public class TikaInputStream extends Tag
         } else {
             return new TikaInputStream(
                     new BufferedInputStream(blob.getBinaryStream()),
-                    null, length);
+                    new TemporaryResources(), length);
         }
     }
 
@@ -355,25 +381,7 @@ public class TikaInputStream extends Tag
 
         return new TikaInputStream(
                 new BufferedInputStream(connection.getInputStream()),
-                new TemporaryFiles(), length);
-    }
-
-    /**
-     * Makes sure that a stream is buffered and correctly supports the
-     * mark feature by wrapping the given stream to a
-     * {@link BufferedInputStream} if needed.
-     *
-     * @param stream original stream
-     * @return buffered stream that supports the mark feature
-     */
-    private static InputStream withBufferingAndMarkSupport(InputStream stream) {
-        if (stream instanceof ByteArrayInputStream) {
-            return stream;
-        } else if (stream instanceof BufferedInputStream) {
-            return stream;
-        } else {
-            return new BufferedInputStream(stream);
-        }
+                new TemporaryResources(), length);
     }
 
     /**
@@ -386,9 +394,9 @@ public class TikaInputStream extends Tag
     private File file;
 
     /**
-     * Temporary file provider.
+     * Tracker of temporary resources.
      */
-    private final TemporaryFiles tmp;
+    private final TemporaryResources tmp;
 
     /**
      * Total length of the stream, or -1 if unknown.
@@ -422,20 +430,25 @@ public class TikaInputStream extends Tag
     private TikaInputStream(File file) throws FileNotFoundException {
         super(new BufferedInputStream(new FileInputStream(file)));
         this.file = file;
-        this.tmp = new TemporaryFiles();
+        this.tmp = new TemporaryResources();
         this.length = file.length();
     }
 
     /**
      * Creates a TikaInputStream instance. This private constructor is used
      * by the static factory methods based on the available information.
+     * <p>
+     * The given stream needs to be included in the given temporary resource
+     * collection if the caller wants it also to get closed when the
+     * {@link #close()} method is invoked.
      *
      * @param stream <em>buffered</em> stream (must support the mark feature)
+     * @param tmp tracker for temporary resources associated with this stream
      * @param length total length of the stream, or -1 if unknown
      */
     private TikaInputStream(
-            InputStream stream, TemporaryFiles tmp, long length) {
-        super(withBufferingAndMarkSupport(stream));
+            InputStream stream, TemporaryResources tmp, long length) {
+        super(stream);
         this.file = null;
         this.tmp = tmp;
         this.length = length;
@@ -489,6 +502,9 @@ public class TikaInputStream extends Tag
      */
     public void setOpenContainer(Object container) {
         openContainer = container;
+        if (container instanceof Closeable) {
+            tmp.addResource((Closeable) container);
+        }
     }
 
     public boolean hasFile() {
@@ -497,11 +513,10 @@ public class TikaInputStream extends Tag
 
     public File getFile() throws IOException {
         if (file == null) {
-            if (in == null) {
-                throw new IOException("Stream has already been read");
-            } else if (position > 0) {
+            if (position > 0) {
                 throw new IOException("Stream is already being read");
             } else {
+                // Spool the entire stream into a temporary file
                 file = tmp.createTemporaryFile();
                 OutputStream out = new FileOutputStream(file);
                 try {
@@ -509,15 +524,37 @@ public class TikaInputStream extends Tag
                 } finally {
                     out.close();
                 }
-                in.close();
-                // Re-point the stream at the file now we have it
-                in = new BufferedInputStream(new FileInputStream(file));
+
+                // Create a new input stream and make sure it'll get closed
+                FileInputStream newStream = new FileInputStream(file);
+                tmp.addResource(newStream);
+
+                // Replace the spooled stream with the new stream in a way
+                // that still ends up closing the old stream if or when the
+                // close() method is called. The closing of the new stream
+                // is already being handled as noted above.
+                final InputStream oldStream = in;
+                in = new BufferedInputStream(newStream) {
+                    @Override
+                    public void close() throws IOException {
+                        oldStream.close();
+                    }
+                };
+
                 length = file.length();
             }
         }
         return file;
     }
 
+    public FileChannel getFileChannel() throws IOException {
+        FileInputStream fis = new FileInputStream(getFile());
+        tmp.addResource(fis);
+        FileChannel channel = fis.getChannel();
+        tmp.addResource(channel);
+        return channel;
+    }
+
     public boolean hasLength() {
         return length != -1;
     }
@@ -549,46 +586,10 @@ public class TikaInputStream extends Tag
     }
 
     @Override
-    public int available() throws IOException {
-        if (in == null) {
-            return 0;
-        } else {
-            return super.available();
-        }
-    }
-
-    @Override
     public long skip(long ln) throws IOException {
-        if (in == null) {
-            return 0;
-        } else {
-            long n = super.skip(ln);
-            position += n;
-            return n;
-        }
-    }
-
-    @Override
-    public int read() throws IOException {
-        if (in == null) {
-            return -1;
-        } else {
-            return super.read();
-        }
-    }
-
-    @Override
-    public int read(byte[] bts, int off, int len) throws IOException {
-        if (in == null) {
-            return -1;
-        } else {
-            return super.read(bts, off, len);
-        }
-    }
-
-    @Override
-    public int read(byte[] bts) throws IOException {
-        return read(bts, 0, bts.length);
+        long n = super.skip(ln);
+        position += n;
+        return n;
     }
 
     @Override
@@ -611,33 +612,22 @@ public class TikaInputStream extends Tag
 
     @Override
     public void close() throws IOException {
-        if (in != null) {
-            in.close();
-            in = null;
-        }
-        if (openContainer != null) {
-           if (openContainer instanceof Closeable) {
-              ((Closeable)openContainer).close();
-           }
-           openContainer = null;
-        }
         file = null;
-        tmp.dispose();
-    }
+        mark = -1;
 
-    @Override
-    protected void beforeRead(int n) throws IOException {
-        if (in == null) {
-            throw new IOException("End of the stream reached");
-        }
+        // The close method was explicitly called, so we indeed
+        // are expected to close the input stream. Handle that
+        // by adding that stream as a resource to be tracked before
+        // closing all of them. This way also possible exceptions from
+        // the close() calls get managed properly.
+        tmp.addResource(in);
+        tmp.close();
     }
 
     @Override
-    protected void afterRead(int n) throws IOException {
+    protected void afterRead(int n) {
         if (n != -1) {
             position += n;
-        } else if (mark == -1) {
-            close();
         }
     }
 

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java?rev=1163970&r1=1163969&r2=1163970&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java Thu Sep  1 09:38:04 2011
@@ -23,7 +23,7 @@ import org.apache.tika.config.TikaConfig
 import org.apache.tika.detect.DefaultDetector;
 import org.apache.tika.detect.Detector;
 import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.TemporaryFiles;
+import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
@@ -114,7 +114,7 @@ public class AutoDetectParser extends Co
             InputStream stream, ContentHandler handler,
             Metadata metadata, ParseContext context)
             throws IOException, SAXException, TikaException {
-        TemporaryFiles tmp = new TemporaryFiles();
+        TemporaryResources tmp = new TemporaryResources();
         try {
             TikaInputStream tis = TikaInputStream.get(stream, tmp);
 

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java?rev=1163970&r1=1163969&r2=1163970&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java Thu Sep  1 09:38:04 2011
@@ -27,7 +27,7 @@ import java.util.Map;
 import java.util.Set;
 
 import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.TemporaryFiles;
+import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
@@ -234,7 +234,7 @@ public class CompositeParser extends Abs
             Metadata metadata, ParseContext context)
             throws IOException, SAXException, TikaException {
         Parser parser = getParser(metadata);
-        TemporaryFiles tmp = new TemporaryFiles();
+        TemporaryResources tmp = new TemporaryResources();
         try {
             TikaInputStream taggedStream = TikaInputStream.get(stream, tmp);
             TaggedContentHandler taggedHandler = new TaggedContentHandler(handler);

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/parser/NetworkParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/NetworkParser.java?rev=1163970&r1=1163969&r2=1163970&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/parser/NetworkParser.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/parser/NetworkParser.java Thu Sep  1 09:38:04 2011
@@ -30,7 +30,7 @@ import java.util.Set;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.CloseShieldInputStream;
 import org.apache.tika.io.IOUtils;
-import org.apache.tika.io.TemporaryFiles;
+import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
@@ -64,7 +64,7 @@ public class NetworkParser extends Abstr
             InputStream stream, ContentHandler handler,
             Metadata metadata, ParseContext context)
             throws IOException, SAXException, TikaException {
-        TemporaryFiles tmp = new TemporaryFiles();
+        TemporaryResources tmp = new TemporaryResources();
         try {
             TikaInputStream tis = TikaInputStream.get(stream, tmp);
             parse(tis, handler, metadata, context);

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java?rev=1163970&r1=1163969&r2=1163970&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java Thu Sep  1 09:38:04 2011
@@ -34,7 +34,7 @@ import java.util.regex.Pattern;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.IOUtils;
 import org.apache.tika.io.NullOutputStream;
-import org.apache.tika.io.TemporaryFiles;
+import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
@@ -80,9 +80,8 @@ public class ExternalParser extends Abst
      * @see Runtime#exec(String[])
      */
     private String[] command = new String[] { "cat" };
-    
-    private TemporaryFiles tmp = new TemporaryFiles();
-    
+
+    private TemporaryResources tmp = new TemporaryResources();
 
     public Set<MediaType> getSupportedTypes(ParseContext context) {
         return getSupportedTypes();

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java?rev=1163970&r1=1163969&r2=1163970&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java Thu Sep  1 09:38:04 2011
@@ -16,14 +16,13 @@
  */
 package org.apache.tika.parser.jpeg;
 
-import java.io.FilterInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.Collections;
 import java.util.Set;
 
 import org.apache.tika.exception.TikaException;
-import org.apache.tika.io.TemporaryFiles;
+import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
@@ -48,7 +47,7 @@ public class JpegParser extends Abstract
             InputStream stream, ContentHandler handler,
             Metadata metadata, ParseContext context)
             throws IOException, SAXException, TikaException {
-        TemporaryFiles tmp = new TemporaryFiles();
+        TemporaryResources tmp = new TemporaryResources();
         try {
             TikaInputStream tis = TikaInputStream.get(stream, tmp);
             new ImageMetadataExtractor(metadata).parseJpeg(tis.getFile());

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java?rev=1163970&r1=1163969&r2=1163970&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java Thu Sep  1 09:38:04 2011
@@ -165,13 +165,13 @@ public class OfficeParser extends Abstra
         NPOIFSFileSystem filesystem;
         if(stream instanceof TikaInputStream) {
             TikaInputStream tstream = (TikaInputStream)stream;
-        	   if(tstream.getOpenContainer() != null) {
-        	      filesystem = (NPOIFSFileSystem)tstream.getOpenContainer();
-        	   } else if(tstream.hasFile()) {
-        	      filesystem = new NPOIFSFileSystem(tstream.getFile());
-        	   } else {
-        	    filesystem = new NPOIFSFileSystem(tstream);
-        	   }
+            if(tstream.getOpenContainer() != null) {
+                filesystem = (NPOIFSFileSystem)tstream.getOpenContainer();
+            } else if(tstream.hasFile()) {
+                filesystem = new NPOIFSFileSystem(tstream.getFileChannel());
+            } else {
+                filesystem = new NPOIFSFileSystem(tstream);
+            }
         } else {
             filesystem = new NPOIFSFileSystem(stream);
         }

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java?rev=1163970&r1=1163969&r2=1163970&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java Thu Sep  1 09:38:04 2011
@@ -21,6 +21,7 @@ import static org.apache.tika.mime.Media
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
+import java.nio.channels.FileChannel;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.Set;
@@ -28,7 +29,7 @@ import java.util.Set;
 import org.apache.poi.poifs.filesystem.Entry;
 import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
 import org.apache.tika.detect.Detector;
-import org.apache.tika.io.TemporaryFiles;
+import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
@@ -92,44 +93,41 @@ public class POIFSContainerDetector impl
 
         // We can only detect the exact type when given a TikaInputStream
         if (TikaInputStream.isTikaInputStream(input)) {
-            TemporaryFiles tmp = new TemporaryFiles();
-            try {
-                // Look for known top level entry names to detect the document type
-                Set<String> names =
-                    getTopLevelNames(TikaInputStream.get(input, tmp));
-                if (names.contains("Workbook")) {
-                    return XLS;
-                } else if (names.contains("EncryptedPackage")) {
-                    return OLE;
-                } else if (names.contains("WordDocument")) {
-                    return DOC;
-                } else if (names.contains("Quill")) {
-                    return PUB;
-                } else if (names.contains("PowerPoint Document")) {
-                    return PPT;
-                } else if (names.contains("VisioDocument")) {
-                    return VSD;
-                } else if (names.contains("CONTENTS")) {
-                    return WPS;
-                } else if (names.contains("\u0001Ole10Native")) {
-                    return OLE;
-                } else if (names.contains("PerfectOffice_MAIN")) {
-                    if (names.contains("SlideShow")) {
-                        return MediaType.application("x-corelpresentations"); // .shw
-                    } else if (names.contains("PerfectOffice_OBJECTS")) {
-                        return MediaType.application("x-quattro-pro"); // .wb?
-                    }
-                } else if (names.contains("NativeContent_MAIN")) {
-                    return MediaType.application("x-quattro-pro"); // .qpw
-                } else {
-                    for (String name : names) {
-                        if (name.startsWith("__substg1.0_")) {
-                            return MSG;
-                        }
+            // No TemporaryResources as this is for sure a TikaInputStream
+            TikaInputStream tis = TikaInputStream.get(input);
+
+            // Look for known top level entry names to detect the document type
+            Set<String> names = getTopLevelNames(tis);
+            if (names.contains("Workbook")) {
+                return XLS;
+            } else if (names.contains("EncryptedPackage")) {
+                return OLE;
+            } else if (names.contains("WordDocument")) {
+                return DOC;
+            } else if (names.contains("Quill")) {
+                return PUB;
+            } else if (names.contains("PowerPoint Document")) {
+                return PPT;
+            } else if (names.contains("VisioDocument")) {
+                return VSD;
+            } else if (names.contains("CONTENTS")) {
+                return WPS;
+            } else if (names.contains("\u0001Ole10Native")) {
+                return OLE;
+            } else if (names.contains("PerfectOffice_MAIN")) {
+                if (names.contains("SlideShow")) {
+                    return MediaType.application("x-corelpresentations"); // .shw
+                } else if (names.contains("PerfectOffice_OBJECTS")) {
+                    return MediaType.application("x-quattro-pro"); // .wb?
+                }
+            } else if (names.contains("NativeContent_MAIN")) {
+                return MediaType.application("x-quattro-pro"); // .qpw
+            } else {
+                for (String name : names) {
+                    if (name.startsWith("__substg1.0_")) {
+                        return MSG;
                     }
                 }
-            } finally {
-                tmp.dispose();
             }
         }
 
@@ -141,10 +139,10 @@ public class POIFSContainerDetector impl
             throws IOException {
         // Force the document stream to a (possibly temporary) file
         // so we don't modify the current position of the stream
-        File file = stream.getFile();
+        FileChannel channel = stream.getFileChannel();
 
         try {
-            NPOIFSFileSystem fs = new NPOIFSFileSystem(file);
+            NPOIFSFileSystem fs = new NPOIFSFileSystem(channel);
 
             // Optimize a possible later parsing process by keeping
             // a reference to the already opened POI file system

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java?rev=1163970&r1=1163969&r2=1163970&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java Thu Sep  1 09:38:04 2011
@@ -30,7 +30,7 @@ import org.apache.poi.openxml4j.opc.Pack
 import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
 import org.apache.tika.detect.Detector;
 import org.apache.tika.io.IOUtils;
-import org.apache.tika.io.TemporaryFiles;
+import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
@@ -70,7 +70,7 @@ public class ZipContainerDetector implem
             return MediaType.APPLICATION_ZIP;
         }
 
-        TemporaryFiles tmp = new TemporaryFiles();
+        TemporaryResources tmp = new TemporaryResources();
         ZipFile zip = null;
         try {
             File file = TikaInputStream.get(input, tmp).getFile();
@@ -99,7 +99,7 @@ public class ZipContainerDetector implem
                 } catch (IOException e) {
                 }
             }
-            tmp.dispose();
+            tmp.close();
         }
     }
 

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java?rev=1163970&r1=1163969&r2=1163970&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java Thu Sep  1 09:38:04 2011
@@ -16,9 +16,32 @@
  */
 package org.apache.tika.parser.rtf;
 
+import java.io.BufferedOutputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import javax.swing.text.AttributeSet;
+import javax.swing.text.BadLocationException;
+import javax.swing.text.DefaultStyledDocument;
+import javax.swing.text.Document;
+import javax.swing.text.StyleContext;
+import javax.swing.text.rtf.RTFEditorKit;
+
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.TaggedInputStream;
-import org.apache.tika.io.TemporaryFiles;
+import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
@@ -28,13 +51,6 @@ import org.apache.tika.sax.XHTMLContentH
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
-import javax.swing.text.*;
-import javax.swing.text.rtf.RTFEditorKit;
-import java.io.*;
-import java.util.*;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
 /**
  * RTF parser
  */
@@ -106,7 +122,7 @@ public class RTFParser extends AbstractP
             Metadata metadata, ParseContext context)
             throws IOException, SAXException, TikaException {
         TaggedInputStream tagged = new TaggedInputStream(stream);
-        TemporaryFiles tmp = new TemporaryFiles();
+        TemporaryResources tmp = new TemporaryResources();
         try {
             File tempFile = tmp.createTemporaryFile();
             createUnicodeRtfTempFile(tempFile, stream);