You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2009/05/24 00:07:15 UTC

svn commit: r778043 - in /lucene/tika/trunk: ./ tika-core/src/main/java/org/apache/tika/io/ tika-core/src/main/java/org/apache/tika/parser/

Author: jukka
Date: Sat May 23 22:07:14 2009
New Revision: 778043

URL: http://svn.apache.org/viewvc?rev=778043&view=rev
Log:
TIKA-198: Better distinction between IOException and TikaException

Add protective code to CompositeParser.

Embedded required but unreleased classes from Commons IO. We can replace them with a proper dependency once Commons IO 2.0 is out.

Added:
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/ProxyInputStream.java
      - copied, changed from r778027, commons/proper/io/trunk/src/java/org/apache/commons/io/input/ProxyInputStream.java
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TaggedIOException.java
      - copied, changed from r778027, commons/proper/io/trunk/src/java/org/apache/commons/io/TaggedIOException.java
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TaggedInputStream.java
      - copied, changed from r778027, commons/proper/io/trunk/src/java/org/apache/commons/io/input/TaggedInputStream.java
Modified:
    lucene/tika/trunk/CHANGES.txt
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java

Modified: lucene/tika/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/CHANGES.txt?rev=778043&r1=778042&r2=778043&view=diff
==============================================================================
--- lucene/tika/trunk/CHANGES.txt (original)
+++ lucene/tika/trunk/CHANGES.txt Sat May 23 22:07:14 2009
@@ -35,6 +35,11 @@
   * Charset detection functionality from the ICU4J library was inlined
     in Tika to avoid the dependency to the large ICU4J jar. (TIKA-229)
 
+  * Composite parsers like the AutoDetectParser now make sure that any
+    RuntimeExceptions or IOExceptions unrelated to the given document
+    stream are converted to TikaExceptions before being passed to the
+    client. (TIKA-198)
+
 Release 0.3 - 03/09/2009
 ------------------------
 

Copied: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/ProxyInputStream.java (from r778027, commons/proper/io/trunk/src/java/org/apache/commons/io/input/ProxyInputStream.java)
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/ProxyInputStream.java?p2=lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/ProxyInputStream.java&p1=commons/proper/io/trunk/src/java/org/apache/commons/io/input/ProxyInputStream.java&r1=778027&r2=778043&rev=778043&view=diff
==============================================================================
--- commons/proper/io/trunk/src/java/org/apache/commons/io/input/ProxyInputStream.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/ProxyInputStream.java Sat May 23 22:07:14 2009
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.commons.io.input;
+package org.apache.tika.io;
 
 import java.io.FilterInputStream;
 import java.io.IOException;
@@ -176,7 +176,6 @@
      * handling. The default behaviour is to re-throw the exception.
      * @param e The IOException thrown
      * @throws IOException if an I/O error occurs
-     * @since Commons IO 2.0
      */
     protected void handleIOException(IOException e) throws IOException {
         throw e;

Copied: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TaggedIOException.java (from r778027, commons/proper/io/trunk/src/java/org/apache/commons/io/TaggedIOException.java)
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TaggedIOException.java?p2=lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TaggedIOException.java&p1=commons/proper/io/trunk/src/java/org/apache/commons/io/TaggedIOException.java&r1=778027&r2=778043&rev=778043&view=diff
==============================================================================
--- commons/proper/io/trunk/src/java/org/apache/commons/io/TaggedIOException.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TaggedIOException.java Sat May 23 22:07:14 2009
@@ -14,16 +14,16 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.commons.io;
+package org.apache.tika.io;
 
 import java.io.IOException;
 
+import org.apache.commons.io.IOExceptionWithCause;
+
 /**
  * An {@link IOException} wrapper that tags the wrapped exception with
  * a given object reference. Both the tag and the wrapped original exception
  * can be used to determine further processing when this exception is caught.
- *
- * @since Commons IO 1.5
  */
 public class TaggedIOException extends IOExceptionWithCause {
 

Copied: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TaggedInputStream.java (from r778027, commons/proper/io/trunk/src/java/org/apache/commons/io/input/TaggedInputStream.java)
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TaggedInputStream.java?p2=lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TaggedInputStream.java&p1=commons/proper/io/trunk/src/java/org/apache/commons/io/input/TaggedInputStream.java&r1=778027&r2=778043&rev=778043&view=diff
==============================================================================
--- commons/proper/io/trunk/src/java/org/apache/commons/io/input/TaggedInputStream.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TaggedInputStream.java Sat May 23 22:07:14 2009
@@ -14,13 +14,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.commons.io.input;
+package org.apache.tika.io;
 
-import java.io.IOException;
+ import java.io.IOException;
 import java.io.InputStream;
 
-import org.apache.commons.io.TaggedIOException;
-
 /**
  * An input stream decorator that tags potential exceptions so that the
  * stream that caused the exception can easily be identified. This is
@@ -57,7 +55,6 @@
  * </pre>
  *
  * @see TaggedIOException
- * @since Commons IO 1.5
  */
 public class TaggedInputStream extends ProxyInputStream {
 

Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java?rev=778043&r1=778042&r2=778043&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java Sat May 23 22:07:14 2009
@@ -22,6 +22,7 @@
 import java.util.Map;
 
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TaggedInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
@@ -100,12 +101,26 @@
     }
 
     /**
-     * Delegates the call to the matching component parser.
+     * Delegates the call to the matching component parser. Potential
+     * {@link RuntimeException}s and {@link IOException}s unrelated to the
+     * given input stream are automatically wrapped into
+     * {@link TikaException}s to better honor the {@link Parser} contract.
      */
     public void parse(
             InputStream stream, ContentHandler handler, Metadata metadata)
             throws IOException, SAXException, TikaException {
-        getParser(metadata).parse(stream, handler, metadata);
+        TaggedInputStream tagged = new TaggedInputStream(stream);
+        try {
+            getParser(metadata).parse(tagged, handler, metadata);
+        } catch (RuntimeException e) {
+            throw new TikaException("Unexpected parse error", e);
+        } catch (IOException e) {
+            tagged.throwIfCauseOf(e);
+
+            // The IOException was caused by the parser instead of the stream,
+            // convert the exception to a TikaException
+            throw new TikaException("Parse error", e);
+        }
     }
 
 }