You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2009/05/24 00:07:15 UTC
svn commit: r778043 - in /lucene/tika/trunk: ./
tika-core/src/main/java/org/apache/tika/io/
tika-core/src/main/java/org/apache/tika/parser/
Author: jukka
Date: Sat May 23 22:07:14 2009
New Revision: 778043
URL: http://svn.apache.org/viewvc?rev=778043&view=rev
Log:
TIKA-198: Better distinction between IOException and TikaException
Add protective code to CompositeParser.
Embedded required but unreleased classes from Commons IO. We can replace them with a proper dependency once Commons IO 2.0 is out.
Added:
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/ProxyInputStream.java
- copied, changed from r778027, commons/proper/io/trunk/src/java/org/apache/commons/io/input/ProxyInputStream.java
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TaggedIOException.java
- copied, changed from r778027, commons/proper/io/trunk/src/java/org/apache/commons/io/TaggedIOException.java
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TaggedInputStream.java
- copied, changed from r778027, commons/proper/io/trunk/src/java/org/apache/commons/io/input/TaggedInputStream.java
Modified:
lucene/tika/trunk/CHANGES.txt
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
Modified: lucene/tika/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/CHANGES.txt?rev=778043&r1=778042&r2=778043&view=diff
==============================================================================
--- lucene/tika/trunk/CHANGES.txt (original)
+++ lucene/tika/trunk/CHANGES.txt Sat May 23 22:07:14 2009
@@ -35,6 +35,11 @@
* Charset detection functionality from the ICU4J library was inlined
in Tika to avoid the dependency to the large ICU4J jar. (TIKA-229)
+ * Composite parsers like the AutoDetectParser now make sure that any
+ RuntimeExceptions or IOExceptions unrelated to the given document
+ stream are converted to TikaExceptions before being passed to the
+ client. (TIKA-198)
+
Release 0.3 - 03/09/2009
------------------------
Copied: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/ProxyInputStream.java (from r778027, commons/proper/io/trunk/src/java/org/apache/commons/io/input/ProxyInputStream.java)
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/ProxyInputStream.java?p2=lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/ProxyInputStream.java&p1=commons/proper/io/trunk/src/java/org/apache/commons/io/input/ProxyInputStream.java&r1=778027&r2=778043&rev=778043&view=diff
==============================================================================
--- commons/proper/io/trunk/src/java/org/apache/commons/io/input/ProxyInputStream.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/ProxyInputStream.java Sat May 23 22:07:14 2009
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.commons.io.input;
+package org.apache.tika.io;
import java.io.FilterInputStream;
import java.io.IOException;
@@ -176,7 +176,6 @@
* handling. The default behaviour is to re-throw the exception.
* @param e The IOException thrown
* @throws IOException if an I/O error occurs
- * @since Commons IO 2.0
*/
protected void handleIOException(IOException e) throws IOException {
throw e;
Copied: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TaggedIOException.java (from r778027, commons/proper/io/trunk/src/java/org/apache/commons/io/TaggedIOException.java)
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TaggedIOException.java?p2=lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TaggedIOException.java&p1=commons/proper/io/trunk/src/java/org/apache/commons/io/TaggedIOException.java&r1=778027&r2=778043&rev=778043&view=diff
==============================================================================
--- commons/proper/io/trunk/src/java/org/apache/commons/io/TaggedIOException.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TaggedIOException.java Sat May 23 22:07:14 2009
@@ -14,16 +14,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.commons.io;
+package org.apache.tika.io;
import java.io.IOException;
+import org.apache.commons.io.IOExceptionWithCause;
+
/**
* An {@link IOException} wrapper that tags the wrapped exception with
* a given object reference. Both the tag and the wrapped original exception
* can be used to determine further processing when this exception is caught.
- *
- * @since Commons IO 1.5
*/
public class TaggedIOException extends IOExceptionWithCause {
Copied: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TaggedInputStream.java (from r778027, commons/proper/io/trunk/src/java/org/apache/commons/io/input/TaggedInputStream.java)
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TaggedInputStream.java?p2=lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TaggedInputStream.java&p1=commons/proper/io/trunk/src/java/org/apache/commons/io/input/TaggedInputStream.java&r1=778027&r2=778043&rev=778043&view=diff
==============================================================================
--- commons/proper/io/trunk/src/java/org/apache/commons/io/input/TaggedInputStream.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TaggedInputStream.java Sat May 23 22:07:14 2009
@@ -14,13 +14,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.commons.io.input;
+package org.apache.tika.io;
-import java.io.IOException;
+ import java.io.IOException;
import java.io.InputStream;
-import org.apache.commons.io.TaggedIOException;
-
/**
* An input stream decorator that tags potential exceptions so that the
* stream that caused the exception can easily be identified. This is
@@ -57,7 +55,6 @@
* </pre>
*
* @see TaggedIOException
- * @since Commons IO 1.5
*/
public class TaggedInputStream extends ProxyInputStream {
Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java?rev=778043&r1=778042&r2=778043&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java Sat May 23 22:07:14 2009
@@ -22,6 +22,7 @@
import java.util.Map;
import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TaggedInputStream;
import org.apache.tika.metadata.Metadata;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
@@ -100,12 +101,26 @@
}
/**
- * Delegates the call to the matching component parser.
+ * Delegates the call to the matching component parser. Potential
+ * {@link RuntimeException}s and {@link IOException}s unrelated to the
+ * given input stream are automatically wrapped into
+ * {@link TikaException}s to better honor the {@link Parser} contract.
*/
public void parse(
InputStream stream, ContentHandler handler, Metadata metadata)
throws IOException, SAXException, TikaException {
- getParser(metadata).parse(stream, handler, metadata);
+ TaggedInputStream tagged = new TaggedInputStream(stream);
+ try {
+ getParser(metadata).parse(tagged, handler, metadata);
+ } catch (RuntimeException e) {
+ throw new TikaException("Unexpected parse error", e);
+ } catch (IOException e) {
+ tagged.throwIfCauseOf(e);
+
+ // The IOException was caused by the parser instead of the stream,
+ // convert the exception to a TikaException
+ throw new TikaException("Parse error", e);
+ }
}
}