You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2012/12/13 09:47:26 UTC

svn commit: r1421141 - in /tika/trunk: tika-core/src/main/java/org/apache/tika/config/ tika-core/src/main/java/org/apache/tika/detect/ tika-parsers/src/main/java/org/apache/tika/parser/html/ tika-parsers/src/main/java/org/apache/tika/parser/txt/

Author: jukka
Date: Thu Dec 13 08:47:25 2012
New Revision: 1421141

URL: http://svn.apache.org/viewvc?rev=1421141&view=rev
Log:
TIKA-1041: Tika 1.2 universalcharset errors

Catch and ignore any NoClassDefFoundErrors throw during encoding detection.
This way we can degrade more gracefully when a dependency is not present.

Modified:
    tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/detect/AutoDetectReader.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java?rev=1421141&r1=1421140&r2=1421141&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java Thu Dec 13 08:47:25 2012
@@ -141,6 +141,16 @@ public class ServiceLoader {
     }
 
     /**
+     * Returns the load error handler used by this loader.
+     *
+     * @return load error handler
+     * @since Apache Tika 1.3
+     */
+    public LoadErrorHandler getLoadErrorHandler() {
+        return handler;
+    }
+
+    /**
      * Returns an input stream for reading the specified resource from the
      * configured class loader.
      *

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/detect/AutoDetectReader.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/detect/AutoDetectReader.java?rev=1421141&r1=1421140&r2=1421141&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/detect/AutoDetectReader.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/detect/AutoDetectReader.java Thu Dec 13 08:47:25 2012
@@ -24,6 +24,7 @@ import java.io.InputStreamReader;
 import java.nio.charset.Charset;
 import java.util.List;
 
+import org.apache.tika.config.LoadErrorHandler;
 import org.apache.tika.config.ServiceLoader;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
@@ -44,13 +45,18 @@ public class AutoDetectReader extends Bu
 
     private static Charset detect(
             InputStream input, Metadata metadata,
-            List<EncodingDetector> detectors)
+            List<EncodingDetector> detectors, LoadErrorHandler handler)
             throws IOException, TikaException {
         // Ask all given detectors for the character encoding
         for (EncodingDetector detector : detectors) {
-            Charset charset = detector.detect(input, metadata);
-            if (charset != null) {
-                return charset;
+            try {
+                Charset charset = detector.detect(input, metadata);
+                if (charset != null) {
+                    return charset;
+                }
+            } catch (NoClassDefFoundError e) {
+                // TIKA-1041: Detector dependencies not present.
+                handler.handleLoadError(detector.getClass().getName(), e);
             }
         }
 
@@ -87,16 +93,17 @@ public class AutoDetectReader extends Bu
 
     private AutoDetectReader(
             BufferedInputStream stream, Metadata metadata,
-            List<EncodingDetector> detectors)
+            List<EncodingDetector> detectors, LoadErrorHandler handler)
             throws IOException, TikaException {
-        this(stream, detect(stream, metadata, detectors));
+        this(stream, detect(stream, metadata, detectors, handler));
     }
 
     public AutoDetectReader(
             InputStream stream, Metadata metadata,
             ServiceLoader loader) throws IOException, TikaException {
         this(new BufferedInputStream(stream), metadata,
-                loader.loadServiceProviders(EncodingDetector.class));
+                loader.loadServiceProviders(EncodingDetector.class),
+                loader.getLoadErrorHandler());
     }
 
     public AutoDetectReader(InputStream stream, Metadata metadata)

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java?rev=1421141&r1=1421140&r2=1421141&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java Thu Dec 13 08:47:25 2012
@@ -72,7 +72,8 @@ public class HtmlParser extends Abstract
             throws IOException, SAXException, TikaException {
         // Automatically detect the character encoding
         AutoDetectReader reader = new AutoDetectReader(
-                new CloseShieldInputStream(stream), metadata, LOADER);
+                new CloseShieldInputStream(stream), metadata,
+                context.get(ServiceLoader.class, LOADER));
         try {
             Charset charset = reader.getCharset();
             String previous = metadata.get(Metadata.CONTENT_TYPE);

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java?rev=1421141&r1=1421140&r2=1421141&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java Thu Dec 13 08:47:25 2012
@@ -68,7 +68,8 @@ public class TXTParser extends AbstractP
             throws IOException, SAXException, TikaException {
         // Automatically detect the character encoding
         AutoDetectReader reader = new AutoDetectReader(
-                new CloseShieldInputStream(stream), metadata, LOADER);
+                new CloseShieldInputStream(stream), metadata,
+                context.get(ServiceLoader.class, LOADER));
         try {
             Charset charset = reader.getCharset();
             MediaType type = new MediaType(MediaType.TEXT_PLAIN, charset);