You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2012/12/13 09:47:26 UTC
svn commit: r1421141 - in /tika/trunk:
tika-core/src/main/java/org/apache/tika/config/
tika-core/src/main/java/org/apache/tika/detect/
tika-parsers/src/main/java/org/apache/tika/parser/html/
tika-parsers/src/main/java/org/apache/tika/parser/txt/
Author: jukka
Date: Thu Dec 13 08:47:25 2012
New Revision: 1421141
URL: http://svn.apache.org/viewvc?rev=1421141&view=rev
Log:
TIKA-1041: Tika 1.2 universalcharset errors
Catch and ignore any NoClassDefFoundErrors throw during encoding detection.
This way we can degrade more gracefully when a dependency is not present.
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java
tika/trunk/tika-core/src/main/java/org/apache/tika/detect/AutoDetectReader.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java?rev=1421141&r1=1421140&r2=1421141&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java Thu Dec 13 08:47:25 2012
@@ -141,6 +141,16 @@ public class ServiceLoader {
}
/**
+ * Returns the load error handler used by this loader.
+ *
+ * @return load error handler
+ * @since Apache Tika 1.3
+ */
+ public LoadErrorHandler getLoadErrorHandler() {
+ return handler;
+ }
+
+ /**
* Returns an input stream for reading the specified resource from the
* configured class loader.
*
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/detect/AutoDetectReader.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/detect/AutoDetectReader.java?rev=1421141&r1=1421140&r2=1421141&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/detect/AutoDetectReader.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/detect/AutoDetectReader.java Thu Dec 13 08:47:25 2012
@@ -24,6 +24,7 @@ import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.List;
+import org.apache.tika.config.LoadErrorHandler;
import org.apache.tika.config.ServiceLoader;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
@@ -44,13 +45,18 @@ public class AutoDetectReader extends Bu
private static Charset detect(
InputStream input, Metadata metadata,
- List<EncodingDetector> detectors)
+ List<EncodingDetector> detectors, LoadErrorHandler handler)
throws IOException, TikaException {
// Ask all given detectors for the character encoding
for (EncodingDetector detector : detectors) {
- Charset charset = detector.detect(input, metadata);
- if (charset != null) {
- return charset;
+ try {
+ Charset charset = detector.detect(input, metadata);
+ if (charset != null) {
+ return charset;
+ }
+ } catch (NoClassDefFoundError e) {
+ // TIKA-1041: Detector dependencies not present.
+ handler.handleLoadError(detector.getClass().getName(), e);
}
}
@@ -87,16 +93,17 @@ public class AutoDetectReader extends Bu
private AutoDetectReader(
BufferedInputStream stream, Metadata metadata,
- List<EncodingDetector> detectors)
+ List<EncodingDetector> detectors, LoadErrorHandler handler)
throws IOException, TikaException {
- this(stream, detect(stream, metadata, detectors));
+ this(stream, detect(stream, metadata, detectors, handler));
}
public AutoDetectReader(
InputStream stream, Metadata metadata,
ServiceLoader loader) throws IOException, TikaException {
this(new BufferedInputStream(stream), metadata,
- loader.loadServiceProviders(EncodingDetector.class));
+ loader.loadServiceProviders(EncodingDetector.class),
+ loader.getLoadErrorHandler());
}
public AutoDetectReader(InputStream stream, Metadata metadata)
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java?rev=1421141&r1=1421140&r2=1421141&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java Thu Dec 13 08:47:25 2012
@@ -72,7 +72,8 @@ public class HtmlParser extends Abstract
throws IOException, SAXException, TikaException {
// Automatically detect the character encoding
AutoDetectReader reader = new AutoDetectReader(
- new CloseShieldInputStream(stream), metadata, LOADER);
+ new CloseShieldInputStream(stream), metadata,
+ context.get(ServiceLoader.class, LOADER));
try {
Charset charset = reader.getCharset();
String previous = metadata.get(Metadata.CONTENT_TYPE);
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java?rev=1421141&r1=1421140&r2=1421141&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java Thu Dec 13 08:47:25 2012
@@ -68,7 +68,8 @@ public class TXTParser extends AbstractP
throws IOException, SAXException, TikaException {
// Automatically detect the character encoding
AutoDetectReader reader = new AutoDetectReader(
- new CloseShieldInputStream(stream), metadata, LOADER);
+ new CloseShieldInputStream(stream), metadata,
+ context.get(ServiceLoader.class, LOADER));
try {
Charset charset = reader.getCharset();
MediaType type = new MediaType(MediaType.TEXT_PLAIN, charset);