You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2009/11/13 04:17:33 UTC

svn commit: r835720 - /lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java

Author: jukka
Date: Fri Nov 13 03:17:31 2009
New Revision: 835720

URL: http://svn.apache.org/viewvc?rev=835720&view=rev
Log:
TIKA-320: Allow disabling language detection in AutoDetectParser

Remove the automatic language detection entirely from AutoDetectParser. Clients can add language detection on top of the Parser interface if they want the functionality.

Modified:
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java

Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java?rev=835720&r1=835719&r2=835720&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java Fri Nov 13 03:17:31 2009
@@ -23,14 +23,12 @@
 import java.util.Map;
 
 import org.apache.tika.config.TikaConfig;
+import org.apache.tika.detect.Detector;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.CountingInputStream;
-import org.apache.tika.language.ProfilingHandler;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.sax.SecureContentHandler;
-import org.apache.tika.sax.TeeContentHandler;
-import org.apache.tika.detect.Detector;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
@@ -100,14 +98,9 @@
         CountingInputStream count = new CountingInputStream(stream);
         SecureContentHandler secure = new SecureContentHandler(handler, count);
 
-        // Automatic language detection
-        ContentHandler profiler = new ProfilingHandler(metadata);
-
         // Parse the document
         try {
-            super.parse(
-                    count, new TeeContentHandler(secure, profiler),
-                    metadata, context);
+            super.parse(count, secure, metadata, context);
         } catch (SAXException e) {
             // Convert zip bomb exceptions to TikaExceptions
             secure.throwIfCauseOf(e);