You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2009/07/12 23:19:29 UTC

svn commit: r793410 - /lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java

Author: jukka
Date: Sun Jul 12 21:19:27 2009
New Revision: 793410

URL: http://svn.apache.org/viewvc?rev=793410&view=rev
Log:
TIKA-258: AutoDetectParser does not allow to use alternative mime detector

Changes based on the patch contributed by Maxim Valyanskiy.

Modified:
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java

Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java?rev=793410&r1=793409&r2=793410&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java Sun Jul 12 21:19:27 2009
@@ -25,14 +25,18 @@
 import org.apache.tika.io.CountingInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
-import org.apache.tika.mime.MimeTypes;
 import org.apache.tika.sax.SecureContentHandler;
+import org.apache.tika.detect.Detector;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
 public class AutoDetectParser extends CompositeParser {
 
-    private MimeTypes types;
+    /**
+     * The type detector used by this parser to auto-detect the type
+     * of a document.
+     */
+    private Detector detector; // always set in the constructor
 
     /**
      * Creates an auto-detecting parser instance using the default Tika
@@ -53,15 +57,31 @@
 
     public void setConfig(TikaConfig config) {
         setParsers(config.getParsers());
-        setMimeTypes(config.getMimeRepository());
+        setDetector(config.getMimeRepository());
     }
 
-    public MimeTypes getMimeTypes() {
-        return types;
+    /**
+     * Returns the type detector used by this parser to auto-detect the type
+     * of a document.
+     *
+     * @return type detector
+     * @since Apache Tika 0.4
+     */
+    public Detector getDetector() {
+        return detector;
     }
 
-    public void setMimeTypes(MimeTypes types) {
-        this.types = types;
+    /**
+     * Sets the type detector used by this parser to auto-detect the type
+     * of a document. Note that calling the {@link #setConfig(TikaConfig)}
+     * method will override the type detector setting with the type settings
+     * included in the given configuration.
+     *
+     * @param detector type detector
+     * @since Apache Tika 0.4
+     */
+    public void setDetector(Detector detector) {
+        this.detector = detector;
     }
 
     public void parse(
@@ -73,7 +93,7 @@
         }
 
         // Automatically detect the MIME type of the document 
-        MediaType type = types.detect(stream, metadata);
+        MediaType type = detector.detect(stream, metadata);
         metadata.set(Metadata.CONTENT_TYPE, type.toString());
 
         // TIKA-216: Zip bomb prevention