You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by kk...@apache.org on 2010/09/14 18:57:22 UTC

svn commit: r996984 - in /tika/trunk/tika-core/src: main/java/org/apache/tika/config/ main/java/org/apache/tika/mime/ main/java/org/apache/tika/parser/ test/java/org/apache/tika/parser/

Author: kkrugler
Date: Tue Sep 14 16:57:22 2010
New Revision: 996984

URL: http://svn.apache.org/viewvc?rev=996984&view=rev
Log:
TIKA-514: Provide constructor for AutoDetectParser that has explicit list of supported parsers

Added:
    tika/trunk/tika-core/src/test/java/org/apache/tika/parser/
    tika/trunk/tika-core/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java   (with props)
Modified:
    tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java?rev=996984&r1=996983&r2=996984&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java Tue Sep 14 16:57:22 2010
@@ -100,7 +100,7 @@ public class TikaConfig {
         if (mtr != null && mtr.hasAttribute("resource")) {
             mimeTypes = MimeTypesFactory.create(mtr.getAttribute("resource"));
         } else {
-            mimeTypes = MimeTypesFactory.create("tika-mimetypes.xml");
+            mimeTypes = MimeTypes.getDefaultMimeTypes();
         }
 
         NodeList nodes = element.getElementsByTagName("parser");
@@ -173,7 +173,8 @@ public class TikaConfig {
                 }
             }
         }
-        mimeTypes = MimeTypesFactory.create("tika-mimetypes.xml");
+        
+        mimeTypes = MimeTypes.getDefaultMimeTypes();
     }
 
     /**

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java?rev=996984&r1=996983&r2=996984&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java Tue Sep 14 16:57:22 2010
@@ -587,5 +587,22 @@ public final class MimeTypes implements 
 
         return type;
     }
+    
+    /**
+     * Get the default MimeTypes
+     * 
+     * @return MimeTypes
+     * @throws MimeTypeException
+     * @throws IOException
+     */
+    public static MimeTypes getDefaultMimeTypes() {
+        try {
+            return MimeTypesFactory.create("tika-mimetypes.xml");
+        } catch (MimeTypeException e) {
+            throw new RuntimeException("Unable to read default mimetypes", e);
+        } catch (IOException e) {
+            throw new RuntimeException("Unable to read default mimetypes", e);
+        }
+    }
 
 }

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java?rev=996984&r1=996983&r2=996984&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java Tue Sep 14 16:57:22 2010
@@ -19,6 +19,8 @@ package org.apache.tika.parser;
 import java.io.BufferedInputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.util.HashMap;
+import java.util.Map;
 
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.detect.Detector;
@@ -27,6 +29,7 @@ import org.apache.tika.io.CountingInputS
 import org.apache.tika.io.TikaInputStream;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MimeTypes;
 import org.apache.tika.sax.SecureContentHandler;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
@@ -57,6 +60,34 @@ public class AutoDetectParser extends Co
         setDetector(detector);
     }
 
+    /**
+     * Creates an auto-detecting parser instance using the specified set of parser.
+     * This allows one to create a Tika configuration where only a subset of the
+     * available parsers have their 3rd party jars included, as otherwise the
+     * use of the default TikaConfig will throw various "ClassNotFound" exceptions.
+     * 
+     * @param detector Detector to use
+     * @param parsers
+     */
+    public AutoDetectParser(Parser...parsers) {
+        this(MimeTypes.getDefaultMimeTypes(), parsers);
+    }
+    
+    public AutoDetectParser(Detector detector, Parser...parsers) {
+        setDetector(detector);
+        setMediaTypeRegistry(MimeTypes.getDefaultMimeTypes().getMediaTypeRegistry());
+        
+        Map<MediaType, Parser> map = new HashMap<MediaType, Parser>();
+        for (Parser parser : parsers) {
+            ParseContext context = new ParseContext();
+            for (MediaType type : parser.getSupportedTypes(context)) {
+                map.put(type, parser);
+            }
+        }
+        
+        setParsers(map);
+    }
+    
     public AutoDetectParser(TikaConfig config) {
         setConfig(config);
     }

Added: tika/trunk/tika-core/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java?rev=996984&view=auto
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java (added)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java Tue Sep 14 16:57:22 2010
@@ -0,0 +1,67 @@
+package org.apache.tika.parser;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashSet;
+import java.util.Set;
+
+import junit.framework.Assert;
+import junit.framework.TestCase;
+
+import org.apache.tika.detect.Detector;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.sax.BodyContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+public class AutoDetectParserTest extends TestCase {
+
+    private static final MediaType MY_MEDIA_TYPE = new MediaType("application", "x-myparser");
+    
+    @SuppressWarnings("serial")
+    private static class MyDetector implements Detector {
+
+        public MediaType detect(InputStream input, Metadata metadata) throws IOException {
+            return MY_MEDIA_TYPE;
+        }
+    }
+    
+    @SuppressWarnings("serial")
+    private static class MyParser implements Parser {
+
+        public Set<MediaType> getSupportedTypes(ParseContext context) {
+            Set<MediaType> supportedTypes = new HashSet<MediaType>();
+            supportedTypes.add(MY_MEDIA_TYPE);
+            return supportedTypes;
+        }
+
+        public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
+                throws IOException, SAXException, TikaException {
+            metadata.add("MyParser", "value");
+        }
+
+        public void parse(InputStream stream, ContentHandler handler, Metadata metadata) throws IOException,
+                SAXException, TikaException {
+            parse(stream, handler, metadata, new ParseContext());
+        }
+    }
+    
+    
+    /**
+     * Test case for TIKA-514. Provide constructor for AutoDetectParser that has explicit
+     * list of supported parsers.
+     * @see <a href="https://issues.apache.org/jira/browse/TIKA-514">TIKA-514</a>
+     */
+    public void testSpecificParserList() throws Exception {
+        AutoDetectParser parser = new AutoDetectParser(new MyDetector(), new MyParser());
+        
+        InputStream is = new ByteArrayInputStream("test".getBytes());
+        Metadata metadata = new Metadata();
+        parser.parse(is, new BodyContentHandler(), metadata, new ParseContext());
+        
+        Assert.assertEquals("value", metadata.get("MyParser"));
+    }
+}

Propchange: tika/trunk/tika-core/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: tika/trunk/tika-core/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain