You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by kk...@apache.org on 2010/09/14 18:57:22 UTC
svn commit: r996984 - in /tika/trunk/tika-core/src:
main/java/org/apache/tika/config/ main/java/org/apache/tika/mime/
main/java/org/apache/tika/parser/ test/java/org/apache/tika/parser/
Author: kkrugler
Date: Tue Sep 14 16:57:22 2010
New Revision: 996984
URL: http://svn.apache.org/viewvc?rev=996984&view=rev
Log:
TIKA-514: Provide constructor for AutoDetectParser that has explicit list of supported parsers
Added:
tika/trunk/tika-core/src/test/java/org/apache/tika/parser/
tika/trunk/tika-core/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java (with props)
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java?rev=996984&r1=996983&r2=996984&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java Tue Sep 14 16:57:22 2010
@@ -100,7 +100,7 @@ public class TikaConfig {
if (mtr != null && mtr.hasAttribute("resource")) {
mimeTypes = MimeTypesFactory.create(mtr.getAttribute("resource"));
} else {
- mimeTypes = MimeTypesFactory.create("tika-mimetypes.xml");
+ mimeTypes = MimeTypes.getDefaultMimeTypes();
}
NodeList nodes = element.getElementsByTagName("parser");
@@ -173,7 +173,8 @@ public class TikaConfig {
}
}
}
- mimeTypes = MimeTypesFactory.create("tika-mimetypes.xml");
+
+ mimeTypes = MimeTypes.getDefaultMimeTypes();
}
/**
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java?rev=996984&r1=996983&r2=996984&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/mime/MimeTypes.java Tue Sep 14 16:57:22 2010
@@ -587,5 +587,22 @@ public final class MimeTypes implements
return type;
}
+
+ /**
+ * Get the default MimeTypes
+ *
+ * @return MimeTypes
+ * @throws MimeTypeException
+ * @throws IOException
+ */
+ public static MimeTypes getDefaultMimeTypes() {
+ try {
+ return MimeTypesFactory.create("tika-mimetypes.xml");
+ } catch (MimeTypeException e) {
+ throw new RuntimeException("Unable to read default mimetypes", e);
+ } catch (IOException e) {
+ throw new RuntimeException("Unable to read default mimetypes", e);
+ }
+ }
}
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java?rev=996984&r1=996983&r2=996984&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java Tue Sep 14 16:57:22 2010
@@ -19,6 +19,8 @@ package org.apache.tika.parser;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.util.HashMap;
+import java.util.Map;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.detect.Detector;
@@ -27,6 +29,7 @@ import org.apache.tika.io.CountingInputS
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MimeTypes;
import org.apache.tika.sax.SecureContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
@@ -57,6 +60,34 @@ public class AutoDetectParser extends Co
setDetector(detector);
}
+ /**
+ * Creates an auto-detecting parser instance using the specified set of parser.
+ * This allows one to create a Tika configuration where only a subset of the
+ * available parsers have their 3rd party jars included, as otherwise the
+ * use of the default TikaConfig will throw various "ClassNotFound" exceptions.
+ *
+ * @param detector Detector to use
+ * @param parsers
+ */
+ public AutoDetectParser(Parser...parsers) {
+ this(MimeTypes.getDefaultMimeTypes(), parsers);
+ }
+
+ public AutoDetectParser(Detector detector, Parser...parsers) {
+ setDetector(detector);
+ setMediaTypeRegistry(MimeTypes.getDefaultMimeTypes().getMediaTypeRegistry());
+
+ Map<MediaType, Parser> map = new HashMap<MediaType, Parser>();
+ for (Parser parser : parsers) {
+ ParseContext context = new ParseContext();
+ for (MediaType type : parser.getSupportedTypes(context)) {
+ map.put(type, parser);
+ }
+ }
+
+ setParsers(map);
+ }
+
public AutoDetectParser(TikaConfig config) {
setConfig(config);
}
Added: tika/trunk/tika-core/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java?rev=996984&view=auto
==============================================================================
--- tika/trunk/tika-core/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java (added)
+++ tika/trunk/tika-core/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java Tue Sep 14 16:57:22 2010
@@ -0,0 +1,67 @@
+package org.apache.tika.parser;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashSet;
+import java.util.Set;
+
+import junit.framework.Assert;
+import junit.framework.TestCase;
+
+import org.apache.tika.detect.Detector;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.sax.BodyContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+public class AutoDetectParserTest extends TestCase {
+
+ private static final MediaType MY_MEDIA_TYPE = new MediaType("application", "x-myparser");
+
+ @SuppressWarnings("serial")
+ private static class MyDetector implements Detector {
+
+ public MediaType detect(InputStream input, Metadata metadata) throws IOException {
+ return MY_MEDIA_TYPE;
+ }
+ }
+
+ @SuppressWarnings("serial")
+ private static class MyParser implements Parser {
+
+ public Set<MediaType> getSupportedTypes(ParseContext context) {
+ Set<MediaType> supportedTypes = new HashSet<MediaType>();
+ supportedTypes.add(MY_MEDIA_TYPE);
+ return supportedTypes;
+ }
+
+ public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context)
+ throws IOException, SAXException, TikaException {
+ metadata.add("MyParser", "value");
+ }
+
+ public void parse(InputStream stream, ContentHandler handler, Metadata metadata) throws IOException,
+ SAXException, TikaException {
+ parse(stream, handler, metadata, new ParseContext());
+ }
+ }
+
+
+ /**
+ * Test case for TIKA-514. Provide constructor for AutoDetectParser that has explicit
+ * list of supported parsers.
+ * @see <a href="https://issues.apache.org/jira/browse/TIKA-514">TIKA-514</a>
+ */
+ public void testSpecificParserList() throws Exception {
+ AutoDetectParser parser = new AutoDetectParser(new MyDetector(), new MyParser());
+
+ InputStream is = new ByteArrayInputStream("test".getBytes());
+ Metadata metadata = new Metadata();
+ parser.parse(is, new BodyContentHandler(), metadata, new ParseContext());
+
+ Assert.assertEquals("value", metadata.get("MyParser"));
+ }
+}
Propchange: tika/trunk/tika-core/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: tika/trunk/tika-core/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java
------------------------------------------------------------------------------
svn:mime-type = text/plain