You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by ju...@apache.org on 2010/11/23 15:17:06 UTC
svn commit: r1038125 -
/jackrabbit/trunk/jackrabbit-core/src/main/resources/org/apache/jackrabbit/core/query/lucene/tika-config.xml
Author: jukka
Date: Tue Nov 23 14:17:05 2010
New Revision: 1038125
URL: http://svn.apache.org/viewvc?rev=1038125&view=rev
Log:
JCR-2642: JackrabbitParser and tika parser
Use the new DefaultParser class in Tika 0.8 to automatically pick up new parser plugins while still allowing our custom tika-config.xml file to disable extraction from selected file formats.
Modified:
jackrabbit/trunk/jackrabbit-core/src/main/resources/org/apache/jackrabbit/core/query/lucene/tika-config.xml
Modified: jackrabbit/trunk/jackrabbit-core/src/main/resources/org/apache/jackrabbit/core/query/lucene/tika-config.xml
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-core/src/main/resources/org/apache/jackrabbit/core/query/lucene/tika-config.xml?rev=1038125&r1=1038124&r2=1038125&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-core/src/main/resources/org/apache/jackrabbit/core/query/lucene/tika-config.xml (original)
+++ jackrabbit/trunk/jackrabbit-core/src/main/resources/org/apache/jackrabbit/core/query/lucene/tika-config.xml Tue Nov 23 14:17:05 2010
@@ -19,129 +19,29 @@
<properties>
- <mimeTypeRepository resource="/org/apache/tika/mime/tika-mimetypes.xml" magic="false"/>
-
<parsers>
- <parser name="parse-dcxml" class="org.apache.tika.parser.xml.DcXMLParser">
- <mime>application/xml</mime>
- <mime>image/svg+xml</mime>
- </parser>
-
- <parser name="parse-office" class="org.apache.tika.parser.microsoft.OfficeParser">
- <mime>application/x-tika-msoffice</mime>
- <mime>application/msword</mime>
- <mime>application/vnd.ms-excel</mime>
- <mime>application/vnd.ms-excel.sheet.binary.macroenabled.12</mime>
- <mime>application/vnd.ms-powerpoint</mime>
- <mime>application/vnd.visio</mime>
- <mime>application/vnd.ms-outlook</mime>
- </parser>
-
- <parser name="parse-ooxml" class="org.apache.tika.parser.microsoft.ooxml.OOXMLParser">
- <mime>application/x-tika-ooxml</mime>
- <mime>application/vnd.openxmlformats-package.core-properties+xml</mime>
- <mime>application/vnd.openxmlformats-officedocument.spreadsheetml.sheet</mime>
- <mime>application/vnd.openxmlformats-officedocument.spreadsheetml.template</mime>
- <mime>application/vnd.ms-excel.sheet.macroenabled.12</mime>
- <mime>application/vnd.ms-excel.template.macroenabled.12</mime>
- <mime>application/vnd.ms-excel.addin.macroenabled.12</mime>
- <mime>application/vnd.openxmlformats-officedocument.presentationml.presentation</mime>
- <mime>application/vnd.openxmlformats-officedocument.presentationml.template</mime>
- <mime>application/vnd.openxmlformats-officedocument.presentationml.slideshow</mime>
- <mime>application/vnd.ms-powerpoint.presentation.macroenabled.12</mime>
- <mime>application/vnd.ms-powerpoint.slideshow.macroenabled.12</mime>
- <mime>application/vnd.ms-powerpoint.addin.macroenabled.12</mime>
- <mime>application/vnd.openxmlformats-officedocument.wordprocessingml.document</mime>
- <mime>application/vnd.openxmlformats-officedocument.wordprocessingml.template</mime>
- <mime>application/vnd.ms-word.document.macroenabled.12</mime>
- <mime>application/vnd.ms-word.template.macroenabled.12</mime>
- </parser>
-
- <parser name="parse-html" class="org.apache.tika.parser.html.HtmlParser">
- <mime>text/html</mime>
- <mime>application/xhtml+xml</mime>
- <mime>application/vnd.wap.xhtml+xml</mime>
- <mime>application/x-asp</mime>
- </parser>
-
- <parser mame="parse-rtf" class="org.apache.tika.parser.rtf.RTFParser">
- <mime>application/rtf</mime>
- </parser>
-
- <parser name="parse-pdf" class="org.apache.tika.parser.pdf.PDFParser">
- <mime>application/pdf</mime>
- </parser>
-
- <parser name="parse-txt" class="org.apache.tika.parser.txt.TXTParser">
- <mime>text/plain</mime>
- </parser>
-
- <parser name="parse-openoffice" class="org.apache.tika.parser.opendocument.OpenOfficeParser">
- <mime>application/vnd.sun.xml.writer</mime>
- <mime>application/vnd.oasis.opendocument.text</mime>
- <mime>application/vnd.oasis.opendocument.graphics</mime>
- <mime>application/vnd.oasis.opendocument.presentation</mime>
- <mime>application/vnd.oasis.opendocument.spreadsheet</mime>
- <mime>application/vnd.oasis.opendocument.chart</mime>
- <mime>application/vnd.oasis.opendocument.image</mime>
- <mime>application/vnd.oasis.opendocument.formula</mime>
- <mime>application/vnd.oasis.opendocument.text-master</mime>
- <mime>application/vnd.oasis.opendocument.text-web</mime>
- <mime>application/vnd.oasis.opendocument.text-template</mime>
- <mime>application/vnd.oasis.opendocument.graphics-template</mime>
- <mime>application/vnd.oasis.opendocument.presentation-template</mime>
- <mime>application/vnd.oasis.opendocument.spreadsheet-template</mime>
- <mime>application/vnd.oasis.opendocument.chart-template</mime>
- <mime>application/vnd.oasis.opendocument.image-template</mime>
- <mime>application/vnd.oasis.opendocument.formula-template</mime>
- <mime>application/x-vnd.oasis.opendocument.text</mime>
- <mime>application/x-vnd.oasis.opendocument.graphics</mime>
- <mime>application/x-vnd.oasis.opendocument.presentation</mime>
- <mime>application/x-vnd.oasis.opendocument.spreadsheet</mime>
- <mime>application/x-vnd.oasis.opendocument.chart</mime>
- <mime>application/x-vnd.oasis.opendocument.image</mime>
- <mime>application/x-vnd.oasis.opendocument.formula</mime>
- <mime>application/x-vnd.oasis.opendocument.text-master</mime>
- <mime>application/x-vnd.oasis.opendocument.text-web</mime>
- <mime>application/x-vnd.oasis.opendocument.text-template</mime>
- <mime>application/x-vnd.oasis.opendocument.graphics-template</mime>
- <mime>application/x-vnd.oasis.opendocument.presentation-template</mime>
- <mime>application/x-vnd.oasis.opendocument.spreadsheet-template</mime>
- <mime>application/x-vnd.oasis.opendocument.chart-template</mime>
- <mime>application/x-vnd.oasis.opendocument.image-template</mime>
- <mime>application/x-vnd.oasis.opendocument.formula-template</mime>
- </parser>
-
- <parser name="parse-class" class="org.apache.tika.parser.asm.ClassParser">
- <mime>application/java-vm</mime>
- </parser>
-
- <parser name="parse-mp3" class="org.apache.tika.parser.mp3.Mp3Parser">
- <mime>audio/mpeg</mime>
- </parser>
-
- <parser name="parse-midi" class="org.apache.tika.parser.audio.MidiParser">
- <mime>application/x-midi</mime>
- <mime>audio/midi</mime>
- </parser>
-
- <parser name="parse-audio" class="org.apache.tika.parser.audio.AudioParser">
- <mime>audio/basic</mime>
- <mime>audio/x-wav</mime>
- <mime>audio/x-aiff</mime>
- </parser>
-
- <parser name="parse-mbox" class="org.apache.tika.parser.mbox.MboxParser">
- <mime>application/mbox</mime>
- </parser>
-
- <parser name="parse-epub" class="org.apache.tika.parser.epub.EpubParser">
- <mime>application/epub+zip</mime>
- </parser>
+ <parser class="org.apache.tika.parser.DefaultParser"/>
- <parser name="parse-flv" class="org.apache.tika.parser.video.FLVParser">
- <mime>video/x-flv</mime>
+ <parser class="org.apache.tika.parser.EmptyParser">
+ <!-- Disable package extraction as it's too resource-intensive -->
+ <mime>application/x-archive</mime>
+ <mime>application/x-bzip</mime>
+ <mime>application/x-bzip2</mime>
+ <mime>application/x-cpio</mime>
+ <mime>application/x-gtar</mime>
+ <mime>application/x-gzip</mime>
+ <mime>application/x-tar</mime>
+ <mime>application/zip</mime>
+ <!-- Disable image extraction as there's no text to be found -->
+ <mime>image/bmp</mime>
+ <mime>image/gif</mime>
+ <mime>image/jpeg</mime>
+ <mime>image/png</mime>
+ <mime>image/vnd.wap.wbmp</mime>
+ <mime>image/x-icon</mime>
+ <mime>image/x-psd</mime>
+ <mime>image/x-xcf</mime>
</parser>
</parsers>