You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by ju...@apache.org on 2009/08/24 11:59:59 UTC
svn commit: r807139 - in /jackrabbit/trunk/jackrabbit-text-extractors:
pom.xml src/main/resources/org/apache/jackrabbit/extractor/tika-config.xml
Author: jukka
Date: Mon Aug 24 09:59:58 2009
New Revision: 807139
URL: http://svn.apache.org/viewvc?rev=807139&view=rev
Log:
JCR-1878: Use Apache Tika for text extraction
Upgrade to Tika version 0.4
Modified:
jackrabbit/trunk/jackrabbit-text-extractors/pom.xml
jackrabbit/trunk/jackrabbit-text-extractors/src/main/resources/org/apache/jackrabbit/extractor/tika-config.xml
Modified: jackrabbit/trunk/jackrabbit-text-extractors/pom.xml
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-text-extractors/pom.xml?rev=807139&r1=807138&r2=807139&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-text-extractors/pom.xml (original)
+++ jackrabbit/trunk/jackrabbit-text-extractors/pom.xml Mon Aug 24 09:59:58 2009
@@ -63,9 +63,8 @@
<dependencies>
<dependency>
<groupId>org.apache.tika</groupId>
- <artifactId>tika</artifactId>
- <version>0.3</version>
- <classifier>jdk14</classifier>
+ <artifactId>tika-parsers</artifactId>
+ <version>0.4</version>
<exclusions>
<exclusion>
<groupId>bouncycastle</groupId>
Modified: jackrabbit/trunk/jackrabbit-text-extractors/src/main/resources/org/apache/jackrabbit/extractor/tika-config.xml
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-text-extractors/src/main/resources/org/apache/jackrabbit/extractor/tika-config.xml?rev=807139&r1=807138&r2=807139&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-text-extractors/src/main/resources/org/apache/jackrabbit/extractor/tika-config.xml (original)
+++ jackrabbit/trunk/jackrabbit-text-extractors/src/main/resources/org/apache/jackrabbit/extractor/tika-config.xml Mon Aug 24 09:59:58 2009
@@ -32,16 +32,30 @@
<mime>application/x-tika-msoffice</mime>
<mime>application/msword</mime>
<mime>application/vnd.ms-excel</mime>
+ <mime>application/vnd.ms-excel.sheet.binary.macroenabled.12</mime>
<mime>application/vnd.ms-powerpoint</mime>
<mime>application/vnd.visio</mime>
<mime>application/vnd.ms-outlook</mime>
</parser>
<parser name="parse-ooxml" class="org.apache.tika.parser.microsoft.ooxml.OOXMLParser">
+ <mime>application/x-tika-ooxml</mime>
<mime>application/vnd.openxmlformats-package.core-properties+xml</mime>
<mime>application/vnd.openxmlformats-officedocument.spreadsheetml.sheet</mime>
+ <mime>application/vnd.openxmlformats-officedocument.spreadsheetml.template</mime>
+ <mime>application/vnd.ms-excel.sheet.macroenabled.12</mime>
+ <mime>application/vnd.ms-excel.template.macroenabled.12</mime>
+ <mime>application/vnd.ms-excel.addin.macroenabled.12</mime>
<mime>application/vnd.openxmlformats-officedocument.presentationml.presentation</mime>
+ <mime>application/vnd.openxmlformats-officedocument.presentationml.template</mime>
+ <mime>application/vnd.openxmlformats-officedocument.presentationml.slideshow</mime>
+ <mime>application/vnd.ms-powerpoint.presentation.macroenabled.12</mime>
+ <mime>application/vnd.ms-powerpoint.slideshow.macroenabled.12</mime>
+ <mime>application/vnd.ms-powerpoint.addin.macroenabled.12</mime>
<mime>application/vnd.openxmlformats-officedocument.wordprocessingml.document</mime>
+ <mime>application/vnd.openxmlformats-officedocument.wordprocessingml.template</mime>
+ <mime>application/vnd.ms-word.document.macroenabled.12</mime>
+ <mime>application/vnd.ms-word.template.macroenabled.12</mime>
</parser>
<parser name="parse-html" class="org.apache.tika.parser.html.HtmlParser">