You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by ju...@apache.org on 2009/08/24 11:59:59 UTC

svn commit: r807139 - in /jackrabbit/trunk/jackrabbit-text-extractors: pom.xml src/main/resources/org/apache/jackrabbit/extractor/tika-config.xml

Author: jukka
Date: Mon Aug 24 09:59:58 2009
New Revision: 807139

URL: http://svn.apache.org/viewvc?rev=807139&view=rev
Log:
JCR-1878: Use Apache Tika for text extraction

Upgrade to Tika version 0.4

Modified:
    jackrabbit/trunk/jackrabbit-text-extractors/pom.xml
    jackrabbit/trunk/jackrabbit-text-extractors/src/main/resources/org/apache/jackrabbit/extractor/tika-config.xml

Modified: jackrabbit/trunk/jackrabbit-text-extractors/pom.xml
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-text-extractors/pom.xml?rev=807139&r1=807138&r2=807139&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-text-extractors/pom.xml (original)
+++ jackrabbit/trunk/jackrabbit-text-extractors/pom.xml Mon Aug 24 09:59:58 2009
@@ -63,9 +63,8 @@
   <dependencies>
     <dependency>
       <groupId>org.apache.tika</groupId>
-      <artifactId>tika</artifactId>
-      <version>0.3</version>
-      <classifier>jdk14</classifier>
+      <artifactId>tika-parsers</artifactId>
+      <version>0.4</version>
       <exclusions>
         <exclusion>
           <groupId>bouncycastle</groupId>

Modified: jackrabbit/trunk/jackrabbit-text-extractors/src/main/resources/org/apache/jackrabbit/extractor/tika-config.xml
URL: http://svn.apache.org/viewvc/jackrabbit/trunk/jackrabbit-text-extractors/src/main/resources/org/apache/jackrabbit/extractor/tika-config.xml?rev=807139&r1=807138&r2=807139&view=diff
==============================================================================
--- jackrabbit/trunk/jackrabbit-text-extractors/src/main/resources/org/apache/jackrabbit/extractor/tika-config.xml (original)
+++ jackrabbit/trunk/jackrabbit-text-extractors/src/main/resources/org/apache/jackrabbit/extractor/tika-config.xml Mon Aug 24 09:59:58 2009
@@ -32,16 +32,30 @@
       <mime>application/x-tika-msoffice</mime>
       <mime>application/msword</mime>
       <mime>application/vnd.ms-excel</mime>
+      <mime>application/vnd.ms-excel.sheet.binary.macroenabled.12</mime>
       <mime>application/vnd.ms-powerpoint</mime>
       <mime>application/vnd.visio</mime>
       <mime>application/vnd.ms-outlook</mime>
     </parser>
 
     <parser name="parse-ooxml" class="org.apache.tika.parser.microsoft.ooxml.OOXMLParser">
+      <mime>application/x-tika-ooxml</mime>
       <mime>application/vnd.openxmlformats-package.core-properties+xml</mime>
       <mime>application/vnd.openxmlformats-officedocument.spreadsheetml.sheet</mime>
+      <mime>application/vnd.openxmlformats-officedocument.spreadsheetml.template</mime>
+      <mime>application/vnd.ms-excel.sheet.macroenabled.12</mime>
+      <mime>application/vnd.ms-excel.template.macroenabled.12</mime>
+      <mime>application/vnd.ms-excel.addin.macroenabled.12</mime>
       <mime>application/vnd.openxmlformats-officedocument.presentationml.presentation</mime>
+      <mime>application/vnd.openxmlformats-officedocument.presentationml.template</mime>
+      <mime>application/vnd.openxmlformats-officedocument.presentationml.slideshow</mime>
+      <mime>application/vnd.ms-powerpoint.presentation.macroenabled.12</mime>
+      <mime>application/vnd.ms-powerpoint.slideshow.macroenabled.12</mime>
+      <mime>application/vnd.ms-powerpoint.addin.macroenabled.12</mime>
       <mime>application/vnd.openxmlformats-officedocument.wordprocessingml.document</mime>
+      <mime>application/vnd.openxmlformats-officedocument.wordprocessingml.template</mime>
+      <mime>application/vnd.ms-word.document.macroenabled.12</mime>
+      <mime>application/vnd.ms-word.template.macroenabled.12</mime>
     </parser>
 
     <parser name="parse-html" class="org.apache.tika.parser.html.HtmlParser">