You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/10/16 14:50:05 UTC

svn commit: r1632289 - in /manifoldcf/trunk: ./ connectors/tika/ framework/ framework/buildfiles/ framework/core/ framework/core/src/main/java/org/apache/manifoldcf/core/extmimemap/

Author: kwright
Date: Thu Oct 16 12:50:05 2014
New Revision: 1632289

URL: http://svn.apache.org/r1632289
Log:
Fix for CONNECTORS-1047.

Modified:
    manifoldcf/trunk/   (props changed)
    manifoldcf/trunk/CHANGES.txt
    manifoldcf/trunk/connectors/tika/build.xml
    manifoldcf/trunk/connectors/tika/pom.xml
    manifoldcf/trunk/framework/build.xml
    manifoldcf/trunk/framework/buildfiles/connector-build.xml
    manifoldcf/trunk/framework/core/pom.xml
    manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/extmimemap/ExtensionMimeMap.java
    manifoldcf/trunk/pom.xml

Propchange: manifoldcf/trunk/
------------------------------------------------------------------------------
  Merged /manifoldcf/branches/CONNECTORS-1074:r1632228-1632283

Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1632289&r1=1632288&r2=1632289&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Thu Oct 16 12:50:05 2014
@@ -3,6 +3,9 @@ $Id$
 
 ======================= 2.0-dev =====================
 
+CONNECTORS-1074: Use Tika to map extensions to mime types.
+(Shinichiro Abe, Karl Wright)
+
 CONNECTORS-1072: Remove 1.x-related import fallback code, since
 2.0 is not backwards compatible with 1.x.
 (Karl Wright)

Modified: manifoldcf/trunk/connectors/tika/build.xml
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/tika/build.xml?rev=1632289&r1=1632288&r2=1632289&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/tika/build.xml (original)
+++ manifoldcf/trunk/connectors/tika/build.xml Thu Oct 16 12:50:05 2014
@@ -36,7 +36,6 @@
             <include name="jackson-core*.jar"/>
             <include name="jackson-databind*.jar"/>
             <include name="jackson-annotations*.jar"/>
-            <include name="tika-core*.jar"/>
             <include name="tika-parsers*.jar"/>
             <include name="tagsoup*.jar"/>
             <include name="poi*.jar"/>
@@ -80,7 +79,6 @@
                 <include name="jackson-core*.jar"/>
                 <include name="jackson-databind*.jar"/>
                 <include name="jackson-annotations*.jar"/>
-                <include name="tika-core*.jar"/>
                 <include name="tika-parsers*.jar"/>
                 <include name="tagsoup*.jar"/>
                 <include name="poi*.jar"/>

Modified: manifoldcf/trunk/connectors/tika/pom.xml
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/tika/pom.xml?rev=1632289&r1=1632288&r2=1632289&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/tika/pom.xml (original)
+++ manifoldcf/trunk/connectors/tika/pom.xml Thu Oct 16 12:50:05 2014
@@ -213,13 +213,8 @@
     </dependency>
     <dependency>
 	  <groupId>org.apache.tika</groupId>
-	  <artifactId>tika-core</artifactId>
-	  <version>1.6</version>
-    </dependency>
-    <dependency>
-	  <groupId>org.apache.tika</groupId>
 	  <artifactId>tika-parsers</artifactId>
-	  <version>1.6</version>
+	  <version>${tika.version}</version>
     </dependency>
     
     <!-- Testing dependencies -->

Modified: manifoldcf/trunk/framework/build.xml
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/build.xml?rev=1632289&r1=1632288&r2=1632289&view=diff
==============================================================================
--- manifoldcf/trunk/framework/build.xml (original)
+++ manifoldcf/trunk/framework/build.xml Thu Oct 16 12:50:05 2014
@@ -31,6 +31,9 @@
     
     <path id="framework-classpath">
         <fileset dir="../lib">
+            <include name="tika-core*.jar"/>
+        </fileset>
+        <fileset dir="../lib">
             <include name="castor*.jar"/>
             <include name="geronimo-javamail_1.4_spec*.jar"/>
             <include name="commons-discovery*.jar"/>
@@ -330,6 +333,9 @@
         <mkdir dir="build/webapp/authority-service/WEB-INF/lib"/>
         <copy todir="build/webapp/authority-service/WEB-INF/lib">
             <fileset dir="../lib">
+                <include name="tika-core*.jar"/>
+            </fileset>
+            <fileset dir="../lib">
                 <include name="castor*.jar"/>
                 <include name="geronimo-javamail_1.4_spec*.jar"/>
                 <include name="commons-discovery*.jar"/>
@@ -381,6 +387,9 @@
         <mkdir dir="build/webapp/authority-service-proprietary/WEB-INF/lib"/>
         <copy todir="build/webapp/authority-service-proprietary/WEB-INF/lib">
             <fileset dir="../lib">
+                <include name="tika-core*.jar"/>
+            </fileset>
+            <fileset dir="../lib">
                 <include name="castor*.jar"/>
                 <include name="geronimo-javamail_1.4_spec*.jar"/>
                 <include name="commons-discovery*.jar"/>
@@ -437,6 +446,9 @@
         <mkdir dir="build/webapp/api-service/WEB-INF/lib"/>
         <copy todir="build/webapp/api-service/WEB-INF/lib">
             <fileset dir="../lib">
+                <include name="tika-core*.jar"/>
+            </fileset>
+            <fileset dir="../lib">
                 <include name="castor*.jar"/>
                 <include name="geronimo-javamail_1.4_spec*.jar"/>
                 <include name="commons-discovery*.jar"/>
@@ -489,6 +501,9 @@
         <mkdir dir="build/webapp/api-service-proprietary/WEB-INF/lib"/>
         <copy todir="build/webapp/api-service-proprietary/WEB-INF/lib">
             <fileset dir="../lib">
+                <include name="tika-core*.jar"/>
+            </fileset>
+            <fileset dir="../lib">
                 <include name="castor*.jar"/>
                 <include name="geronimo-javamail_1.4_spec*.jar"/>
                 <include name="commons-discovery*.jar"/>
@@ -546,6 +561,9 @@
         <mkdir dir="build/webapp/crawler-ui/WEB-INF/lib"/>
         <copy todir="build/webapp/crawler-ui/WEB-INF/lib">
             <fileset dir="../lib">
+                <include name="tika-core*.jar"/>
+            </fileset>
+            <fileset dir="../lib">
                 <include name="castor*.jar"/>
                 <include name="geronimo-javamail_1.4_spec*.jar"/>
                 <include name="commons-discovery*.jar"/>
@@ -606,6 +624,9 @@
         <mkdir dir="build/webapp/crawler-ui-proprietary/WEB-INF/lib"/>
         <copy todir="build/webapp/crawler-ui-proprietary/WEB-INF/lib">
             <fileset dir="../lib">
+                <include name="tika-core*.jar"/>
+            </fileset>
+            <fileset dir="../lib">
                 <include name="castor*.jar"/>
                 <include name="geronimo-javamail_1.4_spec*.jar"/>
                 <include name="commons-discovery*.jar"/>
@@ -673,6 +694,9 @@
         <mkdir dir="build/webapp/combined-service/WEB-INF/lib"/>
         <copy todir="build/webapp/combined-service/WEB-INF/lib">
             <fileset dir="../lib">
+                <include name="tika-core*.jar"/>
+            </fileset>
+            <fileset dir="../lib">
                 <include name="castor*.jar"/>
                 <include name="geronimo-javamail_1.4_spec*.jar"/>
                 <include name="commons-discovery*.jar"/>
@@ -735,6 +759,9 @@
         <mkdir dir="build/webapp/combined-service-proprietary/WEB-INF/lib"/>
         <copy todir="build/webapp/combined-service-proprietary/WEB-INF/lib">
             <fileset dir="../lib">
+                <include name="tika-core*.jar"/>
+            </fileset>
+            <fileset dir="../lib">
                 <include name="castor*.jar"/>
                 <include name="geronimo-javamail_1.4_spec*.jar"/>
                 <include name="commons-discovery*.jar"/>
@@ -866,6 +893,9 @@
         <mkdir dir="dist/lib"/>
         <copy todir="dist/lib">
             <fileset dir="../lib">
+                <include name="tika-core*.jar"/>
+            </fileset>
+            <fileset dir="../lib">
                 <include name="castor*.jar"/>
                 <include name="geronimo-javamail_1.4_spec*.jar"/>
                 <include name="commons-discovery*.jar"/>
@@ -1116,6 +1146,7 @@
     
     <path id="process-lib">
         <fileset dir="../lib">
+            <include name="tika-core*.jar"/>
             <include name="commons-lang*.jar"/>
             <include name="commons-codec*.jar"/>
             <include name="commons-collections*.jar"/>

Modified: manifoldcf/trunk/framework/buildfiles/connector-build.xml
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/buildfiles/connector-build.xml?rev=1632289&r1=1632288&r2=1632289&view=diff
==============================================================================
--- manifoldcf/trunk/framework/buildfiles/connector-build.xml (original)
+++ manifoldcf/trunk/framework/buildfiles/connector-build.xml Thu Oct 16 12:50:05 2014
@@ -140,6 +140,9 @@
             <include name="velocity*.jar"/>
         </fileset>
         <fileset dir="${mcf-dist}/lib">
+            <include name="tika-core*.jar"/>
+        </fileset>
+        <fileset dir="${mcf-dist}/lib">
             <include name="axis*.jar"/>
             <include name="saaj*.jar"/>
             <include name="wsdl4j*.jar"/>

Modified: manifoldcf/trunk/framework/core/pom.xml
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/core/pom.xml?rev=1632289&r1=1632288&r2=1632289&view=diff
==============================================================================
--- manifoldcf/trunk/framework/core/pom.xml (original)
+++ manifoldcf/trunk/framework/core/pom.xml Thu Oct 16 12:50:05 2014
@@ -110,6 +110,11 @@
       </exclusions>
     </dependency>
     <dependency>
+      <groupId>org.apache.tika</groupId>
+      <artifactId>tika-core</artifactId>
+      <version>${tika.version}</version>
+    </dependency>
+    <dependency>
       <groupId>javax.activation</groupId>
       <artifactId>activation</artifactId>
       <version>${activation.version}</version>

Modified: manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/extmimemap/ExtensionMimeMap.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/extmimemap/ExtensionMimeMap.java?rev=1632289&r1=1632288&r2=1632289&view=diff
==============================================================================
--- manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/extmimemap/ExtensionMimeMap.java (original)
+++ manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/extmimemap/ExtensionMimeMap.java Thu Oct 16 12:50:05 2014
@@ -20,43 +20,16 @@
 package org.apache.manifoldcf.core.extmimemap;
 
 import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
-import java.util.*;
+import org.apache.tika.Tika;
 
 /** Map file extension to a mime type.
 */
 public class ExtensionMimeMap
 {
-  protected final static Map<String,String> mimeMap;
-  static {
-    mimeMap = new HashMap<String,String>();
-    mimeMap.put("xml", "text/xml");
-    mimeMap.put("csv", "text/csv");
-    mimeMap.put("json", "application/json");
-    mimeMap.put("pdf", "application/pdf");
-    mimeMap.put("rtf", "text/rtf");
-    mimeMap.put("html", "text/html");
-    mimeMap.put("htm", "text/html");
-    mimeMap.put("doc", "application/msword");
-    mimeMap.put("docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document");
-    mimeMap.put("ppt", "application/vnd.ms-powerpoint");
-    mimeMap.put("pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation");
-    mimeMap.put("xls", "application/vnd.ms-excel");
-    mimeMap.put("xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
-    mimeMap.put("odt", "application/vnd.oasis.opendocument.text");
-    mimeMap.put("ott", "application/vnd.oasis.opendocument.text");
-    mimeMap.put("odp", "application/vnd.oasis.opendocument.presentation");
-    mimeMap.put("otp", "application/vnd.oasis.opendocument.presentation");
-    mimeMap.put("ods", "application/vnd.oasis.opendocument.spreadsheet");
-    mimeMap.put("ots", "application/vnd.oasis.opendocument.spreadsheet");
-    mimeMap.put("txt", "text/plain");
-    mimeMap.put("log", "text/plain");
-    mimeMap.put("aspx", "text/html");
-  }
-
   /** Map extension to mime type */
   public static String mapToMimeType(String extension)
   {
-    return mimeMap.get(extension.toLowerCase(java.util.Locale.ROOT));
+    return new Tika().detect("x."+extension);
   }
   
 }

Modified: manifoldcf/trunk/pom.xml
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/pom.xml?rev=1632289&r1=1632288&r2=1632289&view=diff
==============================================================================
--- manifoldcf/trunk/pom.xml (original)
+++ manifoldcf/trunk/pom.xml Thu Oct 16 12:50:05 2014
@@ -79,6 +79,7 @@
     <zookeeper.version>3.4.6</zookeeper.version>
     <xmlbeans.version>2.6.0</xmlbeans.version>
     <poi.version>3.11-beta2</poi.version>
+    <tika.version>1.6</tika.version>
   </properties>
 
   <modules>