You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/10/16 14:54:57 UTC

svn commit: r1632291 - in /manifoldcf/branches/dev_1x: ./ connectors/tika/ framework/ framework/buildfiles/ framework/core/ framework/core/src/main/java/org/apache/manifoldcf/core/extmimemap/

Author: kwright
Date: Thu Oct 16 12:54:56 2014
New Revision: 1632291

URL: http://svn.apache.org/r1632291
Log:
Pull up fix for CONNECTORS-1074 from trunk.

Modified:
    manifoldcf/branches/dev_1x/   (props changed)
    manifoldcf/branches/dev_1x/CHANGES.txt
    manifoldcf/branches/dev_1x/connectors/tika/build.xml
    manifoldcf/branches/dev_1x/connectors/tika/pom.xml
    manifoldcf/branches/dev_1x/framework/   (props changed)
    manifoldcf/branches/dev_1x/framework/build.xml
    manifoldcf/branches/dev_1x/framework/buildfiles/connector-build.xml
    manifoldcf/branches/dev_1x/framework/core/pom.xml
    manifoldcf/branches/dev_1x/framework/core/src/main/java/org/apache/manifoldcf/core/extmimemap/ExtensionMimeMap.java
    manifoldcf/branches/dev_1x/pom.xml

Propchange: manifoldcf/branches/dev_1x/
------------------------------------------------------------------------------
  Merged /manifoldcf/branches/CONNECTORS-1074:r1632228-1632283
  Merged /manifoldcf/trunk:r1632289

Modified: manifoldcf/branches/dev_1x/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/CHANGES.txt?rev=1632291&r1=1632290&r2=1632291&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/CHANGES.txt (original)
+++ manifoldcf/branches/dev_1x/CHANGES.txt Thu Oct 16 12:54:56 2014
@@ -3,6 +3,9 @@ $Id$
 
 ======================= 1.8-dev =====================
 
+CONNECTORS-1074: Use Tika to map extensions to mime types.
+(Shinichiro Abe, Karl Wright)
+
 CONNECTORS-1075: Provide a means of obfuscating passwords in
 properties.xml and global-properties.xml.  Also include an obfuscation
 utility in the distribution, for generating obfuscated passwords.

Modified: manifoldcf/branches/dev_1x/connectors/tika/build.xml
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/connectors/tika/build.xml?rev=1632291&r1=1632290&r2=1632291&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/connectors/tika/build.xml (original)
+++ manifoldcf/branches/dev_1x/connectors/tika/build.xml Thu Oct 16 12:54:56 2014
@@ -36,7 +36,6 @@
             <include name="jackson-core*.jar"/>
             <include name="jackson-databind*.jar"/>
             <include name="jackson-annotations*.jar"/>
-            <include name="tika-core*.jar"/>
             <include name="tika-parsers*.jar"/>
             <include name="tagsoup*.jar"/>
             <include name="poi*.jar"/>
@@ -80,7 +79,6 @@
                 <include name="jackson-core*.jar"/>
                 <include name="jackson-databind*.jar"/>
                 <include name="jackson-annotations*.jar"/>
-                <include name="tika-core*.jar"/>
                 <include name="tika-parsers*.jar"/>
                 <include name="tagsoup*.jar"/>
                 <include name="poi*.jar"/>

Modified: manifoldcf/branches/dev_1x/connectors/tika/pom.xml
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/connectors/tika/pom.xml?rev=1632291&r1=1632290&r2=1632291&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/connectors/tika/pom.xml (original)
+++ manifoldcf/branches/dev_1x/connectors/tika/pom.xml Thu Oct 16 12:54:56 2014
@@ -213,13 +213,8 @@
     </dependency>
     <dependency>
 	  <groupId>org.apache.tika</groupId>
-	  <artifactId>tika-core</artifactId>
-	  <version>1.5</version>
-    </dependency>
-    <dependency>
-	  <groupId>org.apache.tika</groupId>
 	  <artifactId>tika-parsers</artifactId>
-	  <version>1.5</version>
+	  <version>${tika.version}</version>
     </dependency>
     
     <!-- Testing dependencies -->

Propchange: manifoldcf/branches/dev_1x/framework/
------------------------------------------------------------------------------
  Merged /manifoldcf/trunk/framework:r1632289

Modified: manifoldcf/branches/dev_1x/framework/build.xml
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/framework/build.xml?rev=1632291&r1=1632290&r2=1632291&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/framework/build.xml (original)
+++ manifoldcf/branches/dev_1x/framework/build.xml Thu Oct 16 12:54:56 2014
@@ -32,6 +32,9 @@
     
     <path id="framework-classpath">
         <fileset dir="../lib">
+            <include name="tika-core*.jar"/>
+        </fileset>
+        <fileset dir="../lib">
             <include name="castor*.jar"/>
             <include name="geronimo-javamail_1.4_spec*.jar"/>
             <include name="commons-discovery*.jar"/>
@@ -332,6 +335,9 @@
         <mkdir dir="build/webapp/authority-service/WEB-INF/lib"/>
         <copy todir="build/webapp/authority-service/WEB-INF/lib">
             <fileset dir="../lib">
+                <include name="tika-core*.jar"/>
+            </fileset>
+            <fileset dir="../lib">
                 <include name="castor*.jar"/>
                 <include name="geronimo-javamail_1.4_spec*.jar"/>
                 <include name="commons-discovery*.jar"/>
@@ -384,6 +390,9 @@
         <mkdir dir="build/webapp/authority-service-proprietary/WEB-INF/lib"/>
         <copy todir="build/webapp/authority-service-proprietary/WEB-INF/lib">
             <fileset dir="../lib">
+                <include name="tika-core*.jar"/>
+            </fileset>
+            <fileset dir="../lib">
                 <include name="castor*.jar"/>
                 <include name="geronimo-javamail_1.4_spec*.jar"/>
                 <include name="commons-discovery*.jar"/>
@@ -441,6 +450,9 @@
         <mkdir dir="build/webapp/api-service/WEB-INF/lib"/>
         <copy todir="build/webapp/api-service/WEB-INF/lib">
             <fileset dir="../lib">
+                <include name="tika-core*.jar"/>
+            </fileset>
+            <fileset dir="../lib">
                 <include name="castor*.jar"/>
                 <include name="geronimo-javamail_1.4_spec*.jar"/>
                 <include name="commons-discovery*.jar"/>
@@ -494,6 +506,9 @@
         <mkdir dir="build/webapp/api-service-proprietary/WEB-INF/lib"/>
         <copy todir="build/webapp/api-service-proprietary/WEB-INF/lib">
             <fileset dir="../lib">
+                <include name="tika-core*.jar"/>
+            </fileset>
+            <fileset dir="../lib">
                 <include name="castor*.jar"/>
                 <include name="geronimo-javamail_1.4_spec*.jar"/>
                 <include name="commons-discovery*.jar"/>
@@ -552,6 +567,9 @@
         <mkdir dir="build/webapp/crawler-ui/WEB-INF/lib"/>
         <copy todir="build/webapp/crawler-ui/WEB-INF/lib">
             <fileset dir="../lib">
+                <include name="tika-core*.jar"/>
+            </fileset>
+            <fileset dir="../lib">
                 <include name="castor*.jar"/>
                 <include name="geronimo-javamail_1.4_spec*.jar"/>
                 <include name="commons-discovery*.jar"/>
@@ -613,6 +631,9 @@
         <mkdir dir="build/webapp/crawler-ui-proprietary/WEB-INF/lib"/>
         <copy todir="build/webapp/crawler-ui-proprietary/WEB-INF/lib">
             <fileset dir="../lib">
+                <include name="tika-core*.jar"/>
+            </fileset>
+            <fileset dir="../lib">
                 <include name="castor*.jar"/>
                 <include name="geronimo-javamail_1.4_spec*.jar"/>
                 <include name="commons-discovery*.jar"/>
@@ -681,6 +702,9 @@
         <mkdir dir="build/webapp/combined-service/WEB-INF/lib"/>
         <copy todir="build/webapp/combined-service/WEB-INF/lib">
             <fileset dir="../lib">
+                <include name="tika-core*.jar"/>
+            </fileset>
+            <fileset dir="../lib">
                 <include name="castor*.jar"/>
                 <include name="geronimo-javamail_1.4_spec*.jar"/>
                 <include name="commons-discovery*.jar"/>
@@ -744,6 +768,9 @@
         <mkdir dir="build/webapp/combined-service-proprietary/WEB-INF/lib"/>
         <copy todir="build/webapp/combined-service-proprietary/WEB-INF/lib">
             <fileset dir="../lib">
+                <include name="tika-core*.jar"/>
+            </fileset>
+            <fileset dir="../lib">
                 <include name="castor*.jar"/>
                 <include name="geronimo-javamail_1.4_spec*.jar"/>
                 <include name="commons-discovery*.jar"/>
@@ -876,6 +903,9 @@
         <mkdir dir="dist/lib"/>
         <copy todir="dist/lib">
             <fileset dir="../lib">
+                <include name="tika-core*.jar"/>
+            </fileset>
+            <fileset dir="../lib">
                 <include name="castor*.jar"/>
                 <include name="geronimo-javamail_1.4_spec*.jar"/>
                 <include name="commons-discovery*.jar"/>
@@ -1128,6 +1158,7 @@
     
     <path id="process-lib">
         <fileset dir="../lib">
+            <include name="tika-core*.jar"/>
             <include name="commons-lang*.jar"/>
             <include name="commons-codec*.jar"/>
             <include name="commons-collections*.jar"/>

Modified: manifoldcf/branches/dev_1x/framework/buildfiles/connector-build.xml
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/framework/buildfiles/connector-build.xml?rev=1632291&r1=1632290&r2=1632291&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/framework/buildfiles/connector-build.xml (original)
+++ manifoldcf/branches/dev_1x/framework/buildfiles/connector-build.xml Thu Oct 16 12:54:56 2014
@@ -141,6 +141,9 @@
             <include name="velocity*.jar"/>
         </fileset>
         <fileset dir="${mcf-dist}/lib">
+            <include name="tika-core*.jar"/>
+        </fileset>
+        <fileset dir="${mcf-dist}/lib">
             <include name="axis*.jar"/>
             <include name="saaj*.jar"/>
             <include name="wsdl4j*.jar"/>

Modified: manifoldcf/branches/dev_1x/framework/core/pom.xml
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/framework/core/pom.xml?rev=1632291&r1=1632290&r2=1632291&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/framework/core/pom.xml (original)
+++ manifoldcf/branches/dev_1x/framework/core/pom.xml Thu Oct 16 12:54:56 2014
@@ -110,6 +110,11 @@
       </exclusions>
     </dependency>
     <dependency>
+      <groupId>org.apache.tika</groupId>
+      <artifactId>tika-core</artifactId>
+      <version>${tika.version}</version>
+    </dependency>
+    <dependency>
       <groupId>javax.activation</groupId>
       <artifactId>activation</artifactId>
       <version>${activation.version}</version>

Modified: manifoldcf/branches/dev_1x/framework/core/src/main/java/org/apache/manifoldcf/core/extmimemap/ExtensionMimeMap.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/framework/core/src/main/java/org/apache/manifoldcf/core/extmimemap/ExtensionMimeMap.java?rev=1632291&r1=1632290&r2=1632291&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/framework/core/src/main/java/org/apache/manifoldcf/core/extmimemap/ExtensionMimeMap.java (original)
+++ manifoldcf/branches/dev_1x/framework/core/src/main/java/org/apache/manifoldcf/core/extmimemap/ExtensionMimeMap.java Thu Oct 16 12:54:56 2014
@@ -20,43 +20,16 @@
 package org.apache.manifoldcf.core.extmimemap;
 
 import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
-import java.util.*;
+import org.apache.tika.Tika;
 
 /** Map file extension to a mime type.
 */
 public class ExtensionMimeMap
 {
-  protected final static Map<String,String> mimeMap;
-  static {
-    mimeMap = new HashMap<String,String>();
-    mimeMap.put("xml", "text/xml");
-    mimeMap.put("csv", "text/csv");
-    mimeMap.put("json", "application/json");
-    mimeMap.put("pdf", "application/pdf");
-    mimeMap.put("rtf", "text/rtf");
-    mimeMap.put("html", "text/html");
-    mimeMap.put("htm", "text/html");
-    mimeMap.put("doc", "application/msword");
-    mimeMap.put("docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document");
-    mimeMap.put("ppt", "application/vnd.ms-powerpoint");
-    mimeMap.put("pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation");
-    mimeMap.put("xls", "application/vnd.ms-excel");
-    mimeMap.put("xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
-    mimeMap.put("odt", "application/vnd.oasis.opendocument.text");
-    mimeMap.put("ott", "application/vnd.oasis.opendocument.text");
-    mimeMap.put("odp", "application/vnd.oasis.opendocument.presentation");
-    mimeMap.put("otp", "application/vnd.oasis.opendocument.presentation");
-    mimeMap.put("ods", "application/vnd.oasis.opendocument.spreadsheet");
-    mimeMap.put("ots", "application/vnd.oasis.opendocument.spreadsheet");
-    mimeMap.put("txt", "text/plain");
-    mimeMap.put("log", "text/plain");
-    mimeMap.put("aspx", "text/html");
-  }
-
   /** Map extension to mime type */
   public static String mapToMimeType(String extension)
   {
-    return mimeMap.get(extension.toLowerCase(java.util.Locale.ROOT));
+    return new Tika().detect("x."+extension);
   }
   
 }

Modified: manifoldcf/branches/dev_1x/pom.xml
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/pom.xml?rev=1632291&r1=1632290&r2=1632291&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/pom.xml (original)
+++ manifoldcf/branches/dev_1x/pom.xml Thu Oct 16 12:54:56 2014
@@ -79,6 +79,7 @@
     <zookeeper.version>3.4.6</zookeeper.version>
     <xmlbeans.version>2.6.0</xmlbeans.version>
     <poi.version>3.11-beta2</poi.version>
+    <tika.version>1.6</tika.version>
   </properties>
 
   <modules>