You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/10/16 14:50:05 UTC
svn commit: r1632289 - in /manifoldcf/trunk: ./ connectors/tika/ framework/
framework/buildfiles/ framework/core/
framework/core/src/main/java/org/apache/manifoldcf/core/extmimemap/
Author: kwright
Date: Thu Oct 16 12:50:05 2014
New Revision: 1632289
URL: http://svn.apache.org/r1632289
Log:
Fix for CONNECTORS-1047.
Modified:
manifoldcf/trunk/ (props changed)
manifoldcf/trunk/CHANGES.txt
manifoldcf/trunk/connectors/tika/build.xml
manifoldcf/trunk/connectors/tika/pom.xml
manifoldcf/trunk/framework/build.xml
manifoldcf/trunk/framework/buildfiles/connector-build.xml
manifoldcf/trunk/framework/core/pom.xml
manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/extmimemap/ExtensionMimeMap.java
manifoldcf/trunk/pom.xml
Propchange: manifoldcf/trunk/
------------------------------------------------------------------------------
Merged /manifoldcf/branches/CONNECTORS-1074:r1632228-1632283
Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1632289&r1=1632288&r2=1632289&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Thu Oct 16 12:50:05 2014
@@ -3,6 +3,9 @@ $Id$
======================= 2.0-dev =====================
+CONNECTORS-1074: Use Tika to map extensions to mime types.
+(Shinichiro Abe, Karl Wright)
+
CONNECTORS-1072: Remove 1.x-related import fallback code, since
2.0 is not backwards compatible with 1.x.
(Karl Wright)
Modified: manifoldcf/trunk/connectors/tika/build.xml
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/tika/build.xml?rev=1632289&r1=1632288&r2=1632289&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/tika/build.xml (original)
+++ manifoldcf/trunk/connectors/tika/build.xml Thu Oct 16 12:50:05 2014
@@ -36,7 +36,6 @@
<include name="jackson-core*.jar"/>
<include name="jackson-databind*.jar"/>
<include name="jackson-annotations*.jar"/>
- <include name="tika-core*.jar"/>
<include name="tika-parsers*.jar"/>
<include name="tagsoup*.jar"/>
<include name="poi*.jar"/>
@@ -80,7 +79,6 @@
<include name="jackson-core*.jar"/>
<include name="jackson-databind*.jar"/>
<include name="jackson-annotations*.jar"/>
- <include name="tika-core*.jar"/>
<include name="tika-parsers*.jar"/>
<include name="tagsoup*.jar"/>
<include name="poi*.jar"/>
Modified: manifoldcf/trunk/connectors/tika/pom.xml
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/tika/pom.xml?rev=1632289&r1=1632288&r2=1632289&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/tika/pom.xml (original)
+++ manifoldcf/trunk/connectors/tika/pom.xml Thu Oct 16 12:50:05 2014
@@ -213,13 +213,8 @@
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
- <artifactId>tika-core</artifactId>
- <version>1.6</version>
- </dependency>
- <dependency>
- <groupId>org.apache.tika</groupId>
<artifactId>tika-parsers</artifactId>
- <version>1.6</version>
+ <version>${tika.version}</version>
</dependency>
<!-- Testing dependencies -->
Modified: manifoldcf/trunk/framework/build.xml
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/build.xml?rev=1632289&r1=1632288&r2=1632289&view=diff
==============================================================================
--- manifoldcf/trunk/framework/build.xml (original)
+++ manifoldcf/trunk/framework/build.xml Thu Oct 16 12:50:05 2014
@@ -31,6 +31,9 @@
<path id="framework-classpath">
<fileset dir="../lib">
+ <include name="tika-core*.jar"/>
+ </fileset>
+ <fileset dir="../lib">
<include name="castor*.jar"/>
<include name="geronimo-javamail_1.4_spec*.jar"/>
<include name="commons-discovery*.jar"/>
@@ -330,6 +333,9 @@
<mkdir dir="build/webapp/authority-service/WEB-INF/lib"/>
<copy todir="build/webapp/authority-service/WEB-INF/lib">
<fileset dir="../lib">
+ <include name="tika-core*.jar"/>
+ </fileset>
+ <fileset dir="../lib">
<include name="castor*.jar"/>
<include name="geronimo-javamail_1.4_spec*.jar"/>
<include name="commons-discovery*.jar"/>
@@ -381,6 +387,9 @@
<mkdir dir="build/webapp/authority-service-proprietary/WEB-INF/lib"/>
<copy todir="build/webapp/authority-service-proprietary/WEB-INF/lib">
<fileset dir="../lib">
+ <include name="tika-core*.jar"/>
+ </fileset>
+ <fileset dir="../lib">
<include name="castor*.jar"/>
<include name="geronimo-javamail_1.4_spec*.jar"/>
<include name="commons-discovery*.jar"/>
@@ -437,6 +446,9 @@
<mkdir dir="build/webapp/api-service/WEB-INF/lib"/>
<copy todir="build/webapp/api-service/WEB-INF/lib">
<fileset dir="../lib">
+ <include name="tika-core*.jar"/>
+ </fileset>
+ <fileset dir="../lib">
<include name="castor*.jar"/>
<include name="geronimo-javamail_1.4_spec*.jar"/>
<include name="commons-discovery*.jar"/>
@@ -489,6 +501,9 @@
<mkdir dir="build/webapp/api-service-proprietary/WEB-INF/lib"/>
<copy todir="build/webapp/api-service-proprietary/WEB-INF/lib">
<fileset dir="../lib">
+ <include name="tika-core*.jar"/>
+ </fileset>
+ <fileset dir="../lib">
<include name="castor*.jar"/>
<include name="geronimo-javamail_1.4_spec*.jar"/>
<include name="commons-discovery*.jar"/>
@@ -546,6 +561,9 @@
<mkdir dir="build/webapp/crawler-ui/WEB-INF/lib"/>
<copy todir="build/webapp/crawler-ui/WEB-INF/lib">
<fileset dir="../lib">
+ <include name="tika-core*.jar"/>
+ </fileset>
+ <fileset dir="../lib">
<include name="castor*.jar"/>
<include name="geronimo-javamail_1.4_spec*.jar"/>
<include name="commons-discovery*.jar"/>
@@ -606,6 +624,9 @@
<mkdir dir="build/webapp/crawler-ui-proprietary/WEB-INF/lib"/>
<copy todir="build/webapp/crawler-ui-proprietary/WEB-INF/lib">
<fileset dir="../lib">
+ <include name="tika-core*.jar"/>
+ </fileset>
+ <fileset dir="../lib">
<include name="castor*.jar"/>
<include name="geronimo-javamail_1.4_spec*.jar"/>
<include name="commons-discovery*.jar"/>
@@ -673,6 +694,9 @@
<mkdir dir="build/webapp/combined-service/WEB-INF/lib"/>
<copy todir="build/webapp/combined-service/WEB-INF/lib">
<fileset dir="../lib">
+ <include name="tika-core*.jar"/>
+ </fileset>
+ <fileset dir="../lib">
<include name="castor*.jar"/>
<include name="geronimo-javamail_1.4_spec*.jar"/>
<include name="commons-discovery*.jar"/>
@@ -735,6 +759,9 @@
<mkdir dir="build/webapp/combined-service-proprietary/WEB-INF/lib"/>
<copy todir="build/webapp/combined-service-proprietary/WEB-INF/lib">
<fileset dir="../lib">
+ <include name="tika-core*.jar"/>
+ </fileset>
+ <fileset dir="../lib">
<include name="castor*.jar"/>
<include name="geronimo-javamail_1.4_spec*.jar"/>
<include name="commons-discovery*.jar"/>
@@ -866,6 +893,9 @@
<mkdir dir="dist/lib"/>
<copy todir="dist/lib">
<fileset dir="../lib">
+ <include name="tika-core*.jar"/>
+ </fileset>
+ <fileset dir="../lib">
<include name="castor*.jar"/>
<include name="geronimo-javamail_1.4_spec*.jar"/>
<include name="commons-discovery*.jar"/>
@@ -1116,6 +1146,7 @@
<path id="process-lib">
<fileset dir="../lib">
+ <include name="tika-core*.jar"/>
<include name="commons-lang*.jar"/>
<include name="commons-codec*.jar"/>
<include name="commons-collections*.jar"/>
Modified: manifoldcf/trunk/framework/buildfiles/connector-build.xml
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/buildfiles/connector-build.xml?rev=1632289&r1=1632288&r2=1632289&view=diff
==============================================================================
--- manifoldcf/trunk/framework/buildfiles/connector-build.xml (original)
+++ manifoldcf/trunk/framework/buildfiles/connector-build.xml Thu Oct 16 12:50:05 2014
@@ -140,6 +140,9 @@
<include name="velocity*.jar"/>
</fileset>
<fileset dir="${mcf-dist}/lib">
+ <include name="tika-core*.jar"/>
+ </fileset>
+ <fileset dir="${mcf-dist}/lib">
<include name="axis*.jar"/>
<include name="saaj*.jar"/>
<include name="wsdl4j*.jar"/>
Modified: manifoldcf/trunk/framework/core/pom.xml
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/core/pom.xml?rev=1632289&r1=1632288&r2=1632289&view=diff
==============================================================================
--- manifoldcf/trunk/framework/core/pom.xml (original)
+++ manifoldcf/trunk/framework/core/pom.xml Thu Oct 16 12:50:05 2014
@@ -110,6 +110,11 @@
</exclusions>
</dependency>
<dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-core</artifactId>
+ <version>${tika.version}</version>
+ </dependency>
+ <dependency>
<groupId>javax.activation</groupId>
<artifactId>activation</artifactId>
<version>${activation.version}</version>
Modified: manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/extmimemap/ExtensionMimeMap.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/extmimemap/ExtensionMimeMap.java?rev=1632289&r1=1632288&r2=1632289&view=diff
==============================================================================
--- manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/extmimemap/ExtensionMimeMap.java (original)
+++ manifoldcf/trunk/framework/core/src/main/java/org/apache/manifoldcf/core/extmimemap/ExtensionMimeMap.java Thu Oct 16 12:50:05 2014
@@ -20,43 +20,16 @@
package org.apache.manifoldcf.core.extmimemap;
import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
-import java.util.*;
+import org.apache.tika.Tika;
/** Map file extension to a mime type.
*/
public class ExtensionMimeMap
{
- protected final static Map<String,String> mimeMap;
- static {
- mimeMap = new HashMap<String,String>();
- mimeMap.put("xml", "text/xml");
- mimeMap.put("csv", "text/csv");
- mimeMap.put("json", "application/json");
- mimeMap.put("pdf", "application/pdf");
- mimeMap.put("rtf", "text/rtf");
- mimeMap.put("html", "text/html");
- mimeMap.put("htm", "text/html");
- mimeMap.put("doc", "application/msword");
- mimeMap.put("docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document");
- mimeMap.put("ppt", "application/vnd.ms-powerpoint");
- mimeMap.put("pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation");
- mimeMap.put("xls", "application/vnd.ms-excel");
- mimeMap.put("xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
- mimeMap.put("odt", "application/vnd.oasis.opendocument.text");
- mimeMap.put("ott", "application/vnd.oasis.opendocument.text");
- mimeMap.put("odp", "application/vnd.oasis.opendocument.presentation");
- mimeMap.put("otp", "application/vnd.oasis.opendocument.presentation");
- mimeMap.put("ods", "application/vnd.oasis.opendocument.spreadsheet");
- mimeMap.put("ots", "application/vnd.oasis.opendocument.spreadsheet");
- mimeMap.put("txt", "text/plain");
- mimeMap.put("log", "text/plain");
- mimeMap.put("aspx", "text/html");
- }
-
/** Map extension to mime type */
public static String mapToMimeType(String extension)
{
- return mimeMap.get(extension.toLowerCase(java.util.Locale.ROOT));
+ return new Tika().detect("x."+extension);
}
}
Modified: manifoldcf/trunk/pom.xml
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/pom.xml?rev=1632289&r1=1632288&r2=1632289&view=diff
==============================================================================
--- manifoldcf/trunk/pom.xml (original)
+++ manifoldcf/trunk/pom.xml Thu Oct 16 12:50:05 2014
@@ -79,6 +79,7 @@
<zookeeper.version>3.4.6</zookeeper.version>
<xmlbeans.version>2.6.0</xmlbeans.version>
<poi.version>3.11-beta2</poi.version>
+ <tika.version>1.6</tika.version>
</properties>
<modules>