You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2014/10/16 14:54:57 UTC
svn commit: r1632291 - in /manifoldcf/branches/dev_1x: ./ connectors/tika/
framework/ framework/buildfiles/ framework/core/
framework/core/src/main/java/org/apache/manifoldcf/core/extmimemap/
Author: kwright
Date: Thu Oct 16 12:54:56 2014
New Revision: 1632291
URL: http://svn.apache.org/r1632291
Log:
Pull up fix for CONNECTORS-1074 from trunk.
Modified:
manifoldcf/branches/dev_1x/ (props changed)
manifoldcf/branches/dev_1x/CHANGES.txt
manifoldcf/branches/dev_1x/connectors/tika/build.xml
manifoldcf/branches/dev_1x/connectors/tika/pom.xml
manifoldcf/branches/dev_1x/framework/ (props changed)
manifoldcf/branches/dev_1x/framework/build.xml
manifoldcf/branches/dev_1x/framework/buildfiles/connector-build.xml
manifoldcf/branches/dev_1x/framework/core/pom.xml
manifoldcf/branches/dev_1x/framework/core/src/main/java/org/apache/manifoldcf/core/extmimemap/ExtensionMimeMap.java
manifoldcf/branches/dev_1x/pom.xml
Propchange: manifoldcf/branches/dev_1x/
------------------------------------------------------------------------------
Merged /manifoldcf/branches/CONNECTORS-1074:r1632228-1632283
Merged /manifoldcf/trunk:r1632289
Modified: manifoldcf/branches/dev_1x/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/CHANGES.txt?rev=1632291&r1=1632290&r2=1632291&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/CHANGES.txt (original)
+++ manifoldcf/branches/dev_1x/CHANGES.txt Thu Oct 16 12:54:56 2014
@@ -3,6 +3,9 @@ $Id$
======================= 1.8-dev =====================
+CONNECTORS-1074: Use Tika to map extensions to mime types.
+(Shinichiro Abe, Karl Wright)
+
CONNECTORS-1075: Provide a means of obfuscating passwords in
properties.xml and global-properties.xml. Also include an obfuscation
utility in the distribution, for generating obfuscated passwords.
Modified: manifoldcf/branches/dev_1x/connectors/tika/build.xml
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/connectors/tika/build.xml?rev=1632291&r1=1632290&r2=1632291&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/connectors/tika/build.xml (original)
+++ manifoldcf/branches/dev_1x/connectors/tika/build.xml Thu Oct 16 12:54:56 2014
@@ -36,7 +36,6 @@
<include name="jackson-core*.jar"/>
<include name="jackson-databind*.jar"/>
<include name="jackson-annotations*.jar"/>
- <include name="tika-core*.jar"/>
<include name="tika-parsers*.jar"/>
<include name="tagsoup*.jar"/>
<include name="poi*.jar"/>
@@ -80,7 +79,6 @@
<include name="jackson-core*.jar"/>
<include name="jackson-databind*.jar"/>
<include name="jackson-annotations*.jar"/>
- <include name="tika-core*.jar"/>
<include name="tika-parsers*.jar"/>
<include name="tagsoup*.jar"/>
<include name="poi*.jar"/>
Modified: manifoldcf/branches/dev_1x/connectors/tika/pom.xml
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/connectors/tika/pom.xml?rev=1632291&r1=1632290&r2=1632291&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/connectors/tika/pom.xml (original)
+++ manifoldcf/branches/dev_1x/connectors/tika/pom.xml Thu Oct 16 12:54:56 2014
@@ -213,13 +213,8 @@
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
- <artifactId>tika-core</artifactId>
- <version>1.5</version>
- </dependency>
- <dependency>
- <groupId>org.apache.tika</groupId>
<artifactId>tika-parsers</artifactId>
- <version>1.5</version>
+ <version>${tika.version}</version>
</dependency>
<!-- Testing dependencies -->
Propchange: manifoldcf/branches/dev_1x/framework/
------------------------------------------------------------------------------
Merged /manifoldcf/trunk/framework:r1632289
Modified: manifoldcf/branches/dev_1x/framework/build.xml
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/framework/build.xml?rev=1632291&r1=1632290&r2=1632291&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/framework/build.xml (original)
+++ manifoldcf/branches/dev_1x/framework/build.xml Thu Oct 16 12:54:56 2014
@@ -32,6 +32,9 @@
<path id="framework-classpath">
<fileset dir="../lib">
+ <include name="tika-core*.jar"/>
+ </fileset>
+ <fileset dir="../lib">
<include name="castor*.jar"/>
<include name="geronimo-javamail_1.4_spec*.jar"/>
<include name="commons-discovery*.jar"/>
@@ -332,6 +335,9 @@
<mkdir dir="build/webapp/authority-service/WEB-INF/lib"/>
<copy todir="build/webapp/authority-service/WEB-INF/lib">
<fileset dir="../lib">
+ <include name="tika-core*.jar"/>
+ </fileset>
+ <fileset dir="../lib">
<include name="castor*.jar"/>
<include name="geronimo-javamail_1.4_spec*.jar"/>
<include name="commons-discovery*.jar"/>
@@ -384,6 +390,9 @@
<mkdir dir="build/webapp/authority-service-proprietary/WEB-INF/lib"/>
<copy todir="build/webapp/authority-service-proprietary/WEB-INF/lib">
<fileset dir="../lib">
+ <include name="tika-core*.jar"/>
+ </fileset>
+ <fileset dir="../lib">
<include name="castor*.jar"/>
<include name="geronimo-javamail_1.4_spec*.jar"/>
<include name="commons-discovery*.jar"/>
@@ -441,6 +450,9 @@
<mkdir dir="build/webapp/api-service/WEB-INF/lib"/>
<copy todir="build/webapp/api-service/WEB-INF/lib">
<fileset dir="../lib">
+ <include name="tika-core*.jar"/>
+ </fileset>
+ <fileset dir="../lib">
<include name="castor*.jar"/>
<include name="geronimo-javamail_1.4_spec*.jar"/>
<include name="commons-discovery*.jar"/>
@@ -494,6 +506,9 @@
<mkdir dir="build/webapp/api-service-proprietary/WEB-INF/lib"/>
<copy todir="build/webapp/api-service-proprietary/WEB-INF/lib">
<fileset dir="../lib">
+ <include name="tika-core*.jar"/>
+ </fileset>
+ <fileset dir="../lib">
<include name="castor*.jar"/>
<include name="geronimo-javamail_1.4_spec*.jar"/>
<include name="commons-discovery*.jar"/>
@@ -552,6 +567,9 @@
<mkdir dir="build/webapp/crawler-ui/WEB-INF/lib"/>
<copy todir="build/webapp/crawler-ui/WEB-INF/lib">
<fileset dir="../lib">
+ <include name="tika-core*.jar"/>
+ </fileset>
+ <fileset dir="../lib">
<include name="castor*.jar"/>
<include name="geronimo-javamail_1.4_spec*.jar"/>
<include name="commons-discovery*.jar"/>
@@ -613,6 +631,9 @@
<mkdir dir="build/webapp/crawler-ui-proprietary/WEB-INF/lib"/>
<copy todir="build/webapp/crawler-ui-proprietary/WEB-INF/lib">
<fileset dir="../lib">
+ <include name="tika-core*.jar"/>
+ </fileset>
+ <fileset dir="../lib">
<include name="castor*.jar"/>
<include name="geronimo-javamail_1.4_spec*.jar"/>
<include name="commons-discovery*.jar"/>
@@ -681,6 +702,9 @@
<mkdir dir="build/webapp/combined-service/WEB-INF/lib"/>
<copy todir="build/webapp/combined-service/WEB-INF/lib">
<fileset dir="../lib">
+ <include name="tika-core*.jar"/>
+ </fileset>
+ <fileset dir="../lib">
<include name="castor*.jar"/>
<include name="geronimo-javamail_1.4_spec*.jar"/>
<include name="commons-discovery*.jar"/>
@@ -744,6 +768,9 @@
<mkdir dir="build/webapp/combined-service-proprietary/WEB-INF/lib"/>
<copy todir="build/webapp/combined-service-proprietary/WEB-INF/lib">
<fileset dir="../lib">
+ <include name="tika-core*.jar"/>
+ </fileset>
+ <fileset dir="../lib">
<include name="castor*.jar"/>
<include name="geronimo-javamail_1.4_spec*.jar"/>
<include name="commons-discovery*.jar"/>
@@ -876,6 +903,9 @@
<mkdir dir="dist/lib"/>
<copy todir="dist/lib">
<fileset dir="../lib">
+ <include name="tika-core*.jar"/>
+ </fileset>
+ <fileset dir="../lib">
<include name="castor*.jar"/>
<include name="geronimo-javamail_1.4_spec*.jar"/>
<include name="commons-discovery*.jar"/>
@@ -1128,6 +1158,7 @@
<path id="process-lib">
<fileset dir="../lib">
+ <include name="tika-core*.jar"/>
<include name="commons-lang*.jar"/>
<include name="commons-codec*.jar"/>
<include name="commons-collections*.jar"/>
Modified: manifoldcf/branches/dev_1x/framework/buildfiles/connector-build.xml
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/framework/buildfiles/connector-build.xml?rev=1632291&r1=1632290&r2=1632291&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/framework/buildfiles/connector-build.xml (original)
+++ manifoldcf/branches/dev_1x/framework/buildfiles/connector-build.xml Thu Oct 16 12:54:56 2014
@@ -141,6 +141,9 @@
<include name="velocity*.jar"/>
</fileset>
<fileset dir="${mcf-dist}/lib">
+ <include name="tika-core*.jar"/>
+ </fileset>
+ <fileset dir="${mcf-dist}/lib">
<include name="axis*.jar"/>
<include name="saaj*.jar"/>
<include name="wsdl4j*.jar"/>
Modified: manifoldcf/branches/dev_1x/framework/core/pom.xml
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/framework/core/pom.xml?rev=1632291&r1=1632290&r2=1632291&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/framework/core/pom.xml (original)
+++ manifoldcf/branches/dev_1x/framework/core/pom.xml Thu Oct 16 12:54:56 2014
@@ -110,6 +110,11 @@
</exclusions>
</dependency>
<dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-core</artifactId>
+ <version>${tika.version}</version>
+ </dependency>
+ <dependency>
<groupId>javax.activation</groupId>
<artifactId>activation</artifactId>
<version>${activation.version}</version>
Modified: manifoldcf/branches/dev_1x/framework/core/src/main/java/org/apache/manifoldcf/core/extmimemap/ExtensionMimeMap.java
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/framework/core/src/main/java/org/apache/manifoldcf/core/extmimemap/ExtensionMimeMap.java?rev=1632291&r1=1632290&r2=1632291&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/framework/core/src/main/java/org/apache/manifoldcf/core/extmimemap/ExtensionMimeMap.java (original)
+++ manifoldcf/branches/dev_1x/framework/core/src/main/java/org/apache/manifoldcf/core/extmimemap/ExtensionMimeMap.java Thu Oct 16 12:54:56 2014
@@ -20,43 +20,16 @@
package org.apache.manifoldcf.core.extmimemap;
import org.apache.manifoldcf.core.interfaces.ManifoldCFException;
-import java.util.*;
+import org.apache.tika.Tika;
/** Map file extension to a mime type.
*/
public class ExtensionMimeMap
{
- protected final static Map<String,String> mimeMap;
- static {
- mimeMap = new HashMap<String,String>();
- mimeMap.put("xml", "text/xml");
- mimeMap.put("csv", "text/csv");
- mimeMap.put("json", "application/json");
- mimeMap.put("pdf", "application/pdf");
- mimeMap.put("rtf", "text/rtf");
- mimeMap.put("html", "text/html");
- mimeMap.put("htm", "text/html");
- mimeMap.put("doc", "application/msword");
- mimeMap.put("docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document");
- mimeMap.put("ppt", "application/vnd.ms-powerpoint");
- mimeMap.put("pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation");
- mimeMap.put("xls", "application/vnd.ms-excel");
- mimeMap.put("xlsx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
- mimeMap.put("odt", "application/vnd.oasis.opendocument.text");
- mimeMap.put("ott", "application/vnd.oasis.opendocument.text");
- mimeMap.put("odp", "application/vnd.oasis.opendocument.presentation");
- mimeMap.put("otp", "application/vnd.oasis.opendocument.presentation");
- mimeMap.put("ods", "application/vnd.oasis.opendocument.spreadsheet");
- mimeMap.put("ots", "application/vnd.oasis.opendocument.spreadsheet");
- mimeMap.put("txt", "text/plain");
- mimeMap.put("log", "text/plain");
- mimeMap.put("aspx", "text/html");
- }
-
/** Map extension to mime type */
public static String mapToMimeType(String extension)
{
- return mimeMap.get(extension.toLowerCase(java.util.Locale.ROOT));
+ return new Tika().detect("x."+extension);
}
}
Modified: manifoldcf/branches/dev_1x/pom.xml
URL: http://svn.apache.org/viewvc/manifoldcf/branches/dev_1x/pom.xml?rev=1632291&r1=1632290&r2=1632291&view=diff
==============================================================================
--- manifoldcf/branches/dev_1x/pom.xml (original)
+++ manifoldcf/branches/dev_1x/pom.xml Thu Oct 16 12:54:56 2014
@@ -79,6 +79,7 @@
<zookeeper.version>3.4.6</zookeeper.version>
<xmlbeans.version>2.6.0</xmlbeans.version>
<poi.version>3.11-beta2</poi.version>
+ <tika.version>1.6</tika.version>
</properties>
<modules>