You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2014/09/07 22:54:54 UTC
svn commit: r1623227 - in /lucene/dev/branches/branch_4x: ./ lucene/
lucene/licenses/ solr/ solr/contrib/ solr/contrib/extraction/
solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/
solr/licenses/
Author: uschindler
Date: Sun Sep 7 20:54:53 2014
New Revision: 1623227
URL: http://svn.apache.org/r1623227
Log:
Merged revision(s) 1623225 from lucene/dev/trunk:
SOLR-6488: Upgrade Solr Cell to TIKA 1.6
Added:
lucene/dev/branches/branch_4x/lucene/licenses/commons-compress-1.8.1.jar.sha1
- copied unchanged from r1623225, lucene/dev/trunk/lucene/licenses/commons-compress-1.8.1.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/aspectjrt-1.8.0.jar.sha1
- copied unchanged from r1623225, lucene/dev/trunk/solr/licenses/aspectjrt-1.8.0.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/commons-compress-1.8.1.jar.sha1
- copied unchanged from r1623225, lucene/dev/trunk/solr/licenses/commons-compress-1.8.1.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/fontbox-1.8.6.jar.sha1
- copied unchanged from r1623225, lucene/dev/trunk/solr/licenses/fontbox-1.8.6.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/isoparser-1.0.2.jar.sha1
- copied unchanged from r1623225, lucene/dev/trunk/solr/licenses/isoparser-1.0.2.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/java-libpst-0.8.1.jar.sha1
- copied unchanged from r1623225, lucene/dev/trunk/solr/licenses/java-libpst-0.8.1.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/java-libpst-LICENSE-ASL.txt
- copied unchanged from r1623225, lucene/dev/trunk/solr/licenses/java-libpst-LICENSE-ASL.txt
lucene/dev/branches/branch_4x/solr/licenses/java-libpst-NOTICE.txt
- copied unchanged from r1623225, lucene/dev/trunk/solr/licenses/java-libpst-NOTICE.txt
lucene/dev/branches/branch_4x/solr/licenses/jempbox-1.8.6.jar.sha1
- copied unchanged from r1623225, lucene/dev/trunk/solr/licenses/jempbox-1.8.6.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/jmatio-1.0.jar.sha1
- copied unchanged from r1623225, lucene/dev/trunk/solr/licenses/jmatio-1.0.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/jmatio-LICENSE-BSD.txt
- copied unchanged from r1623225, lucene/dev/trunk/solr/licenses/jmatio-LICENSE-BSD.txt
lucene/dev/branches/branch_4x/solr/licenses/jmatio-NOTICE.txt
- copied unchanged from r1623225, lucene/dev/trunk/solr/licenses/jmatio-NOTICE.txt
lucene/dev/branches/branch_4x/solr/licenses/pdfbox-1.8.6.jar.sha1
- copied unchanged from r1623225, lucene/dev/trunk/solr/licenses/pdfbox-1.8.6.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/poi-3.11-beta2.jar.sha1
- copied unchanged from r1623225, lucene/dev/trunk/solr/licenses/poi-3.11-beta2.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/poi-ooxml-3.11-beta2.jar.sha1
- copied unchanged from r1623225, lucene/dev/trunk/solr/licenses/poi-ooxml-3.11-beta2.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/poi-ooxml-schemas-3.11-beta2.jar.sha1
- copied unchanged from r1623225, lucene/dev/trunk/solr/licenses/poi-ooxml-schemas-3.11-beta2.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/poi-scratchpad-3.11-beta2.jar.sha1
- copied unchanged from r1623225, lucene/dev/trunk/solr/licenses/poi-scratchpad-3.11-beta2.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/rome-1.0.jar.sha1
- copied unchanged from r1623225, lucene/dev/trunk/solr/licenses/rome-1.0.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/tika-core-1.6.jar.sha1
- copied unchanged from r1623225, lucene/dev/trunk/solr/licenses/tika-core-1.6.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/tika-parsers-1.6.jar.sha1
- copied unchanged from r1623225, lucene/dev/trunk/solr/licenses/tika-parsers-1.6.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/tika-xmp-1.6.jar.sha1
- copied unchanged from r1623225, lucene/dev/trunk/solr/licenses/tika-xmp-1.6.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/vorbis-java-core-0.6.jar.sha1
- copied unchanged from r1623225, lucene/dev/trunk/solr/licenses/vorbis-java-core-0.6.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/vorbis-java-tika-0.6.jar.sha1
- copied unchanged from r1623225, lucene/dev/trunk/solr/licenses/vorbis-java-tika-0.6.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/xz-1.5.jar.sha1
- copied unchanged from r1623225, lucene/dev/trunk/solr/licenses/xz-1.5.jar.sha1
Removed:
lucene/dev/branches/branch_4x/lucene/licenses/commons-compress-1.7.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/aspectjrt-1.6.11.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/commons-compress-1.7.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/fontbox-1.8.4.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/isoparser-1.0-RC-1.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/jempbox-1.8.4.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/netcdf-4.2-min.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/netcdf-LICENSE-MIT.txt
lucene/dev/branches/branch_4x/solr/licenses/netcdf-NOTICE.txt
lucene/dev/branches/branch_4x/solr/licenses/pdfbox-1.8.4.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/poi-3.10.1.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/poi-ooxml-3.10.1.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/poi-ooxml-schemas-3.10.1.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/poi-scratchpad-3.10.1.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/rome-0.9.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/tika-core-1.5.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/tika-parsers-1.5.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/tika-xmp-1.5.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/vorbis-java-core-0.1.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/vorbis-java-tika-0.1.jar.sha1
lucene/dev/branches/branch_4x/solr/licenses/xz-1.4.jar.sha1
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/lucene/ (props changed)
lucene/dev/branches/branch_4x/lucene/ivy-versions.properties (contents, props changed)
lucene/dev/branches/branch_4x/lucene/licenses/ (props changed)
lucene/dev/branches/branch_4x/solr/ (props changed)
lucene/dev/branches/branch_4x/solr/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_4x/solr/NOTICE.txt (contents, props changed)
lucene/dev/branches/branch_4x/solr/contrib/ (props changed)
lucene/dev/branches/branch_4x/solr/contrib/extraction/ivy.xml
lucene/dev/branches/branch_4x/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTest.java
lucene/dev/branches/branch_4x/solr/licenses/ (props changed)
Modified: lucene/dev/branches/branch_4x/lucene/ivy-versions.properties
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/ivy-versions.properties?rev=1623227&r1=1623226&r2=1623227&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/ivy-versions.properties (original)
+++ lucene/dev/branches/branch_4x/lucene/ivy-versions.properties Sun Sep 7 20:54:53 2014
@@ -34,8 +34,9 @@ com.google.inject.guice.version = 3.0
/com.google.protobuf/protobuf-java = 2.5.0
/com.googlecode.concurrentlinkedhashmap/concurrentlinkedhashmap-lru = 1.2
/com.googlecode.juniversalchardet/juniversalchardet = 1.0.3
-/com.googlecode.mp4parser/isoparser = 1.0-RC-1
+/com.googlecode.mp4parser/isoparser = 1.0.2
/com.ibm.icu/icu4j = 53.1
+/com.pff/java-libpst = 0.8.1
/com.spatial4j/spatial4j = 0.4.1
com.sun.jersey.version = 1.9
@@ -64,7 +65,6 @@ com.sun.jersey.version = 1.9
/commons-logging/commons-logging = 1.1.3
/de.l3s.boilerpipe/boilerpipe = 1.1.0
/dom4j/dom4j = 1.6.1
-/edu.ucar/netcdf = 4.2-min
/hsqldb/hsqldb = 1.8.0.10
/io.netty/netty = 3.7.0.Final
/jakarta-regexp/jakarta-regexp = 1.4
@@ -81,11 +81,12 @@ com.sun.jersey.version = 1.9
/net.arnx/jsonic = 1.2.7
/net.sf.saxon/Saxon-HE = 9.5.1-4
/net.sourceforge.argparse4j/argparse4j = 0.4.3
+/net.sourceforge.jmatio/jmatio = 1.0
/net.sourceforge.nekohtml/nekohtml = 1.9.17
/org.antlr/antlr-runtime = 3.5
/org.apache.ant/ant = 1.8.2
/org.apache.avro/avro = 1.7.5
-/org.apache.commons/commons-compress = 1.7
+/org.apache.commons/commons-compress = 1.8.1
/org.apache.derby/derby = 10.9.1.0
org.apache.hadoop.version = 2.2.0
@@ -125,18 +126,18 @@ org.apache.james.apache.mime4j.version =
/org.apache.mahout/mahout-math = 0.6
/org.apache.mrunit/mrunit = 1.0.0
-org.apache.pdfbox.version = 1.8.4
+org.apache.pdfbox.version = 1.8.6
/org.apache.pdfbox/fontbox = ${org.apache.pdfbox.version}
/org.apache.pdfbox/jempbox = ${org.apache.pdfbox.version}
/org.apache.pdfbox/pdfbox = ${org.apache.pdfbox.version}
-org.apache.poi.version = 3.10.1
+org.apache.poi.version = 3.11-beta2
/org.apache.poi/poi = ${org.apache.poi.version}
/org.apache.poi/poi-ooxml = ${org.apache.poi.version}
/org.apache.poi/poi-ooxml-schemas = ${org.apache.poi.version}
/org.apache.poi/poi-scratchpad = ${org.apache.poi.version}
-org.apache.tika.version = 1.5
+org.apache.tika.version = 1.6
/org.apache.tika/tika-core = ${org.apache.tika.version}
/org.apache.tika/tika-parsers = ${org.apache.tika.version}
/org.apache.tika/tika-xmp = ${org.apache.tika.version}
@@ -152,7 +153,7 @@ org.apache.uima.version = 2.3.1
/org.apache.velocity/velocity-tools = 2.0
/org.apache.xmlbeans/xmlbeans = 2.6.0
/org.apache.zookeeper/zookeeper = 3.4.6
-/org.aspectj/aspectjrt = 1.6.11
+/org.aspectj/aspectjrt = 1.8.0
org.bouncycastle.version = 1.45
/org.bouncycastle/bcmail-jdk15 = ${org.bouncycastle.version}
@@ -191,7 +192,7 @@ org.eclipse.jetty.version = 8.1.10.v2013
/org.eclipse.jetty/jetty-webapp = ${org.eclipse.jetty.version}
/org.eclipse.jetty/jetty-xml = ${org.eclipse.jetty.version}
-org.gagravarr.vorbis.java.version = 0.1
+org.gagravarr.vorbis.java.version = 0.6
/org.gagravarr/vorbis-java-core = ${org.gagravarr.vorbis.java.version}
/org.gagravarr/vorbis-java-tika = ${org.gagravarr.vorbis.java.version}
@@ -230,7 +231,7 @@ org.slf4j.version = 1.7.6
/org.slf4j/slf4j-api = ${org.slf4j.version}
/org.slf4j/slf4j-log4j12 = ${org.slf4j.version}
-/org.tukaani/xz = 1.4
+/org.tukaani/xz = 1.5
/org.xerial.snappy/snappy-java = 1.0.5
-/rome/rome = 0.9
+/rome/rome = 1.0
/xerces/xercesImpl = 2.9.1
Modified: lucene/dev/branches/branch_4x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/CHANGES.txt?rev=1623227&r1=1623226&r2=1623227&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/solr/CHANGES.txt Sun Sep 7 20:54:53 2014
@@ -23,7 +23,7 @@ Consult the LUCENE_CHANGES.txt file for
Versions of Major Components
---------------------
-Apache Tika 1.5 (with upgraded Apache POI 3.10.1)
+Apache Tika 1.6
Carrot2 3.9.0
Velocity 1.7 and Velocity Tools 2.0
Apache UIMA 2.3.1
@@ -115,6 +115,12 @@ Other Changes
* SOLR-5322: core discovery can fail w/NPE and no explanation if a non-readable directory exists
(Said Chavkin, Erick Erickson)
+* SOLR-6488: Update to Apache Tika 1.6. This adds support for parsing Outlook PST and Matlab
+ MAT files. Parsing for NetCDF files was removed because of license issues; if you need support
+ for this format, download the parser JAR yourself and add it to contrib/extraction/lib folder:
+ http://www.unidata.ucar.edu/software/thredds/current/netcdf-java/
+ (Uwe Schindler)
+
================== 4.10.0 =================
Modified: lucene/dev/branches/branch_4x/solr/NOTICE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/NOTICE.txt?rev=1623227&r1=1623226&r2=1623227&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/NOTICE.txt (original)
+++ lucene/dev/branches/branch_4x/solr/NOTICE.txt Sun Sep 7 20:54:53 2014
@@ -73,6 +73,9 @@ License: Common Development and Distribu
The HdfsDirectory and BlockDirectory were derived from
the Apache Blur incubating project and are Apache License 2.0.
+ASM (Java bytecode manipulation and analysis framework): http://asm.ow2.org/
+Copyright (c) 2000-2005 INRIA, France Telecom
+
=========================================================================
== Apache Lucene Notice ==
=========================================================================
@@ -336,8 +339,6 @@ Copyright (c) 2003-2005, www.fontbox.org
Copyright (c) 1995-2005 International Business Machines Corporation and others
-Copyright (c) 2000-2005 INRIA, France Telecom
-
Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved.
Copyright 2004 Sun Microsystems, Inc. (Rome JAR)
@@ -350,6 +351,12 @@ Copyright 2012 Kohei Taketa juniversalch
Lasse Collin and others, XZ for Java (http://tukaani.org/xz/java.html)
+java-libpst is a pure java library for the reading of Outlook PST and OST files.
+https://github.com/rjohnsondev/java-libpst
+
+JMatIO is a JAVA library to read/write/manipulate with Matlab binary MAT-files.
+http://www.sourceforge.net/projects/jmatio
+
=========================================================================
== Language Detection Notices ==
=========================================================================
Modified: lucene/dev/branches/branch_4x/solr/contrib/extraction/ivy.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/contrib/extraction/ivy.xml?rev=1623227&r1=1623226&r2=1623227&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/contrib/extraction/ivy.xml (original)
+++ lucene/dev/branches/branch_4x/solr/contrib/extraction/ivy.xml Sun Sep 7 20:54:53 2014
@@ -29,10 +29,10 @@
<dependency org="org.apache.tika" name="tika-xmp" rev="${/org.apache.tika/tika-xmp}" conf="compile->*"/>
<!-- Tika dependencies - see http://tika.apache.org/1.3/gettingstarted.html#Using_Tika_as_a_Maven_dependency -->
<!-- When upgrading Tika, upgrade dependencies versions and add any new ones
- (except slf4j-api, commons-codec, commons-logging, geronimo-stax-api_1.0_spec) -->
+ (except slf4j-api, commons-codec, commons-logging, commons-httpclient, geronimo-stax-api_1.0_spec, jcip-annotations, xml-apis, asm)
+ WARNING: Don't add netcdf / unidataCommon (partially LGPL code) -->
<dependency org="org.gagravarr" name="vorbis-java-tika" rev="${/org.gagravarr/vorbis-java-tika}" conf="compile->*"/>
<dependency org="org.gagravarr" name="vorbis-java-core" rev="${/org.gagravarr/vorbis-java-core}" conf="compile->*"/>
- <dependency org="edu.ucar" name="netcdf" rev="${/edu.ucar/netcdf}" conf="compile->*"/>
<dependency org="org.apache.james" name="apache-mime4j-core" rev="${/org.apache.james/apache-mime4j-core}" conf="compile->*"/>
<dependency org="org.apache.james" name="apache-mime4j-dom" rev="${/org.apache.james/apache-mime4j-dom}" conf="compile->*"/>
<dependency org="org.apache.commons" name="commons-compress" rev="${/org.apache.commons/commons-compress}" conf="compile->*"/>
@@ -46,7 +46,6 @@
<dependency org="org.apache.poi" name="poi-ooxml" rev="${/org.apache.poi/poi-ooxml}" conf="compile->*"/>
<dependency org="org.apache.poi" name="poi-ooxml-schemas" rev="${/org.apache.poi/poi-ooxml-schemas}" conf="compile->*"/>
<dependency org="org.apache.xmlbeans" name="xmlbeans" rev="${/org.apache.xmlbeans/xmlbeans}" conf="compile->*"/>
- <dependency org="dom4j" name="dom4j" rev="${/dom4j/dom4j}" conf="compile->*"/>
<dependency org="org.ccil.cowan.tagsoup" name="tagsoup" rev="${/org.ccil.cowan.tagsoup/tagsoup}" conf="compile->*"/>
<dependency org="com.googlecode.mp4parser" name="isoparser" rev="${/com.googlecode.mp4parser/isoparser}" conf="compile->*"/>
<dependency org="org.aspectj" name="aspectjrt" rev="${/org.aspectj/aspectjrt}" conf="compile->*"/>
@@ -58,6 +57,8 @@
<dependency org="org.tukaani" name="xz" rev="${/org.tukaani/xz}" conf="compile->*"/>
<dependency org="com.adobe.xmp" name="xmpcore" rev="${/com.adobe.xmp/xmpcore}" conf="compile->*"/>
<dependency org="com.uwyn" name="jhighlight" rev="${/com.uwyn/jhighlight}" conf="compile->*"/>
+ <dependency org="com.pff" name="java-libpst" rev="${/com.pff/java-libpst}" conf="compile->*"/>
+ <dependency org="net.sourceforge.jmatio" name="jmatio" rev="${/net.sourceforge.jmatio/jmatio}" conf="compile->*"/>
<!-- Other ExtractingRequestHandler dependencies -->
<dependency org="com.ibm.icu" name="icu4j" rev="${/com.ibm.icu/icu4j}" conf="compile->*"/>
Modified: lucene/dev/branches/branch_4x/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTest.java?rev=1623227&r1=1623226&r2=1623227&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTest.java (original)
+++ lucene/dev/branches/branch_4x/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTest.java Sun Sep 7 20:54:53 2014
@@ -264,6 +264,7 @@ public class ExtractingRequestHandlerTes
"fmap.language", "extractedLanguage",
"literal.extractionLiteral", "one",
"literal.extractionLiteral", "two",
+ "fmap.X-Parsed-By", "ignored_parser",
"fmap.Last-Modified", "extractedDate"
);
// TODO: original author did not specify why an exception should be thrown... how to fix?
@@ -279,6 +280,7 @@ public class ExtractingRequestHandlerTes
"literal.id", "three",
"fmap.language", "extractedLanguage",
"literal.extractionLiteral", "one",
+ "fmap.X-Parsed-By", "ignored_parser",
"fmap.Last-Modified", "extractedDate"
);
assertU(commit());
@@ -297,6 +299,7 @@ public class ExtractingRequestHandlerTes
"fmap.Author", "extractedAuthor",
"literal.id", "one",
"fmap.language", "extractedLanguage",
+ "fmap.X-Parsed-By", "ignored_parser",
"fmap.content", "extractedContent",
ExtractingParams.STREAM_TYPE, "text/plain"
);
@@ -316,6 +319,7 @@ public class ExtractingRequestHandlerTes
"fmap.Author", "extractedAuthor",
"literal.id", "one",
"fmap.language", "extractedLanguage",
+ "fmap.X-Parsed-By", "ignored_parser",
"fmap.content", "extractedContent",
ExtractingParams.RESOURCE_NAME, "extraction/version_control.txt"
);