You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by gs...@apache.org on 2009/07/27 20:48:59 UTC
svn commit: r798253 - in /lucene/solr/trunk/contrib/extraction: ./ lib/
src/test/java/org/apache/solr/handler/
Author: gsingers
Date: Mon Jul 27 18:48:58 2009
New Revision: 798253
URL: http://svn.apache.org/viewvc?rev=798253&view=rev
Log:
SOLR-1310: Upgrade to Tika 0.4
Added:
lucene/solr/trunk/contrib/extraction/lib/bcmail-jdk14-136.jar (with props)
lucene/solr/trunk/contrib/extraction/lib/bcprov-jdk14-136.jar (with props)
lucene/solr/trunk/contrib/extraction/lib/commons-compress-1.0.jar (with props)
lucene/solr/trunk/contrib/extraction/lib/commons-logging-1.1.1.jar (with props)
lucene/solr/trunk/contrib/extraction/lib/fontbox-0.1.0.jar (with props)
lucene/solr/trunk/contrib/extraction/lib/geronimo-stax-api_1.0_spec-1.0.jar (with props)
lucene/solr/trunk/contrib/extraction/lib/jempbox-0.2.0.jar (with props)
lucene/solr/trunk/contrib/extraction/lib/poi-3.5-beta6.jar (with props)
lucene/solr/trunk/contrib/extraction/lib/poi-ooxml-3.5-beta6.jar (with props)
lucene/solr/trunk/contrib/extraction/lib/poi-scratchpad-3.5-beta6.jar (with props)
lucene/solr/trunk/contrib/extraction/lib/tika-core-0.4.jar (with props)
lucene/solr/trunk/contrib/extraction/lib/tika-parsers-0.4.jar (with props)
Removed:
lucene/solr/trunk/contrib/extraction/lib/bcmail-jdk14-132.jar
lucene/solr/trunk/contrib/extraction/lib/bcprov-jdk14-132.jar
lucene/solr/trunk/contrib/extraction/lib/commons-logging-1.0.4.jar
lucene/solr/trunk/contrib/extraction/lib/fontbox-0.1.0-dev.jar
lucene/solr/trunk/contrib/extraction/lib/poi-3.5-beta5.jar
lucene/solr/trunk/contrib/extraction/lib/poi-scratchpad-3.5-beta5.jar
lucene/solr/trunk/contrib/extraction/lib/tika-0.3.jar
Modified:
lucene/solr/trunk/contrib/extraction/CHANGES.txt
lucene/solr/trunk/contrib/extraction/lib/pdfbox-0.7.3.jar
lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java
Modified: lucene/solr/trunk/contrib/extraction/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/CHANGES.txt?rev=798253&r1=798252&r2=798253&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/extraction/CHANGES.txt (original)
+++ lucene/solr/trunk/contrib/extraction/CHANGES.txt Mon Jul 27 18:48:58 2009
@@ -28,4 +28,9 @@
3. SOLR-1075: Upgrade to Tika 0.3. See http://www.apache.org/dist/lucene/tika/CHANGES-0.3.txt (gsingers)
-4. SOLR-1128: Added metadata output to "extract only" option. (gsingers)
\ No newline at end of file
+4. SOLR-1128: Added metadata output to "extract only" option. (gsingers)
+
+5. SOLR-1310: Upgrade to Tika 0.4. Note there are some differences in detecting Languages now.
+ See http://www.lucidimagination.com/search/document/d6f1899a85b2a45c/vote_apache_tika_0_4_release_candidate_2#d6f1899a85b2a45c
+ for discussion on language detection.
+ See http://www.apache.org/dist/lucene/tika/CHANGES-0.4.txt. (gsingers)
\ No newline at end of file
Added: lucene/solr/trunk/contrib/extraction/lib/bcmail-jdk14-136.jar
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/lib/bcmail-jdk14-136.jar?rev=798253&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/solr/trunk/contrib/extraction/lib/bcmail-jdk14-136.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: lucene/solr/trunk/contrib/extraction/lib/bcprov-jdk14-136.jar
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/lib/bcprov-jdk14-136.jar?rev=798253&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/solr/trunk/contrib/extraction/lib/bcprov-jdk14-136.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: lucene/solr/trunk/contrib/extraction/lib/commons-compress-1.0.jar
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/lib/commons-compress-1.0.jar?rev=798253&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/solr/trunk/contrib/extraction/lib/commons-compress-1.0.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: lucene/solr/trunk/contrib/extraction/lib/commons-logging-1.1.1.jar
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/lib/commons-logging-1.1.1.jar?rev=798253&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/solr/trunk/contrib/extraction/lib/commons-logging-1.1.1.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: lucene/solr/trunk/contrib/extraction/lib/fontbox-0.1.0.jar
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/lib/fontbox-0.1.0.jar?rev=798253&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/solr/trunk/contrib/extraction/lib/fontbox-0.1.0.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: lucene/solr/trunk/contrib/extraction/lib/geronimo-stax-api_1.0_spec-1.0.jar
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/lib/geronimo-stax-api_1.0_spec-1.0.jar?rev=798253&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/solr/trunk/contrib/extraction/lib/geronimo-stax-api_1.0_spec-1.0.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: lucene/solr/trunk/contrib/extraction/lib/jempbox-0.2.0.jar
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/lib/jempbox-0.2.0.jar?rev=798253&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/solr/trunk/contrib/extraction/lib/jempbox-0.2.0.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Modified: lucene/solr/trunk/contrib/extraction/lib/pdfbox-0.7.3.jar
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/lib/pdfbox-0.7.3.jar?rev=798253&r1=798252&r2=798253&view=diff
==============================================================================
Binary files - no diff available.
Added: lucene/solr/trunk/contrib/extraction/lib/poi-3.5-beta6.jar
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/lib/poi-3.5-beta6.jar?rev=798253&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/solr/trunk/contrib/extraction/lib/poi-3.5-beta6.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: lucene/solr/trunk/contrib/extraction/lib/poi-ooxml-3.5-beta6.jar
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/lib/poi-ooxml-3.5-beta6.jar?rev=798253&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/solr/trunk/contrib/extraction/lib/poi-ooxml-3.5-beta6.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: lucene/solr/trunk/contrib/extraction/lib/poi-scratchpad-3.5-beta6.jar
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/lib/poi-scratchpad-3.5-beta6.jar?rev=798253&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/solr/trunk/contrib/extraction/lib/poi-scratchpad-3.5-beta6.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: lucene/solr/trunk/contrib/extraction/lib/tika-core-0.4.jar
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/lib/tika-core-0.4.jar?rev=798253&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/solr/trunk/contrib/extraction/lib/tika-core-0.4.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: lucene/solr/trunk/contrib/extraction/lib/tika-parsers-0.4.jar
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/lib/tika-parsers-0.4.jar?rev=798253&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/solr/trunk/contrib/extraction/lib/tika-parsers-0.4.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Modified: lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java?rev=798253&r1=798252&r2=798253&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java (original)
+++ lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java Mon Jul 27 18:48:58 2009
@@ -79,8 +79,9 @@
"literal.id","simple2",
"uprefix", "t_",
"lowernames", "true",
- "captureAttr", "true", "map.a","t_href",
- "map.content_language", "abcxyz", // test that lowernames is applied before mapping, and uprefix is applied after mapping
+ "captureAttr", "true",
+ "map.a","t_href",
+ "map.content_type", "abcxyz", // test that lowernames is applied before mapping, and uprefix is applied after mapping
"commit", "true" // test immediate commit
);
@@ -88,7 +89,7 @@
// assertQ(req("q","id:simple2","indent","true"), "//*[@numFound='0']");
// test both lowernames and unknown field mapping
- assertQ(req("+id:simple2 +t_content_type:[* TO *]"), "//*[@numFound='1']");
+ //assertQ(req("+id:simple2 +t_content_type:[* TO *]"), "//*[@numFound='1']");
assertQ(req("+id:simple2 +t_href:[* TO *]"), "//*[@numFound='1']");
assertQ(req("+id:simple2 +t_abcxyz:[* TO *]"), "//*[@numFound='1']");
@@ -98,7 +99,6 @@
"uprefix", "t_",
"lowernames", "true",
"captureAttr", "true", "map.a","t_href",
- "map.content_language", "abcxyz",
"commit", "true"
,"boost.t_href", "100.0"
@@ -106,6 +106,7 @@
assertQ(req("t_href:http"), "//*[@numFound='2']");
assertQ(req("t_href:http"), "//doc[1]/str[.='simple3']");
+ assertQ(req("+id:simple3 +t_content_type:[* TO *]"), "//*[@numFound='1']");//test lowercase and then uprefix
// test capture
loadLocal("simple.html",