You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by gs...@apache.org on 2009/07/27 20:48:59 UTC

svn commit: r798253 - in /lucene/solr/trunk/contrib/extraction: ./ lib/ src/test/java/org/apache/solr/handler/

Author: gsingers
Date: Mon Jul 27 18:48:58 2009
New Revision: 798253

URL: http://svn.apache.org/viewvc?rev=798253&view=rev
Log:
SOLR-1310: Upgrade to Tika 0.4

Added:
    lucene/solr/trunk/contrib/extraction/lib/bcmail-jdk14-136.jar   (with props)
    lucene/solr/trunk/contrib/extraction/lib/bcprov-jdk14-136.jar   (with props)
    lucene/solr/trunk/contrib/extraction/lib/commons-compress-1.0.jar   (with props)
    lucene/solr/trunk/contrib/extraction/lib/commons-logging-1.1.1.jar   (with props)
    lucene/solr/trunk/contrib/extraction/lib/fontbox-0.1.0.jar   (with props)
    lucene/solr/trunk/contrib/extraction/lib/geronimo-stax-api_1.0_spec-1.0.jar   (with props)
    lucene/solr/trunk/contrib/extraction/lib/jempbox-0.2.0.jar   (with props)
    lucene/solr/trunk/contrib/extraction/lib/poi-3.5-beta6.jar   (with props)
    lucene/solr/trunk/contrib/extraction/lib/poi-ooxml-3.5-beta6.jar   (with props)
    lucene/solr/trunk/contrib/extraction/lib/poi-scratchpad-3.5-beta6.jar   (with props)
    lucene/solr/trunk/contrib/extraction/lib/tika-core-0.4.jar   (with props)
    lucene/solr/trunk/contrib/extraction/lib/tika-parsers-0.4.jar   (with props)
Removed:
    lucene/solr/trunk/contrib/extraction/lib/bcmail-jdk14-132.jar
    lucene/solr/trunk/contrib/extraction/lib/bcprov-jdk14-132.jar
    lucene/solr/trunk/contrib/extraction/lib/commons-logging-1.0.4.jar
    lucene/solr/trunk/contrib/extraction/lib/fontbox-0.1.0-dev.jar
    lucene/solr/trunk/contrib/extraction/lib/poi-3.5-beta5.jar
    lucene/solr/trunk/contrib/extraction/lib/poi-scratchpad-3.5-beta5.jar
    lucene/solr/trunk/contrib/extraction/lib/tika-0.3.jar
Modified:
    lucene/solr/trunk/contrib/extraction/CHANGES.txt
    lucene/solr/trunk/contrib/extraction/lib/pdfbox-0.7.3.jar
    lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java

Modified: lucene/solr/trunk/contrib/extraction/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/CHANGES.txt?rev=798253&r1=798252&r2=798253&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/extraction/CHANGES.txt (original)
+++ lucene/solr/trunk/contrib/extraction/CHANGES.txt Mon Jul 27 18:48:58 2009
@@ -28,4 +28,9 @@
 
 3. SOLR-1075: Upgrade to Tika 0.3.  See http://www.apache.org/dist/lucene/tika/CHANGES-0.3.txt (gsingers)
 
-4. SOLR-1128: Added metadata output to "extract only" option.  (gsingers)
\ No newline at end of file
+4. SOLR-1128: Added metadata output to "extract only" option.  (gsingers)
+
+5. SOLR-1310: Upgrade to Tika 0.4. Note there are some differences in detecting Languages now.
+    See http://www.lucidimagination.com/search/document/d6f1899a85b2a45c/vote_apache_tika_0_4_release_candidate_2#d6f1899a85b2a45c
+    for discussion on language detection.
+    See http://www.apache.org/dist/lucene/tika/CHANGES-0.4.txt. (gsingers)
\ No newline at end of file

Added: lucene/solr/trunk/contrib/extraction/lib/bcmail-jdk14-136.jar
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/lib/bcmail-jdk14-136.jar?rev=798253&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/solr/trunk/contrib/extraction/lib/bcmail-jdk14-136.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/solr/trunk/contrib/extraction/lib/bcprov-jdk14-136.jar
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/lib/bcprov-jdk14-136.jar?rev=798253&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/solr/trunk/contrib/extraction/lib/bcprov-jdk14-136.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/solr/trunk/contrib/extraction/lib/commons-compress-1.0.jar
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/lib/commons-compress-1.0.jar?rev=798253&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/solr/trunk/contrib/extraction/lib/commons-compress-1.0.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/solr/trunk/contrib/extraction/lib/commons-logging-1.1.1.jar
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/lib/commons-logging-1.1.1.jar?rev=798253&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/solr/trunk/contrib/extraction/lib/commons-logging-1.1.1.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/solr/trunk/contrib/extraction/lib/fontbox-0.1.0.jar
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/lib/fontbox-0.1.0.jar?rev=798253&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/solr/trunk/contrib/extraction/lib/fontbox-0.1.0.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/solr/trunk/contrib/extraction/lib/geronimo-stax-api_1.0_spec-1.0.jar
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/lib/geronimo-stax-api_1.0_spec-1.0.jar?rev=798253&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/solr/trunk/contrib/extraction/lib/geronimo-stax-api_1.0_spec-1.0.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/solr/trunk/contrib/extraction/lib/jempbox-0.2.0.jar
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/lib/jempbox-0.2.0.jar?rev=798253&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/solr/trunk/contrib/extraction/lib/jempbox-0.2.0.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Modified: lucene/solr/trunk/contrib/extraction/lib/pdfbox-0.7.3.jar
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/lib/pdfbox-0.7.3.jar?rev=798253&r1=798252&r2=798253&view=diff
==============================================================================
Binary files - no diff available.

Added: lucene/solr/trunk/contrib/extraction/lib/poi-3.5-beta6.jar
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/lib/poi-3.5-beta6.jar?rev=798253&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/solr/trunk/contrib/extraction/lib/poi-3.5-beta6.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/solr/trunk/contrib/extraction/lib/poi-ooxml-3.5-beta6.jar
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/lib/poi-ooxml-3.5-beta6.jar?rev=798253&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/solr/trunk/contrib/extraction/lib/poi-ooxml-3.5-beta6.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/solr/trunk/contrib/extraction/lib/poi-scratchpad-3.5-beta6.jar
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/lib/poi-scratchpad-3.5-beta6.jar?rev=798253&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/solr/trunk/contrib/extraction/lib/poi-scratchpad-3.5-beta6.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/solr/trunk/contrib/extraction/lib/tika-core-0.4.jar
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/lib/tika-core-0.4.jar?rev=798253&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/solr/trunk/contrib/extraction/lib/tika-core-0.4.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: lucene/solr/trunk/contrib/extraction/lib/tika-parsers-0.4.jar
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/lib/tika-parsers-0.4.jar?rev=798253&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/solr/trunk/contrib/extraction/lib/tika-parsers-0.4.jar
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Modified: lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java?rev=798253&r1=798252&r2=798253&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java (original)
+++ lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java Mon Jul 27 18:48:58 2009
@@ -79,8 +79,9 @@
       "literal.id","simple2",
       "uprefix", "t_",
       "lowernames", "true",
-      "captureAttr", "true",  "map.a","t_href",
-      "map.content_language", "abcxyz",  // test that lowernames is applied before mapping, and uprefix is applied after mapping
+      "captureAttr", "true",
+      "map.a","t_href",
+      "map.content_type", "abcxyz",  // test that lowernames is applied before mapping, and uprefix is applied after mapping
       "commit", "true"  // test immediate commit
     );
 
@@ -88,7 +89,7 @@
     // assertQ(req("q","id:simple2","indent","true"), "//*[@numFound='0']");
 
     // test both lowernames and unknown field mapping
-    assertQ(req("+id:simple2 +t_content_type:[* TO *]"), "//*[@numFound='1']");
+    //assertQ(req("+id:simple2 +t_content_type:[* TO *]"), "//*[@numFound='1']");
     assertQ(req("+id:simple2 +t_href:[* TO *]"), "//*[@numFound='1']");
     assertQ(req("+id:simple2 +t_abcxyz:[* TO *]"), "//*[@numFound='1']");
 
@@ -98,7 +99,6 @@
       "uprefix", "t_",
       "lowernames", "true",
       "captureAttr", "true",  "map.a","t_href",
-      "map.content_language", "abcxyz",
       "commit", "true"
 
       ,"boost.t_href", "100.0"
@@ -106,6 +106,7 @@
 
     assertQ(req("t_href:http"), "//*[@numFound='2']");
     assertQ(req("t_href:http"), "//doc[1]/str[.='simple3']");
+    assertQ(req("+id:simple3 +t_content_type:[* TO *]"), "//*[@numFound='1']");//test lowercase and then uprefix
 
     // test capture
      loadLocal("simple.html",