You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by gs...@apache.org on 2010/03/10 17:18:09 UTC
svn commit: r921425 - in /lucene/solr/trunk/contrib/extraction: CHANGES.txt
lib/icu4j-4_2_1.jar
src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java
src/test/resources/arabic.pdf
Author: gsingers
Date: Wed Mar 10 16:18:09 2010
New Revision: 921425
URL: http://svn.apache.org/viewvc?rev=921425&view=rev
Log:
SOLR-1318: Added ICU4J to extraction and test for Arabic
Added:
lucene/solr/trunk/contrib/extraction/lib/icu4j-4_2_1.jar (with props)
lucene/solr/trunk/contrib/extraction/src/test/resources/arabic.pdf (with props)
Modified:
lucene/solr/trunk/contrib/extraction/CHANGES.txt
lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java
Modified: lucene/solr/trunk/contrib/extraction/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/CHANGES.txt?rev=921425&r1=921424&r2=921425&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/extraction/CHANGES.txt (original)
+++ lucene/solr/trunk/contrib/extraction/CHANGES.txt Wed Mar 10 16:18:09 2010
@@ -29,6 +29,7 @@ $Id:$
* SOLR-1738: Upgrade to Tika 0.6 (gsingers)
+* SOLR-18913: Add ICU4j to libs and add tests for Arabic extraction (Robert Muir via gsingers)
================== Release 1.4.0 ==================
Added: lucene/solr/trunk/contrib/extraction/lib/icu4j-4_2_1.jar
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/lib/icu4j-4_2_1.jar?rev=921425&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/solr/trunk/contrib/extraction/lib/icu4j-4_2_1.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Modified: lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java?rev=921425&r1=921424&r2=921425&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java (original)
+++ lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java Wed Mar 10 16:18:09 2010
@@ -322,6 +322,22 @@ public class ExtractingRequestHandlerTes
assertTrue(val + " is not equal to " + "linkNews", val.equals("linkNews") == true);//there are two <a> tags, and they get collapesd
}
+ /** test arabic PDF extraction is functional */
+ public void testArabicPDF() throws Exception {
+ ExtractingRequestHandler handler = (ExtractingRequestHandler)
+ h.getCore().getRequestHandler("/update/extract");
+ assertTrue("handler is null and it shouldn't be", handler != null);
+
+ loadLocal("arabic.pdf", "fmap.created", "extractedDate", "fmap.producer", "extractedProducer",
+ "fmap.creator", "extractedCreator", "fmap.Keywords", "extractedKeywords",
+ "fmap.Author", "extractedAuthor",
+ "fmap.content", "wdf_nocase",
+ "literal.id", "one",
+ "fmap.Last-Modified", "extractedDate");
+ assertQ(req("wdf_nocase:اÙسÙÙ
"), "//result[@numFound=0]");
+ assertU(commit());
+ assertQ(req("wdf_nocase:اÙسÙÙ
"), "//result[@numFound=1]");
+ }
SolrQueryResponse loadLocal(String filename, String... args) throws Exception {
LocalSolrQueryRequest req = (LocalSolrQueryRequest) req(args);
Added: lucene/solr/trunk/contrib/extraction/src/test/resources/arabic.pdf
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/src/test/resources/arabic.pdf?rev=921425&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/solr/trunk/contrib/extraction/src/test/resources/arabic.pdf
------------------------------------------------------------------------------
svn:mime-type = application/pdf