You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by no...@apache.org on 2015/02/10 12:28:12 UTC

svn commit: r1658681 - in /lucene/dev/branches/branch_5x: ./ solr/ solr/contrib/ solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/ solr/contrib/dataimporthandler-extras/src/test-files/dihextras/ solr/contrib/dataimporth...

Author: noble
Date: Tue Feb 10 11:28:11 2015
New Revision: 1658681

URL: http://svn.apache.org/r1658681
Log:
SOLR-7076: TikaEntityProcessor should have support for onError=skip

Added:
    lucene/dev/branches/branch_5x/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/bad.doc
      - copied unchanged from r1658664, lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/bad.doc
Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/solr/   (props changed)
    lucene/dev/branches/branch_5x/solr/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_5x/solr/contrib/   (props changed)
    lucene/dev/branches/branch_5x/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java
    lucene/dev/branches/branch_5x/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java

Modified: lucene/dev/branches/branch_5x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/CHANGES.txt?rev=1658681&r1=1658680&r2=1658681&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/solr/CHANGES.txt Tue Feb 10 11:28:11 2015
@@ -13,7 +13,7 @@ See http://lucene.apache.org/solr for mo
 * SOLR-6902: Use JUnit rules instead of inheritance with distributed Solr 
   tests to allow for multiple tests without the same class.
   (Ramkumar Aiyengar, Erick Erickson, Mike McCandless)
-  
+
 ==================  5.1.0 ==================
 
 Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release
@@ -87,6 +87,9 @@ Other Changes
 * SOLR-6500: Refactor FileFetcher in SnapPuller, add debug logging. 
   (Ramkumar Aiyengar via Mark Miller)
 
+* SOLR-7076: In DIH, TikaEntityProcessor should have support for onError=skip
+  (Noble Paul)
+
 ==================  5.0.0 ==================
 
 Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.

Modified: lucene/dev/branches/branch_5x/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java?rev=1658681&r1=1658680&r2=1658681&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java (original)
+++ lucene/dev/branches/branch_5x/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java Tue Feb 10 11:28:11 2015
@@ -140,6 +140,10 @@ public class TikaEntityProcessor extends
         }
         tikaParser.parse(is, contentHandler, metadata , context);
     } catch (Exception e) {
+      if(SKIP.equals(onError)) {
+        throw new DataImportHandlerException(DataImportHandlerException.SKIP_ROW,
+            "Document skipped :" + e.getMessage());
+      }
       wrapAndThrow(SEVERE, e, "Unable to read content");
     }
     IOUtils.closeQuietly(is);

Modified: lucene/dev/branches/branch_5x/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java?rev=1658681&r1=1658680&r2=1658681&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java (original)
+++ lucene/dev/branches/branch_5x/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java Tue Feb 10 11:28:11 2015
@@ -49,6 +49,19 @@ public class TestTikaEntityProcessor ext
   "  </document>" +
   "</dataConfig>";
 
+  private String skipOnErrConf =
+      "<dataConfig>" +
+          "  <dataSource type=\"BinFileDataSource\"/>" +
+          "  <document>" +
+          "    <entity name=\"Tika\" onError=\"skip\"  processor=\"TikaEntityProcessor\" url=\"" + getFile("dihextras/bad.doc").getAbsolutePath() + "\" >" +
+          "<field column=\"content\" name=\"text\"/>" +
+          " </entity>" +
+          " <entity name=\"Tika\" processor=\"TikaEntityProcessor\" url=\"" + getFile("dihextras/solr-word.pdf").getAbsolutePath() + "\" >" +
+          "      <field column=\"text\"/>" +
+          "</entity>" +
+          "  </document>" +
+          "</dataConfig>";
+
   private String[] tests = {
       "//*[@numFound='1']"
       ,"//str[@name='author'][.='Grant Ingersoll']"
@@ -86,6 +99,12 @@ public class TestTikaEntityProcessor ext
   }
 
   @Test
+  public void testSkip() throws Exception {
+    runFullImport(skipOnErrConf);
+    assertQ(req("*:*"), "//*[@numFound='1']");
+  }
+
+  @Test
   public void testTikaHTMLMapperEmpty() throws Exception {
     runFullImport(getConfigHTML(null));
     assertQ(req("*:*"), testsHTMLDefault);