You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by no...@apache.org on 2015/02/10 11:49:11 UTC

svn commit: r1658664 - in /lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src: java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java test-files/dihextras/bad.doc test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java

Author: noble
Date: Tue Feb 10 10:49:11 2015
New Revision: 1658664

URL: http://svn.apache.org/r1658664
Log:
SOLR-7076: TikaEntityProcessor should have support for onError=skip

Added:
    lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/bad.doc   (with props)
Modified:
    lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java
    lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java

Modified: lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java?rev=1658664&r1=1658663&r2=1658664&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java (original)
+++ lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java Tue Feb 10 10:49:11 2015
@@ -140,6 +140,10 @@ public class TikaEntityProcessor extends
         }
         tikaParser.parse(is, contentHandler, metadata , context);
     } catch (Exception e) {
+      if(SKIP.equals(onError)) {
+        throw new DataImportHandlerException(DataImportHandlerException.SKIP_ROW,
+            "Document skipped :" + e.getMessage());
+      }
       wrapAndThrow(SEVERE, e, "Unable to read content");
     }
     IOUtils.closeQuietly(is);

Added: lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/bad.doc
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/bad.doc?rev=1658664&view=auto
==============================================================================
Binary file - no diff available.

Modified: lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java?rev=1658664&r1=1658663&r2=1658664&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java (original)
+++ lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java Tue Feb 10 10:49:11 2015
@@ -49,6 +49,19 @@ public class TestTikaEntityProcessor ext
   "  </document>" +
   "</dataConfig>";
 
+  private String skipOnErrConf =
+      "<dataConfig>" +
+          "  <dataSource type=\"BinFileDataSource\"/>" +
+          "  <document>" +
+          "    <entity name=\"Tika\" onError=\"skip\"  processor=\"TikaEntityProcessor\" url=\"" + getFile("dihextras/bad.doc").getAbsolutePath() + "\" >" +
+          "<field column=\"content\" name=\"text\"/>" +
+          " </entity>" +
+          " <entity name=\"Tika\" processor=\"TikaEntityProcessor\" url=\"" + getFile("dihextras/solr-word.pdf").getAbsolutePath() + "\" >" +
+          "      <field column=\"text\"/>" +
+          "</entity>" +
+          "  </document>" +
+          "</dataConfig>";
+
   private String[] tests = {
       "//*[@numFound='1']"
       ,"//str[@name='author'][.='Grant Ingersoll']"
@@ -86,6 +99,12 @@ public class TestTikaEntityProcessor ext
   }
 
   @Test
+  public void testSkip() throws Exception {
+    runFullImport(skipOnErrConf);
+    assertQ(req("*:*"), "//*[@numFound='1']");
+  }
+
+  @Test
   public void testTikaHTMLMapperEmpty() throws Exception {
     runFullImport(getConfigHTML(null));
     assertQ(req("*:*"), testsHTMLDefault);