You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by no...@apache.org on 2015/02/10 11:49:11 UTC
svn commit: r1658664 - in
/lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src:
java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java
test-files/dihextras/bad.doc
test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java
Author: noble
Date: Tue Feb 10 10:49:11 2015
New Revision: 1658664
URL: http://svn.apache.org/r1658664
Log:
SOLR-7076: TikaEntityProcessor should have support for onError=skip
Added:
lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/bad.doc (with props)
Modified:
lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java
lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java
Modified: lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java?rev=1658664&r1=1658663&r2=1658664&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java (original)
+++ lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java Tue Feb 10 10:49:11 2015
@@ -140,6 +140,10 @@ public class TikaEntityProcessor extends
}
tikaParser.parse(is, contentHandler, metadata , context);
} catch (Exception e) {
+ if(SKIP.equals(onError)) {
+ throw new DataImportHandlerException(DataImportHandlerException.SKIP_ROW,
+ "Document skipped :" + e.getMessage());
+ }
wrapAndThrow(SEVERE, e, "Unable to read content");
}
IOUtils.closeQuietly(is);
Added: lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/bad.doc
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/bad.doc?rev=1658664&view=auto
==============================================================================
Binary file - no diff available.
Modified: lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java?rev=1658664&r1=1658663&r2=1658664&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java (original)
+++ lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java Tue Feb 10 10:49:11 2015
@@ -49,6 +49,19 @@ public class TestTikaEntityProcessor ext
" </document>" +
"</dataConfig>";
+ private String skipOnErrConf =
+ "<dataConfig>" +
+ " <dataSource type=\"BinFileDataSource\"/>" +
+ " <document>" +
+ " <entity name=\"Tika\" onError=\"skip\" processor=\"TikaEntityProcessor\" url=\"" + getFile("dihextras/bad.doc").getAbsolutePath() + "\" >" +
+ "<field column=\"content\" name=\"text\"/>" +
+ " </entity>" +
+ " <entity name=\"Tika\" processor=\"TikaEntityProcessor\" url=\"" + getFile("dihextras/solr-word.pdf").getAbsolutePath() + "\" >" +
+ " <field column=\"text\"/>" +
+ "</entity>" +
+ " </document>" +
+ "</dataConfig>";
+
private String[] tests = {
"//*[@numFound='1']"
,"//str[@name='author'][.='Grant Ingersoll']"
@@ -86,6 +99,12 @@ public class TestTikaEntityProcessor ext
}
@Test
+ public void testSkip() throws Exception {
+ runFullImport(skipOnErrConf);
+ assertQ(req("*:*"), "//*[@numFound='1']");
+ }
+
+ @Test
public void testTikaHTMLMapperEmpty() throws Exception {
runFullImport(getConfigHTML(null));
assertQ(req("*:*"), testsHTMLDefault);