You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by no...@apache.org on 2015/02/10 12:28:12 UTC
svn commit: r1658681 - in /lucene/dev/branches/branch_5x: ./ solr/
solr/contrib/
solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/
solr/contrib/dataimporthandler-extras/src/test-files/dihextras/
solr/contrib/dataimporth...
Author: noble
Date: Tue Feb 10 11:28:11 2015
New Revision: 1658681
URL: http://svn.apache.org/r1658681
Log:
SOLR-7076: TikaEntityProcessor should have support for onError=skip
Added:
lucene/dev/branches/branch_5x/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/bad.doc
- copied unchanged from r1658664, lucene/dev/trunk/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/bad.doc
Modified:
lucene/dev/branches/branch_5x/ (props changed)
lucene/dev/branches/branch_5x/solr/ (props changed)
lucene/dev/branches/branch_5x/solr/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_5x/solr/contrib/ (props changed)
lucene/dev/branches/branch_5x/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java
lucene/dev/branches/branch_5x/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java
Modified: lucene/dev/branches/branch_5x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/CHANGES.txt?rev=1658681&r1=1658680&r2=1658681&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/solr/CHANGES.txt Tue Feb 10 11:28:11 2015
@@ -13,7 +13,7 @@ See http://lucene.apache.org/solr for mo
* SOLR-6902: Use JUnit rules instead of inheritance with distributed Solr
tests to allow for multiple tests without the same class.
(Ramkumar Aiyengar, Erick Erickson, Mike McCandless)
-
+
================== 5.1.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release
@@ -87,6 +87,9 @@ Other Changes
* SOLR-6500: Refactor FileFetcher in SnapPuller, add debug logging.
(Ramkumar Aiyengar via Mark Miller)
+* SOLR-7076: In DIH, TikaEntityProcessor should have support for onError=skip
+ (Noble Paul)
+
================== 5.0.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
Modified: lucene/dev/branches/branch_5x/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java?rev=1658681&r1=1658680&r2=1658681&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java (original)
+++ lucene/dev/branches/branch_5x/solr/contrib/dataimporthandler-extras/src/java/org/apache/solr/handler/dataimport/TikaEntityProcessor.java Tue Feb 10 11:28:11 2015
@@ -140,6 +140,10 @@ public class TikaEntityProcessor extends
}
tikaParser.parse(is, contentHandler, metadata , context);
} catch (Exception e) {
+ if(SKIP.equals(onError)) {
+ throw new DataImportHandlerException(DataImportHandlerException.SKIP_ROW,
+ "Document skipped :" + e.getMessage());
+ }
wrapAndThrow(SEVERE, e, "Unable to read content");
}
IOUtils.closeQuietly(is);
Modified: lucene/dev/branches/branch_5x/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java?rev=1658681&r1=1658680&r2=1658681&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java (original)
+++ lucene/dev/branches/branch_5x/solr/contrib/dataimporthandler-extras/src/test/org/apache/solr/handler/dataimport/TestTikaEntityProcessor.java Tue Feb 10 11:28:11 2015
@@ -49,6 +49,19 @@ public class TestTikaEntityProcessor ext
" </document>" +
"</dataConfig>";
+ private String skipOnErrConf =
+ "<dataConfig>" +
+ " <dataSource type=\"BinFileDataSource\"/>" +
+ " <document>" +
+ " <entity name=\"Tika\" onError=\"skip\" processor=\"TikaEntityProcessor\" url=\"" + getFile("dihextras/bad.doc").getAbsolutePath() + "\" >" +
+ "<field column=\"content\" name=\"text\"/>" +
+ " </entity>" +
+ " <entity name=\"Tika\" processor=\"TikaEntityProcessor\" url=\"" + getFile("dihextras/solr-word.pdf").getAbsolutePath() + "\" >" +
+ " <field column=\"text\"/>" +
+ "</entity>" +
+ " </document>" +
+ "</dataConfig>";
+
private String[] tests = {
"//*[@numFound='1']"
,"//str[@name='author'][.='Grant Ingersoll']"
@@ -86,6 +99,12 @@ public class TestTikaEntityProcessor ext
}
@Test
+ public void testSkip() throws Exception {
+ runFullImport(skipOnErrConf);
+ assertQ(req("*:*"), "//*[@numFound='1']");
+ }
+
+ @Test
public void testTikaHTMLMapperEmpty() throws Exception {
runFullImport(getConfigHTML(null));
assertQ(req("*:*"), testsHTMLDefault);