You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ko...@apache.org on 2011/05/13 17:12:54 UTC
svn commit: r1102785 - in /lucene/dev/trunk/solr/contrib/uima: ./
src/main/java/org/apache/solr/uima/processor/
src/test/java/org/apache/solr/uima/processor/
src/test/java/org/apache/solr/uima/processor/an/ src/test/resources/
src/test/resources/solr-u...
Author: koji
Date: Fri May 13 15:12:53 2011
New Revision: 1102785
URL: http://svn.apache.org/viewvc?rev=1102785&view=rev
Log:
SOLR-2512: add ignoreErrors flag so that users can ignore exceptions in AE.
Added:
lucene/dev/trunk/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/an/DummyExceptionAnnotator.java
lucene/dev/trunk/solr/contrib/uima/src/test/resources/DummyExceptionAEDescriptor.xml
lucene/dev/trunk/solr/contrib/uima/src/test/resources/TestExceptionAE.xml
Modified:
lucene/dev/trunk/solr/contrib/uima/CHANGES.txt
lucene/dev/trunk/solr/contrib/uima/README.txt
lucene/dev/trunk/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfiguration.java
lucene/dev/trunk/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfigurationReader.java
lucene/dev/trunk/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java
lucene/dev/trunk/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorTest.java
lucene/dev/trunk/solr/contrib/uima/src/test/resources/solr-uima/conf/solrconfig.xml
Modified: lucene/dev/trunk/solr/contrib/uima/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/uima/CHANGES.txt?rev=1102785&r1=1102784&r2=1102785&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/uima/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/contrib/uima/CHANGES.txt Fri May 13 15:12:53 2011
@@ -33,6 +33,9 @@ New Features
* SOLR-2503: extend mapping function to map feature value to dynamicField. (koji)
+* SOLR-2512: add ignoreErrors flag so that users can ignore exceptions in AE.
+ (Tommaso Teofili, koji)
+
Test Cases:
----------------------
Modified: lucene/dev/trunk/solr/contrib/uima/README.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/uima/README.txt?rev=1102785&r1=1102784&r2=1102785&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/uima/README.txt (original)
+++ lucene/dev/trunk/solr/contrib/uima/README.txt Fri May 13 15:12:53 2011
@@ -30,6 +30,13 @@ To start using Solr UIMA Metadata Extrac
<str name="oc_licenseID">VALID_OPENCALAIS_KEY</str>
</lst>
<str name="analysisEngine">/org/apache/uima/desc/OverridingParamsExtServicesAE.xml</str>
+ <!-- Set to true if you want to continue indexing even if text processing fails.
+ Default is false. That is, Solr throws RuntimeException and
+ never indexed documents entirely in your session. -->
+ <bool name="ignoreErrors">true</bool>
+ <!-- This is optional. It is used for logging when text processing fails.
+ Usually, set uniqueKey field name -->
+ <str name="logField">id</str>
<lst name="analyzeFields">
<bool name="merge">false</bool>
<arr name="fields">
Modified: lucene/dev/trunk/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfiguration.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfiguration.java?rev=1102785&r1=1102784&r2=1102785&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfiguration.java (original)
+++ lucene/dev/trunk/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfiguration.java Fri May 13 15:12:53 2011
@@ -36,14 +36,20 @@ public class SolrUIMAConfiguration {
private Map<String, Object> runtimeParameters;
+ private boolean ignoreErrors;
+
+ private String logField;
+
public SolrUIMAConfiguration(String aePath, String[] fieldsToAnalyze, boolean fieldsMerging,
Map<String, Map<String, MapField>> typesFeaturesFieldsMapping,
- Map<String, Object> runtimeParameters) {
+ Map<String, Object> runtimeParameters, boolean ignoreErrors, String logField) {
this.aePath = aePath;
this.fieldsToAnalyze = fieldsToAnalyze;
this.fieldsMerging = fieldsMerging;
this.runtimeParameters = runtimeParameters;
this.typesFeaturesFieldsMapping = typesFeaturesFieldsMapping;
+ this.ignoreErrors = ignoreErrors;
+ this.logField = logField;
}
public String[] getFieldsToAnalyze() {
@@ -65,6 +71,14 @@ public class SolrUIMAConfiguration {
public Map<String, Object> getRuntimeParameters() {
return runtimeParameters;
}
+
+ public boolean isIgnoreErrors() {
+ return ignoreErrors;
+ }
+
+ public String getLogField(){
+ return logField;
+ }
static final class MapField {
Modified: lucene/dev/trunk/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfigurationReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfigurationReader.java?rev=1102785&r1=1102784&r2=1102785&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfigurationReader.java (original)
+++ lucene/dev/trunk/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfigurationReader.java Fri May 13 15:12:53 2011
@@ -40,7 +40,8 @@ public class SolrUIMAConfigurationReader
public SolrUIMAConfiguration readSolrUIMAConfiguration() {
return new SolrUIMAConfiguration(readAEPath(), readFieldsToAnalyze(), readFieldsMerging(),
- readTypesFeaturesFieldsMapping(), readAEOverridingParameters());
+ readTypesFeaturesFieldsMapping(), readAEOverridingParameters(), readIgnoreErrors(),
+ readLogField());
}
private String readAEPath() {
@@ -105,4 +106,12 @@ public class SolrUIMAConfigurationReader
return runtimeParameters;
}
+ private boolean readIgnoreErrors() {
+ Object ignoreErrors = args.get("ignoreErrors");
+ return ignoreErrors == null ? false : (Boolean)ignoreErrors;
+ }
+
+ private String readLogField() {
+ return (String)args.get("logField");
+ }
}
Modified: lucene/dev/trunk/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java?rev=1102785&r1=1102784&r2=1102785&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java (original)
+++ lucene/dev/trunk/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java Fri May 13 15:12:53 2011
@@ -20,7 +20,9 @@ package org.apache.solr.uima.processor;
import java.io.IOException;
import java.util.Map;
+import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.core.SolrCore;
import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
import org.apache.solr.uima.processor.ae.AEProvider;
@@ -58,12 +60,15 @@ public class UIMAUpdateRequestProcessor
@Override
public void processAdd(AddUpdateCommand cmd) throws IOException {
+ String text = null;
try {
/* get Solr document */
SolrInputDocument solrInputDocument = cmd.getSolrInputDocument();
/* get the fields to analyze */
- for (String text : getTextsToAnalyze(solrInputDocument)) {
+ String[] texts = getTextsToAnalyze(solrInputDocument);
+ for (int i = 0; i < texts.length; i++) {
+ text = texts[i];
if (text != null && !"".equals(text)) {
/* process the text value */
JCas jcas = processText(text);
@@ -79,7 +84,21 @@ public class UIMAUpdateRequestProcessor
}
}
} catch (UIMAException e) {
- throw new RuntimeException(e);
+ String logField = solrUIMAConfiguration.getLogField();
+ String optionalFieldInfo = logField == null ? "." :
+ new StringBuilder(". ").append(logField).append("=")
+ .append((String)cmd.getSolrInputDocument().getField(logField).getValue())
+ .append(", ").toString();
+ if (solrUIMAConfiguration.isIgnoreErrors())
+ log.warn(new StringBuilder("skip the text processing due to ")
+ .append(e.getLocalizedMessage()).append(optionalFieldInfo)
+ .append(" text=\"").append(text.substring(0, 100)).append("...\"").toString());
+ else{
+ throw new SolrException(ErrorCode.SERVER_ERROR,
+ new StringBuilder("processing error: ")
+ .append(e.getLocalizedMessage()).append(optionalFieldInfo)
+ .append(" text=\"").append(text.substring(0, 100)).append("...\"").toString(), e);
+ }
}
super.processAdd(cmd);
}
Modified: lucene/dev/trunk/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorTest.java?rev=1102785&r1=1102784&r2=1102785&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorTest.java (original)
+++ lucene/dev/trunk/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorTest.java Fri May 13 15:12:53 2011
@@ -93,7 +93,7 @@ public class UIMAUpdateRequestProcessorT
@Test
public void testProcessing() throws Exception {
- addDoc(adoc(
+ addDoc("uima", adoc(
"id",
"2312312321312",
"text",
@@ -111,13 +111,13 @@ public class UIMAUpdateRequestProcessorT
@Test
public void testTwoUpdates() throws Exception {
- addDoc(adoc("id", "1", "text", "The Apache Software Foundation is happy to announce "
+ addDoc("uima", adoc("id", "1", "text", "The Apache Software Foundation is happy to announce "
+ "BarCampApache Sydney, Australia, the first ASF-backed event in the Southern "
+ "Hemisphere!"));
assertU(commit());
assertQ(req("sentence:*"), "//*[@numFound='1']");
- addDoc(adoc("id", "2", "text", "Taking place 11th December 2010 at the University "
+ addDoc("uima", adoc("id", "2", "text", "Taking place 11th December 2010 at the University "
+ "of Sydney's Darlington Centre, the BarCampApache \"unconference\" will be"
+ " attendee-driven, facilitated by members of the Apache community and will "
+ "focus on the Apache..."));
@@ -128,9 +128,41 @@ public class UIMAUpdateRequestProcessorT
assertQ(req("ORGANIZATION_sm:Apache"), "//*[@numFound='2']");
}
- private void addDoc(String doc) throws Exception {
+ @Test
+ public void testErrorHandling() throws Exception {
+
+ try{
+ addDoc("uima-not-ignoreErrors", adoc(
+ "id",
+ "2312312321312",
+ "text",
+ "SpellCheckComponent got improvement related to recent Lucene changes. \n "
+ + "Add support for specifying Spelling SuggestWord Comparator to Lucene spell "
+ + "checkers for SpellCheckComponent. Issue SOLR-2053 is already fixed, patch is"
+ + " attached if you need it, but it is also committed to trunk and 3_x branch."
+ + " Last Lucene European Conference has been held in Prague."));
+ fail("exception shouldn't be ignored");
+ }
+ catch(RuntimeException expected){}
+ assertU(commit());
+ assertQ(req("*:*"), "//*[@numFound='0']");
+
+ addDoc("uima-ignoreErrors", adoc(
+ "id",
+ "2312312321312",
+ "text",
+ "SpellCheckComponent got improvement related to recent Lucene changes. \n "
+ + "Add support for specifying Spelling SuggestWord Comparator to Lucene spell "
+ + "checkers for SpellCheckComponent. Issue SOLR-2053 is already fixed, patch is"
+ + " attached if you need it, but it is also committed to trunk and 3_x branch."
+ + " Last Lucene European Conference has been held in Prague."));
+ assertU(commit());
+ assertQ(req("*:*"), "//*[@numFound='1']");
+ }
+
+ private void addDoc(String chain, String doc) throws Exception {
Map<String, String[]> params = new HashMap<String, String[]>();
- params.put(UpdateParams.UPDATE_CHAIN, new String[] { "uima" });
+ params.put(UpdateParams.UPDATE_CHAIN, new String[] { chain });
MultiMapSolrParams mmparams = new MultiMapSolrParams(params);
SolrQueryRequestBase req = new SolrQueryRequestBase(h.getCore(), (SolrParams) mmparams) {
};
Added: lucene/dev/trunk/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/an/DummyExceptionAnnotator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/an/DummyExceptionAnnotator.java?rev=1102785&view=auto
==============================================================================
--- lucene/dev/trunk/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/an/DummyExceptionAnnotator.java (added)
+++ lucene/dev/trunk/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/an/DummyExceptionAnnotator.java Fri May 13 15:12:53 2011
@@ -0,0 +1,31 @@
+package org.apache.solr.uima.processor.an;
+
+import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class DummyExceptionAnnotator extends JCasAnnotator_ImplBase{
+
+ @Override
+ public void process(JCas jcas) throws AnalysisEngineProcessException {
+ throw new AnalysisEngineProcessException();
+ }
+
+}
Added: lucene/dev/trunk/solr/contrib/uima/src/test/resources/DummyExceptionAEDescriptor.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/uima/src/test/resources/DummyExceptionAEDescriptor.xml?rev=1102785&view=auto
==============================================================================
--- lucene/dev/trunk/solr/contrib/uima/src/test/resources/DummyExceptionAEDescriptor.xml (added)
+++ lucene/dev/trunk/solr/contrib/uima/src/test/resources/DummyExceptionAEDescriptor.xml Fri May 13 15:12:53 2011
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+ <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+ <primitive>true</primitive>
+ <annotatorImplementationName>org.apache.solr.uima.processor.an.DummyExceptionAnnotator</annotatorImplementationName>
+ <analysisEngineMetaData>
+ <name>DummyExceptionAEDescriptor</name>
+ <description/>
+ <version>1.0</version>
+ <vendor>ASF</vendor>
+ <configurationParameters/>
+ <configurationParameterSettings/>
+ <typeSystemDescription/>
+ <typePriorities/>
+ <fsIndexCollection/>
+ <capabilities/>
+ <operationalProperties>
+ <modifiesCas>true</modifiesCas>
+ <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+ <outputsNewCASes>false</outputsNewCASes>
+ </operationalProperties>
+ </analysisEngineMetaData>
+ <resourceManagerConfiguration/>
+</analysisEngineDescription>
Added: lucene/dev/trunk/solr/contrib/uima/src/test/resources/TestExceptionAE.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/uima/src/test/resources/TestExceptionAE.xml?rev=1102785&view=auto
==============================================================================
--- lucene/dev/trunk/solr/contrib/uima/src/test/resources/TestExceptionAE.xml (added)
+++ lucene/dev/trunk/solr/contrib/uima/src/test/resources/TestExceptionAE.xml Fri May 13 15:12:53 2011
@@ -0,0 +1,54 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+ <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+ <primitive>false</primitive>
+ <delegateAnalysisEngineSpecifiers>
+ <delegateAnalysisEngine key="DummyExceptionAEDescriptor">
+ <import location="DummyExceptionAEDescriptor.xml"/>
+ </delegateAnalysisEngine>
+ </delegateAnalysisEngineSpecifiers>
+ <analysisEngineMetaData>
+ <name>TestExceptionAE</name>
+ <description/>
+ <version>1.0</version>
+ <vendor/>
+ <configurationParameters/>
+ <configurationParameterSettings/>
+ <flowConstraints>
+ <fixedFlow>
+ <node>DummyExceptionAEDescriptor</node>
+ </fixedFlow>
+ </flowConstraints>
+ <fsIndexCollection/>
+ <capabilities>
+ <capability>
+ <inputs/>
+ <outputs/>
+ <languagesSupported/>
+ </capability>
+ </capabilities>
+ <operationalProperties>
+ <modifiesCas>true</modifiesCas>
+ <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+ <outputsNewCASes>false</outputsNewCASes>
+ </operationalProperties>
+ </analysisEngineMetaData>
+ <resourceManagerConfiguration/>
+</analysisEngineDescription>
Modified: lucene/dev/trunk/solr/contrib/uima/src/test/resources/solr-uima/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/uima/src/test/resources/solr-uima/conf/solrconfig.xml?rev=1102785&r1=1102784&r2=1102785&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/uima/src/test/resources/solr-uima/conf/solrconfig.xml (original)
+++ lucene/dev/trunk/solr/contrib/uima/src/test/resources/solr-uima/conf/solrconfig.xml Fri May 13 15:12:53 2011
@@ -1003,7 +1003,6 @@
</lst>
</lst>
</processor>
- <processor class="solr.LogUpdateProcessorFactory" />
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
@@ -1037,6 +1036,48 @@
</processor>
</updateRequestProcessorChain>
+ <updateRequestProcessorChain name="uima-not-ignoreErrors">
+ <processor class="org.apache.solr.uima.processor.UIMAUpdateRequestProcessorFactory">
+ <lst name="uimaConfig">
+ <lst name="runtimeParameters">
+ <int name="ngramsize">3</int>
+ </lst>
+ <str name="analysisEngine">/TestExceptionAE.xml</str>
+ <bool name="ignoreErrors">false</bool>
+ <lst name="analyzeFields">
+ <bool name="merge">false</bool>
+ <arr name="fields">
+ <str>text</str>
+ </arr>
+ </lst>
+ <lst name="fieldMappings"/>
+ </lst>
+ </processor>
+ <processor class="solr.RunUpdateProcessorFactory" />
+ </updateRequestProcessorChain>
+
+ <updateRequestProcessorChain name="uima-ignoreErrors">
+ <processor class="org.apache.solr.uima.processor.UIMAUpdateRequestProcessorFactory">
+ <lst name="uimaConfig">
+ <lst name="runtimeParameters">
+ <int name="ngramsize">3</int>
+ </lst>
+ <str name="analysisEngine">/TestExceptionAE.xml</str>
+ <bool name="ignoreErrors">true</bool>
+ <!-- This is optional. It is used for logging when text processing fails. Usually, set uniqueKey field name -->
+ <str name="logField">id</str>
+ <lst name="analyzeFields">
+ <bool name="merge">false</bool>
+ <arr name="fields">
+ <str>text</str>
+ </arr>
+ </lst>
+ <lst name="fieldMappings"/>
+ </lst>
+ </processor>
+ <processor class="solr.RunUpdateProcessorFactory" />
+ </updateRequestProcessorChain>
+
<!--
queryResponseWriter plugins... query responses will be written using
the writer specified by the 'wt' request parameter matching the name