You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ko...@apache.org on 2011/05/13 17:12:54 UTC

svn commit: r1102785 - in /lucene/dev/trunk/solr/contrib/uima: ./ src/main/java/org/apache/solr/uima/processor/ src/test/java/org/apache/solr/uima/processor/ src/test/java/org/apache/solr/uima/processor/an/ src/test/resources/ src/test/resources/solr-u...

Author: koji
Date: Fri May 13 15:12:53 2011
New Revision: 1102785

URL: http://svn.apache.org/viewvc?rev=1102785&view=rev
Log:
SOLR-2512: add ignoreErrors flag so that users can ignore exceptions in AE.

Added:
    lucene/dev/trunk/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/an/DummyExceptionAnnotator.java
    lucene/dev/trunk/solr/contrib/uima/src/test/resources/DummyExceptionAEDescriptor.xml
    lucene/dev/trunk/solr/contrib/uima/src/test/resources/TestExceptionAE.xml
Modified:
    lucene/dev/trunk/solr/contrib/uima/CHANGES.txt
    lucene/dev/trunk/solr/contrib/uima/README.txt
    lucene/dev/trunk/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfiguration.java
    lucene/dev/trunk/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfigurationReader.java
    lucene/dev/trunk/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java
    lucene/dev/trunk/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorTest.java
    lucene/dev/trunk/solr/contrib/uima/src/test/resources/solr-uima/conf/solrconfig.xml

Modified: lucene/dev/trunk/solr/contrib/uima/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/uima/CHANGES.txt?rev=1102785&r1=1102784&r2=1102785&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/uima/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/contrib/uima/CHANGES.txt Fri May 13 15:12:53 2011
@@ -33,6 +33,9 @@ New Features
 
 * SOLR-2503: extend mapping function to map feature value to dynamicField. (koji)
 
+* SOLR-2512: add ignoreErrors flag so that users can ignore exceptions in AE.
+  (Tommaso Teofili, koji)
+
 Test Cases:
 ----------------------
 

Modified: lucene/dev/trunk/solr/contrib/uima/README.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/uima/README.txt?rev=1102785&r1=1102784&r2=1102785&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/uima/README.txt (original)
+++ lucene/dev/trunk/solr/contrib/uima/README.txt Fri May 13 15:12:53 2011
@@ -30,6 +30,13 @@ To start using Solr UIMA Metadata Extrac
           <str name="oc_licenseID">VALID_OPENCALAIS_KEY</str>
         </lst>
         <str name="analysisEngine">/org/apache/uima/desc/OverridingParamsExtServicesAE.xml</str>
+        <!-- Set to true if you want to continue indexing even if text processing fails.
+             Default is false. That is, Solr throws RuntimeException and
+             never indexed documents entirely in your session. -->
+        <bool name="ignoreErrors">true</bool>
+        <!-- This is optional. It is used for logging when text processing fails.
+             Usually, set uniqueKey field name -->
+        <str name="logField">id</str>
         <lst name="analyzeFields">
           <bool name="merge">false</bool>
           <arr name="fields">

Modified: lucene/dev/trunk/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfiguration.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfiguration.java?rev=1102785&r1=1102784&r2=1102785&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfiguration.java (original)
+++ lucene/dev/trunk/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfiguration.java Fri May 13 15:12:53 2011
@@ -36,14 +36,20 @@ public class SolrUIMAConfiguration {
 
   private Map<String, Object> runtimeParameters;
 
+  private boolean ignoreErrors;
+  
+  private String logField;
+
   public SolrUIMAConfiguration(String aePath, String[] fieldsToAnalyze, boolean fieldsMerging,
           Map<String, Map<String, MapField>> typesFeaturesFieldsMapping,
-          Map<String, Object> runtimeParameters) {
+          Map<String, Object> runtimeParameters, boolean ignoreErrors, String logField) {
     this.aePath = aePath;
     this.fieldsToAnalyze = fieldsToAnalyze;
     this.fieldsMerging = fieldsMerging;
     this.runtimeParameters = runtimeParameters;
     this.typesFeaturesFieldsMapping = typesFeaturesFieldsMapping;
+    this.ignoreErrors = ignoreErrors;
+    this.logField = logField;
   }
 
   public String[] getFieldsToAnalyze() {
@@ -65,6 +71,14 @@ public class SolrUIMAConfiguration {
   public Map<String, Object> getRuntimeParameters() {
     return runtimeParameters;
   }
+
+  public boolean isIgnoreErrors() {
+    return ignoreErrors;
+  }
+  
+  public String getLogField(){
+    return logField;
+  }
   
   static final class MapField {
     

Modified: lucene/dev/trunk/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfigurationReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfigurationReader.java?rev=1102785&r1=1102784&r2=1102785&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfigurationReader.java (original)
+++ lucene/dev/trunk/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/SolrUIMAConfigurationReader.java Fri May 13 15:12:53 2011
@@ -40,7 +40,8 @@ public class SolrUIMAConfigurationReader
 
   public SolrUIMAConfiguration readSolrUIMAConfiguration() {
     return new SolrUIMAConfiguration(readAEPath(), readFieldsToAnalyze(), readFieldsMerging(),
-            readTypesFeaturesFieldsMapping(), readAEOverridingParameters());
+            readTypesFeaturesFieldsMapping(), readAEOverridingParameters(), readIgnoreErrors(),
+            readLogField());
   }
 
   private String readAEPath() {
@@ -105,4 +106,12 @@ public class SolrUIMAConfigurationReader
     return runtimeParameters;
   }
 
+  private boolean readIgnoreErrors() {
+    Object ignoreErrors = args.get("ignoreErrors");
+    return ignoreErrors == null ? false : (Boolean)ignoreErrors;
+  }
+
+  private String readLogField() {
+    return (String)args.get("logField");
+  }
 }

Modified: lucene/dev/trunk/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java?rev=1102785&r1=1102784&r2=1102785&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java (original)
+++ lucene/dev/trunk/solr/contrib/uima/src/main/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessor.java Fri May 13 15:12:53 2011
@@ -20,7 +20,9 @@ package org.apache.solr.uima.processor;
 import java.io.IOException;
 import java.util.Map;
 
+import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.SolrException.ErrorCode;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.uima.processor.SolrUIMAConfiguration.MapField;
 import org.apache.solr.uima.processor.ae.AEProvider;
@@ -58,12 +60,15 @@ public class UIMAUpdateRequestProcessor 
 
   @Override
   public void processAdd(AddUpdateCommand cmd) throws IOException {
+    String text = null;
     try {
       /* get Solr document */
       SolrInputDocument solrInputDocument = cmd.getSolrInputDocument();
 
       /* get the fields to analyze */
-      for (String text : getTextsToAnalyze(solrInputDocument)) {
+      String[] texts = getTextsToAnalyze(solrInputDocument);
+      for (int i = 0; i < texts.length; i++) {
+        text = texts[i];
         if (text != null && !"".equals(text)) {
           /* process the text value */
           JCas jcas = processText(text);
@@ -79,7 +84,21 @@ public class UIMAUpdateRequestProcessor 
         }
       }
     } catch (UIMAException e) {
-      throw new RuntimeException(e);
+      String logField = solrUIMAConfiguration.getLogField();
+      String optionalFieldInfo = logField == null ? "." :
+        new StringBuilder(". ").append(logField).append("=")
+        .append((String)cmd.getSolrInputDocument().getField(logField).getValue())
+        .append(", ").toString();
+      if (solrUIMAConfiguration.isIgnoreErrors())
+        log.warn(new StringBuilder("skip the text processing due to ")
+          .append(e.getLocalizedMessage()).append(optionalFieldInfo)
+          .append(" text=\"").append(text.substring(0, 100)).append("...\"").toString());
+      else{
+        throw new SolrException(ErrorCode.SERVER_ERROR,
+            new StringBuilder("processing error: ")
+              .append(e.getLocalizedMessage()).append(optionalFieldInfo)
+              .append(" text=\"").append(text.substring(0, 100)).append("...\"").toString(), e);
+      }
     }
     super.processAdd(cmd);
   }

Modified: lucene/dev/trunk/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorTest.java?rev=1102785&r1=1102784&r2=1102785&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorTest.java (original)
+++ lucene/dev/trunk/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorTest.java Fri May 13 15:12:53 2011
@@ -93,7 +93,7 @@ public class UIMAUpdateRequestProcessorT
   @Test
   public void testProcessing() throws Exception {
 
-    addDoc(adoc(
+    addDoc("uima", adoc(
             "id",
             "2312312321312",
             "text",
@@ -111,13 +111,13 @@ public class UIMAUpdateRequestProcessorT
   @Test
   public void testTwoUpdates() throws Exception {
 
-    addDoc(adoc("id", "1", "text", "The Apache Software Foundation is happy to announce "
+    addDoc("uima", adoc("id", "1", "text", "The Apache Software Foundation is happy to announce "
             + "BarCampApache Sydney, Australia, the first ASF-backed event in the Southern "
             + "Hemisphere!"));
     assertU(commit());
     assertQ(req("sentence:*"), "//*[@numFound='1']");
 
-    addDoc(adoc("id", "2", "text", "Taking place 11th December 2010 at the University "
+    addDoc("uima", adoc("id", "2", "text", "Taking place 11th December 2010 at the University "
             + "of Sydney's Darlington Centre, the BarCampApache \"unconference\" will be"
             + " attendee-driven, facilitated by members of the Apache community and will "
             + "focus on the Apache..."));
@@ -128,9 +128,41 @@ public class UIMAUpdateRequestProcessorT
     assertQ(req("ORGANIZATION_sm:Apache"), "//*[@numFound='2']");
   }
 
-  private void addDoc(String doc) throws Exception {
+  @Test
+  public void testErrorHandling() throws Exception {
+
+    try{
+      addDoc("uima-not-ignoreErrors", adoc(
+            "id",
+            "2312312321312",
+            "text",
+            "SpellCheckComponent got improvement related to recent Lucene changes. \n  "
+                    + "Add support for specifying Spelling SuggestWord Comparator to Lucene spell "
+                    + "checkers for SpellCheckComponent. Issue SOLR-2053 is already fixed, patch is"
+                    + " attached if you need it, but it is also committed to trunk and 3_x branch."
+                    + " Last Lucene European Conference has been held in Prague."));
+      fail("exception shouldn't be ignored");
+    }
+    catch(RuntimeException expected){}
+    assertU(commit());
+    assertQ(req("*:*"), "//*[@numFound='0']");
+
+    addDoc("uima-ignoreErrors", adoc(
+            "id",
+            "2312312321312",
+            "text",
+            "SpellCheckComponent got improvement related to recent Lucene changes. \n  "
+                    + "Add support for specifying Spelling SuggestWord Comparator to Lucene spell "
+                    + "checkers for SpellCheckComponent. Issue SOLR-2053 is already fixed, patch is"
+                    + " attached if you need it, but it is also committed to trunk and 3_x branch."
+                    + " Last Lucene European Conference has been held in Prague."));
+    assertU(commit());
+    assertQ(req("*:*"), "//*[@numFound='1']");
+  }
+
+  private void addDoc(String chain, String doc) throws Exception {
     Map<String, String[]> params = new HashMap<String, String[]>();
-    params.put(UpdateParams.UPDATE_CHAIN, new String[] { "uima" });
+    params.put(UpdateParams.UPDATE_CHAIN, new String[] { chain });
     MultiMapSolrParams mmparams = new MultiMapSolrParams(params);
     SolrQueryRequestBase req = new SolrQueryRequestBase(h.getCore(), (SolrParams) mmparams) {
     };

Added: lucene/dev/trunk/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/an/DummyExceptionAnnotator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/an/DummyExceptionAnnotator.java?rev=1102785&view=auto
==============================================================================
--- lucene/dev/trunk/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/an/DummyExceptionAnnotator.java (added)
+++ lucene/dev/trunk/solr/contrib/uima/src/test/java/org/apache/solr/uima/processor/an/DummyExceptionAnnotator.java Fri May 13 15:12:53 2011
@@ -0,0 +1,31 @@
+package org.apache.solr.uima.processor.an;
+
+import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.jcas.JCas;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class DummyExceptionAnnotator extends JCasAnnotator_ImplBase{
+
+  @Override
+  public void process(JCas jcas) throws AnalysisEngineProcessException {
+    throw new AnalysisEngineProcessException();
+  }
+
+}

Added: lucene/dev/trunk/solr/contrib/uima/src/test/resources/DummyExceptionAEDescriptor.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/uima/src/test/resources/DummyExceptionAEDescriptor.xml?rev=1102785&view=auto
==============================================================================
--- lucene/dev/trunk/solr/contrib/uima/src/test/resources/DummyExceptionAEDescriptor.xml (added)
+++ lucene/dev/trunk/solr/contrib/uima/src/test/resources/DummyExceptionAEDescriptor.xml Fri May 13 15:12:53 2011
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <primitive>true</primitive>
+  <annotatorImplementationName>org.apache.solr.uima.processor.an.DummyExceptionAnnotator</annotatorImplementationName>
+  <analysisEngineMetaData>
+    <name>DummyExceptionAEDescriptor</name>
+    <description/>
+    <version>1.0</version>
+    <vendor>ASF</vendor>
+    <configurationParameters/>
+    <configurationParameterSettings/>
+    <typeSystemDescription/>
+    <typePriorities/>
+    <fsIndexCollection/>
+    <capabilities/>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      <outputsNewCASes>false</outputsNewCASes>
+    </operationalProperties>
+  </analysisEngineMetaData>
+  <resourceManagerConfiguration/>
+</analysisEngineDescription>

Added: lucene/dev/trunk/solr/contrib/uima/src/test/resources/TestExceptionAE.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/uima/src/test/resources/TestExceptionAE.xml?rev=1102785&view=auto
==============================================================================
--- lucene/dev/trunk/solr/contrib/uima/src/test/resources/TestExceptionAE.xml (added)
+++ lucene/dev/trunk/solr/contrib/uima/src/test/resources/TestExceptionAE.xml Fri May 13 15:12:53 2011
@@ -0,0 +1,54 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+  -->
+
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <primitive>false</primitive>
+  <delegateAnalysisEngineSpecifiers>
+    <delegateAnalysisEngine key="DummyExceptionAEDescriptor">
+      <import location="DummyExceptionAEDescriptor.xml"/>
+    </delegateAnalysisEngine>
+  </delegateAnalysisEngineSpecifiers>
+  <analysisEngineMetaData>
+    <name>TestExceptionAE</name>
+    <description/>
+    <version>1.0</version>
+    <vendor/>
+    <configurationParameters/>
+    <configurationParameterSettings/>
+    <flowConstraints>
+      <fixedFlow>
+        <node>DummyExceptionAEDescriptor</node>
+      </fixedFlow>
+    </flowConstraints>
+    <fsIndexCollection/>
+    <capabilities>
+      <capability>
+        <inputs/>
+        <outputs/>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      <outputsNewCASes>false</outputsNewCASes>
+    </operationalProperties>
+  </analysisEngineMetaData>
+  <resourceManagerConfiguration/>
+</analysisEngineDescription>

Modified: lucene/dev/trunk/solr/contrib/uima/src/test/resources/solr-uima/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/uima/src/test/resources/solr-uima/conf/solrconfig.xml?rev=1102785&r1=1102784&r2=1102785&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/uima/src/test/resources/solr-uima/conf/solrconfig.xml (original)
+++ lucene/dev/trunk/solr/contrib/uima/src/test/resources/solr-uima/conf/solrconfig.xml Fri May 13 15:12:53 2011
@@ -1003,7 +1003,6 @@
         </lst>
       </lst>
     </processor>
-    <processor class="solr.LogUpdateProcessorFactory" />
     <processor class="solr.RunUpdateProcessorFactory" />
   </updateRequestProcessorChain>
 
@@ -1037,6 +1036,48 @@
     </processor>
   </updateRequestProcessorChain>
 
+  <updateRequestProcessorChain name="uima-not-ignoreErrors">
+    <processor class="org.apache.solr.uima.processor.UIMAUpdateRequestProcessorFactory">
+      <lst name="uimaConfig">
+        <lst name="runtimeParameters">
+          <int name="ngramsize">3</int>
+        </lst>
+        <str name="analysisEngine">/TestExceptionAE.xml</str>
+        <bool name="ignoreErrors">false</bool>
+        <lst name="analyzeFields">
+          <bool name="merge">false</bool>
+          <arr name="fields">
+            <str>text</str>
+          </arr>
+        </lst>
+        <lst name="fieldMappings"/>
+      </lst>
+    </processor>
+    <processor class="solr.RunUpdateProcessorFactory" />
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="uima-ignoreErrors">
+    <processor class="org.apache.solr.uima.processor.UIMAUpdateRequestProcessorFactory">
+      <lst name="uimaConfig">
+        <lst name="runtimeParameters">
+          <int name="ngramsize">3</int>
+        </lst>
+        <str name="analysisEngine">/TestExceptionAE.xml</str>
+        <bool name="ignoreErrors">true</bool>
+        <!-- This is optional. It is used for logging when text processing fails. Usually, set uniqueKey field name -->
+        <str name="logField">id</str>
+        <lst name="analyzeFields">
+          <bool name="merge">false</bool>
+          <arr name="fields">
+            <str>text</str>
+          </arr>
+        </lst>
+        <lst name="fieldMappings"/>
+      </lst>
+    </processor>
+    <processor class="solr.RunUpdateProcessorFactory" />
+  </updateRequestProcessorChain>
+
   <!--
     queryResponseWriter plugins... query responses will be written using
     the writer specified by the 'wt' request parameter matching the name