You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by pk...@apache.org on 2020/10/13 14:58:02 UTC

[uima-ruta] 01/01: UIMA-6271: Ruta: option to validate internal indexing in RutaEngine

This is an automated email from the ASF dual-hosted git repository.

pkluegl pushed a commit to branch UIMA-6271-validate-internal-ruta-indexing
in repository https://gitbox.apache.org/repos/asf/uima-ruta.git

commit 26674c9034cff974abe91acf6287f7cc3dcce31c
Author: Peter Klügl <pe...@averbis.com>
AuthorDate: Tue Oct 13 16:57:33 2020 +0200

    UIMA-6271: Ruta: option to validate internal indexing in RutaEngine
    
    - added config param
    - added utils method with tests
    - added mention in docs
---
 ruta-core/pom.xml                                  |  18 ++++
 .../java/org/apache/uima/ruta/RutaBasicUtils.java  | 116 +++++++++++++++++++++
 .../org/apache/uima/ruta/engine/RutaEngine.java    |  20 ++++
 .../org/apache/uima/ruta/RutaBasicUtilsTest.java   | 116 +++++++++++++++++++++
 ruta-docbook/src/docbook/tools.ruta.overview.xml   |  18 ++++
 ruta-parent/pom.xml                                |   2 +-
 6 files changed, 289 insertions(+), 1 deletion(-)

diff --git a/ruta-core/pom.xml b/ruta-core/pom.xml
index 76072d2..cc4e9b9 100644
--- a/ruta-core/pom.xml
+++ b/ruta-core/pom.xml
@@ -150,6 +150,24 @@
       <artifactId>junit</artifactId>
       <scope>test</scope>
     </dependency>
+    
+    <dependency>
+      <groupId>org.apache.uima</groupId>
+      <artifactId>uimafit-junit</artifactId>
+      <version>${uimafit-version}</version>
+      <scope>test</scope>
+      <!-- Exclude aop stuff, which is not need by uimafit and only introduces a non-asl license -->
+      <exclusions>
+        <exclusion>
+          <groupId>org.springframework</groupId>
+          <artifactId>spring-aop</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>aopalliance</groupId>
+          <artifactId>aopalliance</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
 
     <dependency>
       <groupId>org.slf4j</groupId>
diff --git a/ruta-core/src/main/java/org/apache/uima/ruta/RutaBasicUtils.java b/ruta-core/src/main/java/org/apache/uima/ruta/RutaBasicUtils.java
index 5eb2841..f371d49 100644
--- a/ruta-core/src/main/java/org/apache/uima/ruta/RutaBasicUtils.java
+++ b/ruta-core/src/main/java/org/apache/uima/ruta/RutaBasicUtils.java
@@ -18,14 +18,22 @@
  */
 package org.apache.uima.ruta;
 
+import java.util.Collection;
+import java.util.LinkedHashMap;
 import java.util.List;
+import java.util.Map;
 
+import org.apache.commons.lang3.StringUtils;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.cas.CAS;
 import org.apache.uima.cas.Type;
 import org.apache.uima.cas.TypeSystem;
 import org.apache.uima.cas.text.AnnotationFS;
 import org.apache.uima.cas.text.AnnotationIndex;
 import org.apache.uima.fit.util.CasUtil;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
 import org.apache.uima.ruta.type.RutaBasic;
 
 /**
@@ -129,4 +137,112 @@ public class RutaBasicUtils {
     return true;
   }
 
+  /**
+   * This method validated the internal indexing, i.e. the information stored in the RutaBasics, and
+   * throw exceptions if a invalid state is discovered.
+   * 
+   * @param jcas
+   *          the JCas that should be validated
+   * @param ignoreTypeNames
+   *          the names of types that should not be validated
+   * @throws AnalysisEngineProcessException
+   *           if some problem was detected
+   */
+  public static void validateInternalIndexing(JCas jcas, Collection<String> ignoreTypeNames)
+          throws AnalysisEngineProcessException {
+
+    Map<Integer, RutaBasic> beginMap = new LinkedHashMap<>();
+    Map<Integer, RutaBasic> endMap = new LinkedHashMap<>();
+
+    Collection<RutaBasic> basics = JCasUtil.select(jcas, RutaBasic.class);
+
+    if (basics.isEmpty()) {
+      throw new AnalysisEngineProcessException(
+              new IllegalStateException("No RutaBasics available!"));
+    }
+    for (RutaBasic rutaBasic : basics) {
+
+      int begin = rutaBasic.getBegin();
+      int end = rutaBasic.getEnd();
+
+      if (beginMap.get(begin) != null || endMap.get(end) != null) {
+        throw new AnalysisEngineProcessException(new IllegalStateException(
+                "RutaBasic must be disjunct! Problem at offset " + begin));
+      }
+
+      beginMap.put(begin, rutaBasic);
+      endMap.put(end, rutaBasic);
+    }
+
+    for (Annotation annotation : JCasUtil.select(jcas, Annotation.class)) {
+
+      Type type = annotation.getType();
+      if (ignoreType(type, ignoreTypeNames, jcas)) {
+        continue;
+      }
+
+      int begin = annotation.getBegin();
+      int end = annotation.getEnd();
+
+      RutaBasic beginBasic = beginMap.get(begin);
+      RutaBasic endBasic = endMap.get(end);
+      if (beginBasic == null) {
+        throw new AnalysisEngineProcessException(new IllegalStateException(
+                "No RutaBasic for begin of annotation at offset " + begin));
+      }
+      if (endBasic == null) {
+        throw new AnalysisEngineProcessException(
+                new IllegalStateException("No RutaBasic for end of annotation at offset " + end));
+      }
+
+      Collection<AnnotationFS> beginAnchors = beginBasic.getBeginAnchors(type);
+      if (beginAnchors == null || !beginAnchors.contains(annotation)) {
+        throw new AnalysisEngineProcessException(new IllegalStateException("Annotation of type '"
+                + type.getName() + "' not registered as begin at offset " + begin));
+      }
+      Collection<AnnotationFS> endAnchors = endBasic.getEndAnchors(type);
+      if (endAnchors == null || !endAnchors.contains(annotation)) {
+        throw new AnalysisEngineProcessException(new IllegalStateException("Annotation of type '"
+                + type.getName() + "' not registered as end at offset " + begin));
+      }
+
+      List<RutaBasic> coveredBasics = JCasUtil.selectCovered(RutaBasic.class, annotation);
+      for (RutaBasic coveredBasic : coveredBasics) {
+        if (!coveredBasic.isPartOf(type)) {
+          throw new AnalysisEngineProcessException(
+                  new IllegalStateException("Annotation of type '" + type.getName()
+                          + "' not registered as partof at offset [" + begin + "," + end + "]"));
+        }
+      }
+    }
+  }
+
+  private static boolean ignoreType(Type type, Collection<String> ignoreTypeNames, JCas jcas) {
+
+    if (type == null) {
+      return false;
+    }
+
+    if (StringUtils.equals(type.getName(), RutaBasic.class.getName())) {
+      return true;
+    }
+
+    if (ignoreTypeNames == null) {
+      return false;
+    }
+
+    TypeSystem typeSystem = jcas.getTypeSystem();
+
+    for (String typeName : ignoreTypeNames) {
+      Type ignoreType = typeSystem.getType(typeName);
+      if (ignoreType == null) {
+        continue;
+      }
+      if (typeSystem.subsumes(ignoreType, type)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
 }
diff --git a/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java b/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java
index d676528..093e35a 100644
--- a/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java
+++ b/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java
@@ -62,6 +62,7 @@ import org.apache.uima.resource.ResourceInitializationException;
 import org.apache.uima.resource.ResourceManager;
 import org.apache.uima.ruta.FilterManager;
 import org.apache.uima.ruta.ReindexUpdateMode;
+import org.apache.uima.ruta.RutaBasicUtils;
 import org.apache.uima.ruta.RutaConstants;
 import org.apache.uima.ruta.RutaEnvironment;
 import org.apache.uima.ruta.RutaIndexingConfiguration;
@@ -529,6 +530,17 @@ public class RutaEngine extends JCasAnnotator_ImplBase {
   private ReindexUpdateMode reindexUpdateMode;
 
   /**
+   * Option to validate the internal indexing in RutaBasic with the current CAS after the indexing
+   * and reindexing is performed. Annotations that are not correctly indexing in RutaBasics cause
+   * Exceptions. Annotations of types listed in parameter 'indexSkipTypes' and 'reindexSkipTypes'
+   * are ignored. Default value is false.
+   */
+  public static final String PARAM_VALIDATE_INTERNAL_INDEXING = "validateInternalIndexing";
+
+  @ConfigurationParameter(name = PARAM_VALIDATE_INTERNAL_INDEXING, mandatory = true, defaultValue = "false")
+  private boolean validateInternalIndexing;
+
+  /**
    * This parameter determines positions as invisible if the internal indexing of the corresponding
    * RutaBasic annotation is empty.
    */
@@ -663,6 +675,14 @@ public class RutaEngine extends JCasAnnotator_ImplBase {
     stream.setGreedyRule(greedyRule);
     stream.setMaxRuleMatches(maxRuleMatches);
     stream.setMaxRuleElementMatches(maxRuleElementMatches);
+
+    if (validateInternalIndexing) {
+      Collection<String> ignoreTypeNames = new ArrayList<>();
+      ignoreTypeNames.addAll(Arrays.asList(indexSkipTypes));
+      ignoreTypeNames.addAll(Arrays.asList(reindexSkipTypes));
+      RutaBasicUtils.validateInternalIndexing(jcas, ignoreTypeNames);
+    }
+
     try {
       script.apply(stream, crowd);
     } catch (Throwable e) {
diff --git a/ruta-core/src/test/java/org/apache/uima/ruta/RutaBasicUtilsTest.java b/ruta-core/src/test/java/org/apache/uima/ruta/RutaBasicUtilsTest.java
new file mode 100644
index 0000000..89e11e2
--- /dev/null
+++ b/ruta-core/src/test/java/org/apache/uima/ruta/RutaBasicUtilsTest.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.uima.ruta;
+
+import java.util.Arrays;
+
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.fit.testing.junit.ManagedJCas;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.ruta.engine.Ruta;
+import org.apache.uima.ruta.type.CW;
+import org.apache.uima.ruta.type.RutaBasic;
+import org.junit.Rule;
+import org.junit.Test;
+
+public class RutaBasicUtilsTest {
+
+  public @Rule ManagedJCas managedJCas = new ManagedJCas();
+
+  @Test(expected = AnalysisEngineProcessException.class)
+  public void testBreakOnNoBasics() throws AnalysisEngineProcessException {
+
+    RutaBasicUtils.validateInternalIndexing(managedJCas.get(), null);
+  }
+
+  @Test(expected = AnalysisEngineProcessException.class)
+  public void testBreakOnDuplicateBasics() throws AnalysisEngineProcessException {
+    JCas jcas = managedJCas.get();
+    new RutaBasic(jcas, 0, 1).addToIndexes();
+    new RutaBasic(jcas, 0, 1).addToIndexes();
+    RutaBasicUtils.validateInternalIndexing(jcas, null);
+  }
+
+  @Test(expected = AnalysisEngineProcessException.class)
+  public void testBreakOnMissingBasicAtBegin() throws AnalysisEngineProcessException {
+    JCas jcas = managedJCas.get();
+    new RutaBasic(jcas, 1, 2).addToIndexes();
+    new CW(jcas, 0, 2).addToIndexes();
+    RutaBasicUtils.validateInternalIndexing(jcas, null);
+  }
+
+  @Test(expected = AnalysisEngineProcessException.class)
+  public void testBreakOnMissingBasicAtEnd() throws AnalysisEngineProcessException {
+    JCas jcas = managedJCas.get();
+    new RutaBasic(jcas, 0, 1).addToIndexes();
+    new CW(jcas, 0, 2).addToIndexes();
+    RutaBasicUtils.validateInternalIndexing(jcas, null);
+  }
+
+  @Test(expected = AnalysisEngineProcessException.class)
+  public void testBreakOnMissingAnnotationAtBegin() throws AnalysisEngineProcessException {
+    JCas jcas = managedJCas.get();
+    CW cw = new CW(jcas, 0, 1);
+    cw.addToIndexes();
+    RutaBasic rb = new RutaBasic(jcas, 0, 1);
+    rb.addEnd(cw, cw.getType());
+    rb.addToIndexes();
+    RutaBasicUtils.validateInternalIndexing(jcas, null);
+  }
+
+  @Test(expected = AnalysisEngineProcessException.class)
+  public void testBreakOnMissingAnnotationAtEnd() throws AnalysisEngineProcessException {
+    JCas jcas = managedJCas.get();
+    CW cw = new CW(jcas, 0, 1);
+    cw.addToIndexes();
+    RutaBasic rb = new RutaBasic(jcas, 0, 1);
+    rb.addBegin(cw, cw.getType());
+    rb.addToIndexes();
+    RutaBasicUtils.validateInternalIndexing(jcas, null);
+  }
+
+  @Test(expected = AnalysisEngineProcessException.class)
+  public void testBreakOnMissingPartof() throws AnalysisEngineProcessException {
+    JCas jcas = managedJCas.get();
+    CW cw = new CW(jcas, 0, 1);
+    cw.addToIndexes();
+    RutaBasic rb = new RutaBasic(jcas, 0, 1);
+    rb.addBegin(cw, cw.getType());
+    rb.addEnd(cw, cw.getType());
+    rb.addToIndexes();
+    RutaBasicUtils.validateInternalIndexing(jcas, null);
+  }
+
+  @Test
+  public void testIgnoreTypeNames() throws AnalysisEngineProcessException {
+    JCas jcas = managedJCas.get();
+    new RutaBasic(jcas, 0, 1).addToIndexes();
+    new CW(jcas, 0, 1).addToIndexes();
+    RutaBasicUtils.validateInternalIndexing(jcas, Arrays.asList(CAS.TYPE_NAME_ANNOTATION));
+  }
+
+  @Test
+  public void testAllGood() throws Exception {
+    JCas jcas = managedJCas.get();
+    jcas.setDocumentText("This is 1 TEST.");
+    Ruta.apply(jcas.getCas(), "CW{-> TruePositive};");
+    RutaBasicUtils.validateInternalIndexing(jcas, null);
+  }
+}
diff --git a/ruta-docbook/src/docbook/tools.ruta.overview.xml b/ruta-docbook/src/docbook/tools.ruta.overview.xml
index 3107a97..1bce6ab 100644
--- a/ruta-docbook/src/docbook/tools.ruta.overview.xml
+++ b/ruta-docbook/src/docbook/tools.ruta.overview.xml
@@ -927,6 +927,14 @@ Document{-> EXEC(MyAnalysisEngine, {MyType1, MyType2})};
                 </row>
                 <row>
                   <entry>
+                    <link linkend='ugr.tools.ruta.ae.basic.parameter.indexUpdateMode'>validateInternalIndexing</link>
+                  </entry>
+                  <entry>Option to validate the internal indexing.
+                  </entry>
+                  <entry>Single String</entry>
+                </row>
+                <row>
+                  <entry>
                     <link linkend='ugr.tools.ruta.ae.basic.parameter.emptyIsInvisible'>emptyIsInvisible</link>
                   </entry>
                   <entry>Option to define empty text positions as invisible.
@@ -1285,6 +1293,16 @@ Document{-> EXEC(MyAnalysisEngine, {MyType1, MyType2})};
            Default value is ADDITIVE.
           </para>
         </section>
+        <section id="ugr.tools.ruta.ae.basic.parameter.validateInternalIndexing">
+          <title>validateInternalIndexing</title>
+          <para>
+            Option to validate the internal indexing in RutaBasic with the current CAS after the indexing
+            and reindexing is performed. Annotations that are not correctly indexing in RutaBasics cause
+            Exceptions. Annotations of types listed in parameter 'indexSkipTypes' and 'reindexSkipTypes'
+            are ignored. Default value is false.
+          </para>
+        </section>
+        validateInternalIndexing
         <section id="ugr.tools.ruta.ae.basic.parameter.emptyIsInvisible">
           <title>emptyIsInvisible</title>
           <para>
diff --git a/ruta-parent/pom.xml b/ruta-parent/pom.xml
index 0b0c451..5f70f13 100644
--- a/ruta-parent/pom.xml
+++ b/ruta-parent/pom.xml
@@ -131,7 +131,7 @@
       Creative Commons Attribution 3.0 License.
     </postNoticeText>
     <uimaVersion>2.10.4</uimaVersion>
-    <uimafit-version>2.4.0</uimafit-version>
+    <uimafit-version>2.5.1-SNAPSHOT</uimafit-version>
     <spring-version>4.3.22.RELEASE</spring-version>
     <!--
       BACKWARD_COMPATIBLE_IMPLEMENTER - patch version (=.=.+)