You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by pk...@apache.org on 2020/10/13 14:58:01 UTC

[uima-ruta] branch UIMA-6271-validate-internal-ruta-indexing created (now 26674c9)

This is an automated email from the ASF dual-hosted git repository.

pkluegl pushed a change to branch UIMA-6271-validate-internal-ruta-indexing
in repository https://gitbox.apache.org/repos/asf/uima-ruta.git.


      at 26674c9  UIMA-6271: Ruta: option to validate internal indexing in RutaEngine

This branch includes the following new commits:

     new 26674c9  UIMA-6271: Ruta: option to validate internal indexing in RutaEngine

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[uima-ruta] 01/01: UIMA-6271: Ruta: option to validate internal indexing in RutaEngine

Posted by pk...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

pkluegl pushed a commit to branch UIMA-6271-validate-internal-ruta-indexing
in repository https://gitbox.apache.org/repos/asf/uima-ruta.git

commit 26674c9034cff974abe91acf6287f7cc3dcce31c
Author: Peter Klügl <pe...@averbis.com>
AuthorDate: Tue Oct 13 16:57:33 2020 +0200

    UIMA-6271: Ruta: option to validate internal indexing in RutaEngine
    
    - added config param
    - added utils method with tests
    - added mention in docs
---
 ruta-core/pom.xml                                  |  18 ++++
 .../java/org/apache/uima/ruta/RutaBasicUtils.java  | 116 +++++++++++++++++++++
 .../org/apache/uima/ruta/engine/RutaEngine.java    |  20 ++++
 .../org/apache/uima/ruta/RutaBasicUtilsTest.java   | 116 +++++++++++++++++++++
 ruta-docbook/src/docbook/tools.ruta.overview.xml   |  18 ++++
 ruta-parent/pom.xml                                |   2 +-
 6 files changed, 289 insertions(+), 1 deletion(-)

diff --git a/ruta-core/pom.xml b/ruta-core/pom.xml
index 76072d2..cc4e9b9 100644
--- a/ruta-core/pom.xml
+++ b/ruta-core/pom.xml
@@ -150,6 +150,24 @@
       <artifactId>junit</artifactId>
       <scope>test</scope>
     </dependency>
+    
+    <dependency>
+      <groupId>org.apache.uima</groupId>
+      <artifactId>uimafit-junit</artifactId>
+      <version>${uimafit-version}</version>
+      <scope>test</scope>
+      <!-- Exclude aop stuff, which is not need by uimafit and only introduces a non-asl license -->
+      <exclusions>
+        <exclusion>
+          <groupId>org.springframework</groupId>
+          <artifactId>spring-aop</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>aopalliance</groupId>
+          <artifactId>aopalliance</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
 
     <dependency>
       <groupId>org.slf4j</groupId>
diff --git a/ruta-core/src/main/java/org/apache/uima/ruta/RutaBasicUtils.java b/ruta-core/src/main/java/org/apache/uima/ruta/RutaBasicUtils.java
index 5eb2841..f371d49 100644
--- a/ruta-core/src/main/java/org/apache/uima/ruta/RutaBasicUtils.java
+++ b/ruta-core/src/main/java/org/apache/uima/ruta/RutaBasicUtils.java
@@ -18,14 +18,22 @@
  */
 package org.apache.uima.ruta;
 
+import java.util.Collection;
+import java.util.LinkedHashMap;
 import java.util.List;
+import java.util.Map;
 
+import org.apache.commons.lang3.StringUtils;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.cas.CAS;
 import org.apache.uima.cas.Type;
 import org.apache.uima.cas.TypeSystem;
 import org.apache.uima.cas.text.AnnotationFS;
 import org.apache.uima.cas.text.AnnotationIndex;
 import org.apache.uima.fit.util.CasUtil;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
 import org.apache.uima.ruta.type.RutaBasic;
 
 /**
@@ -129,4 +137,112 @@ public class RutaBasicUtils {
     return true;
   }
 
+  /**
+   * This method validated the internal indexing, i.e. the information stored in the RutaBasics, and
+   * throw exceptions if a invalid state is discovered.
+   * 
+   * @param jcas
+   *          the JCas that should be validated
+   * @param ignoreTypeNames
+   *          the names of types that should not be validated
+   * @throws AnalysisEngineProcessException
+   *           if some problem was detected
+   */
+  public static void validateInternalIndexing(JCas jcas, Collection<String> ignoreTypeNames)
+          throws AnalysisEngineProcessException {
+
+    Map<Integer, RutaBasic> beginMap = new LinkedHashMap<>();
+    Map<Integer, RutaBasic> endMap = new LinkedHashMap<>();
+
+    Collection<RutaBasic> basics = JCasUtil.select(jcas, RutaBasic.class);
+
+    if (basics.isEmpty()) {
+      throw new AnalysisEngineProcessException(
+              new IllegalStateException("No RutaBasics available!"));
+    }
+    for (RutaBasic rutaBasic : basics) {
+
+      int begin = rutaBasic.getBegin();
+      int end = rutaBasic.getEnd();
+
+      if (beginMap.get(begin) != null || endMap.get(end) != null) {
+        throw new AnalysisEngineProcessException(new IllegalStateException(
+                "RutaBasic must be disjunct! Problem at offset " + begin));
+      }
+
+      beginMap.put(begin, rutaBasic);
+      endMap.put(end, rutaBasic);
+    }
+
+    for (Annotation annotation : JCasUtil.select(jcas, Annotation.class)) {
+
+      Type type = annotation.getType();
+      if (ignoreType(type, ignoreTypeNames, jcas)) {
+        continue;
+      }
+
+      int begin = annotation.getBegin();
+      int end = annotation.getEnd();
+
+      RutaBasic beginBasic = beginMap.get(begin);
+      RutaBasic endBasic = endMap.get(end);
+      if (beginBasic == null) {
+        throw new AnalysisEngineProcessException(new IllegalStateException(
+                "No RutaBasic for begin of annotation at offset " + begin));
+      }
+      if (endBasic == null) {
+        throw new AnalysisEngineProcessException(
+                new IllegalStateException("No RutaBasic for end of annotation at offset " + end));
+      }
+
+      Collection<AnnotationFS> beginAnchors = beginBasic.getBeginAnchors(type);
+      if (beginAnchors == null || !beginAnchors.contains(annotation)) {
+        throw new AnalysisEngineProcessException(new IllegalStateException("Annotation of type '"
+                + type.getName() + "' not registered as begin at offset " + begin));
+      }
+      Collection<AnnotationFS> endAnchors = endBasic.getEndAnchors(type);
+      if (endAnchors == null || !endAnchors.contains(annotation)) {
+        throw new AnalysisEngineProcessException(new IllegalStateException("Annotation of type '"
+                + type.getName() + "' not registered as end at offset " + begin));
+      }
+
+      List<RutaBasic> coveredBasics = JCasUtil.selectCovered(RutaBasic.class, annotation);
+      for (RutaBasic coveredBasic : coveredBasics) {
+        if (!coveredBasic.isPartOf(type)) {
+          throw new AnalysisEngineProcessException(
+                  new IllegalStateException("Annotation of type '" + type.getName()
+                          + "' not registered as partof at offset [" + begin + "," + end + "]"));
+        }
+      }
+    }
+  }
+
+  private static boolean ignoreType(Type type, Collection<String> ignoreTypeNames, JCas jcas) {
+
+    if (type == null) {
+      return false;
+    }
+
+    if (StringUtils.equals(type.getName(), RutaBasic.class.getName())) {
+      return true;
+    }
+
+    if (ignoreTypeNames == null) {
+      return false;
+    }
+
+    TypeSystem typeSystem = jcas.getTypeSystem();
+
+    for (String typeName : ignoreTypeNames) {
+      Type ignoreType = typeSystem.getType(typeName);
+      if (ignoreType == null) {
+        continue;
+      }
+      if (typeSystem.subsumes(ignoreType, type)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
 }
diff --git a/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java b/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java
index d676528..093e35a 100644
--- a/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java
+++ b/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java
@@ -62,6 +62,7 @@ import org.apache.uima.resource.ResourceInitializationException;
 import org.apache.uima.resource.ResourceManager;
 import org.apache.uima.ruta.FilterManager;
 import org.apache.uima.ruta.ReindexUpdateMode;
+import org.apache.uima.ruta.RutaBasicUtils;
 import org.apache.uima.ruta.RutaConstants;
 import org.apache.uima.ruta.RutaEnvironment;
 import org.apache.uima.ruta.RutaIndexingConfiguration;
@@ -529,6 +530,17 @@ public class RutaEngine extends JCasAnnotator_ImplBase {
   private ReindexUpdateMode reindexUpdateMode;
 
   /**
+   * Option to validate the internal indexing in RutaBasic with the current CAS after the indexing
+   * and reindexing is performed. Annotations that are not correctly indexing in RutaBasics cause
+   * Exceptions. Annotations of types listed in parameter 'indexSkipTypes' and 'reindexSkipTypes'
+   * are ignored. Default value is false.
+   */
+  public static final String PARAM_VALIDATE_INTERNAL_INDEXING = "validateInternalIndexing";
+
+  @ConfigurationParameter(name = PARAM_VALIDATE_INTERNAL_INDEXING, mandatory = true, defaultValue = "false")
+  private boolean validateInternalIndexing;
+
+  /**
    * This parameter determines positions as invisible if the internal indexing of the corresponding
    * RutaBasic annotation is empty.
    */
@@ -663,6 +675,14 @@ public class RutaEngine extends JCasAnnotator_ImplBase {
     stream.setGreedyRule(greedyRule);
     stream.setMaxRuleMatches(maxRuleMatches);
     stream.setMaxRuleElementMatches(maxRuleElementMatches);
+
+    if (validateInternalIndexing) {
+      Collection<String> ignoreTypeNames = new ArrayList<>();
+      ignoreTypeNames.addAll(Arrays.asList(indexSkipTypes));
+      ignoreTypeNames.addAll(Arrays.asList(reindexSkipTypes));
+      RutaBasicUtils.validateInternalIndexing(jcas, ignoreTypeNames);
+    }
+
     try {
       script.apply(stream, crowd);
     } catch (Throwable e) {
diff --git a/ruta-core/src/test/java/org/apache/uima/ruta/RutaBasicUtilsTest.java b/ruta-core/src/test/java/org/apache/uima/ruta/RutaBasicUtilsTest.java
new file mode 100644
index 0000000..89e11e2
--- /dev/null
+++ b/ruta-core/src/test/java/org/apache/uima/ruta/RutaBasicUtilsTest.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.uima.ruta;
+
+import java.util.Arrays;
+
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.fit.testing.junit.ManagedJCas;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.ruta.engine.Ruta;
+import org.apache.uima.ruta.type.CW;
+import org.apache.uima.ruta.type.RutaBasic;
+import org.junit.Rule;
+import org.junit.Test;
+
+public class RutaBasicUtilsTest {
+
+  public @Rule ManagedJCas managedJCas = new ManagedJCas();
+
+  @Test(expected = AnalysisEngineProcessException.class)
+  public void testBreakOnNoBasics() throws AnalysisEngineProcessException {
+
+    RutaBasicUtils.validateInternalIndexing(managedJCas.get(), null);
+  }
+
+  @Test(expected = AnalysisEngineProcessException.class)
+  public void testBreakOnDuplicateBasics() throws AnalysisEngineProcessException {
+    JCas jcas = managedJCas.get();
+    new RutaBasic(jcas, 0, 1).addToIndexes();
+    new RutaBasic(jcas, 0, 1).addToIndexes();
+    RutaBasicUtils.validateInternalIndexing(jcas, null);
+  }
+
+  @Test(expected = AnalysisEngineProcessException.class)
+  public void testBreakOnMissingBasicAtBegin() throws AnalysisEngineProcessException {
+    JCas jcas = managedJCas.get();
+    new RutaBasic(jcas, 1, 2).addToIndexes();
+    new CW(jcas, 0, 2).addToIndexes();
+    RutaBasicUtils.validateInternalIndexing(jcas, null);
+  }
+
+  @Test(expected = AnalysisEngineProcessException.class)
+  public void testBreakOnMissingBasicAtEnd() throws AnalysisEngineProcessException {
+    JCas jcas = managedJCas.get();
+    new RutaBasic(jcas, 0, 1).addToIndexes();
+    new CW(jcas, 0, 2).addToIndexes();
+    RutaBasicUtils.validateInternalIndexing(jcas, null);
+  }
+
+  @Test(expected = AnalysisEngineProcessException.class)
+  public void testBreakOnMissingAnnotationAtBegin() throws AnalysisEngineProcessException {
+    JCas jcas = managedJCas.get();
+    CW cw = new CW(jcas, 0, 1);
+    cw.addToIndexes();
+    RutaBasic rb = new RutaBasic(jcas, 0, 1);
+    rb.addEnd(cw, cw.getType());
+    rb.addToIndexes();
+    RutaBasicUtils.validateInternalIndexing(jcas, null);
+  }
+
+  @Test(expected = AnalysisEngineProcessException.class)
+  public void testBreakOnMissingAnnotationAtEnd() throws AnalysisEngineProcessException {
+    JCas jcas = managedJCas.get();
+    CW cw = new CW(jcas, 0, 1);
+    cw.addToIndexes();
+    RutaBasic rb = new RutaBasic(jcas, 0, 1);
+    rb.addBegin(cw, cw.getType());
+    rb.addToIndexes();
+    RutaBasicUtils.validateInternalIndexing(jcas, null);
+  }
+
+  @Test(expected = AnalysisEngineProcessException.class)
+  public void testBreakOnMissingPartof() throws AnalysisEngineProcessException {
+    JCas jcas = managedJCas.get();
+    CW cw = new CW(jcas, 0, 1);
+    cw.addToIndexes();
+    RutaBasic rb = new RutaBasic(jcas, 0, 1);
+    rb.addBegin(cw, cw.getType());
+    rb.addEnd(cw, cw.getType());
+    rb.addToIndexes();
+    RutaBasicUtils.validateInternalIndexing(jcas, null);
+  }
+
+  @Test
+  public void testIgnoreTypeNames() throws AnalysisEngineProcessException {
+    JCas jcas = managedJCas.get();
+    new RutaBasic(jcas, 0, 1).addToIndexes();
+    new CW(jcas, 0, 1).addToIndexes();
+    RutaBasicUtils.validateInternalIndexing(jcas, Arrays.asList(CAS.TYPE_NAME_ANNOTATION));
+  }
+
+  @Test
+  public void testAllGood() throws Exception {
+    JCas jcas = managedJCas.get();
+    jcas.setDocumentText("This is 1 TEST.");
+    Ruta.apply(jcas.getCas(), "CW{-> TruePositive};");
+    RutaBasicUtils.validateInternalIndexing(jcas, null);
+  }
+}
diff --git a/ruta-docbook/src/docbook/tools.ruta.overview.xml b/ruta-docbook/src/docbook/tools.ruta.overview.xml
index 3107a97..1bce6ab 100644
--- a/ruta-docbook/src/docbook/tools.ruta.overview.xml
+++ b/ruta-docbook/src/docbook/tools.ruta.overview.xml
@@ -927,6 +927,14 @@ Document{-> EXEC(MyAnalysisEngine, {MyType1, MyType2})};
                 </row>
                 <row>
                   <entry>
+                    <link linkend='ugr.tools.ruta.ae.basic.parameter.indexUpdateMode'>validateInternalIndexing</link>
+                  </entry>
+                  <entry>Option to validate the internal indexing.
+                  </entry>
+                  <entry>Single String</entry>
+                </row>
+                <row>
+                  <entry>
                     <link linkend='ugr.tools.ruta.ae.basic.parameter.emptyIsInvisible'>emptyIsInvisible</link>
                   </entry>
                   <entry>Option to define empty text positions as invisible.
@@ -1285,6 +1293,16 @@ Document{-> EXEC(MyAnalysisEngine, {MyType1, MyType2})};
            Default value is ADDITIVE.
           </para>
         </section>
+        <section id="ugr.tools.ruta.ae.basic.parameter.validateInternalIndexing">
+          <title>validateInternalIndexing</title>
+          <para>
+            Option to validate the internal indexing in RutaBasic with the current CAS after the indexing
+            and reindexing is performed. Annotations that are not correctly indexing in RutaBasics cause
+            Exceptions. Annotations of types listed in parameter 'indexSkipTypes' and 'reindexSkipTypes'
+            are ignored. Default value is false.
+          </para>
+        </section>
+        validateInternalIndexing
         <section id="ugr.tools.ruta.ae.basic.parameter.emptyIsInvisible">
           <title>emptyIsInvisible</title>
           <para>
diff --git a/ruta-parent/pom.xml b/ruta-parent/pom.xml
index 0b0c451..5f70f13 100644
--- a/ruta-parent/pom.xml
+++ b/ruta-parent/pom.xml
@@ -131,7 +131,7 @@
       Creative Commons Attribution 3.0 License.
     </postNoticeText>
     <uimaVersion>2.10.4</uimaVersion>
-    <uimafit-version>2.4.0</uimafit-version>
+    <uimafit-version>2.5.1-SNAPSHOT</uimafit-version>
     <spring-version>4.3.22.RELEASE</spring-version>
     <!--
       BACKWARD_COMPATIBLE_IMPLEMENTER - patch version (=.=.+)