You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by pk...@apache.org on 2020/10/13 14:58:02 UTC
[uima-ruta] 01/01: UIMA-6271: Ruta: option to validate internal
indexing in RutaEngine
This is an automated email from the ASF dual-hosted git repository.
pkluegl pushed a commit to branch UIMA-6271-validate-internal-ruta-indexing
in repository https://gitbox.apache.org/repos/asf/uima-ruta.git
commit 26674c9034cff974abe91acf6287f7cc3dcce31c
Author: Peter Klügl <pe...@averbis.com>
AuthorDate: Tue Oct 13 16:57:33 2020 +0200
UIMA-6271: Ruta: option to validate internal indexing in RutaEngine
- added config param
- added utils method with tests
- added mention in docs
---
ruta-core/pom.xml | 18 ++++
.../java/org/apache/uima/ruta/RutaBasicUtils.java | 116 +++++++++++++++++++++
.../org/apache/uima/ruta/engine/RutaEngine.java | 20 ++++
.../org/apache/uima/ruta/RutaBasicUtilsTest.java | 116 +++++++++++++++++++++
ruta-docbook/src/docbook/tools.ruta.overview.xml | 18 ++++
ruta-parent/pom.xml | 2 +-
6 files changed, 289 insertions(+), 1 deletion(-)
diff --git a/ruta-core/pom.xml b/ruta-core/pom.xml
index 76072d2..cc4e9b9 100644
--- a/ruta-core/pom.xml
+++ b/ruta-core/pom.xml
@@ -150,6 +150,24 @@
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
+
+ <dependency>
+ <groupId>org.apache.uima</groupId>
+ <artifactId>uimafit-junit</artifactId>
+ <version>${uimafit-version}</version>
+ <scope>test</scope>
+ <!-- Exclude aop stuff, which is not need by uimafit and only introduces a non-asl license -->
+ <exclusions>
+ <exclusion>
+ <groupId>org.springframework</groupId>
+ <artifactId>spring-aop</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>aopalliance</groupId>
+ <artifactId>aopalliance</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
<dependency>
<groupId>org.slf4j</groupId>
diff --git a/ruta-core/src/main/java/org/apache/uima/ruta/RutaBasicUtils.java b/ruta-core/src/main/java/org/apache/uima/ruta/RutaBasicUtils.java
index 5eb2841..f371d49 100644
--- a/ruta-core/src/main/java/org/apache/uima/ruta/RutaBasicUtils.java
+++ b/ruta-core/src/main/java/org/apache/uima/ruta/RutaBasicUtils.java
@@ -18,14 +18,22 @@
*/
package org.apache.uima.ruta;
+import java.util.Collection;
+import java.util.LinkedHashMap;
import java.util.List;
+import java.util.Map;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.cas.text.AnnotationIndex;
import org.apache.uima.fit.util.CasUtil;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.ruta.type.RutaBasic;
/**
@@ -129,4 +137,112 @@ public class RutaBasicUtils {
return true;
}
+ /**
+ * This method validated the internal indexing, i.e. the information stored in the RutaBasics, and
+ * throw exceptions if a invalid state is discovered.
+ *
+ * @param jcas
+ * the JCas that should be validated
+ * @param ignoreTypeNames
+ * the names of types that should not be validated
+ * @throws AnalysisEngineProcessException
+ * if some problem was detected
+ */
+ public static void validateInternalIndexing(JCas jcas, Collection<String> ignoreTypeNames)
+ throws AnalysisEngineProcessException {
+
+ Map<Integer, RutaBasic> beginMap = new LinkedHashMap<>();
+ Map<Integer, RutaBasic> endMap = new LinkedHashMap<>();
+
+ Collection<RutaBasic> basics = JCasUtil.select(jcas, RutaBasic.class);
+
+ if (basics.isEmpty()) {
+ throw new AnalysisEngineProcessException(
+ new IllegalStateException("No RutaBasics available!"));
+ }
+ for (RutaBasic rutaBasic : basics) {
+
+ int begin = rutaBasic.getBegin();
+ int end = rutaBasic.getEnd();
+
+ if (beginMap.get(begin) != null || endMap.get(end) != null) {
+ throw new AnalysisEngineProcessException(new IllegalStateException(
+ "RutaBasic must be disjunct! Problem at offset " + begin));
+ }
+
+ beginMap.put(begin, rutaBasic);
+ endMap.put(end, rutaBasic);
+ }
+
+ for (Annotation annotation : JCasUtil.select(jcas, Annotation.class)) {
+
+ Type type = annotation.getType();
+ if (ignoreType(type, ignoreTypeNames, jcas)) {
+ continue;
+ }
+
+ int begin = annotation.getBegin();
+ int end = annotation.getEnd();
+
+ RutaBasic beginBasic = beginMap.get(begin);
+ RutaBasic endBasic = endMap.get(end);
+ if (beginBasic == null) {
+ throw new AnalysisEngineProcessException(new IllegalStateException(
+ "No RutaBasic for begin of annotation at offset " + begin));
+ }
+ if (endBasic == null) {
+ throw new AnalysisEngineProcessException(
+ new IllegalStateException("No RutaBasic for end of annotation at offset " + end));
+ }
+
+ Collection<AnnotationFS> beginAnchors = beginBasic.getBeginAnchors(type);
+ if (beginAnchors == null || !beginAnchors.contains(annotation)) {
+ throw new AnalysisEngineProcessException(new IllegalStateException("Annotation of type '"
+ + type.getName() + "' not registered as begin at offset " + begin));
+ }
+ Collection<AnnotationFS> endAnchors = endBasic.getEndAnchors(type);
+ if (endAnchors == null || !endAnchors.contains(annotation)) {
+ throw new AnalysisEngineProcessException(new IllegalStateException("Annotation of type '"
+ + type.getName() + "' not registered as end at offset " + begin));
+ }
+
+ List<RutaBasic> coveredBasics = JCasUtil.selectCovered(RutaBasic.class, annotation);
+ for (RutaBasic coveredBasic : coveredBasics) {
+ if (!coveredBasic.isPartOf(type)) {
+ throw new AnalysisEngineProcessException(
+ new IllegalStateException("Annotation of type '" + type.getName()
+ + "' not registered as partof at offset [" + begin + "," + end + "]"));
+ }
+ }
+ }
+ }
+
+ private static boolean ignoreType(Type type, Collection<String> ignoreTypeNames, JCas jcas) {
+
+ if (type == null) {
+ return false;
+ }
+
+ if (StringUtils.equals(type.getName(), RutaBasic.class.getName())) {
+ return true;
+ }
+
+ if (ignoreTypeNames == null) {
+ return false;
+ }
+
+ TypeSystem typeSystem = jcas.getTypeSystem();
+
+ for (String typeName : ignoreTypeNames) {
+ Type ignoreType = typeSystem.getType(typeName);
+ if (ignoreType == null) {
+ continue;
+ }
+ if (typeSystem.subsumes(ignoreType, type)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
}
diff --git a/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java b/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java
index d676528..093e35a 100644
--- a/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java
+++ b/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java
@@ -62,6 +62,7 @@ import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.ResourceManager;
import org.apache.uima.ruta.FilterManager;
import org.apache.uima.ruta.ReindexUpdateMode;
+import org.apache.uima.ruta.RutaBasicUtils;
import org.apache.uima.ruta.RutaConstants;
import org.apache.uima.ruta.RutaEnvironment;
import org.apache.uima.ruta.RutaIndexingConfiguration;
@@ -529,6 +530,17 @@ public class RutaEngine extends JCasAnnotator_ImplBase {
private ReindexUpdateMode reindexUpdateMode;
/**
+ * Option to validate the internal indexing in RutaBasic with the current CAS after the indexing
+ * and reindexing is performed. Annotations that are not correctly indexing in RutaBasics cause
+ * Exceptions. Annotations of types listed in parameter 'indexSkipTypes' and 'reindexSkipTypes'
+ * are ignored. Default value is false.
+ */
+ public static final String PARAM_VALIDATE_INTERNAL_INDEXING = "validateInternalIndexing";
+
+ @ConfigurationParameter(name = PARAM_VALIDATE_INTERNAL_INDEXING, mandatory = true, defaultValue = "false")
+ private boolean validateInternalIndexing;
+
+ /**
* This parameter determines positions as invisible if the internal indexing of the corresponding
* RutaBasic annotation is empty.
*/
@@ -663,6 +675,14 @@ public class RutaEngine extends JCasAnnotator_ImplBase {
stream.setGreedyRule(greedyRule);
stream.setMaxRuleMatches(maxRuleMatches);
stream.setMaxRuleElementMatches(maxRuleElementMatches);
+
+ if (validateInternalIndexing) {
+ Collection<String> ignoreTypeNames = new ArrayList<>();
+ ignoreTypeNames.addAll(Arrays.asList(indexSkipTypes));
+ ignoreTypeNames.addAll(Arrays.asList(reindexSkipTypes));
+ RutaBasicUtils.validateInternalIndexing(jcas, ignoreTypeNames);
+ }
+
try {
script.apply(stream, crowd);
} catch (Throwable e) {
diff --git a/ruta-core/src/test/java/org/apache/uima/ruta/RutaBasicUtilsTest.java b/ruta-core/src/test/java/org/apache/uima/ruta/RutaBasicUtilsTest.java
new file mode 100644
index 0000000..89e11e2
--- /dev/null
+++ b/ruta-core/src/test/java/org/apache/uima/ruta/RutaBasicUtilsTest.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.uima.ruta;
+
+import java.util.Arrays;
+
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.fit.testing.junit.ManagedJCas;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.ruta.engine.Ruta;
+import org.apache.uima.ruta.type.CW;
+import org.apache.uima.ruta.type.RutaBasic;
+import org.junit.Rule;
+import org.junit.Test;
+
+public class RutaBasicUtilsTest {
+
+ public @Rule ManagedJCas managedJCas = new ManagedJCas();
+
+ @Test(expected = AnalysisEngineProcessException.class)
+ public void testBreakOnNoBasics() throws AnalysisEngineProcessException {
+
+ RutaBasicUtils.validateInternalIndexing(managedJCas.get(), null);
+ }
+
+ @Test(expected = AnalysisEngineProcessException.class)
+ public void testBreakOnDuplicateBasics() throws AnalysisEngineProcessException {
+ JCas jcas = managedJCas.get();
+ new RutaBasic(jcas, 0, 1).addToIndexes();
+ new RutaBasic(jcas, 0, 1).addToIndexes();
+ RutaBasicUtils.validateInternalIndexing(jcas, null);
+ }
+
+ @Test(expected = AnalysisEngineProcessException.class)
+ public void testBreakOnMissingBasicAtBegin() throws AnalysisEngineProcessException {
+ JCas jcas = managedJCas.get();
+ new RutaBasic(jcas, 1, 2).addToIndexes();
+ new CW(jcas, 0, 2).addToIndexes();
+ RutaBasicUtils.validateInternalIndexing(jcas, null);
+ }
+
+ @Test(expected = AnalysisEngineProcessException.class)
+ public void testBreakOnMissingBasicAtEnd() throws AnalysisEngineProcessException {
+ JCas jcas = managedJCas.get();
+ new RutaBasic(jcas, 0, 1).addToIndexes();
+ new CW(jcas, 0, 2).addToIndexes();
+ RutaBasicUtils.validateInternalIndexing(jcas, null);
+ }
+
+ @Test(expected = AnalysisEngineProcessException.class)
+ public void testBreakOnMissingAnnotationAtBegin() throws AnalysisEngineProcessException {
+ JCas jcas = managedJCas.get();
+ CW cw = new CW(jcas, 0, 1);
+ cw.addToIndexes();
+ RutaBasic rb = new RutaBasic(jcas, 0, 1);
+ rb.addEnd(cw, cw.getType());
+ rb.addToIndexes();
+ RutaBasicUtils.validateInternalIndexing(jcas, null);
+ }
+
+ @Test(expected = AnalysisEngineProcessException.class)
+ public void testBreakOnMissingAnnotationAtEnd() throws AnalysisEngineProcessException {
+ JCas jcas = managedJCas.get();
+ CW cw = new CW(jcas, 0, 1);
+ cw.addToIndexes();
+ RutaBasic rb = new RutaBasic(jcas, 0, 1);
+ rb.addBegin(cw, cw.getType());
+ rb.addToIndexes();
+ RutaBasicUtils.validateInternalIndexing(jcas, null);
+ }
+
+ @Test(expected = AnalysisEngineProcessException.class)
+ public void testBreakOnMissingPartof() throws AnalysisEngineProcessException {
+ JCas jcas = managedJCas.get();
+ CW cw = new CW(jcas, 0, 1);
+ cw.addToIndexes();
+ RutaBasic rb = new RutaBasic(jcas, 0, 1);
+ rb.addBegin(cw, cw.getType());
+ rb.addEnd(cw, cw.getType());
+ rb.addToIndexes();
+ RutaBasicUtils.validateInternalIndexing(jcas, null);
+ }
+
+ @Test
+ public void testIgnoreTypeNames() throws AnalysisEngineProcessException {
+ JCas jcas = managedJCas.get();
+ new RutaBasic(jcas, 0, 1).addToIndexes();
+ new CW(jcas, 0, 1).addToIndexes();
+ RutaBasicUtils.validateInternalIndexing(jcas, Arrays.asList(CAS.TYPE_NAME_ANNOTATION));
+ }
+
+ @Test
+ public void testAllGood() throws Exception {
+ JCas jcas = managedJCas.get();
+ jcas.setDocumentText("This is 1 TEST.");
+ Ruta.apply(jcas.getCas(), "CW{-> TruePositive};");
+ RutaBasicUtils.validateInternalIndexing(jcas, null);
+ }
+}
diff --git a/ruta-docbook/src/docbook/tools.ruta.overview.xml b/ruta-docbook/src/docbook/tools.ruta.overview.xml
index 3107a97..1bce6ab 100644
--- a/ruta-docbook/src/docbook/tools.ruta.overview.xml
+++ b/ruta-docbook/src/docbook/tools.ruta.overview.xml
@@ -927,6 +927,14 @@ Document{-> EXEC(MyAnalysisEngine, {MyType1, MyType2})};
</row>
<row>
<entry>
+ <link linkend='ugr.tools.ruta.ae.basic.parameter.indexUpdateMode'>validateInternalIndexing</link>
+ </entry>
+ <entry>Option to validate the internal indexing.
+ </entry>
+ <entry>Single String</entry>
+ </row>
+ <row>
+ <entry>
<link linkend='ugr.tools.ruta.ae.basic.parameter.emptyIsInvisible'>emptyIsInvisible</link>
</entry>
<entry>Option to define empty text positions as invisible.
@@ -1285,6 +1293,16 @@ Document{-> EXEC(MyAnalysisEngine, {MyType1, MyType2})};
Default value is ADDITIVE.
</para>
</section>
+ <section id="ugr.tools.ruta.ae.basic.parameter.validateInternalIndexing">
+ <title>validateInternalIndexing</title>
+ <para>
+ Option to validate the internal indexing in RutaBasic with the current CAS after the indexing
+ and reindexing is performed. Annotations that are not correctly indexing in RutaBasics cause
+ Exceptions. Annotations of types listed in parameter 'indexSkipTypes' and 'reindexSkipTypes'
+ are ignored. Default value is false.
+ </para>
+ </section>
+ validateInternalIndexing
<section id="ugr.tools.ruta.ae.basic.parameter.emptyIsInvisible">
<title>emptyIsInvisible</title>
<para>
diff --git a/ruta-parent/pom.xml b/ruta-parent/pom.xml
index 0b0c451..5f70f13 100644
--- a/ruta-parent/pom.xml
+++ b/ruta-parent/pom.xml
@@ -131,7 +131,7 @@
Creative Commons Attribution 3.0 License.
</postNoticeText>
<uimaVersion>2.10.4</uimaVersion>
- <uimafit-version>2.4.0</uimafit-version>
+ <uimafit-version>2.5.1-SNAPSHOT</uimafit-version>
<spring-version>4.3.22.RELEASE</spring-version>
<!--
BACKWARD_COMPATIBLE_IMPLEMENTER - patch version (=.=.+)