You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by pk...@apache.org on 2019/11/11 14:34:13 UTC
svn commit: r1869661 - in /uima/ruta/trunk/ruta-core: ./
src/main/java/org/apache/uima/ruta/expression/feature/
src/main/java/org/apache/uima/ruta/rule/
src/test/java/org/apache/uima/ruta/rule/
Author: pkluegl
Date: Mon Nov 11 14:34:13 2019
New Revision: 1869661
URL: http://svn.apache.org/viewvc?rev=1869661&view=rev
Log:
UIMA-3862: allow literal matches covering multiple RutaBasics
Added:
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/rule/RutaLiteralMatcherTest.java
Modified:
uima/ruta/trunk/ruta-core/pom.xml
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/expression/feature/FeatureMatchExpression.java
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/RutaLiteralMatcher.java
Modified: uima/ruta/trunk/ruta-core/pom.xml
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/pom.xml?rev=1869661&r1=1869660&r2=1869661&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/pom.xml (original)
+++ uima/ruta/trunk/ruta-core/pom.xml Mon Nov 11 14:34:13 2019
@@ -30,22 +30,19 @@
<name>Apache UIMA Ruta: ${project.artifactId}</name>
<dependencies>
+
<dependency>
<groupId>org.apache.uima</groupId>
<artifactId>ruta-typesystem</artifactId>
<version>${project.parent.version}</version>
</dependency>
+
<dependency>
<groupId>org.apache.uima</groupId>
<artifactId>uimaj-core</artifactId>
<version>${uimaVersion}</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>junit</groupId>
- <artifactId>junit</artifactId>
- <scope>test</scope>
</dependency>
+
<dependency>
<groupId>org.antlr</groupId>
<artifactId>antlr-runtime</artifactId>
@@ -57,39 +54,46 @@
</exclusion>
</exclusions>
</dependency>
+
<dependency>
<groupId>org.htmlparser</groupId>
<artifactId>htmlparser</artifactId>
<version>1.6</version>
</dependency>
+
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-text</artifactId>
<version>1.6</version>
</dependency>
+
<dependency>
<groupId>commons-collections</groupId>
<artifactId>commons-collections</artifactId>
<version>3.2.1</version>
<scope>compile</scope>
</dependency>
+
<dependency>
<groupId>org.apache.uima</groupId>
<artifactId>uimaj-tools</artifactId>
<version>${uimaVersion}</version>
<scope>compile</scope>
</dependency>
+
<dependency>
<groupId>org.apache.uima</groupId>
<artifactId>uimaj-test-util</artifactId>
<version>${uimaVersion}</version>
<scope>test</scope>
</dependency>
+
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.8.1</version>
</dependency>
+
<dependency>
<groupId>org.apache.uima</groupId>
<artifactId>uimafit-core</artifactId>
@@ -107,12 +111,14 @@
</exclusion>
</exclusions>
</dependency>
+
<!-- needed for ruta-ep-ide - TODO this should be moved to engine? -->
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.4</version>
</dependency>
+
<!-- needed for ruta-ep-textruler/ruta-ep-addons - TODO this should be moved to engine? -->
<dependency>
<groupId>org.apache.commons</groupId>
@@ -120,6 +126,11 @@
<version>3.0</version>
</dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <scope>test</scope>
+ </dependency>
<dependency>
<groupId>org.slf4j</groupId>
Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/expression/feature/FeatureMatchExpression.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/expression/feature/FeatureMatchExpression.java?rev=1869661&r1=1869660&r2=1869661&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/expression/feature/FeatureMatchExpression.java (original)
+++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/expression/feature/FeatureMatchExpression.java Mon Nov 11 14:34:13 2019
@@ -90,7 +90,6 @@ public class FeatureMatchExpression exte
Type featureRangeType = null;
TypeSystem typeSystem = stream.getCas().getTypeSystem();
if (feature instanceof TypeFeature) {
- // TODO
if (getArg() instanceof ITypeExpression) {
Type t1 = fs.getType();
ITypeExpression expr = (ITypeExpression) getArg();
Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/RutaLiteralMatcher.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/RutaLiteralMatcher.java?rev=1869661&r1=1869660&r2=1869661&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/RutaLiteralMatcher.java (original)
+++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/RutaLiteralMatcher.java Mon Nov 11 14:34:13 2019
@@ -20,9 +20,13 @@
package org.apache.uima.ruta.rule;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collection;
+import java.util.Collections;
import java.util.List;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.ruta.RutaStream;
@@ -43,18 +47,35 @@ public class RutaLiteralMatcher implemen
public List<AnnotationFS> getMatchingAnnotations(RutaBlock parent, RutaStream stream) {
List<AnnotationFS> result = new ArrayList<AnnotationFS>();
AnnotationFS windowAnnotation = stream.getDocumentAnnotation();
- List<RutaBasic> list = stream.getBasicsInWindow(windowAnnotation);
- // TODO improve matching on literal strings
- for (RutaBasic each : list) {
- MatchContext context = new MatchContext(each, null, null, true);
- context.setParent(parent);
- if (each.getCoveredText().equals(expression.getStringValue(context, stream))) {
- result.add(each);
+
+ MatchContext context = new MatchContext(parent);
+ String text = windowAnnotation.getCoveredText();
+ String literal = expression.getStringValue(context, stream);
+
+ int indexOf = 0;
+ while ((indexOf = text.indexOf(literal, indexOf)) >= 0) {
+
+ int begin = indexOf + windowAnnotation.getBegin();
+ int end = begin + literal.length();
+ indexOf = end;
+
+ AnnotationFS annotation = getAnnotation(begin, end, stream);
+ if (stream.isVisible(annotation)) {
+ result.add(annotation);
}
}
return result;
}
+ private AnnotationFS getAnnotation(int begin, int end, RutaStream stream) {
+ RutaBasic beginAnchor = stream.getBeginAnchor(begin);
+ if (beginAnchor.getEnd() == end) {
+ return beginAnchor;
+ }
+ CAS cas = stream.getCas();
+ return cas.createAnnotation(cas.getAnnotationType(), begin, end);
+ }
+
@Override
public String toString() {
return "\"" + expression.toString() + "\"";
@@ -73,29 +94,64 @@ public class RutaLiteralMatcher implemen
@Override
public Collection<AnnotationFS> getAnnotationsAfter(RutaRuleElement ruleElement,
AnnotationFS annotation, RutaBlock parent, RutaStream stream) {
- return getNextAnnotations(false, annotation, stream, parent);
+
+ RutaBasic basicNextTo = stream.getBasicNextTo(false, annotation);
+ if (basicNextTo == null) {
+ return Collections.emptyList();
+ }
+
+ MatchContext context = new MatchContext(parent);
+ String literal = expression.getStringValue(context, stream);
+
+ if (!StringUtils.startsWith(literal, basicNextTo.getCoveredText())) {
+ return Collections.emptyList();
+ }
+ AnnotationFS windowAnnotation = stream.getDocumentAnnotation();
+ int begin = basicNextTo.getBegin();
+ int end = begin + literal.length();
+ if (begin < windowAnnotation.getBegin() || end > windowAnnotation.getEnd()) {
+ return Collections.emptyList();
+ }
+
+ String substring = windowAnnotation.getCoveredText().substring(begin, end);
+ if (StringUtils.equals(literal, substring)) {
+ AnnotationFS matchedAnnotation = getAnnotation(begin, end, stream);
+ if (stream.isVisible(matchedAnnotation)) {
+ return Arrays.asList(matchedAnnotation);
+ }
+ }
+ return Collections.emptyList();
}
@Override
public Collection<AnnotationFS> getAnnotationsBefore(RutaRuleElement ruleElement,
AnnotationFS annotation, RutaBlock parent, RutaStream stream) {
- return getNextAnnotations(true, annotation, stream, parent);
- }
-
- private Collection<AnnotationFS> getNextAnnotations(boolean before, AnnotationFS annotation,
- RutaStream stream, RutaBlock parent) {
- List<AnnotationFS> result = new ArrayList<AnnotationFS>(1);
- RutaBasic basicNextTo = stream.getBasicNextTo(before, annotation);
+ RutaBasic basicNextTo = stream.getBasicNextTo(true, annotation);
if (basicNextTo == null) {
- return result;
+ return Collections.emptyList();
+ }
+
+ MatchContext context = new MatchContext(parent);
+ String literal = expression.getStringValue(context, stream);
+
+ if (!StringUtils.endsWith(literal, basicNextTo.getCoveredText())) {
+ return Collections.emptyList();
}
- MatchContext context = new MatchContext(annotation, null, null, !before);
- context.setParent(parent);
- String stringValue = expression.getStringValue(context, stream);
- if (stringValue != null && stringValue.equals(basicNextTo.getCoveredText())) {
- result.add(basicNextTo);
+ AnnotationFS windowAnnotation = stream.getDocumentAnnotation();
+ int begin = basicNextTo.getEnd() - literal.length();
+ int end = basicNextTo.getEnd();
+ if (begin < windowAnnotation.getBegin() || end > windowAnnotation.getEnd()) {
+ return Collections.emptyList();
}
- return result;
+
+ String substring = windowAnnotation.getCoveredText().substring(begin, end);
+ if (StringUtils.equals(literal, substring)) {
+ AnnotationFS matchedAnnotation = getAnnotation(begin, end, stream);
+ if (stream.isVisible(matchedAnnotation)) {
+ return Arrays.asList(matchedAnnotation);
+ }
+ }
+ return Collections.emptyList();
}
@Override
Added: uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/rule/RutaLiteralMatcherTest.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/rule/RutaLiteralMatcherTest.java?rev=1869661&view=auto
==============================================================================
--- uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/rule/RutaLiteralMatcherTest.java (added)
+++ uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/rule/RutaLiteralMatcherTest.java Mon Nov 11 14:34:13 2019
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.ruta.rule;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.ruta.engine.Ruta;
+import org.apache.uima.ruta.engine.RutaTestUtils;
+import org.junit.Test;
+
+public class RutaLiteralMatcherTest {
+
+ @Test
+ public void test() throws Exception {
+
+ String text = "This is a test.";
+ String script = "";
+// script += "\"" + text + "\" {-> T1};\n";
+// script += "\"is a\" {-> T2} \"test.\";\n";
+// script += "\"is a test\" {-> T3} @PERIOD;\n";
+// script += "\" \" {-> T4};\n";
+// script += "ADDRETAINTYPE(SPACE);\n\" \" {-> T5};\nREMOVERETAINTYPE(SPACE);\n";
+ script += "\" is a test\" {-> T6} @PERIOD;\n";
+
+ CAS cas = RutaTestUtils.getCAS(text);
+ Ruta.apply(cas, script);
+
+// RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, text);
+// RutaTestUtils.assertAnnotationsEquals(cas, 2, 1, "is a");
+// RutaTestUtils.assertAnnotationsEquals(cas, 3, 1, "is a test");
+// RutaTestUtils.assertAnnotationsEquals(cas, 4, 0);
+// RutaTestUtils.assertAnnotationsEquals(cas, 5, 3, " ", " ", " ");
+ RutaTestUtils.assertAnnotationsEquals(cas, 6, 0);
+ }
+}