You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by pk...@apache.org on 2019/11/11 14:34:13 UTC

svn commit: r1869661 - in /uima/ruta/trunk/ruta-core: ./ src/main/java/org/apache/uima/ruta/expression/feature/ src/main/java/org/apache/uima/ruta/rule/ src/test/java/org/apache/uima/ruta/rule/

Author: pkluegl
Date: Mon Nov 11 14:34:13 2019
New Revision: 1869661

URL: http://svn.apache.org/viewvc?rev=1869661&view=rev
Log:
UIMA-3862: allow literal matches covering multiple RutaBasics

Added:
    uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/rule/RutaLiteralMatcherTest.java
Modified:
    uima/ruta/trunk/ruta-core/pom.xml
    uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/expression/feature/FeatureMatchExpression.java
    uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/RutaLiteralMatcher.java

Modified: uima/ruta/trunk/ruta-core/pom.xml
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/pom.xml?rev=1869661&r1=1869660&r2=1869661&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/pom.xml (original)
+++ uima/ruta/trunk/ruta-core/pom.xml Mon Nov 11 14:34:13 2019
@@ -30,22 +30,19 @@
   <name>Apache UIMA Ruta: ${project.artifactId}</name>
   
   <dependencies>
+  
     <dependency>
       <groupId>org.apache.uima</groupId>
       <artifactId>ruta-typesystem</artifactId>
       <version>${project.parent.version}</version>
     </dependency>
+    
     <dependency>
       <groupId>org.apache.uima</groupId>
       <artifactId>uimaj-core</artifactId>
       <version>${uimaVersion}</version>
-      <scope>compile</scope>
-    </dependency>
-    <dependency>
-      <groupId>junit</groupId>
-      <artifactId>junit</artifactId>
-      <scope>test</scope>
     </dependency>
+    
    <dependency>
       <groupId>org.antlr</groupId>
       <artifactId>antlr-runtime</artifactId>
@@ -57,39 +54,46 @@
         </exclusion>
       </exclusions>
     </dependency>
+    
     <dependency>
       <groupId>org.htmlparser</groupId>
       <artifactId>htmlparser</artifactId>
       <version>1.6</version>
     </dependency>
+    
     <dependency>
 	   <groupId>org.apache.commons</groupId>
 	   <artifactId>commons-text</artifactId>
 	   <version>1.6</version>
 	</dependency>
+    
     <dependency>
       <groupId>commons-collections</groupId>
       <artifactId>commons-collections</artifactId>
       <version>3.2.1</version>
       <scope>compile</scope>
     </dependency>
+    
     <dependency>
       <groupId>org.apache.uima</groupId>
       <artifactId>uimaj-tools</artifactId>
       <version>${uimaVersion}</version>
       <scope>compile</scope>
     </dependency>
+    
     <dependency>
       <groupId>org.apache.uima</groupId>
       <artifactId>uimaj-test-util</artifactId>
       <version>${uimaVersion}</version>
       <scope>test</scope>
     </dependency>
+    
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-lang3</artifactId>
       <version>3.8.1</version>
     </dependency>
+    
     <dependency>
       <groupId>org.apache.uima</groupId>
       <artifactId>uimafit-core</artifactId>
@@ -107,12 +111,14 @@
         </exclusion>
       </exclusions>
     </dependency>
+    
     <!-- needed for ruta-ep-ide - TODO this should be moved to engine? -->
     <dependency>
       <groupId>commons-io</groupId>
       <artifactId>commons-io</artifactId>
       <version>2.4</version>
     </dependency>
+    
     <!-- needed for ruta-ep-textruler/ruta-ep-addons - TODO this should be moved to engine? -->
     <dependency>
       <groupId>org.apache.commons</groupId>
@@ -120,6 +126,11 @@
       <version>3.0</version>
     </dependency>
     
+    <dependency>
+        <groupId>junit</groupId>
+        <artifactId>junit</artifactId>
+        <scope>test</scope>
+    </dependency>
     
     <dependency>
       <groupId>org.slf4j</groupId>

Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/expression/feature/FeatureMatchExpression.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/expression/feature/FeatureMatchExpression.java?rev=1869661&r1=1869660&r2=1869661&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/expression/feature/FeatureMatchExpression.java (original)
+++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/expression/feature/FeatureMatchExpression.java Mon Nov 11 14:34:13 2019
@@ -90,7 +90,6 @@ public class FeatureMatchExpression exte
     Type featureRangeType = null;
     TypeSystem typeSystem = stream.getCas().getTypeSystem();
     if (feature instanceof TypeFeature) {
-      // TODO
       if (getArg() instanceof ITypeExpression) {
         Type t1 = fs.getType();
         ITypeExpression expr = (ITypeExpression) getArg();

Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/RutaLiteralMatcher.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/RutaLiteralMatcher.java?rev=1869661&r1=1869660&r2=1869661&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/RutaLiteralMatcher.java (original)
+++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/RutaLiteralMatcher.java Mon Nov 11 14:34:13 2019
@@ -20,9 +20,13 @@
 package org.apache.uima.ruta.rule;
 
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.List;
 
+import org.apache.commons.lang3.StringUtils;
+import org.apache.uima.cas.CAS;
 import org.apache.uima.cas.Type;
 import org.apache.uima.cas.text.AnnotationFS;
 import org.apache.uima.ruta.RutaStream;
@@ -43,18 +47,35 @@ public class RutaLiteralMatcher implemen
   public List<AnnotationFS> getMatchingAnnotations(RutaBlock parent, RutaStream stream) {
     List<AnnotationFS> result = new ArrayList<AnnotationFS>();
     AnnotationFS windowAnnotation = stream.getDocumentAnnotation();
-    List<RutaBasic> list = stream.getBasicsInWindow(windowAnnotation);
-    // TODO improve matching on literal strings
-    for (RutaBasic each : list) {
-      MatchContext context = new MatchContext(each, null, null, true);
-      context.setParent(parent);
-      if (each.getCoveredText().equals(expression.getStringValue(context, stream))) {
-        result.add(each);
+
+    MatchContext context = new MatchContext(parent);
+    String text = windowAnnotation.getCoveredText();
+    String literal = expression.getStringValue(context, stream);
+
+    int indexOf = 0;
+    while ((indexOf = text.indexOf(literal, indexOf)) >= 0) {
+
+      int begin = indexOf + windowAnnotation.getBegin();
+      int end = begin + literal.length();
+      indexOf = end;
+
+      AnnotationFS annotation = getAnnotation(begin, end, stream);
+      if (stream.isVisible(annotation)) {
+        result.add(annotation);
       }
     }
     return result;
   }
 
+  private AnnotationFS getAnnotation(int begin, int end, RutaStream stream) {
+    RutaBasic beginAnchor = stream.getBeginAnchor(begin);
+    if (beginAnchor.getEnd() == end) {
+      return beginAnchor;
+    }
+    CAS cas = stream.getCas();
+    return cas.createAnnotation(cas.getAnnotationType(), begin, end);
+  }
+
   @Override
   public String toString() {
     return "\"" + expression.toString() + "\"";
@@ -73,29 +94,64 @@ public class RutaLiteralMatcher implemen
   @Override
   public Collection<AnnotationFS> getAnnotationsAfter(RutaRuleElement ruleElement,
           AnnotationFS annotation, RutaBlock parent, RutaStream stream) {
-    return getNextAnnotations(false, annotation, stream, parent);
+
+    RutaBasic basicNextTo = stream.getBasicNextTo(false, annotation);
+    if (basicNextTo == null) {
+      return Collections.emptyList();
+    }
+
+    MatchContext context = new MatchContext(parent);
+    String literal = expression.getStringValue(context, stream);
+
+    if (!StringUtils.startsWith(literal, basicNextTo.getCoveredText())) {
+      return Collections.emptyList();
+    }
+    AnnotationFS windowAnnotation = stream.getDocumentAnnotation();
+    int begin = basicNextTo.getBegin();
+    int end = begin + literal.length();
+    if (begin < windowAnnotation.getBegin() || end > windowAnnotation.getEnd()) {
+      return Collections.emptyList();
+    }
+
+    String substring = windowAnnotation.getCoveredText().substring(begin, end);
+    if (StringUtils.equals(literal, substring)) {
+      AnnotationFS matchedAnnotation = getAnnotation(begin, end, stream);
+      if (stream.isVisible(matchedAnnotation)) {
+        return Arrays.asList(matchedAnnotation);
+      }
+    }
+    return Collections.emptyList();
   }
 
   @Override
   public Collection<AnnotationFS> getAnnotationsBefore(RutaRuleElement ruleElement,
           AnnotationFS annotation, RutaBlock parent, RutaStream stream) {
-    return getNextAnnotations(true, annotation, stream, parent);
-  }
-
-  private Collection<AnnotationFS> getNextAnnotations(boolean before, AnnotationFS annotation,
-          RutaStream stream, RutaBlock parent) {
-    List<AnnotationFS> result = new ArrayList<AnnotationFS>(1);
-    RutaBasic basicNextTo = stream.getBasicNextTo(before, annotation);
+    RutaBasic basicNextTo = stream.getBasicNextTo(true, annotation);
     if (basicNextTo == null) {
-      return result;
+      return Collections.emptyList();
+    }
+
+    MatchContext context = new MatchContext(parent);
+    String literal = expression.getStringValue(context, stream);
+
+    if (!StringUtils.endsWith(literal, basicNextTo.getCoveredText())) {
+      return Collections.emptyList();
     }
-    MatchContext context = new MatchContext(annotation, null, null, !before);
-    context.setParent(parent);
-    String stringValue = expression.getStringValue(context, stream);
-    if (stringValue != null && stringValue.equals(basicNextTo.getCoveredText())) {
-      result.add(basicNextTo);
+    AnnotationFS windowAnnotation = stream.getDocumentAnnotation();
+    int begin = basicNextTo.getEnd() - literal.length();
+    int end = basicNextTo.getEnd();
+    if (begin < windowAnnotation.getBegin() || end > windowAnnotation.getEnd()) {
+      return Collections.emptyList();
     }
-    return result;
+
+    String substring = windowAnnotation.getCoveredText().substring(begin, end);
+    if (StringUtils.equals(literal, substring)) {
+      AnnotationFS matchedAnnotation = getAnnotation(begin, end, stream);
+      if (stream.isVisible(matchedAnnotation)) {
+        return Arrays.asList(matchedAnnotation);
+      }
+    }
+    return Collections.emptyList();
   }
 
   @Override

Added: uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/rule/RutaLiteralMatcherTest.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/rule/RutaLiteralMatcherTest.java?rev=1869661&view=auto
==============================================================================
--- uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/rule/RutaLiteralMatcherTest.java (added)
+++ uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/rule/RutaLiteralMatcherTest.java Mon Nov 11 14:34:13 2019
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.ruta.rule;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.ruta.engine.Ruta;
+import org.apache.uima.ruta.engine.RutaTestUtils;
+import org.junit.Test;
+
+public class RutaLiteralMatcherTest {
+
+  @Test
+  public void test() throws Exception {
+
+    String text = "This is a test.";
+    String script = "";
+//     script += "\"" + text + "\" {-> T1};\n";
+//    script += "\"is a\" {-> T2} \"test.\";\n";
+//    script += "\"is a test\" {-> T3} @PERIOD;\n";
+//    script += "\" \" {-> T4};\n";
+//    script += "ADDRETAINTYPE(SPACE);\n\" \" {-> T5};\nREMOVERETAINTYPE(SPACE);\n";
+    script += "\" is a test\" {-> T6} @PERIOD;\n";
+
+    CAS cas = RutaTestUtils.getCAS(text);
+    Ruta.apply(cas, script);
+
+//    RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, text);
+//    RutaTestUtils.assertAnnotationsEquals(cas, 2, 1, "is a");
+//    RutaTestUtils.assertAnnotationsEquals(cas, 3, 1, "is a test");
+//    RutaTestUtils.assertAnnotationsEquals(cas, 4, 0);
+//    RutaTestUtils.assertAnnotationsEquals(cas, 5, 3, " ", " ", " ");
+    RutaTestUtils.assertAnnotationsEquals(cas, 6, 0);
+  }
+}