You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by pk...@apache.org on 2016/07/19 08:44:28 UTC

svn commit: r1753352 - in /uima/ruta/trunk/ruta-core/src: main/java/org/apache/uima/ruta/RutaStream.java main/java/org/apache/uima/ruta/rule/WildCardRuleElement.java test/java/org/apache/uima/ruta/WildCard2Test.java

Author: pkluegl
Date: Tue Jul 19 08:44:28 2016
New Revision: 1753352

URL: http://svn.apache.org/viewvc?rev=1753352&view=rev
Log:
UIMA-5013
- added dummy annotations in order to use continue instead of  start
- added test


Modified:
    uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaStream.java
    uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/WildCardRuleElement.java
    uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/WildCard2Test.java

Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaStream.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaStream.java?rev=1753352&r1=1753351&r2=1753352&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaStream.java (original)
+++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaStream.java Tue Jul 19 08:44:28 2016
@@ -76,6 +76,7 @@ import org.apache.uima.ruta.rule.MatchCo
 import org.apache.uima.ruta.rule.RuleElement;
 import org.apache.uima.ruta.type.RutaAnnotation;
 import org.apache.uima.ruta.type.RutaBasic;
+import org.apache.uima.ruta.type.RutaOptional;
 import org.apache.uima.ruta.utils.RutaListUtils;
 import org.apache.uima.ruta.utils.UIMAUtils;
 import org.apache.uima.ruta.visitor.InferenceCrowd;
@@ -118,6 +119,11 @@ public class RutaStream extends FSIterat
 
   private boolean onlyOnce = false;
 
+  private Annotation documentBeginAnchor;
+
+  private Annotation documentEndAnchor;
+
+
   public RutaStream(CAS cas, Type basicType, FilterManager filter, boolean lowMemoryProfile,
           boolean simpleGreedyForComposed, InferenceCrowd crowd) {
     super();
@@ -134,6 +140,8 @@ public class RutaStream extends FSIterat
       documentAnnotation = cas.getDocumentAnnotation();
       documentAnnotationType = getCas().getDocumentAnnotation().getType();
       basicIt.moveToFirst();
+      documentBeginAnchor = new RutaOptional(getJCas(), 0, 0);
+      documentEndAnchor = new RutaOptional(getJCas(), documentAnnotation.getEnd(), documentAnnotation.getEnd());
     } else {
       documentAnnotation = additionalWindow;
       documentAnnotationType = filter.getWindowType();
@@ -1226,4 +1234,23 @@ public class RutaStream extends FSIterat
     changeOffsets(annotation, annotation.getBegin(), end, modifikator);
   }
 
+  public AnnotationFS getVeryFirstBeforeWindow(boolean direction) {
+    if(direction) {
+      RutaBasic firstBasicOfAll = getFirstBasicOfAll();
+      int begin = firstBasicOfAll.getBegin();
+      if(begin == 0) {
+        return documentBeginAnchor;
+      } else {
+        return getEndAnchor(begin);
+      }
+    } else {
+      RutaBasic lastBasicOfAll = getLastBasicOfAll();
+      int end = lastBasicOfAll.getEnd();
+      if(end == cas.getDocumentAnnotation().getEnd()) {
+        return documentEndAnchor;
+      } else {
+        return getBeginAnchor(end);
+      }
+    }
+  }
 }

Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/WildCardRuleElement.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/WildCardRuleElement.java?rev=1753352&r1=1753351&r2=1753352&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/WildCardRuleElement.java (original)
+++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/WildCardRuleElement.java Tue Jul 19 08:44:28 2016
@@ -124,7 +124,8 @@ public class WildCardRuleElement extends
     List<RuleMatch> result = new ArrayList<RuleMatch>();
     AnnotationFS nextOne = annotation;
     boolean doneHere = false;
-    while (!doneHere && (nextOne = getNextPositionForComposed(cre, after, nextOne, stream)) != null) {
+    while (!doneHere
+            && (nextOne = getNextPositionForComposed(cre, after, nextOne, stream)) != null) {
       int pointer = after ? nextOne.getBegin() : nextOne.getEnd();
       RutaBasic anchor = stream.getAnchor(!after, pointer);
       ComposedRuleElementMatch extendedContainerMatch = containerMatch.copy();
@@ -310,8 +311,8 @@ public class WildCardRuleElement extends
             ComposedRuleElementMatch nextContainerMatch = getContainerMatchOfNextElement(
                     containerMatch, nextDepth);
             if (coveredByWildCard == null) {
-              result = nextElement.startMatch(ruleMatch, ruleApply, nextContainerMatch,
-                      nextElement, stream, crowd);
+              result = nextElement.startMatch(ruleMatch, ruleApply, nextContainerMatch, nextElement,
+                      stream, crowd);
             } else {
               // TODO match and containermatch should be on the correct level!
               result = nextElement.continueMatch(after, coveredByWildCard, ruleMatch, ruleApply,
@@ -336,7 +337,7 @@ public class WildCardRuleElement extends
     while (!doneHere && iterator.isValid() && stream.isVisible(iterator.get())) {
       AnnotationFS nextOne = iterator.get();
       int pointer = after ? nextOne.getBegin() : nextOne.getEnd();
-      RutaBasic anchor = stream.getAnchor(!after, pointer);
+      AnnotationFS anchor = stream.getAnchor(!after, pointer);
 
       ComposedRuleElementMatch extendedContainerMatch = containerMatch.copy();
       RuleMatch extendedMatch = ruleMatch.copy(extendedContainerMatch, after);
@@ -348,15 +349,14 @@ public class WildCardRuleElement extends
         ComposedRuleElementMatch nextContainerMatch = getContainerMatchOfNextElement(
                 extendedContainerMatch, nextDepth);
         if (anchor == null) {
-          result = nextElement.startMatch(extendedMatch, ruleApply, nextContainerMatch,
-                  nextElement, stream, crowd);
-        } else {
-          // TODO match and containermatch should be on the correct level!
-          result = nextElement.continueMatch(after, anchor, extendedMatch, ruleApply,
-                  nextContainerMatch, sideStepOrigin, nextElement, stream, crowd);
+          anchor = stream.getVeryFirstBeforeWindow(after);
         }
+        // TODO match and containermatch should be on the correct level!
+        result = nextElement.continueMatch(after, anchor, extendedMatch, ruleApply,
+                nextContainerMatch, sideStepOrigin, nextElement, stream, crowd);
         List<RuleElementMatch> nextList = nextContainerMatch.getInnerMatches().get(nextElement);
-        if (nextList == null || nextList.isEmpty() || !nextList.get(nextList.size() - 1).matched()) {
+        if (nextList == null || nextList.isEmpty()
+                || !nextList.get(nextList.size() - 1).matched()) {
           moveOn(after, iterator, stream);
         } else {
           doneHere = true;
@@ -456,8 +456,8 @@ public class WildCardRuleElement extends
       } catch (CASException e) {
         e.printStackTrace();
       }
-      RutaFrame window = new RutaFrame(jcas, stream.getDocumentAnnotation().getBegin(), stream
-              .getDocumentAnnotation().getEnd());
+      RutaFrame window = new RutaFrame(jcas, stream.getDocumentAnnotation().getBegin(),
+              stream.getDocumentAnnotation().getEnd());
       if (annotation == null) {
         result = cas.getAnnotationIndex(type).subiterator(window);
       } else {
@@ -511,8 +511,8 @@ public class WildCardRuleElement extends
     while (!doneHere && (indexOf = document.indexOf(stringValue, pointer)) < document.length()) {
       if (indexOf < 0) {
         // can't match, the next next element will see it.
-        ComposedRuleElementMatch nextContainerMatch = getContainerMatchOfNextElement(
-                containerMatch, nextDepth);
+        ComposedRuleElementMatch nextContainerMatch = getContainerMatchOfNextElement(containerMatch,
+                nextDepth);
         nextElement.continueMatch(after, annotation, ruleMatch, ruleApply, nextContainerMatch,
                 sideStepOrigin, null, stream, crowd);
         doneHere = true;
@@ -529,8 +529,8 @@ public class WildCardRuleElement extends
         ComposedRuleElementMatch nextContainerMatch = getContainerMatchOfNextElement(
                 extendedContainerMatch, nextDepth);
         if (endAnchor == null) {
-          result = nextElement.startMatch(extendedMatch, ruleApply, nextContainerMatch,
-                  nextElement, stream, crowd);
+          result = nextElement.startMatch(extendedMatch, ruleApply, nextContainerMatch, nextElement,
+                  stream, crowd);
         } else {
           result = nextElement.continueMatch(after, endAnchor, extendedMatch, ruleApply,
                   nextContainerMatch, sideStepOrigin, nextElement, stream, crowd);

Modified: uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/WildCard2Test.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/WildCard2Test.java?rev=1753352&r1=1753351&r2=1753352&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/WildCard2Test.java (original)
+++ uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/WildCard2Test.java Tue Jul 19 08:44:28 2016
@@ -26,60 +26,57 @@ import org.junit.Test;
 
 public class WildCard2Test {
 
-   @Test
-   public void test() {
-   String document =
-   "Ogren, P.V., Wetzler, P.G., Bethard, S.: ClearTK: A UIMA Toolkit for Statistical Natural Language Processing. In: UIMA for NLP workshop at LREC 08. (2008)";
-   document += "\n";
-   document +=
-   "Stephen Soderland, Claire Cardie, and Raymond Mooney. Learning Information Extraction Rules for Semi-Structured and Free Text. In Machine Learning, volume 34, pages 233–272, 1999.";
-   String script = "";
-   script += "RETAINTYPE(BREAK, SPACE);\n";
-   script += "#{-> T6} BREAK #{-> T6};\n";
-   script += "T6{-> TRIM(BREAK, SPACE)};\n";
-   script += "CW{REGEXP(\".\")} PERIOD{->T7};\n";
-   script += "RETAINTYPE;\n";
-   script += "BLOCK(forEach) T6 {}{\n";
-   script += "(# COLON){-> T1} (# PERIOD){-> T2} # \"(\" NUM{REGEXP(\"....\")-> T3} \")\";\n";
-   script +=
-   "(#{-CONTAINS(COLON)} PERIOD{-PARTOF(T7)}){-> T1} (# PERIOD){-> T2} # NUM{REGEXP(\"....\")-> T3};\n";
-   script += "}\n";
-  
-   CAS cas = null;
-   try {
-   cas = RutaTestUtils.getCAS(document);
-   Ruta.apply(cas, script);
-   } catch (Exception e) {
-   e.printStackTrace();
-   }
-  
-   RutaTestUtils.assertAnnotationsEquals(cas, 1, 2, "Ogren, P.V., Wetzler, P.G., Bethard, S.:",
-   "Stephen Soderland, Claire Cardie, and Raymond Mooney.");
-   RutaTestUtils.assertAnnotationsEquals(cas, 2, 2,
-   "ClearTK: A UIMA Toolkit for Statistical Natural Language Processing.",
-   "Learning Information Extraction Rules for Semi-Structured and Free Text.");
-   RutaTestUtils.assertAnnotationsEquals(cas, 3, 2, "2008", "1999");
-  
-   cas.release();
-   }
-  
-   @Test
-   public void testOptional() {
-   String document = "Cw 1 2 3";
-   String script = "(CW #){-> T1} SW?;";
-  
-   CAS cas = null;
-   try {
-   cas = RutaTestUtils.getCAS(document);
-   Ruta.apply(cas, script);
-   } catch (Exception e) {
-   e.printStackTrace();
-   }
-  
-   RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, "Cw 1 2 3");
-  
-   cas.release();
-   }
+  @Test
+  public void test() {
+    String document = "Ogren, P.V., Wetzler, P.G., Bethard, S.: ClearTK: A UIMA Toolkit for Statistical Natural Language Processing. In: UIMA for NLP workshop at LREC 08. (2008)";
+    document += "\n";
+    document += "Stephen Soderland, Claire Cardie, and Raymond Mooney. Learning Information Extraction Rules for Semi-Structured and Free Text. In Machine Learning, volume 34, pages 233–272, 1999.";
+    String script = "";
+    script += "RETAINTYPE(BREAK, SPACE);\n";
+    script += "#{-> T6} BREAK #{-> T6};\n";
+    script += "T6{-> TRIM(BREAK, SPACE)};\n";
+    script += "CW{REGEXP(\".\")} PERIOD{->T7};\n";
+    script += "RETAINTYPE;\n";
+    script += "BLOCK(forEach) T6 {}{\n";
+    script += "(# COLON){-> T1} (# PERIOD){-> T2} # \"(\" NUM{REGEXP(\"....\")-> T3} \")\";\n";
+    script += "(#{-CONTAINS(COLON)} PERIOD{-PARTOF(T7)}){-> T1} (# PERIOD){-> T2} # NUM{REGEXP(\"....\")-> T3};\n";
+    script += "}\n";
+
+    CAS cas = null;
+    try {
+      cas = RutaTestUtils.getCAS(document);
+      Ruta.apply(cas, script);
+    } catch (Exception e) {
+      e.printStackTrace();
+    }
+
+    RutaTestUtils.assertAnnotationsEquals(cas, 1, 2, "Ogren, P.V., Wetzler, P.G., Bethard, S.:",
+            "Stephen Soderland, Claire Cardie, and Raymond Mooney.");
+    RutaTestUtils.assertAnnotationsEquals(cas, 2, 2,
+            "ClearTK: A UIMA Toolkit for Statistical Natural Language Processing.",
+            "Learning Information Extraction Rules for Semi-Structured and Free Text.");
+    RutaTestUtils.assertAnnotationsEquals(cas, 3, 2, "2008", "1999");
+
+    cas.release();
+  }
+
+  @Test
+  public void testOptional() {
+    String document = "Cw 1 2 3";
+    String script = "(CW #){-> T1} SW?;";
+
+    CAS cas = null;
+    try {
+      cas = RutaTestUtils.getCAS(document);
+      Ruta.apply(cas, script);
+    } catch (Exception e) {
+      e.printStackTrace();
+    }
+
+    RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, "Cw 1 2 3");
+
+    cas.release();
+  }
 
   @Test
   public void testLookaheadInGreedy() {
@@ -102,5 +99,21 @@ public class WildCard2Test {
 
     cas.release();
   }
+  
+  @Test
+  public void testMatchAtDocumentBegin() throws Exception{
+    String document = "First test. Some test. Last test.";
+    String script = "# CW{-> T1};";
+    script += "CW{-> T2};";
+    script += "T1 # (T2 #){->T3};";
+    script += "T3->{# CW{->T4};};";
+    CAS cas =  RutaTestUtils.getCAS(document);
+    Ruta.apply(cas, script);
+    RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, "First");
+    RutaTestUtils.assertAnnotationsEquals(cas, 2, 3, "First", "Some", "Last");
+    RutaTestUtils.assertAnnotationsEquals(cas, 3, 1, "Some test. Last test.");
+    RutaTestUtils.assertAnnotationsEquals(cas, 4, 1, "Some");
+    cas.release();
+  }
 
 }