You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by pk...@apache.org on 2013/08/16 19:30:22 UTC

svn commit: r1514808 - in /uima/sandbox/ruta/trunk/ruta-core/src: main/java/org/apache/uima/ruta/rule/ComposedRuleElement.java main/java/org/apache/uima/ruta/rule/RuleMatchComparator.java test/java/org/apache/uima/ruta/QuantifierTest6.java

Author: pkluegl
Date: Fri Aug 16 17:30:22 2013
New Revision: 1514808

URL: http://svn.apache.org/r1514808
Log:
UIMA-3191
- fixed failed greedy child match causes greedy parent to stop matching
- added test

Added:
    uima/sandbox/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/RuleMatchComparator.java
    uima/sandbox/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/QuantifierTest6.java
Modified:
    uima/sandbox/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/ComposedRuleElement.java

Modified: uima/sandbox/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/ComposedRuleElement.java
URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/ComposedRuleElement.java?rev=1514808&r1=1514807&r2=1514808&view=diff
==============================================================================
--- uima/sandbox/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/ComposedRuleElement.java (original)
+++ uima/sandbox/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/ComposedRuleElement.java Fri Aug 16 17:30:22 2013
@@ -21,11 +21,13 @@ package org.apache.uima.ruta.rule;
 
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.Comparator;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Set;
+import java.util.TreeMap;
 
 import org.apache.uima.cas.text.AnnotationFS;
 import org.apache.uima.jcas.JCas;
@@ -45,6 +47,8 @@ public class ComposedRuleElement extends
 
   private Boolean conjunct = null;
 
+  private Comparator<RuleMatch> ruleMatchComparator = new RuleMatchComparator();
+
   public ComposedRuleElement(List<RuleElement> elements, RuleElementQuantifier quantifier,
           List<AbstractRutaCondition> conditions, List<AbstractRutaAction> actions,
           RuleElementContainer container, RutaBlock parent) {
@@ -108,7 +112,7 @@ public class ComposedRuleElement extends
       for (RuleMatch eachStartRuleMatch : startRuleMatches) {
         AnnotationFS prefixAnnotation = getPrefixAnnotation(eachStartRuleMatch, stream);
         for (RuleElement each : elements) {
-          if(each.equals(anchoringRuleElement)) {
+          if (each.equals(anchoringRuleElement)) {
             continue;
           }
           ComposedRuleElementMatch startElementMatch = (ComposedRuleElementMatch) eachStartRuleMatch
@@ -205,8 +209,7 @@ public class ComposedRuleElement extends
       }
     } else if (conjunct) {
       // conjunctive
-      
-      
+
       // TODO see startMatch()
       Map<RuleMatch, ComposedRuleElementMatch> ruleMatches = new HashMap<RuleMatch, ComposedRuleElementMatch>();
       for (RuleElement each : elements) {
@@ -243,40 +246,42 @@ public class ComposedRuleElement extends
           Map<RuleMatch, ComposedRuleElementMatch> ruleMatches, boolean direction) {
     // TODO hotfix: this needs a correct implementation
     return ruleMatches;
-//    Map<RuleMatch, ComposedRuleElementMatch> result = new HashMap<RuleMatch, ComposedRuleElementMatch>();
-//    Set<Entry<RuleMatch, ComposedRuleElementMatch>> entrySet = ruleMatches.entrySet();
-//    Entry<RuleMatch, ComposedRuleElementMatch> largestEntry = null;
-//    boolean allMatched = true;
-//    AnnotationFS largestAnnotation = null;
-//    for (Entry<RuleMatch, ComposedRuleElementMatch> entry : entrySet) {
-//      RuleMatch ruleMatch = entry.getKey();
-//      ComposedRuleElementMatch elementMatch = entry.getValue();
-//      allMatched &= elementMatch.matched();
-//      AnnotationFS lastMatchedAnnotation = ruleMatch.getLastMatchedAnnotation(getFirstElement(),
-//              direction);
-//      if (largestEntry == null) {
-//        largestEntry = entry;
-//        largestAnnotation = lastMatchedAnnotation;
-//      } else {
-//        if (lastMatchedAnnotation != null
-//                && largestAnnotation != null
-//                && lastMatchedAnnotation.getCoveredText().length() > largestAnnotation
-//                        .getCoveredText().length()) {
-//          largestEntry = entry;
-//          largestAnnotation = lastMatchedAnnotation;
-//        }
-//      }
-//    }
-//    if (allMatched) {
-//      result.put(largestEntry.getKey(), largestEntry.getValue());
-//    }
-//    return result;
+    // Map<RuleMatch, ComposedRuleElementMatch> result = new HashMap<RuleMatch,
+    // ComposedRuleElementMatch>();
+    // Set<Entry<RuleMatch, ComposedRuleElementMatch>> entrySet = ruleMatches.entrySet();
+    // Entry<RuleMatch, ComposedRuleElementMatch> largestEntry = null;
+    // boolean allMatched = true;
+    // AnnotationFS largestAnnotation = null;
+    // for (Entry<RuleMatch, ComposedRuleElementMatch> entry : entrySet) {
+    // RuleMatch ruleMatch = entry.getKey();
+    // ComposedRuleElementMatch elementMatch = entry.getValue();
+    // allMatched &= elementMatch.matched();
+    // AnnotationFS lastMatchedAnnotation = ruleMatch.getLastMatchedAnnotation(getFirstElement(),
+    // direction);
+    // if (largestEntry == null) {
+    // largestEntry = entry;
+    // largestAnnotation = lastMatchedAnnotation;
+    // } else {
+    // if (lastMatchedAnnotation != null
+    // && largestAnnotation != null
+    // && lastMatchedAnnotation.getCoveredText().length() > largestAnnotation
+    // .getCoveredText().length()) {
+    // largestEntry = entry;
+    // largestAnnotation = lastMatchedAnnotation;
+    // }
+    // }
+    // }
+    // if (allMatched) {
+    // result.put(largestEntry.getKey(), largestEntry.getValue());
+    // }
+    // return result;
   }
 
   private Map<RuleMatch, ComposedRuleElementMatch> mergeDisjunctiveRuleMatches(
           Map<RuleMatch, ComposedRuleElementMatch> ruleMatches, boolean direction) {
     // TODO hotfix: this needs a correct implementation
-    Map<RuleMatch, ComposedRuleElementMatch> result = new HashMap<RuleMatch, ComposedRuleElementMatch>();
+    Map<RuleMatch, ComposedRuleElementMatch> result = new TreeMap<RuleMatch, ComposedRuleElementMatch>(
+            ruleMatchComparator);
     Set<Entry<RuleMatch, ComposedRuleElementMatch>> entrySet = ruleMatches.entrySet();
     Entry<RuleMatch, ComposedRuleElementMatch> largestEntry = null;
     AnnotationFS largestAnnotation = null;
@@ -385,7 +390,11 @@ public class ComposedRuleElement extends
       ruleMatch.setMatched((ruleMatch.matched() || removedFailedMatches)
               && (evaluateMatches != null || continueMatch));
       if (failed) {
-        if (nextElement != null) {
+        // TODO failed was caused by a child: should here failed = false?
+        if (!removedFailedMatches && continueMatch) {
+          result = continueOwnMatch(after, annotation, ruleMatch, ruleApply, parentContainerMatch,
+                  sideStepOrigin, null, stream, crowd);
+        } else if (nextElement != null) {
           AnnotationFS backtrackedAnnotation = getBacktrackedAnnotation(evaluateMatches, annotation);
           if (backtrackedAnnotation != null) {
             result = nextElement.continueMatch(after, backtrackedAnnotation, ruleMatch, ruleApply,
@@ -461,7 +470,8 @@ public class ComposedRuleElement extends
     }
   }
 
-  private void doMatch(ComposedRuleElementMatch match, RuleMatch ruleMatch, RutaStream stream, InferenceCrowd crowd) {
+  private void doMatch(ComposedRuleElementMatch match, RuleMatch ruleMatch, RutaStream stream,
+          InferenceCrowd crowd) {
     List<AnnotationFS> textsMatched = match.getTextsMatched();
     if (textsMatched == null || textsMatched.isEmpty()) {
       return;

Added: uima/sandbox/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/RuleMatchComparator.java
URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/RuleMatchComparator.java?rev=1514808&view=auto
==============================================================================
--- uima/sandbox/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/RuleMatchComparator.java (added)
+++ uima/sandbox/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/RuleMatchComparator.java Fri Aug 16 17:30:22 2013
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.ruta.rule;
+
+import java.util.Comparator;
+import java.util.List;
+
+import org.apache.uima.cas.text.AnnotationFS;
+
+/**
+ * Comparator for rule matches following mostly the default order.
+ * 
+ */
+public class RuleMatchComparator implements Comparator<RuleMatch> {
+
+  public RuleMatchComparator() {
+    super();
+  }
+
+  public int compare(RuleMatch m1, RuleMatch m2) {
+    List<AnnotationFS> tm1 = m1.getRootMatch().getTextsMatched();
+    List<AnnotationFS> tm2 = m2.getRootMatch().getTextsMatched();
+    int b1 = 0;
+    int b2 = 0;
+    int e1 = 0;
+    int e2 = 0;
+    if (tm1 != null && !tm1.isEmpty()) {
+      b1 = tm1.get(0).getBegin();
+      e1 = tm1.get(tm1.size() - 1).getEnd();
+    }
+    if (tm2 != null && !tm2.isEmpty()) {
+      b2 = tm2.get(0).getBegin();
+      e2 = tm2.get(tm2.size() - 1).getEnd();
+    }
+    if (b1 < b2) {
+      return -1;
+    } else if (b1 > b2) {
+      return 1;
+    } else if (e1 > e2) {
+      return -1;
+    } else if (e1 < e2) {
+      return 1;
+    } else {
+      return 0;
+    }
+  }
+}

Added: uima/sandbox/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/QuantifierTest6.java
URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/QuantifierTest6.java?rev=1514808&view=auto
==============================================================================
--- uima/sandbox/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/QuantifierTest6.java (added)
+++ uima/sandbox/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/QuantifierTest6.java Fri Aug 16 17:30:22 2013
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.ruta;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.FSIterator;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.cas.text.AnnotationIndex;
+import org.apache.uima.ruta.engine.Ruta;
+import org.junit.Test;
+
+public class QuantifierTest6 {
+  @Test
+  public void test() {
+    String document = "A1B-2C3-D4E";
+    String script = "Document{->RETAINTYPE(WS)};\n";
+    script += "(CW | NUM)+{-PARTOF(T1)-> MARK(T1, 1, 2)} (SPECIAL{REGEXP(\"-\")} (CW | NUM)+)+;\n";
+    script += "Document{->RETAINTYPE};\n";
+    
+    CAS cas = null;
+    try {
+      cas = RutaTestUtils.getCAS(document);
+      Ruta.apply(cas, script);
+    } catch (Exception e) {
+      e.printStackTrace();
+    }
+    
+    Type t = null;
+    AnnotationIndex<AnnotationFS> ai = null;
+    FSIterator<AnnotationFS> iterator = null;
+
+    t = RutaTestUtils.getTestType(cas, 1);
+    ai = cas.getAnnotationIndex(t);
+    assertEquals(1, ai.size());
+    iterator = ai.iterator();
+    assertEquals("A1B-2C3-D4E", iterator.next().getCoveredText());
+    
+    if(cas != null) {
+      cas.release();
+    }
+  }
+}