You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by pk...@apache.org on 2013/03/25 15:19:32 UTC

svn commit: r1460672 - in /uima/sandbox/textmarker/trunk: textmarker-core/src/main/java/org/apache/uima/textmarker/rule/ textmarker-core/src/test/java/org/apache/uima/textmarker/ textmarker-core/src/test/resources/org/apache/uima/textmarker/ textmarker...

Author: pkluegl
Date: Mon Mar 25 14:19:31 2013
New Revision: 1460672

URL: http://svn.apache.org/r1460672
Log:
UIMA-2757
- extended test and fixed problems
- added some documentation

Modified:
    uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/rule/WildCardRuleElement.java
    uima/sandbox/textmarker/trunk/textmarker-core/src/test/java/org/apache/uima/textmarker/WildCardTest.java
    uima/sandbox/textmarker/trunk/textmarker-core/src/test/resources/org/apache/uima/textmarker/WildCardTest.tm
    uima/sandbox/textmarker/trunk/textmarker-docbook/src/docbook/tools.textmarker.language.syntax.xml
    uima/sandbox/textmarker/trunk/textmarker-docbook/src/docbook/tools.textmarker.overview.xml

Modified: uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/rule/WildCardRuleElement.java
URL: http://svn.apache.org/viewvc/uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/rule/WildCardRuleElement.java?rev=1460672&r1=1460671&r2=1460672&view=diff
==============================================================================
--- uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/rule/WildCardRuleElement.java (original)
+++ uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/rule/WildCardRuleElement.java Mon Mar 25 14:19:31 2013
@@ -25,17 +25,20 @@ import java.util.Collections;
 import java.util.List;
 
 import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.CASException;
 import org.apache.uima.cas.ConstraintFactory;
 import org.apache.uima.cas.FSIterator;
 import org.apache.uima.cas.FSTypeConstraint;
 import org.apache.uima.cas.Type;
 import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.jcas.JCas;
 import org.apache.uima.textmarker.TextMarkerBlock;
 import org.apache.uima.textmarker.TextMarkerStream;
 import org.apache.uima.textmarker.action.AbstractTextMarkerAction;
 import org.apache.uima.textmarker.condition.AbstractTextMarkerCondition;
 import org.apache.uima.textmarker.expression.string.StringExpression;
 import org.apache.uima.textmarker.type.TextMarkerBasic;
+import org.apache.uima.textmarker.type.TextMarkerFrame;
 import org.apache.uima.textmarker.visitor.InferenceCrowd;
 
 public class WildCardRuleElement extends AbstractRuleElement {
@@ -265,11 +268,7 @@ public class WildCardRuleElement extends
         TextMarkerTypeMatcher typeMatcher = (TextMarkerTypeMatcher) re.getMatcher();
         List<Type> types = typeMatcher.getTypes(parent, stream);
         Type type = types.get(0);
-        if (annotation == null) {
-          iterator = cas.getAnnotationIndex(type).iterator();
-        } else {
-          iterator = cas.getAnnotationIndex(type).iterator(annotation);
-        }
+        iterator = getIteratorOfType(type, annotation, stream);
       } else if (matcher instanceof TextMarkerDisjunctiveMatcher) {
         List<Type> types = matcher.getTypes(parent, stream);
         iterator = getIteratorForDisjunctive(cas, types, after, annotation, stream);
@@ -277,13 +276,39 @@ public class WildCardRuleElement extends
         // should not happen
       }
     } else {
-      if (annotation == null) {
-        iterator = cas.getAnnotationIndex(defaultType).iterator();
+      iterator = getIteratorOfType(defaultType, annotation, stream);
+    }
+    return iterator;
+  }
+
+  private FSIterator<AnnotationFS> getIteratorOfType(Type type, AnnotationFS annotation,
+          TextMarkerStream stream) {
+    CAS cas = stream.getCas();
+    FSIterator<AnnotationFS> result = null;
+    if(stream.getDocumentAnnotation().equals(cas.getDocumentAnnotation())) {
+      // no windowing needed
+      if(annotation == null) {
+        result = cas.getAnnotationIndex(type).iterator();
       } else {
-        iterator = cas.getAnnotationIndex(defaultType).iterator(annotation);
+        result = cas.getAnnotationIndex(type).iterator(annotation);
+      }
+    } else {
+      JCas jcas = null;
+      try {
+        jcas = cas.getJCas();
+      } catch (CASException e) {
+        e.printStackTrace();
+      }
+      TextMarkerFrame window = new TextMarkerFrame(jcas, stream.getDocumentAnnotation().getBegin(), stream.getDocumentAnnotation().getEnd());
+      if(annotation == null) {
+        result = cas.getAnnotationIndex(type).subiterator(window);
+      } else {
+        result = cas.getAnnotationIndex(type).subiterator(window);
+        result.moveTo(annotation);
       }
     }
-    return iterator;
+    
+    return result;
   }
 
   private void tryWithNextLiteral(boolean after, AnnotationFS annotation,
@@ -311,8 +336,8 @@ public class WildCardRuleElement extends
         doneHere = true;
         break;
       }
-      TextMarkerBasic anchor = stream.getAnchor(after, indexOf);
-      TextMarkerBasic endAnchor = stream.getAnchor(!after, indexOf);
+      TextMarkerBasic anchor = stream.getAnchor(after, indexOf+delta);
+      TextMarkerBasic endAnchor = stream.getAnchor(!after, indexOf+delta);
       ComposedRuleElementMatch extendedContainerMatch = containerMatch.copy();
       RuleMatch extendedMatch = ruleMatch.copy(extendedContainerMatch);
       AnnotationFS coveredByWildCard = getCoveredByWildCard(after, annotation, anchor, stream);
@@ -375,41 +400,34 @@ public class WildCardRuleElement extends
     }
   }
 
-  private AnnotationFS getCoveredByWildCard(boolean after, AnnotationFS annotation,
-          AnnotationFS nextOne, TextMarkerStream stream) {
-    AnnotationFS afs = null;
+  private AnnotationFS getCoveredByWildCard(boolean after, AnnotationFS last,
+          AnnotationFS next, TextMarkerStream stream) {
     CAS cas = stream.getCas();
     Type type = cas.getAnnotationType();
+    AnnotationFS documentAnnotation = stream.getDocumentAnnotation();
 
-    int lastBegin = 0;
-    int lastEnd = 0;
-    if (annotation != null) {
-      if (after) {
-        lastBegin = annotation.getBegin();
-        lastEnd = annotation.getEnd();
-      } else {
-        lastBegin = annotation.getBegin();
-        lastEnd = annotation.getEnd();
-        // TODO refactor code below
-      }
-    } else {
-      // TODO refactor code below
-    }
-
-    if (nextOne == null) {
-      AnnotationFS documentAnnotation = stream.getDocumentAnnotation();
-      if (after) {
-        afs = cas.createAnnotation(type, lastEnd, documentAnnotation.getEnd());
-      } else {
-        afs = cas.createAnnotation(type, documentAnnotation.getBegin(), lastBegin);
-      }
-    } else {
-      if (after) {
-        afs = cas.createAnnotation(type, lastEnd, nextOne.getBegin());
-      } else {
-        afs = cas.createAnnotation(type, nextOne.getEnd(), lastBegin);
-      }
-    }
+    // order like in the index
+    AnnotationFS before = last;
+    AnnotationFS later = next;
+    if(!after) {
+      before = next;
+      later = last;
+    }
+    
+    // without any information, match on everything
+    int begin = documentAnnotation.getBegin();
+    int end = documentAnnotation.getEnd();
+    
+    // limit offsets
+    if(before != null) {
+      begin = before.getEnd();
+    }
+    if(later != null) {
+      end = later.getBegin();
+    }
+    
+    AnnotationFS afs = cas.createAnnotation(type, begin, end);
+    
     return afs;
   }
 

Modified: uima/sandbox/textmarker/trunk/textmarker-core/src/test/java/org/apache/uima/textmarker/WildCardTest.java
URL: http://svn.apache.org/viewvc/uima/sandbox/textmarker/trunk/textmarker-core/src/test/java/org/apache/uima/textmarker/WildCardTest.java?rev=1460672&r1=1460671&r2=1460672&view=diff
==============================================================================
--- uima/sandbox/textmarker/trunk/textmarker-core/src/test/java/org/apache/uima/textmarker/WildCardTest.java (original)
+++ uima/sandbox/textmarker/trunk/textmarker-core/src/test/java/org/apache/uima/textmarker/WildCardTest.java Mon Mar 25 14:19:31 2013
@@ -122,6 +122,66 @@ public class WildCardTest {
     iterator = ai.iterator();
     assertEquals("The TextMarker language is an imperative rule language extended with scripting elements.", iterator.next().getCoveredText());
     
+    t = TextMarkerTestUtils.getTestType(cas, 16);
+    ai = cas.getAnnotationIndex(t);
+    assertEquals(4, ai.size());
+    
+    t = TextMarkerTestUtils.getTestType(cas, 17);
+    ai = cas.getAnnotationIndex(t);
+    iterator = ai.iterator();
+    assertEquals(4, ai.size());
+    assertEquals("The", iterator.next().getCoveredText());
+    assertEquals("If", iterator.next().getCoveredText());
+    assertEquals("The", iterator.next().getCoveredText());
+    assertEquals("The", iterator.next().getCoveredText());
+    
+    t = TextMarkerTestUtils.getTestType(cas, 18);
+    ai = cas.getAnnotationIndex(t);
+    assertEquals(4, ai.size());
+    
+    t = TextMarkerTestUtils.getTestType(cas, 19);
+    ai = cas.getAnnotationIndex(t);
+    assertEquals(8, ai.size());
+    
+    t = TextMarkerTestUtils.getTestType(cas, 20);
+    ai = cas.getAnnotationIndex(t);
+    assertEquals(4, ai.size());
+    
+    t = TextMarkerTestUtils.getTestType(cas, 21);
+    ai = cas.getAnnotationIndex(t);
+    assertEquals(4, ai.size());
+    
+    t = TextMarkerTestUtils.getTestType(cas, 22);
+    ai = cas.getAnnotationIndex(t);
+    assertEquals(4, ai.size());
+    
+    t = TextMarkerTestUtils.getTestType(cas, 23);
+    ai = cas.getAnnotationIndex(t);
+    assertEquals(4, ai.size());
+    
+    t = TextMarkerTestUtils.getTestType(cas, 24);
+    ai = cas.getAnnotationIndex(t);
+    assertEquals(2, ai.size());
+
+    t = TextMarkerTestUtils.getTestType(cas, 25);
+    ai = cas.getAnnotationIndex(t);
+    assertEquals(4, ai.size());
+    
+    t = TextMarkerTestUtils.getTestType(cas, 26);
+    ai = cas.getAnnotationIndex(t);
+    assertEquals(9, ai.size());
+    
+    t = TextMarkerTestUtils.getTestType(cas, 27);
+    ai = cas.getAnnotationIndex(t);
+    assertEquals(4, ai.size());
+    
+    t = TextMarkerTestUtils.getTestType(cas, 28);
+    ai = cas.getAnnotationIndex(t);
+    assertEquals(8, ai.size());
+    
+    t = TextMarkerTestUtils.getTestType(cas, 29);
+    ai = cas.getAnnotationIndex(t);
+    assertEquals(1, ai.size());
     
     if (cas != null) {
       cas.release();

Modified: uima/sandbox/textmarker/trunk/textmarker-core/src/test/resources/org/apache/uima/textmarker/WildCardTest.tm
URL: http://svn.apache.org/viewvc/uima/sandbox/textmarker/trunk/textmarker-core/src/test/resources/org/apache/uima/textmarker/WildCardTest.tm?rev=1460672&r1=1460671&r2=1460672&view=diff
==============================================================================
--- uima/sandbox/textmarker/trunk/textmarker-core/src/test/resources/org/apache/uima/textmarker/WildCardTest.tm (original)
+++ uima/sandbox/textmarker/trunk/textmarker-core/src/test/resources/org/apache/uima/textmarker/WildCardTest.tm Mon Mar 25 14:19:31 2013
@@ -1,7 +1,7 @@
 PACKAGE org.apache.uima;
 
 
-DECLARE T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14;
+DECLARE T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30;
 
 
 CW{-PARTOF(T1)} #{-> MARK(T1,1,2)};
@@ -21,4 +21,31 @@ Document{-> DYNAMICANCHORING(false)};
 CW{-PARTOF(T11)} #{-> MARK(T11,1,2,3)} (PERIOD | COLON);
 #{-PARTOF(T12) -> MARK(T12,1,2)} (PERIOD | COLON);
 CW{-PARTOF(T13)} #{-> MARK(T13,1,2,3)} (SW PERIOD);
-CW{-PARTOF(T14)} #{-> MARK(T14,1,2,3)} ("elements" PERIOD);
\ No newline at end of file
+CW{-PARTOF(T14)} #{-> MARK(T14,1,2,3)} ("elements" PERIOD);
+
+T4{-PARTOF(T15) -> MARK(T15,1,2)} T4;
+
+BLOCK(window) T15{}{
+    CW{-PARTOF(T16)} #{-> MARK(T16,1,2)};
+    Document{-> DYNAMICANCHORING(true)};
+    #{-PARTOF(T17)-> MARK(T17,1,2)} CW;
+    #{-PARTOF(T18) -> MARK(T18,1,2)} PERIOD;
+    Document{-> DYNAMICANCHORING(false)};
+    CW{-PARTOF(T19)} #{-> MARK(T19,1,2,3)} PERIOD;
+    #{-> MARK(T20)} PERIOD;
+    Document{-> DYNAMICANCHORING(true)};
+    #{-PARTOF(T21) -> MARK(T21)} PERIOD;
+    Document{-> DYNAMICANCHORING(false)};
+    #{-> MARK(T22)};
+    # #{-> MARK(T23)};
+    "Text" "Marker" #{-> MARK(T24)} ".";
+    #{-PARTOF(T25)-> MARK(T25)} ".";
+    CW{-PARTOF(T26)} #{-> MARK(T26,1,2,3)} (PERIOD | COLON);
+    #{-PARTOF(T27) -> MARK(T27,1,2)} (PERIOD | COLON);
+    CW{-PARTOF(T28)} #{-> MARK(T28,1,2,3)} (SW PERIOD);
+    CW{-PARTOF(T29)} #{-> MARK(T29,1,2,3)} ("elements" PERIOD);
+}
+
+
+
+

Modified: uima/sandbox/textmarker/trunk/textmarker-docbook/src/docbook/tools.textmarker.language.syntax.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/textmarker/trunk/textmarker-docbook/src/docbook/tools.textmarker.language.syntax.xml?rev=1460672&r1=1460671&r2=1460672&view=diff
==============================================================================
--- uima/sandbox/textmarker/trunk/textmarker-docbook/src/docbook/tools.textmarker.language.syntax.xml (original)
+++ uima/sandbox/textmarker/trunk/textmarker-docbook/src/docbook/tools.textmarker.language.syntax.xml Mon Mar 25 14:19:31 2013
@@ -91,17 +91,19 @@ GroupAssignment        -> TypeExpression
 RuleElements           -> RuleElement+
 RuleElement            -> RuleElementType | RuleElementLiteral
                         | RuleElementComposed | RuleElementDisjunctive
+                        | RuleElementWildCard
 RuleElementType        ->  TypeExpression QuantifierPart?
                                          ("{" Conditions?  Actions? "}")?
 RuleElementWithCA      ->  TypeExpression QuantifierPart?
                                             "{" Conditions?  Actions? "}"
 RuleElementLiteral     ->  SimpleStringExpression QuantifierPart?
-                                            "{" Conditions?  Actions? "}"
+                                          ("{" Conditions?  Actions? "}")?
 RuleElementComposed    -> "(" RuleElements ")" QuantifierPart?
-                                            "{" Conditions?  Actions? "}"
+                                          ("{" Conditions?  Actions? "}")?
 RuleElementDisjunctive -> "(" (TypeExpression | SimpleStringExpression)
                         ("|" (TypeExpression | SimpleStringExpression) )+
-                         ")" QuantifierPart? "{" Conditions?  Actions? }"
+                        (")" QuantifierPart? "{" Conditions?  Actions? }")?
+RuleElementWildCard    -> "#"("{" Conditions?  Actions? }")?
 QuantifierPart         -> "*" | "*?" | "+" | "+?" | "?" | "??"
                         | "[" NumberExpression "," NumberExpression "]"
                         | "[" NumberExpression "," NumberExpression "]?"

Modified: uima/sandbox/textmarker/trunk/textmarker-docbook/src/docbook/tools.textmarker.overview.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/textmarker/trunk/textmarker-docbook/src/docbook/tools.textmarker.overview.xml?rev=1460672&r1=1460671&r2=1460672&view=diff
==============================================================================
--- uima/sandbox/textmarker/trunk/textmarker-docbook/src/docbook/tools.textmarker.overview.xml (original)
+++ uima/sandbox/textmarker/trunk/textmarker-docbook/src/docbook/tools.textmarker.overview.xml Mon Mar 25 14:19:31 2013
@@ -234,6 +234,18 @@ Document{-> MARKFAST(Animal, AnimalsList
     <programlisting><![CDATA[(Animal (COMMA | SEMICOLON))+{-> MARK(AnimalEnum,1,2)} Animal;]]></programlisting>
 
     <para>
+      There is a <quote>wild card</quote> rule element, which can be used to skip some text or annotations until the next rule element is able to match.
+    </para>
+    
+    <programlisting><![CDATA[DECLARE Sentence;
+PERIOD #{-> MARK(Sentence)} PERIOD;]]></programlisting>
+    
+    <para>
+      This rule annotates everything between two <quote>PERIOD</quote> annotations with the type <quote>Sentence</quote>. Please note that the resulting 
+      annotations are probably invisible, if they start or end with an filtered type.
+    </para>
+
+    <para>
       Rule elements can contain more then one condition. The rule in the next example tries to identify headlines, which are bold, 
       underlined and end with a colon.
     </para>