You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by pk...@apache.org on 2016/10/26 11:46:52 UTC

svn commit: r1766669 - in /uima/ruta/trunk/ruta-core/src: main/java/org/apache/uima/ruta/RutaStream.java test/java/org/apache/uima/ruta/EmptyDocumentTest.java

Author: pkluegl
Date: Wed Oct 26 11:46:52 2016
New Revision: 1766669

URL: http://svn.apache.org/viewvc?rev=1766669&view=rev
Log:
UIMA-5161
- more robust against empty docs
- added test

Modified:
    uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaStream.java
    uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/EmptyDocumentTest.java

Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaStream.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaStream.java?rev=1766669&r1=1766668&r2=1766669&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaStream.java (original)
+++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaStream.java Wed Oct 26 11:46:52 2016
@@ -123,7 +123,6 @@ public class RutaStream extends FSIterat
 
   private Annotation documentEndAnchor;
 
-
   public RutaStream(CAS cas, Type basicType, FilterManager filter, boolean lowMemoryProfile,
           boolean simpleGreedyForComposed, InferenceCrowd crowd) {
     super();
@@ -141,7 +140,8 @@ public class RutaStream extends FSIterat
       documentAnnotationType = getCas().getDocumentAnnotation().getType();
       basicIt.moveToFirst();
       documentBeginAnchor = new RutaOptional(getJCas(), 0, 0);
-      documentEndAnchor = new RutaOptional(getJCas(), documentAnnotation.getEnd(), documentAnnotation.getEnd());
+      documentEndAnchor = new RutaOptional(getJCas(), documentAnnotation.getEnd(),
+              documentAnnotation.getEnd());
     } else {
       documentAnnotation = additionalWindow;
       documentAnnotationType = filter.getWindowType();
@@ -201,7 +201,7 @@ public class RutaStream extends FSIterat
           annotationIndex = cas.getAnnotationIndex(type);
         }
         for (AnnotationFS a : annotationIndex) {
-          if (a.getBegin() != a.getEnd()) {
+          if (a.getBegin() != a.getEnd() || a.equals(cas.getDocumentAnnotation())) {
             allAnnotations.add(a);
           }
         }
@@ -214,13 +214,12 @@ public class RutaStream extends FSIterat
         anchors.add(a.getBegin());
         anchors.add(a.getEnd());
       }
-      if (anchors.size() == 1) {
+      if (anchors.size() == 0) {
+        // empty document
+        createRutaBasic(0, 0);
+      } else if (anchors.size() == 1) {
         Integer first = anchors.pollFirst();
-        RutaBasic newTMB = new RutaBasic(getJCas(), first, first);
-        newTMB.setLowMemoryProfile(lowMemoryProfile);
-        beginAnchors.put(first, newTMB);
-        endAnchors.put(first, newTMB);
-        cas.addFsToIndexes(newTMB);
+        createRutaBasic(first, first);
       } else {
         while (true) {
           Integer first = anchors.pollFirst();
@@ -229,11 +228,7 @@ public class RutaStream extends FSIterat
           }
           Integer second = anchors.first();
           if (first < second) {
-            RutaBasic newTMB = new RutaBasic(getJCas(), first, second);
-            newTMB.setLowMemoryProfile(lowMemoryProfile);
-            beginAnchors.put(first, newTMB);
-            endAnchors.put(second, newTMB);
-            cas.addFsToIndexes(newTMB);
+            createRutaBasic(first, second);
           }
         }
       }
@@ -277,6 +272,15 @@ public class RutaStream extends FSIterat
     }
   }
 
+  private RutaBasic createRutaBasic(int begin, int end) {
+    RutaBasic newTMB = new RutaBasic(getJCas(), begin, end);
+    newTMB.setLowMemoryProfile(lowMemoryProfile);
+    beginAnchors.put(0, newTMB);
+    endAnchors.put(0, newTMB);
+    cas.addFsToIndexes(newTMB);
+    return newTMB;
+  }
+
   public void addAnnotation(AnnotationFS annotation, boolean addToIndex,
           AbstractRuleMatch<? extends AbstractRule> creator) {
     addAnnotation(annotation, addToIndex, true, creator);
@@ -964,8 +968,8 @@ public class RutaStream extends FSIterat
     }
   }
 
-  public void assignFeatureValue(FeatureStructure annotation, Feature feature, IRutaExpression value,
-          MatchContext context) {
+  public void assignFeatureValue(FeatureStructure annotation, Feature feature,
+          IRutaExpression value, MatchContext context) {
     if (feature == null) {
       throw new IllegalArgumentException("Not able to assign feature value (e.g., coveredText).");
     }
@@ -990,14 +994,18 @@ public class RutaStream extends FSIterat
         StringArrayFS array = FSCollectionFactory.createStringArray(cas, new String[] { string });
         annotation.setFeatureValue(feature, array);
       }
-    } else if (rangeName.equals(UIMAConstants.TYPE_INTEGER) || rangeName.equals(UIMAConstants.TYPE_LONG)
-            || rangeName.equals(UIMAConstants.TYPE_SHORT) || rangeName.equals(UIMAConstants.TYPE_BYTE)) {
+    } else if (rangeName.equals(UIMAConstants.TYPE_INTEGER)
+            || rangeName.equals(UIMAConstants.TYPE_LONG)
+            || rangeName.equals(UIMAConstants.TYPE_SHORT)
+            || rangeName.equals(UIMAConstants.TYPE_BYTE)) {
       if (value instanceof INumberExpression) {
         INumberExpression numberExpr = (INumberExpression) value;
         int v = numberExpr.getIntegerValue(context, this);
-        if (annotation instanceof AnnotationFS && StringUtils.equals(feature.getShortName(), CAS.FEATURE_BASE_NAME_BEGIN)) {
+        if (annotation instanceof AnnotationFS
+                && StringUtils.equals(feature.getShortName(), CAS.FEATURE_BASE_NAME_BEGIN)) {
           changeBegin((AnnotationFS) annotation, v, context.getRuleMatch());
-        } else if(annotation instanceof AnnotationFS && StringUtils.equals(feature.getShortName(), CAS.FEATURE_BASE_NAME_END)) {
+        } else if (annotation instanceof AnnotationFS
+                && StringUtils.equals(feature.getShortName(), CAS.FEATURE_BASE_NAME_END)) {
           changeEnd((AnnotationFS) annotation, v, context.getRuleMatch());
         } else {
           annotation.setIntValue(feature, v);
@@ -1082,10 +1090,11 @@ public class RutaStream extends FSIterat
       }
     } else if (value instanceof IAnnotationExpression && !range.isPrimitive()) {
       IAnnotationExpression ae = (IAnnotationExpression) value;
-      boolean rangeSubsumesAnnotation = cas.getTypeSystem().subsumes(cas.getAnnotationType(), range);
-      
+      boolean rangeSubsumesAnnotation = cas.getTypeSystem().subsumes(cas.getAnnotationType(),
+              range);
+
       FeatureStructure a = null;
-      if(rangeSubsumesAnnotation) {
+      if (rangeSubsumesAnnotation) {
         a = ae.getAnnotation(context, this);
       } else {
         a = ae.getFeatureStructure(context, this);
@@ -1214,7 +1223,7 @@ public class RutaStream extends FSIterat
 
   public List<AnnotationFS> getBestGuessedAnnotationsAt(AnnotationFS window, Type type) {
     List<AnnotationFS> result = new ArrayList<AnnotationFS>();
-    if(window == null) {
+    if (window == null) {
       return result;
     }
     TypeSystem typeSystem = getCas().getTypeSystem();
@@ -1232,12 +1241,13 @@ public class RutaStream extends FSIterat
     return result;
   }
 
-  public void changeOffsets(AnnotationFS annotation, int begin, int end, AbstractRuleMatch<? extends AbstractRule> modifikator) {
-    if(!(annotation instanceof Annotation)) {
+  public void changeOffsets(AnnotationFS annotation, int begin, int end,
+          AbstractRuleMatch<? extends AbstractRule> modifikator) {
+    if (!(annotation instanceof Annotation)) {
       return;
     }
     Annotation a = (Annotation) annotation;
-    if(annotation.getBegin() == begin && annotation.getEnd() == end) {
+    if (annotation.getBegin() == begin && annotation.getEnd() == end) {
       return;
     }
     // TODO implement incremental reindexing
@@ -1246,20 +1256,22 @@ public class RutaStream extends FSIterat
     a.setEnd(end);
     addAnnotation(a, true, modifikator);
   }
-  
-  public void changeBegin(AnnotationFS annotation, int begin, AbstractRuleMatch<? extends AbstractRule> modifikator) {
+
+  public void changeBegin(AnnotationFS annotation, int begin,
+          AbstractRuleMatch<? extends AbstractRule> modifikator) {
     changeOffsets(annotation, begin, annotation.getEnd(), modifikator);
   }
-  
-  public void changeEnd(AnnotationFS annotation, int end, AbstractRuleMatch<? extends AbstractRule> modifikator) {
+
+  public void changeEnd(AnnotationFS annotation, int end,
+          AbstractRuleMatch<? extends AbstractRule> modifikator) {
     changeOffsets(annotation, annotation.getBegin(), end, modifikator);
   }
 
   public AnnotationFS getVeryFirstBeforeWindow(boolean direction) {
-    if(direction) {
+    if (direction) {
       RutaBasic firstBasicOfAll = getFirstBasicOfAll();
       int begin = firstBasicOfAll.getBegin();
-      if(begin == 0) {
+      if (begin == 0) {
         return documentBeginAnchor;
       } else {
         return getEndAnchor(begin);
@@ -1267,7 +1279,7 @@ public class RutaStream extends FSIterat
     } else {
       RutaBasic lastBasicOfAll = getLastBasicOfAll();
       int end = lastBasicOfAll.getEnd();
-      if(end == cas.getDocumentAnnotation().getEnd()) {
+      if (end == cas.getDocumentAnnotation().getEnd()) {
         return documentEndAnchor;
       } else {
         return getBeginAnchor(end);

Modified: uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/EmptyDocumentTest.java
URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/EmptyDocumentTest.java?rev=1766669&r1=1766668&r2=1766669&view=diff
==============================================================================
--- uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/EmptyDocumentTest.java (original)
+++ uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/EmptyDocumentTest.java Wed Oct 26 11:46:52 2016
@@ -25,6 +25,7 @@ import org.apache.uima.cas.CAS;
 import org.apache.uima.cas.FSIterator;
 import org.apache.uima.cas.text.AnnotationFS;
 import org.apache.uima.cas.text.AnnotationIndex;
+import org.apache.uima.ruta.engine.Ruta;
 import org.apache.uima.ruta.engine.RutaEngine;
 import org.apache.uima.ruta.engine.RutaTestUtils;
 import org.junit.Test;
@@ -37,8 +38,8 @@ public class EmptyDocumentTest {
     String namespace = RuleInference1Test.class.getPackage().getName().replaceAll("\\.", "/");
     CAS cas = null;
     try {
-      cas = RutaTestUtils.process(namespace + "/" + name + RutaEngine.SCRIPT_FILE_EXTENSION, namespace + "/"
-              + "EmptyDocumentTest.txt", 50);
+      cas = RutaTestUtils.process(namespace + "/" + name + RutaEngine.SCRIPT_FILE_EXTENSION,
+              namespace + "/" + "EmptyDocumentTest.txt", 50);
 
     } catch (Exception e) {
       e.printStackTrace();
@@ -51,10 +52,19 @@ public class EmptyDocumentTest {
     assertEquals(1, ai.size());
     iterator = ai.iterator();
     assertEquals("", iterator.next().getCoveredText());
-    
+
     if (cas != null) {
       cas.release();
     }
 
   }
+
+  @Test
+  public void test2() throws Exception {
+    CAS cas = RutaTestUtils.getCAS("");
+    Ruta.apply(cas, "Document{IS(uima.tcas.DocumentAnnotation) -> T1};");
+    RutaTestUtils.assertAnnotationsEquals(cas, 1, 1, "");
+
+  }
+
 }