You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by sc...@apache.org on 2017/01/13 22:01:14 UTC

svn commit: r1778669 - /uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/Subiterator.java

Author: schor
Date: Fri Jan 13 22:01:14 2017
New Revision: 1778669

URL: http://svn.apache.org/viewvc?rev=1778669&view=rev
Log:
[UIMA-5250] handle re-computing things when moveToFirst/Last/FS.

Modified:
    uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/Subiterator.java

Modified: uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/Subiterator.java
URL: http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/Subiterator.java?rev=1778669&r1=1778668&r2=1778669&view=diff
==============================================================================
--- uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/Subiterator.java (original)
+++ uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/Subiterator.java Fri Jan 13 22:01:14 2017
@@ -25,6 +25,7 @@ import java.util.Comparator;
 import java.util.NoSuchElementException;
 import java.util.function.Predicate;
 
+import org.apache.uima.cas.CASRuntimeException;
 import org.apache.uima.cas.FSIterator;
 import org.apache.uima.cas.FeatureStructure;
 import org.apache.uima.cas.Type;
@@ -68,7 +69,14 @@ import org.apache.uima.jcas.tcas.Annotat
  * 
  *   - the traditional one uses the standard comparator for annotations: begin (ascending), end (descending) and
  *     type priority ordering
- *   - the 2nd style uses just a begin value and an end value, no type priority ordering.  
+ *   - the 2nd style uses just a begin value and an end value, no type priority ordering. 
+ *   
+ * Interaction with copy-on-write concurrent modification avoidance
+ *   As with other iterators, the moveToFirst/Last/feature-structure-position "resets" the underlying
+ *     iterators to match their current indexes.  
+ *   This implementation maintains local data: the list form and the isEmpty flag.  These would
+ *     also need recomputing for the above operations, if isIndexesHaveBeenUpdated() is true. 
+ *   
  */
 public class Subiterator<T extends AnnotationFS> implements LowLevelIterator<T> {
 
@@ -103,12 +111,26 @@ public class Subiterator<T extends Annot
   private final boolean isSkipEquals;
   private final BoundsUse boundsUse;
   
-  private final boolean isEmpty;
+  /**
+   * isEmpty is an expensive calculation, involving potentially traversing all the iterators in a particular type hierarchy
+   * It is only done:
+   *   - at init time
+   *   - at moveToFirst/last/FS - these can cause regenerating the copy-on-write objects via the getNonNullCow call
+   *       if no new nonNullCow was obtained, no need to recalculate empty
+   */
+  private boolean isEmpty;
   
   private int prevEnd = 0;  // for unambiguous iterators
+  
+  /**
+   * list form is recalculated at moveToFirst/last/fs, same as isEmpty
+   */
   private boolean isListForm = false;
   
-  private final int startId;
+  /**
+   * startId is recalculated at moveToFirst/last/fs, same as isEmpty
+   */
+  private int startId;
 
   private final int boundBegin;
   private final int boundEnd;
@@ -165,11 +187,11 @@ public class Subiterator<T extends Annot
     this.isTypePriority = (boundsUse == BoundsUse.covering) ? false : isTypePriority;
 
     this.annotationComparator = annotationComparator;
-    this.jcas = (JCasImpl) ((LowLevelIterator<T>)it).ll_getIndex().getCasImpl().getJCas();
+    this.jcas = (JCasImpl) ll_getIndex().getCasImpl().getJCas();
     
     if (boundsUse == BoundsUse.covering) {
       // compute start position and isEmpty setting
-      int span = ((LowLevelIterator<?>)it).ll_maxAnnotSpan();
+      int span = ((LowLevelIterator<?>)it).ll_maxAnnotSpan();  // an optimization, the largest end-begin annotation
       int begin = boundEnd - span;
       if (begin > boundBegin) {
         makeInvalid();
@@ -187,9 +209,13 @@ public class Subiterator<T extends Annot
       coveringStartPos = coveringEndPos = null;
     }
     
+    moveToStartSetEmptyAndId();
+  }
+  
+  private void moveToStartSetEmptyAndId() {
     moveToStart();
     isEmpty = !isValid();
-    startId = isValid() ? get()._id() : 0;
+    startId = isValid() ? get()._id() : 0;    
   }
    
   /** 
@@ -236,7 +262,7 @@ public class Subiterator<T extends Annot
     
     this.annotationComparator = annotationComparator;
     this.isEmpty = isEmpty;
-    this.jcas = (JCasImpl) ((LowLevelIterator<T>)it).ll_getIndex().getCasImpl().getJCas();
+    this.jcas = (JCasImpl) ll_getIndex().getCasImpl().getJCas();
     this.coveringStartPos = coveringStartPos;
     this.coveringEndPos = coveringEndPos;
   }
@@ -326,7 +352,7 @@ public class Subiterator<T extends Annot
   private boolean skipOverBoundingAnnot(boolean forward) {
     boolean moved = false;
     if (isBounded && it.isValid()) { // skip if not bounded or iterator is invalid
-      while (equalToBounds(it.get())) {
+      while (equalToBounds(it.getNvc())) {
         moved = true;
         if (forward) {
           it.moveToNextNvc();
@@ -378,8 +404,21 @@ public class Subiterator<T extends Annot
   }
   
   /**
-   * when covering, skip items where the end < bounds end
-   * may result in iterator becoming invalid
+   * when covering (which is different from coveredBy and sameBeginEnd,
+   *  means get all annotations which span the bounds), 
+   *  skip items where the end < bounds end,
+   *    because those items don't span the bounding FS
+   * 
+   * Edge case: item with same begin and end is considered "covering"
+   *              but subject to exclusion based on equalToBounds skipping
+   * Cases:
+   *   position of begin is after span begin - mark "invalid"
+   *   position of begin is before or == span begin:
+   *     position of end is == or > span end: OK
+   *     position of end is < span end:
+   *     if backward: moveToPrev until get valid position or run out.  if run out, mark invalid
+   *     if forward: move to next until position of begin advances but is <= span begin.
+   *      
    * @param forward
    */
   private void adjustForCovering(boolean forward) {
@@ -387,10 +426,14 @@ public class Subiterator<T extends Annot
       return;
     }
     // moveTo may move to invalid position
+    // if the cur pos item has a begin beyond the bound, it cannot be a covering annotation
     if (it.getNvc().getBegin() > this.boundBegin) {
       makeInvalid();
       return;
     }
+    
+    // skip until get an FS whose end >= boundEnd, it is a candidate.
+    //   stop if begin gets too large (going forwards)
     while (it.isValid() && (it.getNvc().getEnd() < this.boundEnd)) {
       if (forward) {
         it.moveToNextNvc();
@@ -450,10 +493,10 @@ public class Subiterator<T extends Annot
     return false;
   }
     
-  private boolean isBeginEndTypeEqualToBound(Annotation fs) {
-    return fs.getBegin() == boundBegin && fs.getEnd() == boundEnd &&
-            (!isPositionUsesType || fs.getType() == boundType);
-  }
+//  private boolean isBeginEndTypeEqualToBound(Annotation fs) {
+//    return fs.getBegin() == boundBegin && fs.getEnd() == boundEnd &&
+//            (!isPositionUsesType || fs.getType() == boundType);
+//  }
   
   private boolean isBeginEndTypeEqual(Annotation fs, int begin, int end, Type type) {
     return fs.getBegin() == begin && fs.getEnd() == end &&
@@ -531,7 +574,7 @@ public class Subiterator<T extends Annot
     if (isListForm) {
       return (T) this.list.get(this.pos);
     } else {
-      return (T)it.get();
+      return (T)it.getNvc();
     }
   }
 
@@ -554,6 +597,10 @@ public class Subiterator<T extends Annot
       return;
     }
    
+    if (isEmpty) {
+      return;
+    }
+    
     it.moveToNextNvc();
     if (!isAmbiguous) {               // skip until start > prev end
       movePastPrevAnnotation();
@@ -609,6 +656,10 @@ public class Subiterator<T extends Annot
       --this.pos;
       return;
     }
+    
+    if (isEmpty) {
+      return;
+    }
 
     if (!isAmbiguous) {
       // Convert to list form
@@ -651,12 +702,31 @@ public class Subiterator<T extends Annot
    */
   @Override
   public void moveToFirst() {
+    if (isIndexesHaveBeenUpdated()) {
+      resetList();
+      moveToStartSetEmptyAndId();
+      return;
+    }    
+  
+    if (isEmpty) {
+      return;
+    }
+    
     if (isListForm) {
       this.pos = 0;
     } else {
       moveToStart();
     }
   }
+  
+  private void resetList() {
+    if (isListForm) {
+      isListForm = false;
+      if (list != null) {
+        list.clear();
+      };      
+    }
+  }
 
   /*
    * This operation is relatively expensive one time for unambiguous
@@ -665,9 +735,14 @@ public class Subiterator<T extends Annot
    */
   @Override
   public void moveToLast() {
+    if (isIndexesHaveBeenUpdated()) {
+      moveToFirst(); // done to recompute is empty, reset list, recompute bounds, etc.
+    }
+    
     if (isEmpty) {
       return;
     }
+    
     if (!isAmbiguous && !isListForm) {
       convertToListForm();
     }
@@ -710,7 +785,7 @@ public class Subiterator<T extends Annot
    * @param goBackwards when true, continue to backup
    */
   private void moveToJustPastBoundsAndBackup(int begin, int end, Predicate<Annotation> goBackwards) {
-    it.moveTo(new Annotation (jcas, begin, end));
+    it.moveTo(new Annotation(jcas, begin, end));
     if (it.isValid()) {
       Annotation a = it.getNvc();
       while (goBackwards.test(a)) {
@@ -766,12 +841,19 @@ public class Subiterator<T extends Annot
    */
   @Override
   public void moveTo(FeatureStructure fs) {
+    if (isIndexesHaveBeenUpdated()) {
+      moveToFirst(); // done to recompute is empty, reset list, recompute bounds, etc.
+    }
+    
     if (isEmpty) return;
     
     Annotation fsa = (Annotation) fs;
     if (!isAmbiguous && !isListForm) {  // unambiguous must be in list form
       convertToListForm();
     }
+    
+    // need to handle 4 cases of not-covered, covered-by, covering, and same
+    
     if (isListForm) {
       // W A R N I N G - don't use it.xxx forms here, they don't work for list form
       // Don't need strict, skip-over-boundary, or type priority adjustments, 
@@ -821,26 +903,47 @@ public class Subiterator<T extends Annot
       }
     } else {
       // not list form
-      // is ambiguous, may be strict, always bounded
+      // is ambiguous, may be strict.
+      // Always bounded (if unbounded, that's only when subiterator is being used to 
+      // implement "unambiguous", and that mode requires the "list" form above.)
+      // can be one of 3 bounds: coveredBy, covering, and sameBeginEnd.
       it.moveTo(fs);  // may move before, within, or after bounds
-      adjustForTypePriorityBoundingBegin(0);
-      adjustForStrictOrCoveringAndBoundSkip(true);
+      adjustForTypePriorityBoundingBegin(0);  // does nothing if using type priorities
+      adjustForStrictOrCoveringAndBoundSkip(true); // "covering" case adjustments
       
       if (it.isValid()) {
-        // mark invalid if end up outside of bounds after adjustments
+        // if beyond bound, mark invalid 
+        // if before bound, move to first
         Annotation a = it.get();
-        if (a.getBegin() > boundEnd) {
-          makeInvalid();
-        } else if (isTypePriority && annotationComparator.compare(a,  boundingAnnot) < 0) {
-          makeInvalid();
-        } else if (a.getBegin() < boundBegin || 
-                   a.getBegin() > boundEnd ||
-                   (a.getBegin() == boundBegin && a.getEnd() > boundEnd) ||
-                   (isPositionUsesType && 
-                       a.getBegin() == boundBegin && 
-                       a.getEnd() == boundEnd &&
-                       a.getType() != boundType)) {
-          makeInvalid();
+        
+        switch (boundsUse) {
+        case coveredBy:
+        case sameBeginEnd:
+          if (a.getBegin() > boundEnd) {
+            makeInvalid();
+          } else if (isTypePriority && annotationComparator.compare(a,  boundingAnnot) < 0) {
+            // with type priority, position is before bound.
+            moveToFirst();
+          } else { // is not type priority case - see if too low
+            final int b = a.getBegin();
+            final int e = a.getEnd();
+            if (b < boundBegin ||  
+                ( b == boundBegin && e > boundEnd)) {
+              moveToFirst();
+            } else if (isPositionUsesType && 
+                       b == boundBegin && 
+                       e == boundEnd &&
+                       a.getType() != boundType) {
+              /** Subiterator {0} has bound type: {1}, begin: {2}, end: {3}, for coveredBy, not using type priorities, matching FS with same begin end and different type {4}, cannot order these*/
+                     throw new CASRuntimeException(CASRuntimeException.SUBITERATOR_AMBIGUOUS_POSITION_DIFFERENT_TYPES, 
+                         this, boundType.getName(), b, e, a.getType().getName()); 
+                   }
+          }
+          break;
+        case covering:
+          break;
+        default:
+          Misc.internalError();
         }
       }
     }
@@ -873,6 +976,11 @@ public class Subiterator<T extends Annot
     return copy;
   }
 
+  
+  /**
+   * This is unsupported because its expensive to compute
+   * in many cases, and may not be needed.
+   */
   @Override
   public int ll_indexSize() {
     throw new UnsupportedOperationException();
@@ -880,6 +988,9 @@ public class Subiterator<T extends Annot
 
   @Override
   public int ll_maxAnnotSpan() {
+    if (isEmpty) {
+      return 0;
+    }
     return ((LowLevelIterator)it).ll_maxAnnotSpan();
   }
 
@@ -893,6 +1004,14 @@ public class Subiterator<T extends Annot
     it.moveToPrevious();
   }
 
+  /**
+   * Used to determine when some precomputed things (e.g. listform) need to be recalculated
+   * @return true if one or more of the underlying indexes of the underlying iterator have been updated
+   */
+  @Override
+  public boolean isIndexesHaveBeenUpdated() {
+    return ((LowLevelIterator<?>)it).isIndexesHaveBeenUpdated();
+  }
 
 
 //  // makes an extra copy of the items