You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by sc...@apache.org on 2015/10/30 20:24:18 UTC
svn commit: r1711542 - in
/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas:
impl/FSsTobeAddedback.java impl/Subiterator.java text/AnnotationIndex.java
Author: schor
Date: Fri Oct 30 19:24:17 2015
New Revision: 1711542
URL: http://svn.apache.org/viewvc?rev=1711542&view=rev
Log:
no Jira - fix several error messages whose string had a single quote - these need to be escaped, else the whole rest of the line is presumed quoted and variable substitution into {nnn} fails. Found these using a regex search on all .properties files
Modified:
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/FSsTobeAddedback.java
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/Subiterator.java
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/text/AnnotationIndex.java
Modified: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/FSsTobeAddedback.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/FSsTobeAddedback.java?rev=1711542&r1=1711541&r2=1711542&view=diff
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/FSsTobeAddedback.java (original)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/FSsTobeAddedback.java Fri Oct 30 19:24:17 2015
@@ -68,7 +68,7 @@ abstract class FSsTobeAddedback implemen
protected void log(int fsAddr, FSIndexRepositoryImpl view, int count) {
if (SHOW) {
- log(view, fsAddr);
+ log(fsAddr, view);
System.out.format(", count = %d%n", count);
}
}
Modified: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/Subiterator.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/Subiterator.java?rev=1711542&r1=1711541&r2=1711542&view=diff
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/Subiterator.java (original)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/Subiterator.java Fri Oct 30 19:24:17 2015
@@ -84,6 +84,8 @@ public class Subiterator<T extends Annot
private final boolean isBounded;
private boolean isListForm = false;
private final boolean isBeginEndCompare;
+
+ private final int startId;
/**
@@ -129,6 +131,7 @@ public class Subiterator<T extends Annot
this.fsIndexRepo = fsIndexRepo;
moveToStart();
+ startId = (isValid()) ? ((FeatureStructureImpl)get()).getAddress() : 0;
}
@@ -140,7 +143,7 @@ public class Subiterator<T extends Annot
*
*/
private void convertToListForm() {
- moveToStart(); // moves to the start annotation, including moving past equals for annot style, and accomodating strict
+ moveToStart(); // moves to the start annotation, including moving past equals for annot style, and accommodating strict
this.list = new ArrayList<T>();
while (isValid()) {
prevEnd = it.getEnd();
@@ -258,6 +261,7 @@ public class Subiterator<T extends Annot
*
* @see org.apache.uima.cas.FSIterator#isValid()
*/
+ @Override
public boolean isValid() {
return isListForm ?
(this.pos >= 0) && (this.pos < this.list.size()) :
@@ -273,6 +277,7 @@ public class Subiterator<T extends Annot
*
* @see org.apache.uima.cas.FSIterator#get()
*/
+ @Override
public T get() throws NoSuchElementException {
if (isListForm) {
if ((this.pos >= 0) && (this.pos < this.list.size())) {
@@ -291,6 +296,7 @@ public class Subiterator<T extends Annot
*
* @see org.apache.uima.cas.FSIterator#moveToNext()
*/
+ @Override
public void moveToNext() {
if (isListForm) {
++this.pos;
@@ -305,7 +311,12 @@ public class Subiterator<T extends Annot
}
adjustForStrictForward();
- setPrevEnd();
+ if (it.isValid() && (it.get().getBegin() > boundingEnd)) {
+ it.moveToLast();
+ it.moveToNext(); // mark invalid
+ } else {
+ setPrevEnd();
+ }
}
/*
@@ -313,6 +324,7 @@ public class Subiterator<T extends Annot
*
* @see org.apache.uima.cas.FSIterator#moveToPrevious()
*/
+ @Override
public void moveToPrevious() {
if (isListForm) {
--this.pos;
@@ -328,8 +340,13 @@ public class Subiterator<T extends Annot
return;
}
- it.moveToPrevious();
- adjustForStrictBackward();
+ if (isValid() && ((FeatureStructureImpl)it.get()).getAddress() == startId) {
+ it.moveToFirst();
+ it.moveToPrevious(); // make it invalid
+ } else {
+ it.moveToPrevious();
+ adjustForStrictBackward();
+ }
}
/*
@@ -337,6 +354,7 @@ public class Subiterator<T extends Annot
*
* @see org.apache.uima.cas.FSIterator#moveToFirst()
*/
+ @Override
public void moveToFirst() {
if (isListForm) {
this.pos = 0;
@@ -350,6 +368,7 @@ public class Subiterator<T extends Annot
*
* @see org.apache.uima.cas.FSIterator#moveToLast()
*/
+ @Override
public void moveToLast() {
if (isListForm) {
this.pos = this.list.size() - 1;
@@ -392,6 +411,7 @@ public class Subiterator<T extends Annot
*
* @see org.apache.uima.cas.FSIterator#moveTo(org.apache.uima.cas.FeatureStructure)
*/
+ @Override
public void moveTo(FeatureStructure fs) {
AnnotationFS fsa = (AnnotationFS) fs;
if (!ambiguous && !isListForm) { // unambiguous must be in list form
@@ -436,10 +456,7 @@ public class Subiterator<T extends Annot
// is ambiguous, may be strict, always bounded (either by annotation or begin / end
it.moveTo(fs);
if (isBeginEndCompare) {
- it.moveTo(fs);
adjustAfterMoveToForBeginEndComparator(fsa);
- } else { // is begin/end bounding
- it.moveTo(fs);
}
adjustForStrictForward();
}
@@ -449,7 +466,7 @@ public class Subiterator<T extends Annot
* @see org.apache.uima.cas.impl.FSIteratorImplBase#moveTo(java.util.Comparator)
*/
@Override
- <TT extends AnnotationFS> void moveTo(final int begin, final int end) {
+ void moveTo(final int begin, final int end) {
if (!ambiguous && !isListForm) { // unambiguous must be in list form
convertToListForm();
}
@@ -496,6 +513,7 @@ public class Subiterator<T extends Annot
*
* @see org.apache.uima.cas.FSIterator#copy()
*/
+ @Override
public FSIterator<T> copy() {
Subiterator<T> copy = new Subiterator<T>(
this.it, this.boundingAnnotation, this.boundingBegin, this.boundingEnd, this.ambiguous, this.strict, this.isBounded, this.fsIndexRepo);
Modified: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/text/AnnotationIndex.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/text/AnnotationIndex.java?rev=1711542&r1=1711541&r2=1711542&view=diff
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/text/AnnotationIndex.java (original)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/text/AnnotationIndex.java Fri Oct 30 19:24:17 2015
@@ -78,28 +78,56 @@ public interface AnnotationIndex<T exten
FSIterator<T> iterator(boolean ambiguous);
/**
- * Return a subiterator whose bounds are defined by the input annotation.
+ * Return a subiterator whose bounds are defined by the <code>annot</code>.
* <p>
- * The subiterator will return annotations <code>b</code> s.t. <code>annot < b</code>,
- * <code>annot.getBegin() <= b.getBegin()</code> and
- * <code>annot.getEnd() >= b.getEnd()</code>. For annotations x, y, <code>x < y</code>
- * here is to be interpreted as "x comes before y in the index", according to the rules defined in
+ * The <code>annot</code> is used for 3 purposes:</p>
+ * <ul><li>It is used to compute the position in the index where the iteration starts.</li>
+ * <li>It is used to compute end point where the iterator stops when moving forward.</li>
+ * <li>It is used to specify which annotations will be skipped while iterating.</li>
+ * </ul>
+ * <p>The starting position is computed by first finding a position
+ * whose annotation compares equal with the <code>annot</code> (this might be one of several), and then
+ * advancing until reaching a position where the annotation there is not equal to the
+ * <code>annot</code>.
+ * If no item in the index is equal (meaning it has the same begin, the same end, and is the same type
+ * as the <code>annot</code>)
+ * then the iterator is positioned to the first annotation
+ * which is greater than the <code>annot</code>, or
+ * if there are no annotations greater than the <code>annot</code>, the iterator is marked invalid.
+ * </p>
+ * <p>The iterator will stop (become invalid) when
+ * <ul><li>it runs out of items in the index going forward or backwards, or</li>
+ * <li>while moving forward, it reaches a point where the annotation at that position has a
+ * start is beyond the <code>annot's</code> end position, or</li>
+ * <li>while moving backwards, it reaches a position in front of its original starting position.</li>
+ * </ul>
+ * <p>While iterating, it operates like a <code>strict</code> iterator;
+ * annotations whose end positions are > the end position of <code>annot</code> are skipped.
+ * </p>
+ *
+ * <p>This is equivalent to returning annotations <code>b</code> such that</p>
+ * <ul><li><code>annot < b</code>, and</li>
+ * <li><code>annot.getEnd() >= b.getBegin()</code>, skipping <code>b's</code>
+ * whose end position is > annot.getEnd().</li>
+ * </ul>
+ * <p>For annotations x and y, the relation <code>x < y</code>
+ * here is interpreted as "x comes before y in the index", according to the rules defined in
* the description of {@link AnnotationIndex this class}.
* </p>
* <p>
* This definition implies that annotations <code>b</code> that have the same span as
- * <code>annot</code> may or may not be returned by the subiterator. This is determined by the
- * type priorities; the subiterator will only return such an annotation <code>b</code> if the
+ * <code>annot</code> may or may not be returned by the subiterator, depending on type priorities;
+ * the subiterator will only return such an annotation <code>b</code> if the
* type of <code>annot</code> precedes the type of <code>b</code> in the type priorities
* definition. If you have not specified the priority, or if <code>annot</code> and
* <code>b</code> are of the same type, then the behavior is undefined.
* </p>
* <p>
- * For example, if you an annotation <code>s</code> of type <code>Sentence</code> and an
- * annotation <code>p</code> of type <code>Paragraph</code> that have the same span, and you
+ * For example, if you have an annotation <code>S</code> of type <code>Sentence</code> and an
+ * annotation <code>P</code> of type <code>Paragraph</code> that have the same span, and you
* have defined <code>Paragraph</code> before <code>Sentence</code> in your type priorities,
- * then <code>subiterator(p)</code> will give you an iterator that will return <code>s</code>,
- * but <code>subiterator(s)</code> will give you an iterator that will NOT return <code>p</code>.
+ * then <code>subiterator(P)</code> will give you an iterator that will return <code>S</code>,
+ * but <code>subiterator(S)</code> will give you an iterator that will NOT return <code>P</code>.
* The intuition is that a Paragraph is conceptually larger than a Sentence, as defined by the
* type priorities.
* </p>
@@ -116,19 +144,52 @@ public interface AnnotationIndex<T exten
FSIterator<T> subiterator(AnnotationFS annot);
/**
- * Return a subiterator whose bounds are defined by the input annotation.
+ * Return a subiterator whose bounds are defined by the <code>annot</code>.
* <p>
- * A <code>strict</code> subiterator is defined as follows: it will return annotations
- * <code>b</code> s.t. <code>annot < b</code>,
- * <code>annot.getBegin() <= b.getBegin()</code> and
- * <code>annot.getEnd() >= b.getEnd()</code>. For annotations x,y, <code>x < y</code>
+ * The <code>annot</code> is used in 2 or 3 ways.</p>
+ * <ul><li>It specifies the left-most position in the index where the iteration starts.</li>
+ * <li>It specifies an end point where the iterator stops.</li>
+ * <li>If <code>strict</code> is specified, the end point also specifies which annotations
+ * will be skipped while iterating.</li>
+ * </ul>
+ * <p>The starting position is computed by first finding the position
+ * whose annotation compares equal with the <code>annot</code>, and then
+ * advancing until reaching a position where the annotation there is not equal to the
+ * <code>annot</code>.
+ * If no item in the index is equal (meaning it has the same begin, the same end, and is the same type
+ * as the <code>annot</code>)
+ * then the iterator is positioned to the first annotation
+ * which is greater than the <code>annot</code>, or
+ * if there are no annotations greater than the <code>annot</code>, the iterator is marked invalid.
+ * </p>
+ * <p>The iterator will stop (become invalid) when
+ * <ul><li>it runs out of items in the index going forward or backwards, or</li>
+ * <li>while moving forward, it reaches a point where the annotation at that position has a
+ * start is beyond the <code>annot's</code> end position, or</li>
+ * <li>while moving backwards, it reaches a position in front of its original starting position</li>
+ * </ul>
+ * </p>
+ * <p>Ignoring <code>strict</code> and <code>ambiguous</code> for a moment,
+ * this is equivalent to returning annotations <code>b</code> such that</p>
+ * <ul><li><code>annot < b</code> using the standard annotation comparator, and</li>
+ * <li><code>annot.getEnd() >= b.getBegin()</code>, and also bounded by the index itself.</li>
+ * </ul></p>
+ * <p>
+ * A <code>strict</code> subiterator skips annotations where
+ * <code>annot.getEnd() < b.getEnd()</code>.
+ * </p>
+ * <p>
+ * A <code>ambiguous = false</code> specification produces an unambigouse iterator, which
+ * computes a subset of the annotations, going forward, such that annotations whose <code>begin</code>
+ * is contained within the previous returned annotation's span, are skipped.
+ * </p>
+ * <p>For annotations x,y, <code>x < y</code>
* here is to be interpreted as "x comes before y in the index", according to the rules defined in
* the description of {@link AnnotationIndex this class}.
* <p>
- * If <code>strict</code> is set to <code>false</code>, the boundary conditions are relaxed
- * as follows: return annotations <code>b</code> s.t. <code>annot < b</code> and
- * <code>annot.getBegin() <= b.getBegin() <= annot.getEnd()</code>. The resulting
- * iterator may also be disambiguated.
+ * If <code>strict = true</code> then annotations whose end is > <code>annot.getEnd()</code>
+ * are skipped.
+ * </p>
* <p>
* These definitions imply that annotations <code>b</code> that have the same span as
* <code>annot</code> may or may not be returned by the subiterator. This is determined by the
@@ -138,11 +199,11 @@ public interface AnnotationIndex<T exten
* <code>b</code> are of the same type, then the behavior is undefined.
* </p>
* <p>
- * For example, if you an annotation <code>s</code> of type <code>Sentence</code> and an
- * annotation <code>p</code> of type <code>Paragraph</code> that have the same span, and you
+ * For example, if you have an annotation <code>S</code> of type <code>Sentence</code> and an
+ * annotation <code>P</code> of type <code>Paragraph</code> that have the same span, and you
* have defined <code>Paragraph</code> before <code>Sentence</code> in your type priorities,
- * then <code>subiterator(p)</code> will give you an iterator that will return <code>s</code>,
- * but <code>subiterator(s)</code> will give you an iterator that will NOT return <code>p</code>.
+ * then <code>subiterator(P)</code> will give you an iterator that will return <code>S</code>,
+ * but <code>subiterator(S)</code> will give you an iterator that will NOT return <code>P</code>.
* The intuition is that a Paragraph is conceptually larger than a Sentence, as defined by the
* type priorities.
* </p>
@@ -152,7 +213,7 @@ public interface AnnotationIndex<T exten
* @param ambiguous
* If set to <code>false</code>, resulting iterator will be unambiguous.
* @param strict
- * Controls if annotations that overlap to the right are considered in or out.
+ * true means to exclude annotations that overlap annot on the right
* @return A subiterator.
*/
FSIterator<T> subiterator(AnnotationFS annot, boolean ambiguous, boolean strict);