You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ro...@apache.org on 2019/01/01 18:05:46 UTC

lucene-solr:master: LUCENE-8612: Add Intervals.extend()

Repository: lucene-solr
Updated Branches:
  refs/heads/master 752989fd7 -> 2532a5d31


LUCENE-8612: Add Intervals.extend()


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/2532a5d3
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/2532a5d3
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/2532a5d3

Branch: refs/heads/master
Commit: 2532a5d31c8e4086120d74c8fb83102b8533777c
Parents: 752989f
Author: Alan Woodward <ro...@apache.org>
Authored: Tue Jan 1 17:50:20 2019 +0000
Committer: Alan Woodward <ro...@apache.org>
Committed: Tue Jan 1 17:55:19 2019 +0000

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |   4 +
 .../intervals/DifferenceIntervalFunction.java   | 101 ----------------
 .../intervals/ExtendedIntervalIterator.java     | 118 +++++++++++++++++++
 .../intervals/ExtendedIntervalsSource.java      |  83 +++++++++++++
 .../search/intervals/IntervalFunction.java      |  24 +++-
 .../search/intervals/IntervalIterator.java      |   6 +-
 .../search/intervals/IntervalMatches.java       |   4 +-
 .../lucene/search/intervals/Intervals.java      |  24 +++-
 .../search/intervals/TestIntervalQuery.java     |   6 +
 .../lucene/search/intervals/TestIntervals.java  |  32 +++++
 10 files changed, 290 insertions(+), 112 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2532a5d3/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 7ce6f46..e8820cb 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -105,6 +105,10 @@ API Changes
 * LUCENE-8609: Remove IndexWriter#numDocs() and IndexWriter#maxDoc() in favor
   of IndexWriter#getDocStats(). (Simon Willnauer)
 
+* LUCENE-8612: Intervals.extend() treats an interval as if it covered a wider
+  span than it actually does, allowing users to force minimum gaps between
+  intervals in a phrase. (Alan Woodward)
+
 Changes in Runtime Behavior
 
 * LUCENE-8333: Switch MoreLikeThis.setMaxDocFreqPct to use maxDoc instead of

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2532a5d3/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DifferenceIntervalFunction.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DifferenceIntervalFunction.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DifferenceIntervalFunction.java
index def1d03..8479716 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DifferenceIntervalFunction.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/DifferenceIntervalFunction.java
@@ -18,7 +18,6 @@
 package org.apache.lucene.search.intervals;
 
 import java.io.IOException;
-import java.util.Objects;
 
 /**
  * A function that takes two interval iterators and combines them to produce a third,
@@ -160,106 +159,6 @@ abstract class DifferenceIntervalFunction {
     }
   }
 
-  /**
-   * Filters the minuend iterator so that only intervals that do not occur within a set number
-   * of positions of intervals from the subtrahend iterator are returned
-   */
-  static class NotWithinFunction extends DifferenceIntervalFunction {
-
-    private final int positions;
-
-    NotWithinFunction(int positions) {
-      this.positions = positions;
-    }
-
-    @Override
-    public boolean equals(Object o) {
-      if (this == o) return true;
-      if (o == null || getClass() != o.getClass()) return false;
-      NotWithinFunction that = (NotWithinFunction) o;
-      return positions == that.positions;
-    }
-
-    @Override
-    public String toString() {
-      return "NOTWITHIN/" + positions;
-    }
-
-    @Override
-    public int hashCode() {
-      return Objects.hash(positions);
-    }
-
-    @Override
-    public IntervalIterator apply(IntervalIterator minuend, IntervalIterator subtrahend) {
-      IntervalIterator notWithin = new IntervalIterator() {
-
-        @Override
-        public int docID() {
-          return subtrahend.docID();
-        }
-
-        @Override
-        public int nextDoc() throws IOException {
-          positioned = false;
-          return subtrahend.nextDoc();
-        }
-
-        @Override
-        public int advance(int target) throws IOException {
-          positioned = false;
-          return subtrahend.advance(target);
-        }
-
-        @Override
-        public long cost() {
-          return subtrahend.cost();
-        }
-
-        boolean positioned = false;
-
-        @Override
-        public int start() {
-          if (positioned == false)
-            return -1;
-          int start = subtrahend.start();
-          return Math.max(0, start - positions);
-        }
-
-        @Override
-        public int end() {
-          if (positioned == false)
-            return -1;
-          int end = subtrahend.end();
-          int newEnd = end + positions;
-          if (newEnd < 0) // check for overflow
-            return Integer.MAX_VALUE;
-          return newEnd;
-        }
-
-        @Override
-        public int gaps() {
-          throw new UnsupportedOperationException();
-        }
-
-        @Override
-        public int nextInterval() throws IOException {
-          if (positioned == false) {
-            positioned = true;
-          }
-          return subtrahend.nextInterval();
-        }
-
-        @Override
-        public float matchCost() {
-          return subtrahend.matchCost();
-        }
-
-      };
-      return NON_OVERLAPPING.apply(minuend, notWithin);
-    }
-  }
-
   private static class NotContainingIterator extends RelativeIterator {
 
     private NotContainingIterator(IntervalIterator minuend, IntervalIterator subtrahend) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2532a5d3/lucene/sandbox/src/java/org/apache/lucene/search/intervals/ExtendedIntervalIterator.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/ExtendedIntervalIterator.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/ExtendedIntervalIterator.java
new file mode 100644
index 0000000..843b113
--- /dev/null
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/ExtendedIntervalIterator.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search.intervals;
+
+import java.io.IOException;
+
+/**
+ * Wraps an IntervalIterator and extends the bounds of its intervals
+ *
+ * Useful for specifying gaps in an ordered iterator; if you want to match
+ * `a b [2 spaces] c`, you can search for phrase(a, extended(b, 0, 2), c)
+ *
+ * An interval with prefix bounds extended by n will skip over matches that
+ * appear in positions lower than n
+ */
+class ExtendedIntervalIterator extends IntervalIterator {
+
+  private final IntervalIterator in;
+  private final int before;
+  private final int after;
+
+  private boolean positioned;
+
+  /**
+   * Create a new ExtendedIntervalIterator
+   * @param in      the iterator to wrap
+   * @param before  the number of positions to extend before the delegated interval
+   * @param after   the number of positions to extend beyond the delegated interval
+   */
+  ExtendedIntervalIterator(IntervalIterator in, int before, int after) {
+    this.in = in;
+    this.before = before;
+    this.after = after;
+  }
+
+  @Override
+  public int start() {
+    if (positioned == false) {
+      return -1;
+    }
+    int start = in.start();
+    if (start == NO_MORE_INTERVALS) {
+      return NO_MORE_INTERVALS;
+    }
+    return Math.max(0, start - before);
+  }
+
+  @Override
+  public int end() {
+    if (positioned == false) {
+      return -1;
+    }
+    int end = in.end();
+    if (end == NO_MORE_INTERVALS) {
+      return NO_MORE_INTERVALS;
+    }
+    end += after;
+    if (end < 0 || end == NO_MORE_INTERVALS) {
+      // overflow
+      end = NO_MORE_INTERVALS - 1;
+    }
+    return end;
+  }
+
+  @Override
+  public int gaps() {
+    return in.gaps();
+  }
+
+  @Override
+  public int nextInterval() throws IOException {
+    positioned = true;
+    in.nextInterval();
+    return start();
+  }
+
+  @Override
+  public float matchCost() {
+    return in.matchCost();
+  }
+
+  @Override
+  public int docID() {
+    return in.docID();
+  }
+
+  @Override
+  public int nextDoc() throws IOException {
+    positioned = false;
+    return in.nextDoc();
+  }
+
+  @Override
+  public int advance(int target) throws IOException {
+    positioned = false;
+    return in.advance(target);
+  }
+
+  @Override
+  public long cost() {
+    return in.cost();
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2532a5d3/lucene/sandbox/src/java/org/apache/lucene/search/intervals/ExtendedIntervalsSource.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/ExtendedIntervalsSource.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/ExtendedIntervalsSource.java
new file mode 100644
index 0000000..d4e3bfa
--- /dev/null
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/ExtendedIntervalsSource.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search.intervals;
+
+import java.io.IOException;
+import java.util.Objects;
+import java.util.Set;
+
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.MatchesIterator;
+
+class ExtendedIntervalsSource extends IntervalsSource {
+
+  final IntervalsSource source;
+  private final int before;
+  private final int after;
+
+  ExtendedIntervalsSource(IntervalsSource source, int before, int after) {
+    this.source = source;
+    this.before = before;
+    this.after = after;
+  }
+
+  @Override
+  public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException {
+    IntervalIterator in = source.intervals(field, ctx);
+    if (in == null) {
+      return null;
+    }
+    return new ExtendedIntervalIterator(in, before, after);
+  }
+
+  @Override
+  public MatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException {
+    MatchesIterator in = source.matches(field, ctx, doc);
+    if (in == null) {
+      return null;
+    }
+    IntervalIterator wrapped = new ExtendedIntervalIterator(IntervalMatches.wrapMatches(in, doc), before, after);
+    return IntervalMatches.asMatches(wrapped, in, doc);
+  }
+
+  @Override
+  public void extractTerms(String field, Set<Term> terms) {
+    source.extractTerms(field, terms);
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) return true;
+    if (o == null || getClass() != o.getClass()) return false;
+    ExtendedIntervalsSource that = (ExtendedIntervalsSource) o;
+    return before == that.before &&
+        after == that.after &&
+        Objects.equals(source, that.source);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(source, before, after);
+  }
+
+  @Override
+  public String toString() {
+    return "EXTEND(" + source + "," + before + "," + after + ")";
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2532a5d3/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalFunction.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalFunction.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalFunction.java
index 1a5eab6..9460d8d 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalFunction.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalFunction.java
@@ -75,19 +75,19 @@ abstract class IntervalFunction {
     @Override
     public int nextInterval() throws IOException {
       if (subIterators.get(0).nextInterval() == IntervalIterator.NO_MORE_INTERVALS)
-        return IntervalIterator.NO_MORE_INTERVALS;
+        return start = end = IntervalIterator.NO_MORE_INTERVALS;
       int i = 1;
       while (i < subIterators.size()) {
         while (subIterators.get(i).start() <= subIterators.get(i - 1).end()) {
           if (subIterators.get(i).nextInterval() == IntervalIterator.NO_MORE_INTERVALS)
-            return IntervalIterator.NO_MORE_INTERVALS;
+            return start = end = IntervalIterator.NO_MORE_INTERVALS;
         }
         if (subIterators.get(i).start() == subIterators.get(i - 1).end() + 1) {
           i = i + 1;
         }
         else {
           if (subIterators.get(0).nextInterval() == IntervalIterator.NO_MORE_INTERVALS)
-            return IntervalIterator.NO_MORE_INTERVALS;
+            return start = end = IntervalIterator.NO_MORE_INTERVALS;
           i = 1;
         }
       }
@@ -150,6 +150,9 @@ abstract class IntervalFunction {
           i++;
         }
         start = subIterators.get(0).start();
+        if (start == NO_MORE_INTERVALS) {
+          return end = NO_MORE_INTERVALS;
+        }
         firstEnd = subIterators.get(0).end();
         end = subIterators.get(subIterators.size() - 1).end();
         b = subIterators.get(subIterators.size() - 1).start();
@@ -248,7 +251,7 @@ abstract class IntervalFunction {
           if (allowOverlaps == false) {
             while (hasOverlaps(it)) {
               if (it.nextInterval() == IntervalIterator.NO_MORE_INTERVALS)
-                return IntervalIterator.NO_MORE_INTERVALS;
+                return start = end = IntervalIterator.NO_MORE_INTERVALS;
             }
           }
           queue.add(it);
@@ -256,7 +259,7 @@ abstract class IntervalFunction {
         }
       }
       if (this.queue.size() < subIterators.length)
-        return IntervalIterator.NO_MORE_INTERVALS;
+        return start = end = IntervalIterator.NO_MORE_INTERVALS;
       // then, minimize it
       do {
         start = queue.top().start();
@@ -408,11 +411,17 @@ abstract class IntervalFunction {
 
         @Override
         public int start() {
+          if (bpos == false) {
+            return NO_MORE_INTERVALS;
+          }
           return a.start();
         }
 
         @Override
         public int end() {
+          if (bpos == false) {
+            return NO_MORE_INTERVALS;
+          }
           return a.end();
         }
 
@@ -427,12 +436,15 @@ abstract class IntervalFunction {
             return IntervalIterator.NO_MORE_INTERVALS;
           while (a.nextInterval() != IntervalIterator.NO_MORE_INTERVALS) {
             while (b.end() < a.end()) {
-              if (b.nextInterval() == IntervalIterator.NO_MORE_INTERVALS)
+              if (b.nextInterval() == IntervalIterator.NO_MORE_INTERVALS) {
+                bpos = false;
                 return IntervalIterator.NO_MORE_INTERVALS;
+              }
             }
             if (b.start() <= a.start())
               return a.start();
           }
+          bpos = false;
           return IntervalIterator.NO_MORE_INTERVALS;
         }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2532a5d3/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalIterator.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalIterator.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalIterator.java
index f819aab..305f56c 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalIterator.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalIterator.java
@@ -48,14 +48,16 @@ public abstract class IntervalIterator extends DocIdSetIterator {
   /**
    * The start of the current interval
    *
-   * Returns -1 if {@link #nextInterval()} has not yet been called
+   * Returns -1 if {@link #nextInterval()} has not yet been called and {@link #NO_MORE_INTERVALS}
+   * once the iterator is exhausted.
    */
   public abstract int start();
 
   /**
    * The end of the current interval
    *
-   * Returns -1 if {@link #nextInterval()} has not yet been called
+   * Returns -1 if {@link #nextInterval()} has not yet been called and {@link #NO_MORE_INTERVALS}
+   * once the iterator is exhausted.
    */
   public abstract int end();
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2532a5d3/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalMatches.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalMatches.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalMatches.java
index a28f6e4..24424d2 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalMatches.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/IntervalMatches.java
@@ -49,12 +49,12 @@ final class IntervalMatches {
 
       @Override
       public int startPosition() {
-        return source.startPosition();
+        return iterator.start();
       }
 
       @Override
       public int endPosition() {
-        return source.endPosition();
+        return iterator.end();
       }
 
       @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2532a5d3/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java
index b0a4829..a98adbd 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/intervals/Intervals.java
@@ -104,6 +104,27 @@ public final class Intervals {
   }
 
   /**
+   * Create an {@link IntervalsSource} that wraps another source, extending its
+   * intervals by a number of positions before and after.
+   *
+   * This can be useful for adding defined gaps in a block query; for example,
+   * to find 'a b [2 arbitrary terms] c', you can call:
+   * <pre>
+   *   Intervals.phrase(Intervals.term("a"), Intervals.extend(Intervals.term("b"), 0, 2), Intervals.term("c"));
+   * </pre>
+   *
+   * Note that calling {@link IntervalIterator#gaps()} on iterators returned by this source
+   * delegates directly to the wrapped iterator, and does not include the extensions.
+   *
+   * @param source the source to extend
+   * @param before how many positions to extend before the delegated interval
+   * @param after  how many positions to extend after the delegated interval
+   */
+  public static IntervalsSource extend(IntervalsSource source, int before, int after) {
+    return new ExtendedIntervalsSource(source, before, after);
+  }
+
+  /**
    * Create an ordered {@link IntervalsSource}
    *
    * Returns intervals in which the subsources all appear in the given order
@@ -162,7 +183,8 @@ public final class Intervals {
    * @param subtrahend  the {@link IntervalsSource} to filter by
    */
   public static IntervalsSource notWithin(IntervalsSource minuend, int positions, IntervalsSource subtrahend) {
-    return new DifferenceIntervalsSource(minuend, subtrahend, new DifferenceIntervalFunction.NotWithinFunction(positions));
+    return new DifferenceIntervalsSource(minuend, Intervals.extend(subtrahend, positions, positions),
+        DifferenceIntervalFunction.NON_OVERLAPPING);
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2532a5d3/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervalQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervalQuery.java b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervalQuery.java
index 18c69a7..6110605 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervalQuery.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervalQuery.java
@@ -188,4 +188,10 @@ public class TestIntervalQuery extends LuceneTestCase {
     );
     checkHits(q, new int[]{3});
   }
+
+  public void testDefinedGaps() throws IOException {
+    Query q = new IntervalQuery(field,
+        Intervals.phrase(Intervals.term("w1"), Intervals.extend(Intervals.term("w2"), 1, 0)));
+    checkHits(q, new int[]{ 1, 2, 5 });
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2532a5d3/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java
index 0308550..139cea9 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/search/intervals/TestIntervals.java
@@ -124,6 +124,8 @@ public class TestIntervals extends LuceneTestCase {
             i += 2;
           }
           assertEquals("Wrong number of endpoints in doc " + id, expected[id].length, i);
+          assertEquals(IntervalIterator.NO_MORE_INTERVALS, intervals.start());
+          assertEquals(IntervalIterator.NO_MORE_INTERVALS, intervals.end());
           if (i > 0)
             matchedDocs++;
         }
@@ -504,4 +506,34 @@ public class TestIntervals extends LuceneTestCase {
     assertMatch(mi, 4, 8, 12, 26);
   }
 
+  public void testDefinedGaps() throws IOException {
+    IntervalsSource source = Intervals.phrase(
+        Intervals.term("pease"),
+        Intervals.extend(Intervals.term("cold"), 1, 1),
+        Intervals.term("porridge")
+    );
+    checkIntervals(source, "field1", 3, new int[][]{
+        {},
+        { 3, 7 },
+        { 0, 4 },
+        {},
+        { 3, 7 },
+        {}
+    });
+
+    MatchesIterator mi = getMatches(source, 1, "field1");
+    assertMatch(mi, 3, 7, 20, 55);
+    MatchesIterator sub = mi.getSubMatches();
+    assertNotNull(sub);
+    assertMatch(sub, 3, 3, 20, 25);
+    assertMatch(sub, 4, 6, 35, 39);
+    assertMatch(sub, 7, 7, 47, 55);
+
+    source = Intervals.extend(Intervals.term("w1"), 5, Integer.MAX_VALUE);
+    checkIntervals(source, "field2", 1, new int[][]{
+        {}, {}, {}, {}, {},
+        { 0, Integer.MAX_VALUE - 1, 0, Integer.MAX_VALUE - 1, 5, Integer.MAX_VALUE - 1 }
+    });
+  }
+
 }