You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2015/04/05 21:44:31 UTC

svn commit: r1671420 - in /lucene/dev/trunk/lucene: ./ core/src/java/org/apache/lucene/search/spans/ core/src/test/org/apache/lucene/search/spans/

Author: rmuir
Date: Sun Apr  5 19:44:31 2015
New Revision: 1671420

URL: http://svn.apache.org/r1671420
Log:
LUCENE-6393: Add two-phase support to SpanPositionCheckQuery and subclasses

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/FilterSpans.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanSearchEquivalence.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1671420&r1=1671419&r2=1671420&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Sun Apr  5 19:44:31 2015
@@ -45,6 +45,10 @@ New Features
   code with boolean queries, and use two-phased iterators for
   faster intersection by avoiding loading positions in certain cases.
   (Paul Elschot, Terry Smith, Robert Muir via Mike McCandless)
+  
+* LUCENE-6393: Add two-phase support to SpanPositionCheckQuery
+  and its subclasses: SpanPositionRangeQuery, SpanPayloadCheckQuery, 
+  SpanNearPayloadCheckQuery, SpanFirstQuery. (Paul Elschot, Robert Muir)
 
 * LUCENE-6352: Added a new query time join to the join module that uses
   global ordinals, which is faster for subsequent joins between reopens.

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/FilterSpans.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/FilterSpans.java?rev=1671420&r1=1671419&r2=1671420&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/FilterSpans.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/FilterSpans.java Sun Apr  5 19:44:31 2015
@@ -27,7 +27,7 @@ import org.apache.lucene.search.TwoPhase
  * A {@link Spans} implementation wrapping another spans instance,
  * allowing to override selected methods in a subclass.
  */
-public class FilterSpans extends Spans {
+public abstract class FilterSpans extends Spans {
  
   /** The wrapped spans instance. */
   protected final Spans in;
@@ -89,6 +89,31 @@ public class FilterSpans extends Spans {
   
   @Override
   public TwoPhaseIterator asTwoPhaseIterator() {
-    return in.asTwoPhaseIterator();
-  }
+    TwoPhaseIterator inner = in.asTwoPhaseIterator();
+    if (inner != null) {
+      // wrapped instance has an approximation
+      return new TwoPhaseIterator(inner.approximation()) {
+        @Override
+        public boolean matches() throws IOException {
+          return inner.matches() && twoPhaseCurrentDocMatches();
+        }
+      };
+    } else {
+      // wrapped instance has no approximation, but 
+      // we can still defer matching until absolutely needed.
+      return new TwoPhaseIterator(in) {
+        @Override
+        public boolean matches() throws IOException {
+          return twoPhaseCurrentDocMatches();
+        }
+      };
+    }
+  }
+  
+  /**
+   * Returns true if the current document matches.
+   * <p>
+   * This is called during two-phase processing.
+   */
+  public abstract boolean twoPhaseCurrentDocMatches() throws IOException;
 }

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java?rev=1671420&r1=1671419&r2=1671420&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java Sun Apr  5 19:44:31 2015
@@ -106,6 +106,11 @@ public class NearSpansUnordered extends
     }
 
     @Override
+    public boolean twoPhaseCurrentDocMatches() throws IOException {
+      return true; // we don't modify the spans, we just capture information from it.
+    }
+
+    @Override
     public String toString() {
       return "NearSpansUnordered.SpansCell(" + in.toString() + ")";
     }

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java?rev=1671420&r1=1671419&r2=1671420&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java Sun Apr  5 19:44:31 2015
@@ -127,45 +127,27 @@ public abstract class SpanPositionCheckQ
 
     @Override
     public int nextDoc() throws IOException {
-      if (in.nextDoc() == NO_MORE_DOCS)
-        return NO_MORE_DOCS;
-
-      return toNextDocWithAllowedPosition();
+      while (true) {
+        int doc = in.nextDoc();
+        if (doc == NO_MORE_DOCS) {
+          return NO_MORE_DOCS;
+        } else if (twoPhaseCurrentDocMatches()) {
+          return doc;
+        }
+      }
     }
 
     @Override
     public int advance(int target) throws IOException {
-      if (in.advance(target) == NO_MORE_DOCS)
-        return NO_MORE_DOCS;
-
-      return toNextDocWithAllowedPosition();
-    }
-
-    @SuppressWarnings("fallthrough")
-    protected int toNextDocWithAllowedPosition() throws IOException {
-      startPos = in.nextStartPosition();
-      assert startPos != NO_MORE_POSITIONS;
-      for (;;) {
-        switch(acceptPosition(in)) {
-          case YES:
-            atFirstInCurrentDoc = true;
-            return in.docID();
-          case NO:
-            startPos = in.nextStartPosition();
-            if (startPos != NO_MORE_POSITIONS) {
-              break;
-            }
-            // else fallthrough
-          case NO_MORE_IN_CURRENT_DOC:
-            if (in.nextDoc() == NO_MORE_DOCS) {
-              startPos = -1;
-              return NO_MORE_DOCS;
-            }
-            startPos = in.nextStartPosition();
-            assert startPos != NO_MORE_POSITIONS : "no start position at doc="+in.docID();
-            break;
+      int doc = in.advance(target);
+      while (doc != NO_MORE_DOCS) {
+        if (twoPhaseCurrentDocMatches()) {
+          break;
         }
+        doc = in.nextDoc();
       }
+
+      return doc;
     }
 
     @Override
@@ -190,6 +172,30 @@ public abstract class SpanPositionCheckQ
         }
       }
     }
+    
+    // return true if the current document matches
+    @SuppressWarnings("fallthrough")
+    public boolean twoPhaseCurrentDocMatches() throws IOException {
+      atFirstInCurrentDoc = false;
+      startPos = in.nextStartPosition();
+      assert startPos != NO_MORE_POSITIONS;
+      for (;;) {
+        switch(acceptPosition(in)) {
+          case YES:
+            atFirstInCurrentDoc = true;
+            return true;
+          case NO:
+            startPos = in.nextStartPosition();
+            if (startPos != NO_MORE_POSITIONS) {
+              break;
+            }
+            // else fallthrough
+          case NO_MORE_IN_CURRENT_DOC:
+            startPos = -1;
+            return false;
+        }
+      }
+    }
 
     @Override
     public int startPosition() {

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanSearchEquivalence.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanSearchEquivalence.java?rev=1671420&r1=1671419&r2=1671420&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanSearchEquivalence.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/spans/TestSpanSearchEquivalence.java Sun Apr  5 19:44:31 2015
@@ -158,6 +158,79 @@ public class TestSpanSearchEquivalence e
     }
   }
   
+  /** SpanPositionRangeQuery(A, M, N) ⊆ TermQuery(A) */
+  public void testSpanRangeTerm() throws Exception {
+    Term t1 = randomTerm();
+    for (int i = 0; i < 5; i++) {
+      for (int j = 0; j < 5; j++) {
+        Query q1 = new SpanPositionRangeQuery(new SpanTermQuery(t1), i, i+j);
+        Query q2 = new TermQuery(t1);
+        assertSubsetOf(q1, q2);
+      }
+    }
+  }
+  
+  /** SpanPositionRangeQuery(A, M, N) ⊆ SpanFirstQuery(A, M, N+1) */
+  public void testSpanRangeTermIncreasingEnd() throws Exception {
+    Term t1 = randomTerm();
+    for (int i = 0; i < 5; i++) {
+      for (int j = 0; j < 5; j++) {
+        Query q1 = new SpanPositionRangeQuery(new SpanTermQuery(t1), i, i+j);
+        Query q2 = new SpanPositionRangeQuery(new SpanTermQuery(t1), i, i+j+1);
+        assertSubsetOf(q1, q2);
+      }
+    }
+  }
+  
+  /** SpanPositionRangeQuery(A, 0, ∞) = TermQuery(A) */
+  public void testSpanRangeTermEverything() throws Exception {
+    Term t1 = randomTerm();
+    Query q1 = new SpanPositionRangeQuery(new SpanTermQuery(t1), 0, Integer.MAX_VALUE);
+    Query q2 = new TermQuery(t1);
+    assertSameSet(q1, q2);
+  }
+  
+  /** SpanPositionRangeQuery([A B], M, N) ⊆ SpanNearQuery([A B]) */
+  public void testSpanRangeNear() throws Exception {
+    Term t1 = randomTerm();
+    Term t2 = randomTerm();
+    SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
+    SpanQuery nearQuery = new SpanNearQuery(subquery, 10, true);
+    for (int i = 0; i < 5; i++) {
+      for (int j = 0; j < 5; j++) {
+        Query q1 = new SpanPositionRangeQuery(nearQuery, i, i+j);
+        Query q2 = nearQuery;
+        assertSubsetOf(q1, q2);
+      }
+    }
+  }
+  
+  /** SpanPositionRangeQuery([A B], M, N) ⊆ SpanFirstQuery([A B], M, N+1) */
+  public void testSpanRangeNearIncreasingEnd() throws Exception {
+    Term t1 = randomTerm();
+    Term t2 = randomTerm();
+    SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
+    SpanQuery nearQuery = new SpanNearQuery(subquery, 10, true);
+    for (int i = 0; i < 5; i++) {
+      for (int j = 0; j < 5; j++) {
+        Query q1 = new SpanPositionRangeQuery(nearQuery, i, i+j);
+        Query q2 = new SpanPositionRangeQuery(nearQuery, i, i+j+1);
+        assertSubsetOf(q1, q2);
+      }
+    }
+  }
+  
+  /** SpanPositionRangeQuery([A B], ∞) = SpanNearQuery([A B]) */
+  public void testSpanRangeNearEverything() throws Exception {
+    Term t1 = randomTerm();
+    Term t2 = randomTerm();
+    SpanQuery subquery[] = new SpanQuery[] { new SpanTermQuery(t1), new SpanTermQuery(t2) };
+    SpanQuery nearQuery = new SpanNearQuery(subquery, 10, true);
+    Query q1 = new SpanPositionRangeQuery(nearQuery, 0, Integer.MAX_VALUE);
+    Query q2 = nearQuery;
+    assertSameSet(q1, q2);
+  }
+  
   /** SpanFirstQuery(A, N) ⊆ TermQuery(A) */
   public void testSpanFirstTerm() throws Exception {
     Term t1 = randomTerm();
@@ -187,7 +260,6 @@ public class TestSpanSearchEquivalence e
   }
   
   /** SpanFirstQuery([A B], N) ⊆ SpanNearQuery([A B]) */
-  @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-6393")
   public void testSpanFirstNear() throws Exception {
     Term t1 = randomTerm();
     Term t2 = randomTerm();
@@ -201,7 +273,6 @@ public class TestSpanSearchEquivalence e
   }
   
   /** SpanFirstQuery([A B], N) ⊆ SpanFirstQuery([A B], N+1) */
-  @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-6393")
   public void testSpanFirstNearIncreasing() throws Exception {
     Term t1 = randomTerm();
     Term t2 = randomTerm();
@@ -215,7 +286,6 @@ public class TestSpanSearchEquivalence e
   }
   
   /** SpanFirstQuery([A B], ∞) = SpanNearQuery([A B]) */
-  @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/LUCENE-6393")
   public void testSpanFirstNearEverything() throws Exception {
     Term t1 = randomTerm();
     Term t2 = randomTerm();