You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2015/04/24 07:33:44 UTC

svn commit: r1675780 - in /lucene/dev/branches/branch_5x: ./ lucene/ lucene/core/ lucene/core/src/java/org/apache/lucene/search/ lucene/core/src/java/org/apache/lucene/search/spans/ lucene/core/src/java/org/apache/lucene/util/

Author: rmuir
Date: Fri Apr 24 05:33:43 2015
New Revision: 1675780

URL: http://svn.apache.org/r1675780
Log:
LUCENE-6373: complete two phase doc id iteration support for Spans

Added:
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisiPriorityQueue.java
      - copied unchanged from r1675776, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/DisiPriorityQueue.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java
      - copied unchanged from r1675776, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionDISIApproximation.java
      - copied unchanged from r1675776, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/DisjunctionDISIApproximation.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionQueue.java
      - copied unchanged from r1675776, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionQueue.java
Removed:
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/ScorerPriorityQueue.java
Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/lucene/   (props changed)
    lucene/dev/branches/branch_5x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_5x/lucene/core/   (props changed)
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DocIdSetIterator.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/TwoPhaseIterator.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java

Modified: lucene/dev/branches/branch_5x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/CHANGES.txt?rev=1675780&r1=1675779&r2=1675780&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/CHANGES.txt Fri Apr 24 05:33:43 2015
@@ -21,6 +21,10 @@ New Features
   FilterSpans to just have an accept(Spans candidate) method for
   subclasses. (Robert Muir)
 
+* LUCENE-6373: SpanOrQuery shares disjunction logic with boolean
+  queries, and supports two-phased iterators to avoid loading
+  positions when possible. (Paul Elschot via Robert Muir)
+
 * LUCENE-6352: Added a new query time join to the join module that uses
   global ordinals, which is faster for subsequent joins between reopens.
   (Martijn van Groningen, Adrien Grand)

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java?rev=1675780&r1=1675779&r2=1675780&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java Fri Apr 24 05:33:43 2015
@@ -23,7 +23,6 @@ import java.util.Comparator;
 import java.util.List;
 
 import org.apache.lucene.util.CollectionUtil;
-import org.apache.lucene.search.spans.Spans;
 
 /** A conjunction of DocIdSetIterators.
  * This iterates over the doc ids that are present in each given DocIdSetIterator.
@@ -35,20 +34,16 @@ public class ConjunctionDISI extends Doc
   /** Create a conjunction over the provided iterators, taking advantage of
    *  {@link TwoPhaseIterator}. */
   public static ConjunctionDISI intersect(List<? extends DocIdSetIterator> iterators) {
+    assert iterators.size() >= 2;
     final List<DocIdSetIterator> allIterators = new ArrayList<>();
     final List<TwoPhaseIterator> twoPhaseIterators = new ArrayList<>();
-    for (DocIdSetIterator iterator : iterators) {
-      TwoPhaseIterator twoPhaseIterator = null;
-      if (iterator instanceof Scorer) { 
-        twoPhaseIterator = ((Scorer) iterator).asTwoPhaseIterator();
-      } else if (iterator instanceof Spans) {
-        twoPhaseIterator = ((Spans) iterator).asTwoPhaseIterator();
-      }
-      if (twoPhaseIterator != null) {
-        allIterators.add(twoPhaseIterator.approximation());
-        twoPhaseIterators.add(twoPhaseIterator);
+    for (DocIdSetIterator iter : iterators) {
+      TwoPhaseIterator twoPhaseIter = TwoPhaseIterator.asTwoPhaseIterator(iter);
+      if (twoPhaseIter != null) {
+        allIterators.add(twoPhaseIter.approximation());
+        twoPhaseIterators.add(twoPhaseIter);
       } else { // no approximation support, use the iterator as-is
-        allIterators.add(iterator);
+        allIterators.add(iter);
       }
     }
 
@@ -63,6 +58,7 @@ public class ConjunctionDISI extends Doc
   final DocIdSetIterator[] others;
 
   ConjunctionDISI(List<? extends DocIdSetIterator> iterators) {
+    assert iterators.size() >= 2;
     // Sort the array the first time to allow the least frequent DocsEnum to
     // lead the matching.
     CollectionUtil.timSort(iterators, new Comparator<DocIdSetIterator>() {

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java?rev=1675780&r1=1675779&r2=1675780&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java Fri Apr 24 05:33:43 2015
@@ -19,8 +19,6 @@ package org.apache.lucene.search;
 import java.io.IOException;
 import java.util.List;
 
-import org.apache.lucene.search.ScorerPriorityQueue.ScorerWrapper;
-
 /**
  * The Scorer for DisjunctionMaxQuery.  The union of all documents generated by the the subquery scorers
  * is generated in document number order.  The score for each document is the maximum of the scores computed
@@ -48,11 +46,11 @@ final class DisjunctionMaxScorer extends
   }
 
   @Override
-  protected float score(ScorerWrapper topList) throws IOException {
+  protected float score(DisiWrapper<Scorer> topList) throws IOException {
     float scoreSum = 0;
     float scoreMax = 0;
-    for (ScorerWrapper w = topList; w != null; w = w.next) {
-      final float subScore = w.scorer.score();
+    for (DisiWrapper<Scorer> w = topList; w != null; w = w.next) {
+      final float subScore = w.iterator.score();
       scoreSum += subScore;
       if (subScore > scoreMax) {
         scoreMax = subScore;

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java?rev=1675780&r1=1675779&r2=1675780&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java Fri Apr 24 05:33:43 2015
@@ -22,29 +22,27 @@ import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
 
-import org.apache.lucene.search.ScorerPriorityQueue.ScorerWrapper;
-
 /**
  * Base class for Scorers that score disjunctions.
  */
 abstract class DisjunctionScorer extends Scorer {
 
   private final boolean needsScores;
-  private final ScorerPriorityQueue subScorers;
+  private final DisiPriorityQueue<Scorer> subScorers;
   private final long cost;
 
   /** Linked list of scorers which are on the current doc */
-  private ScorerWrapper topScorers;
+  private DisiWrapper<Scorer> topScorers;
 
   protected DisjunctionScorer(Weight weight, List<Scorer> subScorers, boolean needsScores) {
     super(weight);
     if (subScorers.size() <= 1) {
       throw new IllegalArgumentException("There must be at least 2 subScorers");
     }
-    this.subScorers = new ScorerPriorityQueue(subScorers.size());
+    this.subScorers = new DisiPriorityQueue<Scorer>(subScorers.size());
     long cost = 0;
     for (Scorer scorer : subScorers) {
-      final ScorerWrapper w = new ScorerWrapper(scorer);
+      final DisiWrapper<Scorer> w = new DisiWrapper<>(scorer);
       cost += w.cost;
       this.subScorers.add(w);
     }
@@ -52,69 +50,17 @@ abstract class DisjunctionScorer extends
     this.needsScores = needsScores;
   }
 
-  /**
-   * A {@link DocIdSetIterator} which is a disjunction of the approximations of
-   * the provided iterators.
-   */
-  private static class DisjunctionDISIApproximation extends DocIdSetIterator {
-
-    final ScorerPriorityQueue subScorers;
-    final long cost;
-
-    DisjunctionDISIApproximation(ScorerPriorityQueue subScorers) {
-      this.subScorers = subScorers;
-      long cost = 0;
-      for (ScorerWrapper w : subScorers) {
-        cost += w.cost;
-      }
-      this.cost = cost;
-    }
-
-    @Override
-    public long cost() {
-      return cost;
-    }
-
-    @Override
-    public int docID() {
-     return subScorers.top().doc;
-    }
-
-    @Override
-    public int nextDoc() throws IOException {
-      ScorerWrapper top = subScorers.top();
-      final int doc = top.doc;
-      do {
-        top.doc = top.approximation.nextDoc();
-        top = subScorers.updateTop();
-      } while (top.doc == doc);
-
-      return top.doc;
-    }
-
-    @Override
-    public int advance(int target) throws IOException {
-      ScorerWrapper top = subScorers.top();
-      do {
-        top.doc = top.approximation.advance(target);
-        top = subScorers.updateTop();
-      } while (top.doc < target);
-
-      return top.doc;
-    }
-  }
-
   @Override
   public TwoPhaseIterator asTwoPhaseIterator() {
     boolean hasApproximation = false;
-    for (ScorerWrapper w : subScorers) {
+    for (DisiWrapper<Scorer> w : subScorers) {
       if (w.twoPhaseView != null) {
         hasApproximation = true;
         break;
       }
     }
 
-    if (hasApproximation == false) {
+    if (! hasApproximation) {
       // none of the sub scorers supports approximations
       return null;
     }
@@ -122,13 +68,13 @@ abstract class DisjunctionScorer extends
     // note it is important to share the same pq as this scorer so that
     // rebalancing the pq through the approximation will also rebalance
     // the pq in this scorer.
-    return new TwoPhaseIterator(new DisjunctionDISIApproximation(subScorers)) {
+    return new TwoPhaseIterator(new DisjunctionDISIApproximation<Scorer>(subScorers)) {
 
       @Override
       public boolean matches() throws IOException {
-        ScorerWrapper topScorers = subScorers.topList();
+        DisiWrapper<Scorer> topScorers = subScorers.topList();
         // remove the head of the list as long as it does not match
-        while (topScorers.twoPhaseView != null && topScorers.twoPhaseView.matches() == false) {
+        while (topScorers.twoPhaseView != null && ! topScorers.twoPhaseView.matches()) {
           topScorers = topScorers.next;
           if (topScorers == null) {
             return false;
@@ -138,9 +84,9 @@ abstract class DisjunctionScorer extends
         if (needsScores) {
           // if scores or freqs are needed, we also need to remove scorers
           // from the top list that do not actually match
-          ScorerWrapper previous = topScorers;
-          for (ScorerWrapper w = topScorers.next; w != null; w = w.next) {
-            if (w.twoPhaseView != null && w.twoPhaseView.matches() == false) {
+          DisiWrapper<Scorer> previous = topScorers;
+          for (DisiWrapper<Scorer> w = topScorers.next; w != null; w = w.next) {
+            if (w.twoPhaseView != null && ! w.twoPhaseView.matches()) {
               // w does not match, remove it
               previous.next = w.next;
             } else {
@@ -175,10 +121,10 @@ abstract class DisjunctionScorer extends
   @Override
   public final int nextDoc() throws IOException {
     topScorers = null;
-    ScorerWrapper top = subScorers.top();
+    DisiWrapper<Scorer> top = subScorers.top();
     final int doc = top.doc;
     do {
-      top.doc = top.scorer.nextDoc();
+      top.doc = top.iterator.nextDoc();
       top = subScorers.updateTop();
     } while (top.doc == doc);
 
@@ -188,9 +134,9 @@ abstract class DisjunctionScorer extends
   @Override
   public final int advance(int target) throws IOException {
     topScorers = null;
-    ScorerWrapper top = subScorers.top();
+    DisiWrapper<Scorer> top = subScorers.top();
     do {
-      top.doc = top.scorer.advance(target);
+      top.doc = top.iterator.advance(target);
       top = subScorers.updateTop();
     } while (top.doc < target);
 
@@ -203,7 +149,7 @@ abstract class DisjunctionScorer extends
       topScorers = subScorers.topList();
     }
     int freq = 1;
-    for (ScorerWrapper w = topScorers.next; w != null; w = w.next) {
+    for (DisiWrapper<Scorer> w = topScorers.next; w != null; w = w.next) {
       freq += 1;
     }
     return freq;
@@ -218,13 +164,13 @@ abstract class DisjunctionScorer extends
   }
 
   /** Compute the score for the given linked list of scorers. */
-  protected abstract float score(ScorerWrapper topList) throws IOException;
+  protected abstract float score(DisiWrapper<Scorer> topList) throws IOException;
 
   @Override
   public final Collection<ChildScorer> getChildren() {
     ArrayList<ChildScorer> children = new ArrayList<>();
-    for (ScorerWrapper scorer : subScorers) {
-      children.add(new ChildScorer(scorer.scorer, "SHOULD"));
+    for (DisiWrapper<Scorer> scorer : subScorers) {
+      children.add(new ChildScorer(scorer.iterator, "SHOULD"));
     }
     return children;
   }

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java?rev=1675780&r1=1675779&r2=1675780&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java Fri Apr 24 05:33:43 2015
@@ -20,8 +20,6 @@ package org.apache.lucene.search;
 import java.io.IOException;
 import java.util.List;
 
-import org.apache.lucene.search.ScorerPriorityQueue.ScorerWrapper;
-
 /** A Scorer for OR like queries, counterpart of <code>ConjunctionScorer</code>.
  * This Scorer implements {@link Scorer#advance(int)} and uses advance() on the given Scorers. 
  */
@@ -39,11 +37,11 @@ final class DisjunctionSumScorer extends
   }
 
   @Override
-  protected float score(ScorerWrapper topList) throws IOException {
+  protected float score(DisiWrapper<Scorer> topList) throws IOException {
     double score = 0;
     int freq = 0;
-    for (ScorerWrapper w = topList; w != null; w = w.next) {
-      score += w.scorer.score();
+    for (DisiWrapper<Scorer> w = topList; w != null; w = w.next) {
+      score += w.iterator.score();
       freq += 1;
     }
     return (float)score * coord[freq];

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DocIdSetIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DocIdSetIterator.java?rev=1675780&r1=1675779&r2=1675780&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DocIdSetIterator.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DocIdSetIterator.java Fri Apr 24 05:33:43 2015
@@ -19,6 +19,8 @@ package org.apache.lucene.search;
 
 import java.io.IOException;
 
+import org.apache.lucene.search.spans.Spans;
+
 /**
  * This abstract class defines methods to iterate over a set of non-decreasing
  * doc ids. Note that this class assumes it iterates on doc Ids, and therefore
@@ -175,4 +177,5 @@ public abstract class DocIdSetIterator {
    * completely inaccurate.
    */
   public abstract long cost();
+  
 }

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java?rev=1675780&r1=1675779&r2=1675780&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java Fri Apr 24 05:33:43 2015
@@ -23,12 +23,11 @@ import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 
-import org.apache.lucene.search.ScorerPriorityQueue.ScorerWrapper;
 import org.apache.lucene.util.PriorityQueue;
 
-import static org.apache.lucene.search.ScorerPriorityQueue.leftNode;
-import static org.apache.lucene.search.ScorerPriorityQueue.parentNode;
-import static org.apache.lucene.search.ScorerPriorityQueue.rightNode;
+import static org.apache.lucene.search.DisiPriorityQueue.leftNode;
+import static org.apache.lucene.search.DisiPriorityQueue.parentNode;
+import static org.apache.lucene.search.DisiPriorityQueue.rightNode;
 
 /**
  * A {@link Scorer} for {@link BooleanQuery} when
@@ -83,17 +82,17 @@ final class MinShouldMatchSumScorer exte
 
   // list of scorers which 'lead' the iteration and are currently
   // positioned on 'doc'
-  ScorerWrapper lead;
+  DisiWrapper<Scorer> lead;
   int doc;  // current doc ID of the leads
   int freq; // number of scorers on the desired doc ID
 
   // priority queue of scorers that are too advanced compared to the current
   // doc. Ordered by doc ID.
-  final ScorerPriorityQueue head;
+  final DisiPriorityQueue<Scorer> head;
 
   // priority queue of scorers which are behind the current doc.
   // Ordered by cost.
-  final ScorerWrapper[] tail;
+  final DisiWrapper<Scorer>[] tail;
   int tailSize;
 
   final Collection<ChildScorer> childScorers;
@@ -113,13 +112,13 @@ final class MinShouldMatchSumScorer exte
     this.coord = coord;
     this.doc = -1;
 
-    head = new ScorerPriorityQueue(scorers.size() - minShouldMatch + 1);
+    head = new DisiPriorityQueue<Scorer>(scorers.size() - minShouldMatch + 1);
     // there can be at most minShouldMatch - 1 scorers beyond the current position
     // otherwise we might be skipping over matching documents
-    tail = new ScorerWrapper[minShouldMatch - 1];
+    tail = new DisiWrapper[minShouldMatch - 1];
 
     for (Scorer scorer : scorers) {
-      addLead(new ScorerWrapper(scorer));
+      addLead(new DisiWrapper<Scorer>(scorer));
     }
 
     List<ChildScorer> children = new ArrayList<>();
@@ -145,13 +144,13 @@ final class MinShouldMatchSumScorer exte
     // We are moving to the next doc ID, so scorers in 'lead' need to go in
     // 'tail'. If there is not enough space in 'tail', then we take the least
     // costly scorers and advance them.
-    for (ScorerWrapper s = lead; s != null; s = s.next) {
-      final ScorerWrapper evicted = insertTailWithOverFlow(s);
+    for (DisiWrapper<Scorer> s = lead; s != null; s = s.next) {
+      final DisiWrapper<Scorer> evicted = insertTailWithOverFlow(s);
       if (evicted != null) {
         if (evicted.doc == doc) {
-          evicted.doc = evicted.scorer.nextDoc();
+          evicted.doc = evicted.iterator.nextDoc();
         } else {
-          evicted.doc = evicted.scorer.advance(doc + 1);
+          evicted.doc = evicted.iterator.advance(doc + 1);
         }
         head.add(evicted);
       }
@@ -164,23 +163,23 @@ final class MinShouldMatchSumScorer exte
   @Override
   public int advance(int target) throws IOException {
     // Same logic as in nextDoc
-    for (ScorerWrapper s = lead; s != null; s = s.next) {
-      final ScorerWrapper evicted = insertTailWithOverFlow(s);
+    for (DisiWrapper<Scorer> s = lead; s != null; s = s.next) {
+      final DisiWrapper<Scorer> evicted = insertTailWithOverFlow(s);
       if (evicted != null) {
-        evicted.doc = evicted.scorer.advance(target);
+        evicted.doc = evicted.iterator.advance(target);
         head.add(evicted);
       }
     }
 
     // But this time there might also be scorers in 'head' behind the desired
     // target so we need to do the same thing that we did on 'lead' on 'head'
-    ScorerWrapper headTop = head.top();
+    DisiWrapper<Scorer> headTop = head.top();
     while (headTop.doc < target) {
-      final ScorerWrapper evicted = insertTailWithOverFlow(headTop);
+      final DisiWrapper<Scorer> evicted = insertTailWithOverFlow(headTop);
       // We know that the tail is full since it contains at most
       // minShouldMatch - 1 entries and we just moved at least minShouldMatch
       // entries to it, so evicted is not null
-      evicted.doc = evicted.scorer.advance(target);
+      evicted.doc = evicted.iterator.advance(target);
       headTop = head.updateTop(evicted);
     }
 
@@ -188,20 +187,20 @@ final class MinShouldMatchSumScorer exte
     return doNext();
   }
 
-  private void addLead(ScorerWrapper lead) {
+  private void addLead(DisiWrapper<Scorer> lead) {
     lead.next = this.lead;
     this.lead = lead;
     freq += 1;
   }
 
   private void pushBackLeads() throws IOException {
-    for (ScorerWrapper s = lead; s != null; s = s.next) {
+    for (DisiWrapper<Scorer> s = lead; s != null; s = s.next) {
       addTail(s);
     }
   }
 
-  private void advanceTail(ScorerWrapper top) throws IOException {
-    top.doc = top.scorer.advance(doc);
+  private void advanceTail(DisiWrapper<Scorer> top) throws IOException {
+    top.doc = top.iterator.advance(doc);
     if (top.doc == doc) {
       addLead(top);
     } else {
@@ -210,7 +209,7 @@ final class MinShouldMatchSumScorer exte
   }
 
   private void advanceTail() throws IOException {
-    final ScorerWrapper top = popTail();
+    final DisiWrapper<Scorer> top = popTail();
     advanceTail(top);
   }
 
@@ -276,8 +275,8 @@ final class MinShouldMatchSumScorer exte
     // we need to know about all matches
     updateFreq();
     double score = 0;
-    for (ScorerWrapper s = lead; s != null; s = s.next) {
-      score += s.scorer.score();
+    for (DisiWrapper<Scorer> s = lead; s != null; s = s.next) {
+      score += s.iterator.score();
     }
     return coord[freq] * (float) score;
   }
@@ -289,12 +288,12 @@ final class MinShouldMatchSumScorer exte
   }
 
   /** Insert an entry in 'tail' and evict the least-costly scorer if full. */
-  private ScorerWrapper insertTailWithOverFlow(ScorerWrapper s) {
+  private DisiWrapper<Scorer> insertTailWithOverFlow(DisiWrapper<Scorer> s) {
     if (tailSize < tail.length) {
       addTail(s);
       return null;
     } else if (tail.length >= 1) {
-      final ScorerWrapper top = tail[0];
+      final DisiWrapper<Scorer> top = tail[0];
       if (top.cost < s.cost) {
         tail[0] = s;
         downHeapCost(tail, tailSize);
@@ -305,16 +304,16 @@ final class MinShouldMatchSumScorer exte
   }
 
   /** Add an entry to 'tail'. Fails if over capacity. */
-  private void addTail(ScorerWrapper s) {
+  private void addTail(DisiWrapper<Scorer> s) {
     tail[tailSize] = s;
     upHeapCost(tail, tailSize);
     tailSize += 1;
   }
 
   /** Pop the least-costly scorer from 'tail'. */
-  private ScorerWrapper popTail() {
+  private DisiWrapper<Scorer> popTail() {
     assert tailSize > 0;
-    final ScorerWrapper result = tail[0];
+    final DisiWrapper<Scorer> result = tail[0];
     tail[0] = tail[--tailSize];
     downHeapCost(tail, tailSize);
     return result;
@@ -322,8 +321,8 @@ final class MinShouldMatchSumScorer exte
 
   /** Heap helpers */
 
-  private static void upHeapCost(ScorerWrapper[] heap, int i) {
-    final ScorerWrapper node = heap[i];
+  private static void upHeapCost(DisiWrapper<Scorer>[] heap, int i) {
+    final DisiWrapper<Scorer> node = heap[i];
     final long nodeCost = node.cost;
     int j = parentNode(i);
     while (j >= 0 && nodeCost < heap[j].cost) {
@@ -334,9 +333,9 @@ final class MinShouldMatchSumScorer exte
     heap[i] = node;
   }
 
-  private static void downHeapCost(ScorerWrapper[] heap, int size) {
+  private static void downHeapCost(DisiWrapper<Scorer>[] heap, int size) {
     int i = 0;
-    final ScorerWrapper node = heap[0];
+    final DisiWrapper<Scorer> node = heap[0];
     int j = leftNode(i);
     if (j < size) {
       int k = rightNode(j);

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/TwoPhaseIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/TwoPhaseIterator.java?rev=1675780&r1=1675779&r2=1675780&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/TwoPhaseIterator.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/TwoPhaseIterator.java Fri Apr 24 05:33:43 2015
@@ -20,9 +20,13 @@ package org.apache.lucene.search;
 import java.io.IOException;
 import java.util.Objects;
 
+import org.apache.lucene.search.spans.Spans;
+
 /**
- * Returned by {@link Scorer#asTwoPhaseIterator()} to expose an approximation of
- * a {@link DocIdSetIterator}. When the {@link #approximation()}'s
+ * Returned by {@link Scorer#asTwoPhaseIterator()}
+ * and  {@link Spans#asTwoPhaseIterator()}
+ * to expose an approximation of a {@link DocIdSetIterator}.
+ * When the {@link #approximation()}'s
  * {@link DocIdSetIterator#nextDoc()} or {@link DocIdSetIterator#advance(int)}
  * return, {@link #matches()} needs to be checked in order to know whether the
  * returned doc ID actually matches.
@@ -89,4 +93,16 @@ public abstract class TwoPhaseIterator {
    *  {@link DocIdSetIterator#NO_MORE_DOCS} -- and at most once. */
   public abstract boolean matches() throws IOException;
 
+  /**
+   * Returns a {@link TwoPhaseIterator} for this {@link DocIdSetIterator}
+   * when available * otherwise returns null.
+   */
+  public static TwoPhaseIterator asTwoPhaseIterator(DocIdSetIterator iter) {
+    return (iter instanceof Scorer)
+            ? ((Scorer) iter).asTwoPhaseIterator()
+            : (iter instanceof Spans)
+            ? ((Spans) iter).asTwoPhaseIterator()
+            : null;
+  }
+
 }

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java?rev=1675780&r1=1675779&r2=1675780&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java Fri Apr 24 05:33:43 2015
@@ -31,9 +31,13 @@ import org.apache.lucene.index.IndexRead
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermContext;
 import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.PriorityQueue;
 import org.apache.lucene.util.ToStringUtils;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.search.DisiPriorityQueue;
+import org.apache.lucene.search.DisiWrapper;
+import org.apache.lucene.search.TwoPhaseIterator;
+import org.apache.lucene.search.DisjunctionDISIApproximation;
+
 
 /** Matches the union of its clauses.
  */
@@ -42,7 +46,7 @@ public class SpanOrQuery extends SpanQue
   private String field;
 
   /** Construct a SpanOrQuery merging the provided clauses.
-   * All clauses must have the same field. 
+   * All clauses must have the same field.
    */
   public SpanOrQuery(SpanQuery... clauses) {
     this.clauses = new ArrayList<>(clauses.length);
@@ -146,35 +150,16 @@ public class SpanOrQuery extends SpanQue
   }
 
 
-  private class SpanQueue extends PriorityQueue<Spans> {
-    public SpanQueue(int size) {
-      super(size);
-    }
-
-    @Override
-    protected final boolean lessThan(Spans spans1, Spans spans2) {
-      if (spans1.docID() == spans2.docID()) {
-        if (spans1.startPosition() == spans2.startPosition()) {
-          return spans1.endPosition() < spans2.endPosition();
-        } else {
-          return spans1.startPosition() < spans2.startPosition();
-        }
-      } else {
-        return spans1.docID() < spans2.docID();
-      }
-    }
-  }
-
   @Override
   public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts)
   throws IOException {
 
     final ArrayList<Spans> subSpans = new ArrayList<>(clauses.size());
 
-    for (SpanQuery seq : clauses) {
-      Spans subSpan = seq.getSpans(context, acceptDocs, termContexts);
-      if (subSpan != null) {
-        subSpans.add(subSpan);
+    for (SpanQuery sq : clauses) {
+      Spans spans = sq.getSpans(context, acceptDocs, termContexts);
+      if (spans != null) {
+        subSpans.add(spans);
       }
     }
 
@@ -184,114 +169,168 @@ public class SpanOrQuery extends SpanQue
       return subSpans.get(0);
     }
 
-    final SpanQueue queue = new SpanQueue(clauses.size());
+    final DisiPriorityQueue<Spans> byDocQueue = new DisiPriorityQueue<>(subSpans.size());
     for (Spans spans : subSpans) {
-      queue.add(spans);
+      byDocQueue.add(new DisiWrapper<>(spans));
     }
 
+    final SpanPositionQueue byPositionQueue = new SpanPositionQueue(subSpans.size()); // when empty use -1
+
     return new Spans() {
+      Spans topPositionSpans = null;
 
       @Override
       public int nextDoc() throws IOException {
-        if (queue.size() == 0) { // all done
-          return NO_MORE_DOCS;
-        }
-
-        int currentDoc = top().docID();
-
-        if (currentDoc == -1) { // initially
-          return advance(0);
-        }
-
+        topPositionSpans = null;
+        DisiWrapper<Spans> topDocSpans = byDocQueue.top();
+        int currentDoc = topDocSpans.doc;
         do {
-          if (top().nextDoc() != NO_MORE_DOCS) { // move top to next doc
-            queue.updateTop();
-          } else {
-            queue.pop(); // exhausted a clause
-            if (queue.size() == 0) {
-              return NO_MORE_DOCS;
-            }
-          }
-          // assert queue.size() > 0;
-          int doc = top().docID();
-          if (doc > currentDoc) {
-            return doc;
-          }
-        } while (true);
+          topDocSpans.doc = topDocSpans.iterator.nextDoc();
+          topDocSpans = byDocQueue.updateTop();
+        } while (topDocSpans.doc == currentDoc);
+        return topDocSpans.doc;
       }
 
-      private Spans top() {
-        return queue.top();
+      @Override
+      public int advance(int target) throws IOException {
+        topPositionSpans = null;
+        DisiWrapper<Spans> topDocSpans = byDocQueue.top();
+        do {
+          topDocSpans.doc = topDocSpans.iterator.advance(target);
+          topDocSpans = byDocQueue.updateTop();
+        } while (topDocSpans.doc < target);
+        return topDocSpans.doc;
       }
 
       @Override
-      public int advance(int target) throws IOException {
+      public int docID() {
+        DisiWrapper<Spans> topDocSpans = byDocQueue.top();
+        return topDocSpans.doc;
+      }
 
-        while ((queue.size() > 0) && (top().docID() < target)) {
-          if (top().advance(target) != NO_MORE_DOCS) {
-            queue.updateTop();
-          } else {
-            queue.pop();
+      @Override
+      public TwoPhaseIterator asTwoPhaseIterator() {
+        boolean hasApproximation = false;
+        for (DisiWrapper<Spans> w : byDocQueue) {
+          if (w.twoPhaseView != null) {
+            hasApproximation = true;
+            break;
           }
         }
 
-        return (queue.size() > 0) ? top().docID() : NO_MORE_DOCS;
-      }
+        if (! hasApproximation) { // none of the sub spans supports approximations
+          return null;
+        }
 
-      @Override
-      public int docID() {
-        return (queue == null) ? -1
-              : (queue.size() > 0) ? top().docID()
-              : NO_MORE_DOCS;
+        return new TwoPhaseIterator(new DisjunctionDISIApproximation<Spans>(byDocQueue)) {
+          @Override
+          public boolean matches() throws IOException {
+            return twoPhaseCurrentDocMatches();
+          }
+        };
       }
+      
+      int lastDocTwoPhaseMatched = -1;
+
+      boolean twoPhaseCurrentDocMatches() throws IOException {
+        DisiWrapper<Spans> listAtCurrentDoc = byDocQueue.topList();
+        // remove the head of the list as long as it does not match
+        final int currentDoc = listAtCurrentDoc.doc;
+        while (listAtCurrentDoc.twoPhaseView != null) {
+          if (listAtCurrentDoc.twoPhaseView.matches()) {
+            // use this spans for positions at current doc:
+            listAtCurrentDoc.lastApproxMatchDoc = currentDoc;
+            break;
+          }
+          // do not use this spans for positions at current doc:
+          listAtCurrentDoc.lastApproxNonMatchDoc = currentDoc;
+          listAtCurrentDoc = listAtCurrentDoc.next;
+          if (listAtCurrentDoc == null) {
+            return false;
+          }
+        }
+        lastDocTwoPhaseMatched = currentDoc;
+        topPositionSpans = null;
+        return true;
+      }
+
+      void fillPositionQueue() throws IOException { // called at first nextStartPosition
+        assert byPositionQueue.size() == 0;
+        // add all matching Spans at current doc to byPositionQueue
+        DisiWrapper<Spans> listAtCurrentDoc = byDocQueue.topList();
+        while (listAtCurrentDoc != null) {
+          Spans spansAtDoc = listAtCurrentDoc.iterator;
+          if (lastDocTwoPhaseMatched == listAtCurrentDoc.doc) { // matched by DisjunctionDisiApproximation
+            if (listAtCurrentDoc.twoPhaseView != null) { // matched by approximation
+              if (listAtCurrentDoc.lastApproxNonMatchDoc == listAtCurrentDoc.doc) { // matches() returned false
+                spansAtDoc = null;
+              } else {
+                if (listAtCurrentDoc.lastApproxMatchDoc != listAtCurrentDoc.doc) {
+                  if (! listAtCurrentDoc.twoPhaseView.matches()) {
+                    spansAtDoc = null;
+                  }
+                }
+              } 
+            }
+          }
 
+          if (spansAtDoc != null) {
+            assert spansAtDoc.docID() == listAtCurrentDoc.doc;
+            assert spansAtDoc.startPosition() == -1;
+            spansAtDoc.nextStartPosition();
+            assert spansAtDoc.startPosition() != NO_MORE_POSITIONS;
+            byPositionQueue.add(spansAtDoc);
+          }
+          listAtCurrentDoc = listAtCurrentDoc.next;
+        }
+        assert byPositionQueue.size() > 0;
+      }
+        
       @Override
       public int nextStartPosition() throws IOException {
-        top().nextStartPosition();
-        queue.updateTop();
-        int startPos = top().startPosition();
-        while (startPos == -1) { // initially at this doc
-          top().nextStartPosition();
-          queue.updateTop();
-          startPos = top().startPosition();
+        DisiWrapper<Spans> topDocSpans = byDocQueue.top();
+        assert topDocSpans.doc != NO_MORE_DOCS;
+        if (topPositionSpans == null) {
+          byPositionQueue.clear();
+          fillPositionQueue(); // fills byPositionQueue at first position
+          topPositionSpans = byPositionQueue.top();
+        } else {
+          topPositionSpans.nextStartPosition();
+          topPositionSpans = byPositionQueue.updateTop();
         }
-        return startPos;
+        return topPositionSpans.startPosition();
       }
 
       @Override
       public int startPosition() {
-        return top().startPosition();
+        return topPositionSpans == null ? -1 : topPositionSpans.startPosition();
       }
 
       @Override
       public int endPosition() {
-        return top().endPosition();
+        return topPositionSpans == null ? -1 : topPositionSpans.endPosition();
       }
 
       @Override
       public Collection<byte[]> getPayload() throws IOException {
-        ArrayList<byte[]> result = null;
-        Spans theTop = top();
-        if (theTop != null && theTop.isPayloadAvailable()) {
-          result = new ArrayList<>(theTop.getPayload());
-        }
-        return result;
+        return topPositionSpans == null
+                ? null
+                : topPositionSpans.isPayloadAvailable()
+                ? new ArrayList<>(topPositionSpans.getPayload())
+                : null;
       }
 
       @Override
       public boolean isPayloadAvailable() throws IOException {
-        Spans top = top();
-        return top != null && top.isPayloadAvailable();
+        return (topPositionSpans != null) && topPositionSpans.isPayloadAvailable();
       }
 
       @Override
       public String toString() {
-          return "spans("+SpanOrQuery.this+")@"+
-            ((queue == null)?"START"
-             :(queue.size()>0?(docID()+": "+top().startPosition()+" - "+top().endPosition()):"END"));
+        return "spanOr("+SpanOrQuery.this+")@"+docID()+": "+startPosition()+" - "+endPosition();
       }
 
-      private long cost = -1;
+      long cost = -1;
 
       @Override
       public long cost() {
@@ -303,8 +342,8 @@ public class SpanOrQuery extends SpanQue
         }
         return cost;
       }
-
     };
   }
 
 }
+

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java?rev=1675780&r1=1675779&r2=1675780&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java Fri Apr 24 05:33:43 2015
@@ -86,11 +86,12 @@ public abstract class Spans extends DocI
    *
    * Note that the returned {@link TwoPhaseIterator}'s
    * {@link TwoPhaseIterator#approximation() approximation} must
-   * advance synchronously with this iterator: advancing the approximation must
+   * advance documents synchronously with this iterator:
+   * advancing the approximation must
    * advance this iterator and vice-versa.
    *
-   * Implementing this method is typically useful on {@link Spans}s
-   * that have a high per-document overhead in order to confirm matches.
+   * Implementing this method is typically useful on a {@link Spans}
+   * that has a high per-document overhead for confirming matches.
    *
    * The default implementation returns {@code null}.
    */

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java?rev=1675780&r1=1675779&r2=1675780&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java Fri Apr 24 05:33:43 2015
@@ -89,7 +89,7 @@ public abstract class PriorityQueue<T> {
    * value (i.e., {@link #lessThan} should always favor the
    * non-sentinel values).<br>
    * 
-   * By default, this method returns false, which means the queue will not be
+   * By default, this method returns null, which means the queue will not be
    * filled with sentinel values. Otherwise, the value returned will be used to
    * pre-populate the queue. Adds sentinel values to the queue.<br>
    *