You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2015/04/24 07:33:44 UTC
svn commit: r1675780 - in /lucene/dev/branches/branch_5x: ./ lucene/
lucene/core/ lucene/core/src/java/org/apache/lucene/search/
lucene/core/src/java/org/apache/lucene/search/spans/
lucene/core/src/java/org/apache/lucene/util/
Author: rmuir
Date: Fri Apr 24 05:33:43 2015
New Revision: 1675780
URL: http://svn.apache.org/r1675780
Log:
LUCENE-6373: complete two phase doc id iteration support for Spans
Added:
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisiPriorityQueue.java
- copied unchanged from r1675776, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/DisiPriorityQueue.java
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java
- copied unchanged from r1675776, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionDISIApproximation.java
- copied unchanged from r1675776, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/DisjunctionDISIApproximation.java
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionQueue.java
- copied unchanged from r1675776, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionQueue.java
Removed:
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/ScorerPriorityQueue.java
Modified:
lucene/dev/branches/branch_5x/ (props changed)
lucene/dev/branches/branch_5x/lucene/ (props changed)
lucene/dev/branches/branch_5x/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_5x/lucene/core/ (props changed)
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DocIdSetIterator.java
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/TwoPhaseIterator.java
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java
Modified: lucene/dev/branches/branch_5x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/CHANGES.txt?rev=1675780&r1=1675779&r2=1675780&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/CHANGES.txt Fri Apr 24 05:33:43 2015
@@ -21,6 +21,10 @@ New Features
FilterSpans to just have an accept(Spans candidate) method for
subclasses. (Robert Muir)
+* LUCENE-6373: SpanOrQuery shares disjunction logic with boolean
+ queries, and supports two-phased iterators to avoid loading
+ positions when possible. (Paul Elschot via Robert Muir)
+
* LUCENE-6352: Added a new query time join to the join module that uses
global ordinals, which is faster for subsequent joins between reopens.
(Martijn van Groningen, Adrien Grand)
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java?rev=1675780&r1=1675779&r2=1675780&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java Fri Apr 24 05:33:43 2015
@@ -23,7 +23,6 @@ import java.util.Comparator;
import java.util.List;
import org.apache.lucene.util.CollectionUtil;
-import org.apache.lucene.search.spans.Spans;
/** A conjunction of DocIdSetIterators.
* This iterates over the doc ids that are present in each given DocIdSetIterator.
@@ -35,20 +34,16 @@ public class ConjunctionDISI extends Doc
/** Create a conjunction over the provided iterators, taking advantage of
* {@link TwoPhaseIterator}. */
public static ConjunctionDISI intersect(List<? extends DocIdSetIterator> iterators) {
+ assert iterators.size() >= 2;
final List<DocIdSetIterator> allIterators = new ArrayList<>();
final List<TwoPhaseIterator> twoPhaseIterators = new ArrayList<>();
- for (DocIdSetIterator iterator : iterators) {
- TwoPhaseIterator twoPhaseIterator = null;
- if (iterator instanceof Scorer) {
- twoPhaseIterator = ((Scorer) iterator).asTwoPhaseIterator();
- } else if (iterator instanceof Spans) {
- twoPhaseIterator = ((Spans) iterator).asTwoPhaseIterator();
- }
- if (twoPhaseIterator != null) {
- allIterators.add(twoPhaseIterator.approximation());
- twoPhaseIterators.add(twoPhaseIterator);
+ for (DocIdSetIterator iter : iterators) {
+ TwoPhaseIterator twoPhaseIter = TwoPhaseIterator.asTwoPhaseIterator(iter);
+ if (twoPhaseIter != null) {
+ allIterators.add(twoPhaseIter.approximation());
+ twoPhaseIterators.add(twoPhaseIter);
} else { // no approximation support, use the iterator as-is
- allIterators.add(iterator);
+ allIterators.add(iter);
}
}
@@ -63,6 +58,7 @@ public class ConjunctionDISI extends Doc
final DocIdSetIterator[] others;
ConjunctionDISI(List<? extends DocIdSetIterator> iterators) {
+ assert iterators.size() >= 2;
// Sort the array the first time to allow the least frequent DocsEnum to
// lead the matching.
CollectionUtil.timSort(iterators, new Comparator<DocIdSetIterator>() {
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java?rev=1675780&r1=1675779&r2=1675780&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java Fri Apr 24 05:33:43 2015
@@ -19,8 +19,6 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.List;
-import org.apache.lucene.search.ScorerPriorityQueue.ScorerWrapper;
-
/**
* The Scorer for DisjunctionMaxQuery. The union of all documents generated by the the subquery scorers
* is generated in document number order. The score for each document is the maximum of the scores computed
@@ -48,11 +46,11 @@ final class DisjunctionMaxScorer extends
}
@Override
- protected float score(ScorerWrapper topList) throws IOException {
+ protected float score(DisiWrapper<Scorer> topList) throws IOException {
float scoreSum = 0;
float scoreMax = 0;
- for (ScorerWrapper w = topList; w != null; w = w.next) {
- final float subScore = w.scorer.score();
+ for (DisiWrapper<Scorer> w = topList; w != null; w = w.next) {
+ final float subScore = w.iterator.score();
scoreSum += subScore;
if (subScore > scoreMax) {
scoreMax = subScore;
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java?rev=1675780&r1=1675779&r2=1675780&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java Fri Apr 24 05:33:43 2015
@@ -22,29 +22,27 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
-import org.apache.lucene.search.ScorerPriorityQueue.ScorerWrapper;
-
/**
* Base class for Scorers that score disjunctions.
*/
abstract class DisjunctionScorer extends Scorer {
private final boolean needsScores;
- private final ScorerPriorityQueue subScorers;
+ private final DisiPriorityQueue<Scorer> subScorers;
private final long cost;
/** Linked list of scorers which are on the current doc */
- private ScorerWrapper topScorers;
+ private DisiWrapper<Scorer> topScorers;
protected DisjunctionScorer(Weight weight, List<Scorer> subScorers, boolean needsScores) {
super(weight);
if (subScorers.size() <= 1) {
throw new IllegalArgumentException("There must be at least 2 subScorers");
}
- this.subScorers = new ScorerPriorityQueue(subScorers.size());
+ this.subScorers = new DisiPriorityQueue<Scorer>(subScorers.size());
long cost = 0;
for (Scorer scorer : subScorers) {
- final ScorerWrapper w = new ScorerWrapper(scorer);
+ final DisiWrapper<Scorer> w = new DisiWrapper<>(scorer);
cost += w.cost;
this.subScorers.add(w);
}
@@ -52,69 +50,17 @@ abstract class DisjunctionScorer extends
this.needsScores = needsScores;
}
- /**
- * A {@link DocIdSetIterator} which is a disjunction of the approximations of
- * the provided iterators.
- */
- private static class DisjunctionDISIApproximation extends DocIdSetIterator {
-
- final ScorerPriorityQueue subScorers;
- final long cost;
-
- DisjunctionDISIApproximation(ScorerPriorityQueue subScorers) {
- this.subScorers = subScorers;
- long cost = 0;
- for (ScorerWrapper w : subScorers) {
- cost += w.cost;
- }
- this.cost = cost;
- }
-
- @Override
- public long cost() {
- return cost;
- }
-
- @Override
- public int docID() {
- return subScorers.top().doc;
- }
-
- @Override
- public int nextDoc() throws IOException {
- ScorerWrapper top = subScorers.top();
- final int doc = top.doc;
- do {
- top.doc = top.approximation.nextDoc();
- top = subScorers.updateTop();
- } while (top.doc == doc);
-
- return top.doc;
- }
-
- @Override
- public int advance(int target) throws IOException {
- ScorerWrapper top = subScorers.top();
- do {
- top.doc = top.approximation.advance(target);
- top = subScorers.updateTop();
- } while (top.doc < target);
-
- return top.doc;
- }
- }
-
@Override
public TwoPhaseIterator asTwoPhaseIterator() {
boolean hasApproximation = false;
- for (ScorerWrapper w : subScorers) {
+ for (DisiWrapper<Scorer> w : subScorers) {
if (w.twoPhaseView != null) {
hasApproximation = true;
break;
}
}
- if (hasApproximation == false) {
+ if (! hasApproximation) {
// none of the sub scorers supports approximations
return null;
}
@@ -122,13 +68,13 @@ abstract class DisjunctionScorer extends
// note it is important to share the same pq as this scorer so that
// rebalancing the pq through the approximation will also rebalance
// the pq in this scorer.
- return new TwoPhaseIterator(new DisjunctionDISIApproximation(subScorers)) {
+ return new TwoPhaseIterator(new DisjunctionDISIApproximation<Scorer>(subScorers)) {
@Override
public boolean matches() throws IOException {
- ScorerWrapper topScorers = subScorers.topList();
+ DisiWrapper<Scorer> topScorers = subScorers.topList();
// remove the head of the list as long as it does not match
- while (topScorers.twoPhaseView != null && topScorers.twoPhaseView.matches() == false) {
+ while (topScorers.twoPhaseView != null && ! topScorers.twoPhaseView.matches()) {
topScorers = topScorers.next;
if (topScorers == null) {
return false;
@@ -138,9 +84,9 @@ abstract class DisjunctionScorer extends
if (needsScores) {
// if scores or freqs are needed, we also need to remove scorers
// from the top list that do not actually match
- ScorerWrapper previous = topScorers;
- for (ScorerWrapper w = topScorers.next; w != null; w = w.next) {
- if (w.twoPhaseView != null && w.twoPhaseView.matches() == false) {
+ DisiWrapper<Scorer> previous = topScorers;
+ for (DisiWrapper<Scorer> w = topScorers.next; w != null; w = w.next) {
+ if (w.twoPhaseView != null && ! w.twoPhaseView.matches()) {
// w does not match, remove it
previous.next = w.next;
} else {
@@ -175,10 +121,10 @@ abstract class DisjunctionScorer extends
@Override
public final int nextDoc() throws IOException {
topScorers = null;
- ScorerWrapper top = subScorers.top();
+ DisiWrapper<Scorer> top = subScorers.top();
final int doc = top.doc;
do {
- top.doc = top.scorer.nextDoc();
+ top.doc = top.iterator.nextDoc();
top = subScorers.updateTop();
} while (top.doc == doc);
@@ -188,9 +134,9 @@ abstract class DisjunctionScorer extends
@Override
public final int advance(int target) throws IOException {
topScorers = null;
- ScorerWrapper top = subScorers.top();
+ DisiWrapper<Scorer> top = subScorers.top();
do {
- top.doc = top.scorer.advance(target);
+ top.doc = top.iterator.advance(target);
top = subScorers.updateTop();
} while (top.doc < target);
@@ -203,7 +149,7 @@ abstract class DisjunctionScorer extends
topScorers = subScorers.topList();
}
int freq = 1;
- for (ScorerWrapper w = topScorers.next; w != null; w = w.next) {
+ for (DisiWrapper<Scorer> w = topScorers.next; w != null; w = w.next) {
freq += 1;
}
return freq;
@@ -218,13 +164,13 @@ abstract class DisjunctionScorer extends
}
/** Compute the score for the given linked list of scorers. */
- protected abstract float score(ScorerWrapper topList) throws IOException;
+ protected abstract float score(DisiWrapper<Scorer> topList) throws IOException;
@Override
public final Collection<ChildScorer> getChildren() {
ArrayList<ChildScorer> children = new ArrayList<>();
- for (ScorerWrapper scorer : subScorers) {
- children.add(new ChildScorer(scorer.scorer, "SHOULD"));
+ for (DisiWrapper<Scorer> scorer : subScorers) {
+ children.add(new ChildScorer(scorer.iterator, "SHOULD"));
}
return children;
}
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java?rev=1675780&r1=1675779&r2=1675780&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java Fri Apr 24 05:33:43 2015
@@ -20,8 +20,6 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.List;
-import org.apache.lucene.search.ScorerPriorityQueue.ScorerWrapper;
-
/** A Scorer for OR like queries, counterpart of <code>ConjunctionScorer</code>.
* This Scorer implements {@link Scorer#advance(int)} and uses advance() on the given Scorers.
*/
@@ -39,11 +37,11 @@ final class DisjunctionSumScorer extends
}
@Override
- protected float score(ScorerWrapper topList) throws IOException {
+ protected float score(DisiWrapper<Scorer> topList) throws IOException {
double score = 0;
int freq = 0;
- for (ScorerWrapper w = topList; w != null; w = w.next) {
- score += w.scorer.score();
+ for (DisiWrapper<Scorer> w = topList; w != null; w = w.next) {
+ score += w.iterator.score();
freq += 1;
}
return (float)score * coord[freq];
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DocIdSetIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DocIdSetIterator.java?rev=1675780&r1=1675779&r2=1675780&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DocIdSetIterator.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/DocIdSetIterator.java Fri Apr 24 05:33:43 2015
@@ -19,6 +19,8 @@ package org.apache.lucene.search;
import java.io.IOException;
+import org.apache.lucene.search.spans.Spans;
+
/**
* This abstract class defines methods to iterate over a set of non-decreasing
* doc ids. Note that this class assumes it iterates on doc Ids, and therefore
@@ -175,4 +177,5 @@ public abstract class DocIdSetIterator {
* completely inaccurate.
*/
public abstract long cost();
+
}
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java?rev=1675780&r1=1675779&r2=1675780&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/MinShouldMatchSumScorer.java Fri Apr 24 05:33:43 2015
@@ -23,12 +23,11 @@ import java.util.Collection;
import java.util.Collections;
import java.util.List;
-import org.apache.lucene.search.ScorerPriorityQueue.ScorerWrapper;
import org.apache.lucene.util.PriorityQueue;
-import static org.apache.lucene.search.ScorerPriorityQueue.leftNode;
-import static org.apache.lucene.search.ScorerPriorityQueue.parentNode;
-import static org.apache.lucene.search.ScorerPriorityQueue.rightNode;
+import static org.apache.lucene.search.DisiPriorityQueue.leftNode;
+import static org.apache.lucene.search.DisiPriorityQueue.parentNode;
+import static org.apache.lucene.search.DisiPriorityQueue.rightNode;
/**
* A {@link Scorer} for {@link BooleanQuery} when
@@ -83,17 +82,17 @@ final class MinShouldMatchSumScorer exte
// list of scorers which 'lead' the iteration and are currently
// positioned on 'doc'
- ScorerWrapper lead;
+ DisiWrapper<Scorer> lead;
int doc; // current doc ID of the leads
int freq; // number of scorers on the desired doc ID
// priority queue of scorers that are too advanced compared to the current
// doc. Ordered by doc ID.
- final ScorerPriorityQueue head;
+ final DisiPriorityQueue<Scorer> head;
// priority queue of scorers which are behind the current doc.
// Ordered by cost.
- final ScorerWrapper[] tail;
+ final DisiWrapper<Scorer>[] tail;
int tailSize;
final Collection<ChildScorer> childScorers;
@@ -113,13 +112,13 @@ final class MinShouldMatchSumScorer exte
this.coord = coord;
this.doc = -1;
- head = new ScorerPriorityQueue(scorers.size() - minShouldMatch + 1);
+ head = new DisiPriorityQueue<Scorer>(scorers.size() - minShouldMatch + 1);
// there can be at most minShouldMatch - 1 scorers beyond the current position
// otherwise we might be skipping over matching documents
- tail = new ScorerWrapper[minShouldMatch - 1];
+ tail = new DisiWrapper[minShouldMatch - 1];
for (Scorer scorer : scorers) {
- addLead(new ScorerWrapper(scorer));
+ addLead(new DisiWrapper<Scorer>(scorer));
}
List<ChildScorer> children = new ArrayList<>();
@@ -145,13 +144,13 @@ final class MinShouldMatchSumScorer exte
// We are moving to the next doc ID, so scorers in 'lead' need to go in
// 'tail'. If there is not enough space in 'tail', then we take the least
// costly scorers and advance them.
- for (ScorerWrapper s = lead; s != null; s = s.next) {
- final ScorerWrapper evicted = insertTailWithOverFlow(s);
+ for (DisiWrapper<Scorer> s = lead; s != null; s = s.next) {
+ final DisiWrapper<Scorer> evicted = insertTailWithOverFlow(s);
if (evicted != null) {
if (evicted.doc == doc) {
- evicted.doc = evicted.scorer.nextDoc();
+ evicted.doc = evicted.iterator.nextDoc();
} else {
- evicted.doc = evicted.scorer.advance(doc + 1);
+ evicted.doc = evicted.iterator.advance(doc + 1);
}
head.add(evicted);
}
@@ -164,23 +163,23 @@ final class MinShouldMatchSumScorer exte
@Override
public int advance(int target) throws IOException {
// Same logic as in nextDoc
- for (ScorerWrapper s = lead; s != null; s = s.next) {
- final ScorerWrapper evicted = insertTailWithOverFlow(s);
+ for (DisiWrapper<Scorer> s = lead; s != null; s = s.next) {
+ final DisiWrapper<Scorer> evicted = insertTailWithOverFlow(s);
if (evicted != null) {
- evicted.doc = evicted.scorer.advance(target);
+ evicted.doc = evicted.iterator.advance(target);
head.add(evicted);
}
}
// But this time there might also be scorers in 'head' behind the desired
// target so we need to do the same thing that we did on 'lead' on 'head'
- ScorerWrapper headTop = head.top();
+ DisiWrapper<Scorer> headTop = head.top();
while (headTop.doc < target) {
- final ScorerWrapper evicted = insertTailWithOverFlow(headTop);
+ final DisiWrapper<Scorer> evicted = insertTailWithOverFlow(headTop);
// We know that the tail is full since it contains at most
// minShouldMatch - 1 entries and we just moved at least minShouldMatch
// entries to it, so evicted is not null
- evicted.doc = evicted.scorer.advance(target);
+ evicted.doc = evicted.iterator.advance(target);
headTop = head.updateTop(evicted);
}
@@ -188,20 +187,20 @@ final class MinShouldMatchSumScorer exte
return doNext();
}
- private void addLead(ScorerWrapper lead) {
+ private void addLead(DisiWrapper<Scorer> lead) {
lead.next = this.lead;
this.lead = lead;
freq += 1;
}
private void pushBackLeads() throws IOException {
- for (ScorerWrapper s = lead; s != null; s = s.next) {
+ for (DisiWrapper<Scorer> s = lead; s != null; s = s.next) {
addTail(s);
}
}
- private void advanceTail(ScorerWrapper top) throws IOException {
- top.doc = top.scorer.advance(doc);
+ private void advanceTail(DisiWrapper<Scorer> top) throws IOException {
+ top.doc = top.iterator.advance(doc);
if (top.doc == doc) {
addLead(top);
} else {
@@ -210,7 +209,7 @@ final class MinShouldMatchSumScorer exte
}
private void advanceTail() throws IOException {
- final ScorerWrapper top = popTail();
+ final DisiWrapper<Scorer> top = popTail();
advanceTail(top);
}
@@ -276,8 +275,8 @@ final class MinShouldMatchSumScorer exte
// we need to know about all matches
updateFreq();
double score = 0;
- for (ScorerWrapper s = lead; s != null; s = s.next) {
- score += s.scorer.score();
+ for (DisiWrapper<Scorer> s = lead; s != null; s = s.next) {
+ score += s.iterator.score();
}
return coord[freq] * (float) score;
}
@@ -289,12 +288,12 @@ final class MinShouldMatchSumScorer exte
}
/** Insert an entry in 'tail' and evict the least-costly scorer if full. */
- private ScorerWrapper insertTailWithOverFlow(ScorerWrapper s) {
+ private DisiWrapper<Scorer> insertTailWithOverFlow(DisiWrapper<Scorer> s) {
if (tailSize < tail.length) {
addTail(s);
return null;
} else if (tail.length >= 1) {
- final ScorerWrapper top = tail[0];
+ final DisiWrapper<Scorer> top = tail[0];
if (top.cost < s.cost) {
tail[0] = s;
downHeapCost(tail, tailSize);
@@ -305,16 +304,16 @@ final class MinShouldMatchSumScorer exte
}
/** Add an entry to 'tail'. Fails if over capacity. */
- private void addTail(ScorerWrapper s) {
+ private void addTail(DisiWrapper<Scorer> s) {
tail[tailSize] = s;
upHeapCost(tail, tailSize);
tailSize += 1;
}
/** Pop the least-costly scorer from 'tail'. */
- private ScorerWrapper popTail() {
+ private DisiWrapper<Scorer> popTail() {
assert tailSize > 0;
- final ScorerWrapper result = tail[0];
+ final DisiWrapper<Scorer> result = tail[0];
tail[0] = tail[--tailSize];
downHeapCost(tail, tailSize);
return result;
@@ -322,8 +321,8 @@ final class MinShouldMatchSumScorer exte
/** Heap helpers */
- private static void upHeapCost(ScorerWrapper[] heap, int i) {
- final ScorerWrapper node = heap[i];
+ private static void upHeapCost(DisiWrapper<Scorer>[] heap, int i) {
+ final DisiWrapper<Scorer> node = heap[i];
final long nodeCost = node.cost;
int j = parentNode(i);
while (j >= 0 && nodeCost < heap[j].cost) {
@@ -334,9 +333,9 @@ final class MinShouldMatchSumScorer exte
heap[i] = node;
}
- private static void downHeapCost(ScorerWrapper[] heap, int size) {
+ private static void downHeapCost(DisiWrapper<Scorer>[] heap, int size) {
int i = 0;
- final ScorerWrapper node = heap[0];
+ final DisiWrapper<Scorer> node = heap[0];
int j = leftNode(i);
if (j < size) {
int k = rightNode(j);
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/TwoPhaseIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/TwoPhaseIterator.java?rev=1675780&r1=1675779&r2=1675780&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/TwoPhaseIterator.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/TwoPhaseIterator.java Fri Apr 24 05:33:43 2015
@@ -20,9 +20,13 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.Objects;
+import org.apache.lucene.search.spans.Spans;
+
/**
- * Returned by {@link Scorer#asTwoPhaseIterator()} to expose an approximation of
- * a {@link DocIdSetIterator}. When the {@link #approximation()}'s
+ * Returned by {@link Scorer#asTwoPhaseIterator()}
+ * and {@link Spans#asTwoPhaseIterator()}
+ * to expose an approximation of a {@link DocIdSetIterator}.
+ * When the {@link #approximation()}'s
* {@link DocIdSetIterator#nextDoc()} or {@link DocIdSetIterator#advance(int)}
* return, {@link #matches()} needs to be checked in order to know whether the
* returned doc ID actually matches.
@@ -89,4 +93,16 @@ public abstract class TwoPhaseIterator {
* {@link DocIdSetIterator#NO_MORE_DOCS} -- and at most once. */
public abstract boolean matches() throws IOException;
+ /**
+ * Returns a {@link TwoPhaseIterator} for this {@link DocIdSetIterator}
+ * when available * otherwise returns null.
+ */
+ public static TwoPhaseIterator asTwoPhaseIterator(DocIdSetIterator iter) {
+ return (iter instanceof Scorer)
+ ? ((Scorer) iter).asTwoPhaseIterator()
+ : (iter instanceof Spans)
+ ? ((Spans) iter).asTwoPhaseIterator()
+ : null;
+ }
+
}
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java?rev=1675780&r1=1675779&r2=1675780&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java Fri Apr 24 05:33:43 2015
@@ -31,9 +31,13 @@ import org.apache.lucene.index.IndexRead
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.search.Query;
+import org.apache.lucene.search.DisiPriorityQueue;
+import org.apache.lucene.search.DisiWrapper;
+import org.apache.lucene.search.TwoPhaseIterator;
+import org.apache.lucene.search.DisjunctionDISIApproximation;
+
/** Matches the union of its clauses.
*/
@@ -42,7 +46,7 @@ public class SpanOrQuery extends SpanQue
private String field;
/** Construct a SpanOrQuery merging the provided clauses.
- * All clauses must have the same field.
+ * All clauses must have the same field.
*/
public SpanOrQuery(SpanQuery... clauses) {
this.clauses = new ArrayList<>(clauses.length);
@@ -146,35 +150,16 @@ public class SpanOrQuery extends SpanQue
}
- private class SpanQueue extends PriorityQueue<Spans> {
- public SpanQueue(int size) {
- super(size);
- }
-
- @Override
- protected final boolean lessThan(Spans spans1, Spans spans2) {
- if (spans1.docID() == spans2.docID()) {
- if (spans1.startPosition() == spans2.startPosition()) {
- return spans1.endPosition() < spans2.endPosition();
- } else {
- return spans1.startPosition() < spans2.startPosition();
- }
- } else {
- return spans1.docID() < spans2.docID();
- }
- }
- }
-
@Override
public Spans getSpans(final LeafReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts)
throws IOException {
final ArrayList<Spans> subSpans = new ArrayList<>(clauses.size());
- for (SpanQuery seq : clauses) {
- Spans subSpan = seq.getSpans(context, acceptDocs, termContexts);
- if (subSpan != null) {
- subSpans.add(subSpan);
+ for (SpanQuery sq : clauses) {
+ Spans spans = sq.getSpans(context, acceptDocs, termContexts);
+ if (spans != null) {
+ subSpans.add(spans);
}
}
@@ -184,114 +169,168 @@ public class SpanOrQuery extends SpanQue
return subSpans.get(0);
}
- final SpanQueue queue = new SpanQueue(clauses.size());
+ final DisiPriorityQueue<Spans> byDocQueue = new DisiPriorityQueue<>(subSpans.size());
for (Spans spans : subSpans) {
- queue.add(spans);
+ byDocQueue.add(new DisiWrapper<>(spans));
}
+ final SpanPositionQueue byPositionQueue = new SpanPositionQueue(subSpans.size()); // when empty use -1
+
return new Spans() {
+ Spans topPositionSpans = null;
@Override
public int nextDoc() throws IOException {
- if (queue.size() == 0) { // all done
- return NO_MORE_DOCS;
- }
-
- int currentDoc = top().docID();
-
- if (currentDoc == -1) { // initially
- return advance(0);
- }
-
+ topPositionSpans = null;
+ DisiWrapper<Spans> topDocSpans = byDocQueue.top();
+ int currentDoc = topDocSpans.doc;
do {
- if (top().nextDoc() != NO_MORE_DOCS) { // move top to next doc
- queue.updateTop();
- } else {
- queue.pop(); // exhausted a clause
- if (queue.size() == 0) {
- return NO_MORE_DOCS;
- }
- }
- // assert queue.size() > 0;
- int doc = top().docID();
- if (doc > currentDoc) {
- return doc;
- }
- } while (true);
+ topDocSpans.doc = topDocSpans.iterator.nextDoc();
+ topDocSpans = byDocQueue.updateTop();
+ } while (topDocSpans.doc == currentDoc);
+ return topDocSpans.doc;
}
- private Spans top() {
- return queue.top();
+ @Override
+ public int advance(int target) throws IOException {
+ topPositionSpans = null;
+ DisiWrapper<Spans> topDocSpans = byDocQueue.top();
+ do {
+ topDocSpans.doc = topDocSpans.iterator.advance(target);
+ topDocSpans = byDocQueue.updateTop();
+ } while (topDocSpans.doc < target);
+ return topDocSpans.doc;
}
@Override
- public int advance(int target) throws IOException {
+ public int docID() {
+ DisiWrapper<Spans> topDocSpans = byDocQueue.top();
+ return topDocSpans.doc;
+ }
- while ((queue.size() > 0) && (top().docID() < target)) {
- if (top().advance(target) != NO_MORE_DOCS) {
- queue.updateTop();
- } else {
- queue.pop();
+ @Override
+ public TwoPhaseIterator asTwoPhaseIterator() {
+ boolean hasApproximation = false;
+ for (DisiWrapper<Spans> w : byDocQueue) {
+ if (w.twoPhaseView != null) {
+ hasApproximation = true;
+ break;
}
}
- return (queue.size() > 0) ? top().docID() : NO_MORE_DOCS;
- }
+ if (! hasApproximation) { // none of the sub spans supports approximations
+ return null;
+ }
- @Override
- public int docID() {
- return (queue == null) ? -1
- : (queue.size() > 0) ? top().docID()
- : NO_MORE_DOCS;
+ return new TwoPhaseIterator(new DisjunctionDISIApproximation<Spans>(byDocQueue)) {
+ @Override
+ public boolean matches() throws IOException {
+ return twoPhaseCurrentDocMatches();
+ }
+ };
}
+
+ int lastDocTwoPhaseMatched = -1;
+
+ boolean twoPhaseCurrentDocMatches() throws IOException {
+ DisiWrapper<Spans> listAtCurrentDoc = byDocQueue.topList();
+ // remove the head of the list as long as it does not match
+ final int currentDoc = listAtCurrentDoc.doc;
+ while (listAtCurrentDoc.twoPhaseView != null) {
+ if (listAtCurrentDoc.twoPhaseView.matches()) {
+ // use this spans for positions at current doc:
+ listAtCurrentDoc.lastApproxMatchDoc = currentDoc;
+ break;
+ }
+ // do not use this spans for positions at current doc:
+ listAtCurrentDoc.lastApproxNonMatchDoc = currentDoc;
+ listAtCurrentDoc = listAtCurrentDoc.next;
+ if (listAtCurrentDoc == null) {
+ return false;
+ }
+ }
+ lastDocTwoPhaseMatched = currentDoc;
+ topPositionSpans = null;
+ return true;
+ }
+
+ void fillPositionQueue() throws IOException { // called at first nextStartPosition
+ assert byPositionQueue.size() == 0;
+ // add all matching Spans at current doc to byPositionQueue
+ DisiWrapper<Spans> listAtCurrentDoc = byDocQueue.topList();
+ while (listAtCurrentDoc != null) {
+ Spans spansAtDoc = listAtCurrentDoc.iterator;
+ if (lastDocTwoPhaseMatched == listAtCurrentDoc.doc) { // matched by DisjunctionDisiApproximation
+ if (listAtCurrentDoc.twoPhaseView != null) { // matched by approximation
+ if (listAtCurrentDoc.lastApproxNonMatchDoc == listAtCurrentDoc.doc) { // matches() returned false
+ spansAtDoc = null;
+ } else {
+ if (listAtCurrentDoc.lastApproxMatchDoc != listAtCurrentDoc.doc) {
+ if (! listAtCurrentDoc.twoPhaseView.matches()) {
+ spansAtDoc = null;
+ }
+ }
+ }
+ }
+ }
+ if (spansAtDoc != null) {
+ assert spansAtDoc.docID() == listAtCurrentDoc.doc;
+ assert spansAtDoc.startPosition() == -1;
+ spansAtDoc.nextStartPosition();
+ assert spansAtDoc.startPosition() != NO_MORE_POSITIONS;
+ byPositionQueue.add(spansAtDoc);
+ }
+ listAtCurrentDoc = listAtCurrentDoc.next;
+ }
+ assert byPositionQueue.size() > 0;
+ }
+
@Override
public int nextStartPosition() throws IOException {
- top().nextStartPosition();
- queue.updateTop();
- int startPos = top().startPosition();
- while (startPos == -1) { // initially at this doc
- top().nextStartPosition();
- queue.updateTop();
- startPos = top().startPosition();
+ DisiWrapper<Spans> topDocSpans = byDocQueue.top();
+ assert topDocSpans.doc != NO_MORE_DOCS;
+ if (topPositionSpans == null) {
+ byPositionQueue.clear();
+ fillPositionQueue(); // fills byPositionQueue at first position
+ topPositionSpans = byPositionQueue.top();
+ } else {
+ topPositionSpans.nextStartPosition();
+ topPositionSpans = byPositionQueue.updateTop();
}
- return startPos;
+ return topPositionSpans.startPosition();
}
@Override
public int startPosition() {
- return top().startPosition();
+ return topPositionSpans == null ? -1 : topPositionSpans.startPosition();
}
@Override
public int endPosition() {
- return top().endPosition();
+ return topPositionSpans == null ? -1 : topPositionSpans.endPosition();
}
@Override
public Collection<byte[]> getPayload() throws IOException {
- ArrayList<byte[]> result = null;
- Spans theTop = top();
- if (theTop != null && theTop.isPayloadAvailable()) {
- result = new ArrayList<>(theTop.getPayload());
- }
- return result;
+ return topPositionSpans == null
+ ? null
+ : topPositionSpans.isPayloadAvailable()
+ ? new ArrayList<>(topPositionSpans.getPayload())
+ : null;
}
@Override
public boolean isPayloadAvailable() throws IOException {
- Spans top = top();
- return top != null && top.isPayloadAvailable();
+ return (topPositionSpans != null) && topPositionSpans.isPayloadAvailable();
}
@Override
public String toString() {
- return "spans("+SpanOrQuery.this+")@"+
- ((queue == null)?"START"
- :(queue.size()>0?(docID()+": "+top().startPosition()+" - "+top().endPosition()):"END"));
+ return "spanOr("+SpanOrQuery.this+")@"+docID()+": "+startPosition()+" - "+endPosition();
}
- private long cost = -1;
+ long cost = -1;
@Override
public long cost() {
@@ -303,8 +342,8 @@ public class SpanOrQuery extends SpanQue
}
return cost;
}
-
};
}
}
+
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java?rev=1675780&r1=1675779&r2=1675780&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/spans/Spans.java Fri Apr 24 05:33:43 2015
@@ -86,11 +86,12 @@ public abstract class Spans extends DocI
*
* Note that the returned {@link TwoPhaseIterator}'s
* {@link TwoPhaseIterator#approximation() approximation} must
- * advance synchronously with this iterator: advancing the approximation must
+ * advance documents synchronously with this iterator:
+ * advancing the approximation must
* advance this iterator and vice-versa.
*
- * Implementing this method is typically useful on {@link Spans}s
- * that have a high per-document overhead in order to confirm matches.
+ * Implementing this method is typically useful on a {@link Spans}
+ * that has a high per-document overhead for confirming matches.
*
* The default implementation returns {@code null}.
*/
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java?rev=1675780&r1=1675779&r2=1675780&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java Fri Apr 24 05:33:43 2015
@@ -89,7 +89,7 @@ public abstract class PriorityQueue<T> {
* value (i.e., {@link #lessThan} should always favor the
* non-sentinel values).<br>
*
- * By default, this method returns false, which means the queue will not be
+ * By default, this method returns null, which means the queue will not be
* filled with sentinel values. Otherwise, the value returned will be used to
* pre-populate the queue. Adds sentinel values to the queue.<br>
*