You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2015/02/13 17:58:33 UTC
svn commit: r1659601 - in /lucene/dev/branches/branch_5x: ./ lucene/
lucene/core/ lucene/core/src/java/org/apache/lucene/search/
lucene/core/src/test/org/apache/lucene/search/
Author: jpountz
Date: Fri Feb 13 16:58:33 2015
New Revision: 1659601
URL: http://svn.apache.org/r1659601
Log:
LUCENE-6198: Two-phase execution for phrase queries and conjunctions.
Added:
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java
- copied unchanged from r1659599, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/TwoPhaseDocIdSetIterator.java
- copied, changed from r1659599, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/TwoPhaseDocIdSetIterator.java
lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java
- copied, changed from r1659599, lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java
Modified:
lucene/dev/branches/branch_5x/ (props changed)
lucene/dev/branches/branch_5x/lucene/ (props changed)
lucene/dev/branches/branch_5x/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_5x/lucene/core/ (props changed)
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/Scorer.java
lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java
Modified: lucene/dev/branches/branch_5x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/CHANGES.txt?rev=1659601&r1=1659600&r2=1659601&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/CHANGES.txt Fri Feb 13 16:58:33 2015
@@ -49,6 +49,11 @@ Optimizations
* LUCENE-6233 Speed up CheckIndex when the index has term vectors
(Robert Muir, Mike McCandless)
+* LUCENE-6198: Added the TwoPhaseDocIdSetIterator API, exposed on scorers which
+ is for now only used on phrase queries and conjunctions in order to check
+ positions lazily if the phrase query is in a conjunction with other queries.
+ (Robert Muir, Adrien Grand)
+
API Changes
* LUCENE-6204, LUCENE-6208: Simplify CompoundFormat: remove files()
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java?rev=1659601&r1=1659600&r2=1659601&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java Fri Feb 13 16:58:33 2015
@@ -20,18 +20,14 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
-import java.util.Comparator;
import java.util.List;
-import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
/** Scorer for conjunctions, sets of queries, all of which are required. */
class ConjunctionScorer extends Scorer {
- protected int lastDoc = -1;
- protected final DocsAndFreqs[] docsAndFreqs;
- private final DocsAndFreqs lead;
+ private final ConjunctionDISI disi;
private final Scorer[] scorers;
private final float coord;
@@ -44,68 +40,28 @@ class ConjunctionScorer extends Scorer {
super(weight);
assert required.containsAll(scorers);
this.coord = coord;
- this.docsAndFreqs = new DocsAndFreqs[required.size()];
- for (int i = 0; i < required.size(); ++i) {
- docsAndFreqs[i] = new DocsAndFreqs(required.get(i));
- }
- // Sort the array the first time to allow the least frequent DocsEnum to
- // lead the matching.
- ArrayUtil.timSort(docsAndFreqs, new Comparator<DocsAndFreqs>() {
- @Override
- public int compare(DocsAndFreqs o1, DocsAndFreqs o2) {
- return Long.compare(o1.cost, o2.cost);
- }
- });
-
- lead = docsAndFreqs[0]; // least frequent DocsEnum leads the intersection
-
+ this.disi = ConjunctionDISI.intersect(required);
this.scorers = scorers.toArray(new Scorer[scorers.size()]);
}
- private int doNext(int doc) throws IOException {
- for(;;) {
- // doc may already be NO_MORE_DOCS here, but we don't check explicitly
- // since all scorers should advance to NO_MORE_DOCS, match, then
- // return that value.
- advanceHead: for(;;) {
- for (int i = 1; i < docsAndFreqs.length; i++) {
- // invariant: docsAndFreqs[i].doc <= doc at this point.
-
- // docsAndFreqs[i].doc may already be equal to doc if we "broke advanceHead"
- // on the previous iteration and the advance on the lead scorer exactly matched.
- if (docsAndFreqs[i].doc < doc) {
- docsAndFreqs[i].doc = docsAndFreqs[i].iterator.advance(doc);
-
- if (docsAndFreqs[i].doc > doc) {
- // DocsEnum beyond the current doc - break and advance lead to the new highest doc.
- doc = docsAndFreqs[i].doc;
- break advanceHead;
- }
- }
- }
- // success - all DocsEnums are on the same doc
- return doc;
- }
- // advance head for next iteration
- doc = lead.doc = lead.iterator.advance(doc);
- }
+ @Override
+ public TwoPhaseDocIdSetIterator asTwoPhaseIterator() {
+ return disi.asTwoPhaseIterator();
}
@Override
public int advance(int target) throws IOException {
- lead.doc = lead.iterator.advance(target);
- return lastDoc = doNext(lead.doc);
+ return disi.advance(target);
}
@Override
public int docID() {
- return lastDoc;
+ return disi.docID();
}
@Override
public int nextDoc() throws IOException {
- lead.doc = lead.iterator.nextDoc();
- return lastDoc = doNext(lead.doc);
+ return disi.nextDoc();
}
@Override
@@ -120,7 +76,7 @@ class ConjunctionScorer extends Scorer {
@Override
public int freq() {
- return docsAndFreqs.length;
+ return scorers.length;
}
@Override
@@ -145,12 +101,12 @@ class ConjunctionScorer extends Scorer {
@Override
public long cost() {
- return lead.iterator.cost();
+ return disi.cost();
}
@Override
public Collection<ChildScorer> getChildren() {
- ArrayList<ChildScorer> children = new ArrayList<>(docsAndFreqs.length);
+ ArrayList<ChildScorer> children = new ArrayList<>();
for (Scorer scorer : scorers) {
children.add(new ChildScorer(scorer, "MUST"));
}
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java?rev=1659601&r1=1659600&r2=1659601&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java Fri Feb 13 16:58:33 2015
@@ -18,9 +18,11 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Arrays;
+import java.util.List;
-import org.apache.lucene.index.*;
+import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.BytesRef;
@@ -49,10 +51,11 @@ final class ExactPhraseScorer extends Sc
}
}
+ private final ConjunctionDISI conjunction;
+
private final ChunkState[] chunkStates;
private final PostingsEnum lead;
- private int docID = -1;
private int freq;
private final Similarity.SimScorer docScorer;
@@ -72,49 +75,46 @@ final class ExactPhraseScorer extends Sc
// min(cost)
cost = lead.cost();
+ List<DocIdSetIterator> iterators = new ArrayList<>();
for(int i=0;i<postings.length;i++) {
chunkStates[i] = new ChunkState(postings[i].postings, -postings[i].position);
+ iterators.add(postings[i].postings);
}
+ conjunction = ConjunctionDISI.intersect(iterators);
}
-
+
+ @Override
+ public TwoPhaseDocIdSetIterator asTwoPhaseIterator() {
+ return new TwoPhaseDocIdSetIterator() {
+
+ @Override
+ public boolean matches() throws IOException {
+ return phraseFreq() > 0;
+ }
+
+ @Override
+ public DocIdSetIterator approximation() {
+ return conjunction;
+ }
+ };
+ }
+
private int doNext(int doc) throws IOException {
- for(;;) {
- // TODO: don't dup this logic from conjunctionscorer :)
- advanceHead: for(;;) {
- for (int i = 1; i < chunkStates.length; i++) {
- final PostingsEnum de = chunkStates[i].posEnum;
- if (de.docID() < doc) {
- int d = de.advance(doc);
-
- if (d > doc) {
- // DocsEnum beyond the current doc - break and advance lead to the new highest doc.
- doc = d;
- break advanceHead;
- }
- }
- }
- // all DocsEnums are on the same doc
- if (doc == NO_MORE_DOCS) {
- return doc;
- } else if (phraseFreq() > 0) {
- return doc; // success: matches phrase
- } else {
- doc = lead.nextDoc(); // doesn't match phrase
- }
+ for (;; doc = conjunction.nextDoc()) {
+ if (doc == NO_MORE_DOCS || phraseFreq() > 0) {
+ return doc;
}
- // advance head for next iteration
- doc = lead.advance(doc);
}
}
@Override
public int nextDoc() throws IOException {
- return docID = doNext(lead.nextDoc());
+ return doNext(conjunction.nextDoc());
}
@Override
public int advance(int target) throws IOException {
- return docID = doNext(lead.advance(target));
+ return doNext(conjunction.advance(target));
}
@Override
@@ -149,12 +149,12 @@ final class ExactPhraseScorer extends Sc
@Override
public int docID() {
- return docID;
+ return conjunction.docID();
}
@Override
public float score() {
- return docScorer.score(docID, freq);
+ return docScorer.score(docID(), freq);
}
private int phraseFreq() throws IOException {
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/Scorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/Scorer.java?rev=1659601&r1=1659600&r2=1659601&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/Scorer.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/Scorer.java Fri Feb 13 16:58:33 2015
@@ -60,7 +60,7 @@ public abstract class Scorer extends Pos
* {@link LeafCollector#collect}.
*/
public abstract float score() throws IOException;
-
+
/** returns parent Weight
* @lucene.experimental
*/
@@ -99,4 +99,23 @@ public abstract class Scorer extends Pos
this.relationship = relationship;
}
}
+
+ /**
+ * Optional method: Return a {@link TwoPhaseDocIdSetIterator} view of this
+ * {@link Scorer}. A return value of {@code null} indicates that
+ * two-phase iteration is not supported.
+ *
+ * Note that the returned {@link TwoPhaseDocIdSetIterator}'s
+ * {@link TwoPhaseDocIdSetIterator#approximation() approximation} must
+ * advance synchronously with this iterator: advancing the approximation must
+ * advance this iterator and vice-versa.
+ *
+ * Implementing this method is typically useful on {@link Scorer}s
+ * that have a high per-document overhead in order to confirm matches.
+ *
+ * The default implementation returns {@code null}.
+ */
+ public TwoPhaseDocIdSetIterator asTwoPhaseIterator() {
+ return null;
+ }
}
Copied: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/TwoPhaseDocIdSetIterator.java (from r1659599, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/TwoPhaseDocIdSetIterator.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/TwoPhaseDocIdSetIterator.java?p2=lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/TwoPhaseDocIdSetIterator.java&p1=lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/TwoPhaseDocIdSetIterator.java&r1=1659599&r2=1659601&rev=1659601&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/TwoPhaseDocIdSetIterator.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/search/TwoPhaseDocIdSetIterator.java Fri Feb 13 16:58:33 2015
@@ -30,7 +30,7 @@ public abstract class TwoPhaseDocIdSetIt
/** Return a {@link DocIdSetIterator} view of the provided
* {@link TwoPhaseDocIdSetIterator}. */
- public static DocIdSetIterator asDocIdSetIterator(TwoPhaseDocIdSetIterator twoPhaseIterator) {
+ public static DocIdSetIterator asDocIdSetIterator(final TwoPhaseDocIdSetIterator twoPhaseIterator) {
final DocIdSetIterator approximation = twoPhaseIterator.approximation();
return new DocIdSetIterator() {
Modified: lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java?rev=1659601&r1=1659600&r2=1659601&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/TestBooleanQuery.java Fri Feb 13 16:58:33 2015
@@ -590,4 +590,33 @@ public class TestBooleanQuery extends Lu
w.close();
dir.close();
}
+
+ public void testConjunctionSupportsApproximations() throws IOException {
+ Directory dir = newDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+ Document doc = new Document();
+ Field f = newTextField("field", "a b c", Field.Store.NO);
+ doc.add(f);
+ w.addDocument(doc);
+ w.commit();
+
+ DirectoryReader reader = w.getReader();
+ final IndexSearcher searcher = new IndexSearcher(reader);
+
+ PhraseQuery pq = new PhraseQuery();
+ pq.add(new Term("field", "a"));
+ pq.add(new Term("field", "b"));
+
+ BooleanQuery q = new BooleanQuery();
+ q.add(pq, Occur.MUST);
+ q.add(new TermQuery(new Term("field", "c")), Occur.FILTER);
+
+ final Weight weight = searcher.createNormalizedWeight(q, random().nextBoolean());
+ final Scorer scorer = weight.scorer(reader.leaves().get(0), null);
+ assertNotNull(scorer.asTwoPhaseIterator());
+
+ reader.close();
+ w.close();
+ dir.close();
+ }
}
Copied: lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java (from r1659599, lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java?p2=lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java&p1=lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java&r1=1659599&r2=1659601&rev=1659601&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java Fri Feb 13 16:58:33 2015
@@ -55,7 +55,7 @@ public class TestConjunctionDISI extends
* an exception in order to make sure that {@link ConjunctionDISI} takes
* advantage of the {@link TwoPhaseDocIdSetIterator} view.
*/
- private static Scorer scorer(DocIdSetIterator it, TwoPhaseDocIdSetIterator twoPhaseIterator) {
+ private static Scorer scorer(final DocIdSetIterator it, final TwoPhaseDocIdSetIterator twoPhaseIterator) {
return new Scorer(null) {
@Override