You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2017/01/30 10:20:48 UTC
[3/6] lucene-solr:master: LUCENE-7654: To-parent block joins should
implement two-phase iteration.
LUCENE-7654: To-parent block joins should implement two-phase iteration.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/9d5dc0cf
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/9d5dc0cf
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/9d5dc0cf
Branch: refs/heads/master
Commit: 9d5dc0cf573cf8fc75dd7799844db2cb0fa52da8
Parents: 076662d
Author: Adrien Grand <jp...@gmail.com>
Authored: Mon Jan 30 10:46:22 2017 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Mon Jan 30 10:46:22 2017 +0100
----------------------------------------------------------------------
lucene/CHANGES.txt | 4 +
.../search/join/ToParentBlockJoinQuery.java | 336 ++++++++++---------
.../lucene/search/join/TestBlockJoin.java | 5 +-
.../search/join/TestBlockJoinValidation.java | 19 --
4 files changed, 189 insertions(+), 175 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9d5dc0cf/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 6a0d17d..17a528e 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -135,6 +135,10 @@ Optimizations
whether BKD cells are entirely within the distance close to the dateline.
(Adrien Grand)
+* LUCENE-7654: ToParentBlockJoinQuery now implements two-phase iteration and
+ computes scores lazily in order to be faster when used in conjunctions.
+ (Adrien Grand)
+
Build
* LUCENE-7651: Fix Javadocs build for Java 8u121 by injecting "Google Code
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9d5dc0cf/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
----------------------------------------------------------------------
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
index 6369eea..2837423 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
@@ -20,15 +20,18 @@ import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.Locale;
+
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.search.FilterWeight;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.FilterWeight;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.ScorerSupplier;
+import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.BitSet;
@@ -74,7 +77,7 @@ public class ToParentBlockJoinQuery extends Query {
private final ScoreMode scoreMode;
/** Create a ToParentBlockJoinQuery.
- *
+ *
* @param childQuery Query matching child documents.
* @param parentsFilter Filter identifying the parent documents.
* @param scoreMode How to aggregate multiple child scores
@@ -100,7 +103,7 @@ public class ToParentBlockJoinQuery extends Query {
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
return new BlockJoinWeight(this, childQuery.createWeight(searcher, needsScores, boost), parentsFilter, needsScores ? scoreMode : ScoreMode.None);
}
-
+
/** Return our child query. */
public Query getChildQuery() {
return childQuery;
@@ -116,33 +119,44 @@ public class ToParentBlockJoinQuery extends Query {
this.scoreMode = scoreMode;
}
- // NOTE: acceptDocs applies (and is checked) only in the
- // parent document space
@Override
- public Scorer scorer(LeafReaderContext readerContext) throws IOException {
-
- final Scorer childScorer = in.scorer(readerContext);
- if (childScorer == null) {
- // No matches
+ public Scorer scorer(LeafReaderContext context) throws IOException {
+ final ScorerSupplier scorerSupplier = scorerSupplier(context);
+ if (scorerSupplier == null) {
return null;
}
+ return scorerSupplier.get(false);
+ }
- final int firstChildDoc = childScorer.iterator().nextDoc();
- if (firstChildDoc == DocIdSetIterator.NO_MORE_DOCS) {
- // No matches
+ // NOTE: acceptDocs applies (and is checked) only in the
+ // parent document space
+ @Override
+ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
+ final ScorerSupplier childScorerSupplier = in.scorerSupplier(context);
+ if (childScorerSupplier == null) {
return null;
}
// NOTE: this does not take accept docs into account, the responsibility
// to not match deleted docs is on the scorer
- final BitSet parents = parentsFilter.getBitSet(readerContext);
-
+ final BitSet parents = parentsFilter.getBitSet(context);
if (parents == null) {
// No matches
return null;
}
- return new BlockJoinScorer(this, childScorer, parents, firstChildDoc, scoreMode);
+ return new ScorerSupplier() {
+
+ @Override
+ public Scorer get(boolean randomAccess) throws IOException {
+ return new BlockJoinScorer(BlockJoinWeight.this, childScorerSupplier.get(randomAccess), parents, scoreMode);
+ }
+
+ @Override
+ public long cost() {
+ return childScorerSupplier.cost();
+ }
+ };
}
@Override
@@ -154,180 +168,191 @@ public class ToParentBlockJoinQuery extends Query {
return Explanation.noMatch("Not a match");
}
}
-
- static class BlockJoinScorer extends Scorer {
- private final Scorer childScorer;
+
+ private static class ParentApproximation extends DocIdSetIterator {
+
+ private final DocIdSetIterator childApproximation;
private final BitSet parentBits;
- private final ScoreMode scoreMode;
- private int parentDoc = -1;
- private int prevParentDoc;
- private float parentScore;
- private int parentFreq;
- private int nextChildDoc;
- private int childDocUpto;
-
- public BlockJoinScorer(Weight weight, Scorer childScorer, BitSet parentBits, int firstChildDoc, ScoreMode scoreMode) {
- super(weight);
- //System.out.println("Q.init firstChildDoc=" + firstChildDoc);
+ private int doc = -1;
+
+ ParentApproximation(DocIdSetIterator childApproximation, BitSet parentBits) {
+ this.childApproximation = childApproximation;
this.parentBits = parentBits;
- this.childScorer = childScorer;
- this.scoreMode = scoreMode;
- nextChildDoc = firstChildDoc;
}
@Override
- public Collection<ChildScorer> getChildren() {
- return Collections.singleton(new ChildScorer(childScorer, "BLOCK_JOIN"));
+ public int docID() {
+ return doc;
}
@Override
- public DocIdSetIterator iterator() {
- return new DocIdSetIterator() {
- final DocIdSetIterator childIt = childScorer.iterator();
-
- @Override
- public int nextDoc() throws IOException {
- //System.out.println("Q.nextDoc() nextChildDoc=" + nextChildDoc);
- if (nextChildDoc == NO_MORE_DOCS) {
- //System.out.println(" end");
- return parentDoc = NO_MORE_DOCS;
- }
-
- // Gather all children sharing the same parent as
- // nextChildDoc
-
- parentDoc = parentBits.nextSetBit(nextChildDoc);
-
- // Parent & child docs are supposed to be
- // orthogonal:
- checkOrthogonal(nextChildDoc, parentDoc);
-
- //System.out.println(" parentDoc=" + parentDoc);
- assert parentDoc != DocIdSetIterator.NO_MORE_DOCS;
-
- float totalScore = 0;
- float maxScore = Float.NEGATIVE_INFINITY;
- float minScore = Float.POSITIVE_INFINITY;
-
- childDocUpto = 0;
- parentFreq = 0;
- do {
-
- //System.out.println(" c=" + nextChildDoc);
- if (scoreMode != ScoreMode.None) {
- // TODO: specialize this into dedicated classes per-scoreMode
- final float childScore = childScorer.score();
- final int childFreq = childScorer.freq();
- maxScore = Math.max(childScore, maxScore);
- minScore = Math.min(childScore, minScore);
- totalScore += childScore;
- parentFreq += childFreq;
- }
- childDocUpto++;
- nextChildDoc = childIt.nextDoc();
- } while (nextChildDoc < parentDoc);
-
- // Parent & child docs are supposed to be
- // orthogonal:
- checkOrthogonal(nextChildDoc, parentDoc);
-
- switch(scoreMode) {
- case Avg:
- parentScore = totalScore / childDocUpto;
- break;
- case Max:
- parentScore = maxScore;
- break;
- case Min:
- parentScore = minScore;
- break;
- case Total:
- parentScore = totalScore;
- break;
- case None:
- break;
- }
-
- //System.out.println(" return parentDoc=" + parentDoc + " childDocUpto=" + childDocUpto);
- return parentDoc;
- }
-
- @Override
- public int advance(int parentTarget) throws IOException {
+ public int nextDoc() throws IOException {
+ return advance(doc + 1);
+ }
- //System.out.println("Q.advance parentTarget=" + parentTarget);
- if (parentTarget == NO_MORE_DOCS) {
- return parentDoc = NO_MORE_DOCS;
- }
+ @Override
+ public int advance(int target) throws IOException {
+ if (target >= parentBits.length()) {
+ return doc = NO_MORE_DOCS;
+ }
+ final int firstChildTarget = target == 0 ? 0 : parentBits.prevSetBit(target - 1) + 1;
+ int childDoc = childApproximation.docID();
+ if (childDoc < firstChildTarget) {
+ childDoc = childApproximation.advance(firstChildTarget);
+ }
+ if (childDoc >= parentBits.length() - 1) {
+ return doc = NO_MORE_DOCS;
+ }
+ return doc = parentBits.nextSetBit(childDoc + 1);
+ }
- if (parentTarget == 0) {
- // Callers should only be passing in a docID from
- // the parent space, so this means this parent
- // has no children (it got docID 0), so it cannot
- // possibly match. We must handle this case
- // separately otherwise we pass invalid -1 to
- // prevSetBit below:
- return nextDoc();
- }
+ @Override
+ public long cost() {
+ return childApproximation.cost();
+ }
+ }
- prevParentDoc = parentBits.prevSetBit(parentTarget-1);
+ private static class ParentTwoPhase extends TwoPhaseIterator {
- //System.out.println(" rolled back to prevParentDoc=" + prevParentDoc + " vs parentDoc=" + parentDoc);
- assert prevParentDoc >= parentDoc;
- if (prevParentDoc > nextChildDoc) {
- nextChildDoc = childIt.advance(prevParentDoc);
- // System.out.println(" childScorer advanced to child docID=" + nextChildDoc);
- //} else {
- //System.out.println(" skip childScorer advance");
- }
+ private final ParentApproximation parentApproximation;
+ private final DocIdSetIterator childApproximation;
+ private final TwoPhaseIterator childTwoPhase;
- // Parent & child docs are supposed to be orthogonal:
- checkOrthogonal(nextChildDoc, prevParentDoc);
+ ParentTwoPhase(ParentApproximation parentApproximation, TwoPhaseIterator childTwoPhase) {
+ super(parentApproximation);
+ this.parentApproximation = parentApproximation;
+ this.childApproximation = childTwoPhase.approximation();
+ this.childTwoPhase = childTwoPhase;
+ }
- final int nd = nextDoc();
- //System.out.println(" return nextParentDoc=" + nd);
- return nd;
+ @Override
+ public boolean matches() throws IOException {
+ assert childApproximation.docID() < parentApproximation.docID();
+ do {
+ if (childTwoPhase.matches()) {
+ return true;
}
+ } while (childApproximation.nextDoc() < parentApproximation.docID());
+ return false;
+ }
- @Override
- public int docID() {
- return parentDoc;
- }
+ @Override
+ public float matchCost() {
+ // TODO: how could we compute a match cost?
+ return childTwoPhase.matchCost() + 10;
+ }
+ }
- @Override
- public long cost() {
- return childIt.cost();
- }
- };
+ static class BlockJoinScorer extends Scorer {
+ private final Scorer childScorer;
+ private final BitSet parentBits;
+ private final ScoreMode scoreMode;
+ private final DocIdSetIterator childApproximation;
+ private final TwoPhaseIterator childTwoPhase;
+ private final ParentApproximation parentApproximation;
+ private final ParentTwoPhase parentTwoPhase;
+ private float score;
+ private int freq;
+
+ public BlockJoinScorer(Weight weight, Scorer childScorer, BitSet parentBits, ScoreMode scoreMode) {
+ super(weight);
+ //System.out.println("Q.init firstChildDoc=" + firstChildDoc);
+ this.parentBits = parentBits;
+ this.childScorer = childScorer;
+ this.scoreMode = scoreMode;
+ childTwoPhase = childScorer.twoPhaseIterator();
+ if (childTwoPhase == null) {
+ childApproximation = childScorer.iterator();
+ parentApproximation = new ParentApproximation(childApproximation, parentBits);
+ parentTwoPhase = null;
+ } else {
+ childApproximation = childTwoPhase.approximation();
+ parentApproximation = new ParentApproximation(childTwoPhase.approximation(), parentBits);
+ parentTwoPhase = new ParentTwoPhase(parentApproximation, childTwoPhase);
+ }
}
- private void checkOrthogonal(int childDoc, int parentDoc) {
- if (childDoc==parentDoc) {
- throw new IllegalStateException("Child query must not match same docs with parent filter. "
- + "Combine them as must clauses (+) to find a problem doc. "
- + "docId=" + nextChildDoc + ", " + childScorer.getClass());
-
+ @Override
+ public Collection<ChildScorer> getChildren() {
+ return Collections.singleton(new ChildScorer(childScorer, "BLOCK_JOIN"));
+ }
+
+ @Override
+ public DocIdSetIterator iterator() {
+ if (parentTwoPhase == null) {
+ // the approximation is exact
+ return parentApproximation;
+ } else {
+ return TwoPhaseIterator.asDocIdSetIterator(parentTwoPhase);
}
}
@Override
+ public TwoPhaseIterator twoPhaseIterator() {
+ return parentTwoPhase;
+ }
+
+ @Override
public int docID() {
- return parentDoc;
+ return parentApproximation.docID();
}
@Override
public float score() throws IOException {
- return parentScore;
+ setScoreAndFreq();
+ return score;
}
@Override
- public int freq() {
- return parentFreq;
+ public int freq() throws IOException {
+ setScoreAndFreq();
+ return freq;
+ }
+
+ private void setScoreAndFreq() throws IOException {
+ if (childApproximation.docID() >= parentApproximation.docID()) {
+ return;
+ }
+ double score = scoreMode == ScoreMode.None ? 0 : childScorer.score();
+ int freq = 1;
+ while (childApproximation.nextDoc() < parentApproximation.docID()) {
+ if (childTwoPhase == null || childTwoPhase.matches()) {
+ final float childScore = childScorer.score();
+ freq += 1;
+ switch (scoreMode) {
+ case Total:
+ case Avg:
+ score += childScore;
+ break;
+ case Min:
+ score = Math.min(score, childScore);
+ break;
+ case Max:
+ score = Math.min(score, childScore);
+ break;
+ case None:
+ break;
+ default:
+ throw new AssertionError();
+ }
+ }
+ }
+ if (childApproximation.docID() == parentApproximation.docID() && (childTwoPhase == null || childTwoPhase.matches())) {
+ throw new IllegalStateException("Child query must not match same docs with parent filter. "
+ + "Combine them as must clauses (+) to find a problem doc. "
+ + "docId=" + parentApproximation.docID() + ", " + childScorer.getClass());
+ }
+ if (scoreMode == ScoreMode.Avg) {
+ score /= freq;
+ }
+ this.score = (float) score;
+ this.freq = freq;
}
public Explanation explain(LeafReaderContext context, Weight childWeight) throws IOException {
+ int prevParentDoc = parentBits.prevSetBit(parentApproximation.docID() - 1);
int start = context.docBase + prevParentDoc + 1; // +1 b/c prevParentDoc is previous parent doc
- int end = context.docBase + parentDoc - 1; // -1 b/c parentDoc is parent doc
+ int end = context.docBase + parentApproximation.docID() - 1; // -1 b/c parentDoc is parent doc
Explanation bestChild = null;
int matches = 0;
@@ -341,6 +366,7 @@ public class ToParentBlockJoinQuery extends Query {
}
}
+ assert freq() == matches;
return Explanation.match(score(), String.format(Locale.ROOT,
"Score based on %d child docs in range from %d to %d, best match:", matches, start, end), bestChild
);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9d5dc0cf/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
----------------------------------------------------------------------
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
index f508f84..a13e66f 100644
--- a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
+++ b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
@@ -671,7 +671,7 @@ public class TestBlockJoin extends LuceneTestCase {
System.out.println("TEST: iter=" + (1+iter) + " of " + iters);
}
- final Query childQuery;
+ Query childQuery;
if (random().nextInt(3) == 2) {
final int childFieldID = random().nextInt(childFields.length);
childQuery = new TermQuery(new Term("child" + childFieldID,
@@ -706,6 +706,9 @@ public class TestBlockJoin extends LuceneTestCase {
random().nextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT);
childQuery = bq.build();
}
+ if (random().nextBoolean()) {
+ childQuery = new RandomApproximationQuery(childQuery, random());
+ }
final ScoreMode agg = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9d5dc0cf/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinValidation.java
----------------------------------------------------------------------
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinValidation.java b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinValidation.java
index aa68d09..cb3762c 100644
--- a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinValidation.java
+++ b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinValidation.java
@@ -87,25 +87,6 @@ public class TestBlockJoinValidation extends LuceneTestCase {
assertTrue(expected.getMessage() != null && expected.getMessage().contains("Child query must not match same docs with parent filter"));
}
- public void testAdvanceValidationForToParentBjq() throws Exception {
- int randomChildNumber = getRandomChildNumber(0);
- // we need to make advance method meet wrong document, so random child number
- // in BJQ must be greater than child number in Boolean clause
- int nextRandomChildNumber = getRandomChildNumber(randomChildNumber);
- Query parentQueryWithRandomChild = createChildrenQueryWithOneParent(nextRandomChildNumber);
- ToParentBlockJoinQuery blockJoinQuery = new ToParentBlockJoinQuery(parentQueryWithRandomChild, parentsFilter, ScoreMode.None);
- // advance() method is used by ConjunctionScorer, so we need to create Boolean conjunction query
- BooleanQuery.Builder conjunctionQuery = new BooleanQuery.Builder();
- WildcardQuery childQuery = new WildcardQuery(new Term("child", createFieldValue(randomChildNumber)));
- conjunctionQuery.add(new BooleanClause(childQuery, BooleanClause.Occur.MUST));
- conjunctionQuery.add(new BooleanClause(blockJoinQuery, BooleanClause.Occur.MUST));
-
- IllegalStateException expected = expectThrows(IllegalStateException.class, () -> {
- indexSearcher.search(conjunctionQuery.build(), 1);
- });
- assertTrue(expected.getMessage() != null && expected.getMessage().contains("Child query must not match same docs with parent filter"));
- }
-
public void testNextDocValidationForToChildBjq() throws Exception {
Query parentQueryWithRandomChild = createParentsQueryWithOneChild(getRandomChildNumber(0));