You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ji...@apache.org on 2019/04/05 13:24:18 UTC
[lucene-solr] branch master updated: LUCENE-8701:
ToParentBlockJoinQuery now creates a child scorer that disallows skipping
over non-competitive documents if the score of a parent depends on the
score of multiple children (avg, max,
min). Additionally the score mode `none` that assigns a constant score to
each parent can early terminate top scores's collection.
This is an automated email from the ASF dual-hosted git repository.
jimczi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/master by this push:
new 4b0c36b LUCENE-8701: ToParentBlockJoinQuery now creates a child scorer that disallows skipping over non-competitive documents if the score of a parent depends on the score of multiple children (avg, max, min). Additionally the score mode `none` that assigns a constant score to each parent can early terminate top scores's collection.
4b0c36b is described below
commit 4b0c36b17b6253e0aad36deda5cbf87c44084ec5
Author: jimczi <ji...@apache.org>
AuthorDate: Fri Apr 5 15:24:09 2019 +0200
LUCENE-8701: ToParentBlockJoinQuery now creates a child scorer that disallows skipping over
non-competitive documents if the score of a parent depends on the score of multiple
children (avg, max, min). Additionally the score mode `none` that assigns a constant score to
each parent can early terminate top scores's collection.
---
lucene/CHANGES.txt | 5 +
.../lucene/search/join/ToParentBlockJoinQuery.java | 26 ++++-
.../apache/lucene/search/join/TestBlockJoin.java | 6 +-
.../lucene/search/join/TestBlockJoinScorer.java | 111 +++++++++++++++++++++
4 files changed, 145 insertions(+), 3 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 4927d93..ce5c81f 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -88,6 +88,11 @@ Improvements
* LUCENE-8750: Implements setMissingValue() on sort fields produced from
DoubleValuesSource and LongValuesSource (Mike Sokolov via Alan Woodward)
+* LUCENE-8701: ToParentBlockJoinQuery now creates a child scorer that disallows skipping over
+ non-competitive documents if the score of a parent depends on the score of multiple
+ children (avg, max, min). Additionally the score mode `none` that assigns a constant score to
+ each parent can early terminate top scores's collection. (Jim Ferenczi)
+
Changes in Runtime Behavior
* LUCENE-8671: Load FST off-heap also for ID-like fields if reader is not opened
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
index c42cf39..dfb4e8e 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
@@ -24,6 +24,7 @@ import java.util.Locale;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.FilterWeight;
@@ -93,7 +94,18 @@ public class ToParentBlockJoinQuery extends Query {
@Override
public Weight createWeight(IndexSearcher searcher, org.apache.lucene.search.ScoreMode weightScoreMode, float boost) throws IOException {
- return new BlockJoinWeight(this, childQuery.createWeight(searcher, weightScoreMode, boost), parentsFilter, weightScoreMode.needsScores() ? scoreMode : ScoreMode.None);
+ ScoreMode childScoreMode = weightScoreMode.needsScores() ? scoreMode : ScoreMode.None;
+ final Weight childWeight;
+ if (childScoreMode == ScoreMode.None) {
+ // we don't need to compute a score for the child query so we wrap
+ // it under a constant score query that can early terminate if the
+ // minimum score is greater than 0 and the total hits that match the
+ // query is not requested.
+ childWeight = searcher.rewrite(new ConstantScoreQuery(childQuery)).createWeight(searcher, weightScoreMode, 0f);
+ } else {
+ childWeight = childQuery.createWeight(searcher, weightScoreMode, boost);
+ }
+ return new BlockJoinWeight(this, childWeight, parentsFilter, childScoreMode);
}
/** Return our child query. */
@@ -318,9 +330,19 @@ public class ToParentBlockJoinQuery extends Query {
@Override
public float getMaxScore(int upTo) throws IOException {
+ if (scoreMode == ScoreMode.None) {
+ return childScorer.getMaxScore(upTo);
+ }
return Float.POSITIVE_INFINITY;
}
+ @Override
+ public void setMinCompetitiveScore(float minScore) throws IOException {
+ if (scoreMode == ScoreMode.None) {
+ childScorer.setMinCompetitiveScore(minScore);
+ }
+ }
+
private void setScoreAndFreq() throws IOException {
if (childApproximation.docID() >= parentApproximation.docID()) {
return;
@@ -329,7 +351,7 @@ public class ToParentBlockJoinQuery extends Query {
int freq = 1;
while (childApproximation.nextDoc() < parentApproximation.docID()) {
if (childTwoPhase == null || childTwoPhase.matches()) {
- final float childScore = childScorer.score();
+ final float childScore = scoreMode == ScoreMode.None ? 0 : childScorer.score();
freq += 1;
switch (scoreMode) {
case Total:
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
index a82d33e..14ab72a 100644
--- a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
+++ b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java
@@ -813,7 +813,11 @@ public class TestBlockJoin extends LuceneTestCase {
if ("sum of:".equals(childWeightExplanation.getDescription())) {
childWeightExplanation = childWeightExplanation.getDetails()[0];
}
- assertTrue("Wrong child weight description", childWeightExplanation.getDescription().startsWith("weight(child"));
+ if (agg == ScoreMode.None) {
+ assertTrue("Wrong child weight description", childWeightExplanation.getDescription().startsWith("ConstantScore("));
+ } else {
+ assertTrue("Wrong child weight description", childWeightExplanation.getDescription().startsWith("weight(child"));
+ }
}
}
diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinScorer.java b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinScorer.java
new file mode 100644
index 0000000..1d2e286
--- /dev/null
+++ b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoinScorer.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.join;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreMode;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.Weight;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BitSet;
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestBlockJoinScorer extends LuceneTestCase {
+ public void testScoreNone() throws IOException {
+ Directory dir = newDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir,
+ newIndexWriterConfig().setMergePolicy(
+ // retain doc id order
+ newLogMergePolicy(random().nextBoolean())
+ )
+ );
+ List<Document> docs = new ArrayList<>();
+ for (int i = 0; i < 10; i++) {
+ docs.clear();
+ for (int j = 0; j < i; j++) {
+ Document child = new Document();
+ child.add(newStringField("value", Integer.toString(j), Field.Store.YES));
+ docs.add(child);
+ }
+ Document parent = new Document();
+ parent.add(newStringField("docType", "parent", Field.Store.NO));
+ parent.add(newStringField("value", Integer.toString(i), Field.Store.NO));
+ docs.add(parent);
+ w.addDocuments(docs);
+ }
+ w.forceMerge(1);
+
+ IndexReader reader = w.getReader();
+ w.close();
+ IndexSearcher searcher = newSearcher(reader);
+
+ // Create a filter that defines "parent" documents in the index - in this case resumes
+ BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("docType", "parent")));
+ CheckJoinIndex.check(reader, parentsFilter);
+
+ Query childQuery = new MatchAllDocsQuery();
+ ToParentBlockJoinQuery query = new ToParentBlockJoinQuery(childQuery, parentsFilter,
+ org.apache.lucene.search.join.ScoreMode.None);
+
+ Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1);
+ LeafReaderContext context = searcher.getIndexReader().leaves().get(0);
+
+ Scorer scorer = weight.scorer(context);
+ BitSet bits = parentsFilter.getBitSet(reader.leaves().get(0));
+ int parent = 0;
+ for (int i = 0; i < 9; i++) {
+ parent = bits.nextSetBit(parent + 1);
+ assertEquals(parent, scorer.iterator().nextDoc());
+ }
+ assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
+
+ scorer = weight.scorer(context);
+ scorer.setMinCompetitiveScore(0f);
+ parent = 0;
+ for (int i = 0; i < 9; i++) {
+ parent = bits.nextSetBit(parent + 1);
+ assertEquals(parent, scorer.iterator().nextDoc());
+ }
+ assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
+
+ scorer = weight.scorer(context);
+ scorer.setMinCompetitiveScore(Math.nextUp(0f));
+ assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
+
+ scorer = weight.scorer(context);
+ assertEquals(2, scorer.iterator().nextDoc());
+ scorer.setMinCompetitiveScore(Math.nextUp(0f));
+ assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());
+
+ reader.close();
+ dir.close();
+ }
+}
\ No newline at end of file