You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by za...@apache.org on 2021/11/19 05:36:42 UTC
[lucene] branch main updated: LUCENE-10236: Update field-weight used in CombinedFieldQuery scoring calculation (#444)
This is an automated email from the ASF dual-hosted git repository.
zacharymorn pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new 07ee3ba LUCENE-10236: Update field-weight used in CombinedFieldQuery scoring calculation (#444)
07ee3ba is described below
commit 07ee3ba83a4c9f3abc24bf9d3fbb3c3102c4a102
Author: zacharymorn <za...@yahoo.com>
AuthorDate: Thu Nov 18 21:36:38 2021 -0800
LUCENE-10236: Update field-weight used in CombinedFieldQuery scoring calculation (#444)
---
.../lucene/sandbox/search/CombinedFieldQuery.java | 2 +-
.../sandbox/search/MultiNormsLeafSimScorer.java | 8 +++
.../sandbox/search/TestCombinedFieldQuery.java | 78 ++++++++++++++++++++++
3 files changed, 87 insertions(+), 1 deletion(-)
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CombinedFieldQuery.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CombinedFieldQuery.java
index d3187a0..fccd6ce 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CombinedFieldQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CombinedFieldQuery.java
@@ -418,7 +418,7 @@ public final class CombinedFieldQuery extends Query implements Accountable {
}
MultiNormsLeafSimScorer scoringSimScorer =
- new MultiNormsLeafSimScorer(simWeight, context.reader(), fields, true);
+ new MultiNormsLeafSimScorer(simWeight, context.reader(), fieldAndWeights.values(), true);
LeafSimScorer nonScoringSimScorer =
new LeafSimScorer(simWeight, context.reader(), "pseudo_field", false);
// we use termscorers + disjunction as an impl detail
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/MultiNormsLeafSimScorer.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/MultiNormsLeafSimScorer.java
index ba1d69a..ebc98df 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/MultiNormsLeafSimScorer.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/MultiNormsLeafSimScorer.java
@@ -21,8 +21,10 @@ import static org.apache.lucene.sandbox.search.CombinedFieldQuery.FieldAndWeight
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.HashSet;
import java.util.List;
import java.util.Objects;
+import java.util.Set;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.search.Explanation;
@@ -61,7 +63,13 @@ final class MultiNormsLeafSimScorer {
if (needsScores) {
final List<NumericDocValues> normsList = new ArrayList<>();
final List<Float> weightList = new ArrayList<>();
+ final Set<String> duplicateCheckingSet = new HashSet<>();
for (FieldAndWeight field : normFields) {
+ assert duplicateCheckingSet.add(field.field)
+ : "There is a duplicated field ["
+ + field.field
+ + "] used to construct MultiNormsLeafSimScorer";
+
NumericDocValues norms = reader.getNormValues(field.field);
if (norms != null) {
normsList.add(norms);
diff --git a/lucene/sandbox/src/test/org/apache/lucene/sandbox/search/TestCombinedFieldQuery.java b/lucene/sandbox/src/test/org/apache/lucene/sandbox/search/TestCombinedFieldQuery.java
index 7798a2a..4a18b2c 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/sandbox/search/TestCombinedFieldQuery.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/search/TestCombinedFieldQuery.java
@@ -16,6 +16,10 @@
*/
package org.apache.lucene.sandbox.search;
+import static com.carrotsearch.randomizedtesting.RandomizedTest.atMost;
+import static com.carrotsearch.randomizedtesting.RandomizedTest.randomBoolean;
+import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween;
+
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import java.io.IOException;
import java.util.Arrays;
@@ -165,6 +169,80 @@ public class TestCombinedFieldQuery extends LuceneTestCase {
dir.close();
}
+ public void testScoringWithMultipleFieldTermsMatch() throws IOException {
+ int numMatchDoc = randomIntBetween(100, 500);
+ int numHits = atMost(100);
+ int boost1 = Math.max(1, random().nextInt(5));
+ int boost2 = Math.max(1, random().nextInt(5));
+
+ Directory dir = newDirectory();
+ Similarity similarity = randomCompatibleSimilarity();
+
+ IndexWriterConfig iwc = new IndexWriterConfig();
+ iwc.setSimilarity(similarity);
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+
+ // adding potentially matching doc
+ for (int i = 0; i < numMatchDoc; i++) {
+ Document doc = new Document();
+
+ int freqA = random().nextInt(20) + 1;
+ for (int j = 0; j < freqA; j++) {
+ doc.add(new TextField("a", "foo", Store.NO));
+ }
+
+ freqA = random().nextInt(20) + 1;
+ if (randomBoolean()) {
+ for (int j = 0; j < freqA; j++) {
+ doc.add(new TextField("a", "foo" + j, Store.NO));
+ }
+ }
+
+ freqA = random().nextInt(20) + 1;
+ for (int j = 0; j < freqA; j++) {
+ doc.add(new TextField("a", "zoo", Store.NO));
+ }
+
+ int freqB = random().nextInt(20) + 1;
+ for (int j = 0; j < freqB; j++) {
+ doc.add(new TextField("b", "zoo", Store.NO));
+ }
+
+ freqB = random().nextInt(20) + 1;
+ if (randomBoolean()) {
+ for (int j = 0; j < freqB; j++) {
+ doc.add(new TextField("b", "zoo" + j, Store.NO));
+ }
+ }
+
+ int freqC = random().nextInt(20) + 1;
+ for (int j = 0; j < freqC; j++) {
+ doc.add(new TextField("c", "bla" + j, Store.NO));
+ }
+ w.addDocument(doc);
+ }
+
+ IndexReader reader = w.getReader();
+ IndexSearcher searcher = newSearcher(reader);
+ searcher.setSimilarity(similarity);
+
+ CombinedFieldQuery query =
+ new CombinedFieldQuery.Builder()
+ .addField("a", (float) boost1)
+ .addField("b", (float) boost2)
+ .addTerm(new BytesRef("foo"))
+ .addTerm(new BytesRef("zoo"))
+ .build();
+
+ TopScoreDocCollector completeCollector =
+ TopScoreDocCollector.create(numHits, null, Integer.MAX_VALUE);
+ searcher.search(query, completeCollector);
+
+ reader.close();
+ w.close();
+ dir.close();
+ }
+
public void testNormsDisabled() throws IOException {
Directory dir = newDirectory();
Similarity similarity = randomCompatibleSimilarity();