You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by za...@apache.org on 2021/11/19 05:36:42 UTC

[lucene] branch main updated: LUCENE-10236: Update field-weight used in CombinedFieldQuery scoring calculation (#444)

This is an automated email from the ASF dual-hosted git repository.

zacharymorn pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new 07ee3ba  LUCENE-10236: Update field-weight used in CombinedFieldQuery scoring calculation (#444)
07ee3ba is described below

commit 07ee3ba83a4c9f3abc24bf9d3fbb3c3102c4a102
Author: zacharymorn <za...@yahoo.com>
AuthorDate: Thu Nov 18 21:36:38 2021 -0800

    LUCENE-10236: Update field-weight used in CombinedFieldQuery scoring calculation (#444)
---
 .../lucene/sandbox/search/CombinedFieldQuery.java  |  2 +-
 .../sandbox/search/MultiNormsLeafSimScorer.java    |  8 +++
 .../sandbox/search/TestCombinedFieldQuery.java     | 78 ++++++++++++++++++++++
 3 files changed, 87 insertions(+), 1 deletion(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CombinedFieldQuery.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CombinedFieldQuery.java
index d3187a0..fccd6ce 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CombinedFieldQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CombinedFieldQuery.java
@@ -418,7 +418,7 @@ public final class CombinedFieldQuery extends Query implements Accountable {
       }
 
       MultiNormsLeafSimScorer scoringSimScorer =
-          new MultiNormsLeafSimScorer(simWeight, context.reader(), fields, true);
+          new MultiNormsLeafSimScorer(simWeight, context.reader(), fieldAndWeights.values(), true);
       LeafSimScorer nonScoringSimScorer =
           new LeafSimScorer(simWeight, context.reader(), "pseudo_field", false);
       // we use termscorers + disjunction as an impl detail
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/MultiNormsLeafSimScorer.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/MultiNormsLeafSimScorer.java
index ba1d69a..ebc98df 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/MultiNormsLeafSimScorer.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/MultiNormsLeafSimScorer.java
@@ -21,8 +21,10 @@ import static org.apache.lucene.sandbox.search.CombinedFieldQuery.FieldAndWeight
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Objects;
+import java.util.Set;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.search.Explanation;
@@ -61,7 +63,13 @@ final class MultiNormsLeafSimScorer {
     if (needsScores) {
       final List<NumericDocValues> normsList = new ArrayList<>();
       final List<Float> weightList = new ArrayList<>();
+      final Set<String> duplicateCheckingSet = new HashSet<>();
       for (FieldAndWeight field : normFields) {
+        assert duplicateCheckingSet.add(field.field)
+            : "There is a duplicated field ["
+                + field.field
+                + "] used to construct MultiNormsLeafSimScorer";
+
         NumericDocValues norms = reader.getNormValues(field.field);
         if (norms != null) {
           normsList.add(norms);
diff --git a/lucene/sandbox/src/test/org/apache/lucene/sandbox/search/TestCombinedFieldQuery.java b/lucene/sandbox/src/test/org/apache/lucene/sandbox/search/TestCombinedFieldQuery.java
index 7798a2a..4a18b2c 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/sandbox/search/TestCombinedFieldQuery.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/search/TestCombinedFieldQuery.java
@@ -16,6 +16,10 @@
  */
 package org.apache.lucene.sandbox.search;
 
+import static com.carrotsearch.randomizedtesting.RandomizedTest.atMost;
+import static com.carrotsearch.randomizedtesting.RandomizedTest.randomBoolean;
+import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween;
+
 import com.carrotsearch.randomizedtesting.generators.RandomPicks;
 import java.io.IOException;
 import java.util.Arrays;
@@ -165,6 +169,80 @@ public class TestCombinedFieldQuery extends LuceneTestCase {
     dir.close();
   }
 
+  public void testScoringWithMultipleFieldTermsMatch() throws IOException {
+    int numMatchDoc = randomIntBetween(100, 500);
+    int numHits = atMost(100);
+    int boost1 = Math.max(1, random().nextInt(5));
+    int boost2 = Math.max(1, random().nextInt(5));
+
+    Directory dir = newDirectory();
+    Similarity similarity = randomCompatibleSimilarity();
+
+    IndexWriterConfig iwc = new IndexWriterConfig();
+    iwc.setSimilarity(similarity);
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+
+    // adding potentially matching doc
+    for (int i = 0; i < numMatchDoc; i++) {
+      Document doc = new Document();
+
+      int freqA = random().nextInt(20) + 1;
+      for (int j = 0; j < freqA; j++) {
+        doc.add(new TextField("a", "foo", Store.NO));
+      }
+
+      freqA = random().nextInt(20) + 1;
+      if (randomBoolean()) {
+        for (int j = 0; j < freqA; j++) {
+          doc.add(new TextField("a", "foo" + j, Store.NO));
+        }
+      }
+
+      freqA = random().nextInt(20) + 1;
+      for (int j = 0; j < freqA; j++) {
+        doc.add(new TextField("a", "zoo", Store.NO));
+      }
+
+      int freqB = random().nextInt(20) + 1;
+      for (int j = 0; j < freqB; j++) {
+        doc.add(new TextField("b", "zoo", Store.NO));
+      }
+
+      freqB = random().nextInt(20) + 1;
+      if (randomBoolean()) {
+        for (int j = 0; j < freqB; j++) {
+          doc.add(new TextField("b", "zoo" + j, Store.NO));
+        }
+      }
+
+      int freqC = random().nextInt(20) + 1;
+      for (int j = 0; j < freqC; j++) {
+        doc.add(new TextField("c", "bla" + j, Store.NO));
+      }
+      w.addDocument(doc);
+    }
+
+    IndexReader reader = w.getReader();
+    IndexSearcher searcher = newSearcher(reader);
+    searcher.setSimilarity(similarity);
+
+    CombinedFieldQuery query =
+        new CombinedFieldQuery.Builder()
+            .addField("a", (float) boost1)
+            .addField("b", (float) boost2)
+            .addTerm(new BytesRef("foo"))
+            .addTerm(new BytesRef("zoo"))
+            .build();
+
+    TopScoreDocCollector completeCollector =
+        TopScoreDocCollector.create(numHits, null, Integer.MAX_VALUE);
+    searcher.search(query, completeCollector);
+
+    reader.close();
+    w.close();
+    dir.close();
+  }
+
   public void testNormsDisabled() throws IOException {
     Directory dir = newDirectory();
     Similarity similarity = randomCompatibleSimilarity();