You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ju...@apache.org on 2021/07/28 14:44:54 UTC
[lucene-solr] branch branch_8x updated: LUCENE-10039: Fix single-field scoring for CombinedFieldQuery (#2535)

This is an automated email from the ASF dual-hosted git repository.

julietibs pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/branch_8x by this push:
     new e317622  LUCENE-10039: Fix single-field scoring for CombinedFieldQuery (#2535)
e317622 is described below

commit e31762253fcf7ef85fa0c09fdb40d3daf201a9d1
Author: Julie Tibshirani <ju...@gmail.com>
AuthorDate: Wed Jul 28 17:28:02 2021 +0300

    LUCENE-10039: Fix single-field scoring for CombinedFieldQuery (#2535)
    
    When there's only one field, CombinedFieldQuery will ignore its weight while
    scoring. This makes the scoring inconsistent, since the field weight is supposed
    to multiply its term frequency.
    
    This PR removes the optimizations around single-field scoring to make sure the
    weight is always taken into account. These optimizations are not critical since
    it should be uncommon to use CombinedFieldQuery with only one field.
    
    This backport also incorporates the part of LUCENE-9823 that applies to
    CombinedFieldQuery. We no longer rewrite single-field queries, which can also
    change their scoring.
---
 lucene/CHANGES.txt                                 |  3 ++
 .../apache/lucene/search/CombinedFieldQuery.java   | 23 +---------
 .../lucene/search/MultiNormsLeafSimScorer.java     |  2 -
 .../lucene/search/TestCombinedFieldQuery.java      | 52 +++++++++++++++++-----
 4 files changed, 46 insertions(+), 34 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index ff7c3db..db68902 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -76,6 +76,9 @@ Bug Fixes
 * LUCENE-10026: Fix CombinedFieldQuery equals and hashCode, which ensures
   query rewrites don't drop CombinedFieldQuery clauses. (Julie Tibshirani)
 
+* LUCENE-10039: Correct CombinedFieldQuery scoring when there is a single
+  field. (Julie Tibshirani)
+
 Other
 ---------------------
 
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/CombinedFieldQuery.java b/lucene/sandbox/src/java/org/apache/lucene/search/CombinedFieldQuery.java
index 6a4f16c..2d6bf93 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/search/CombinedFieldQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/CombinedFieldQuery.java
@@ -235,22 +235,9 @@ public final class CombinedFieldQuery extends Query implements Accountable {
 
   @Override
   public Query rewrite(IndexReader reader) throws IOException {
-    // optimize zero and single field cases
-    if (terms.length == 0) {
+    if (terms.length == 0 || fieldAndWeights.isEmpty()) {
       return new BooleanQuery.Builder().build();
     }
-    // single field and one term
-    if (fieldTerms.length == 1) {
-      return new TermQuery(fieldTerms[0]);
-    }
-    // single field and multiple terms
-    if (fieldAndWeights.size() == 1) {
-      SynonymQuery.Builder builder = new SynonymQuery.Builder(fieldTerms[0].field());
-      for (Term term : fieldTerms) {
-        builder.addTerm(term);
-      }
-      return builder.build();
-    }
     return this;
   }
 
@@ -421,13 +408,7 @@ public final class CombinedFieldQuery extends Query implements Accountable {
         return null;
       }
 
-      // we must optimize this case (term not in segment), disjunctions require >= 2 subs
-      if (iterators.size() == 1) {
-        final LeafSimScorer scoringSimScorer =
-            new LeafSimScorer(simWeight, context.reader(), fields.get(0).field, true);
-        return new TermScorer(this, iterators.get(0), scoringSimScorer);
-      }
-      final MultiNormsLeafSimScorer scoringSimScorer =
+      MultiNormsLeafSimScorer scoringSimScorer =
           new MultiNormsLeafSimScorer(simWeight, context.reader(), fields, true);
       LeafSimScorer nonScoringSimScorer =
           new LeafSimScorer(simWeight, context.reader(), "pseudo_field", false);
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/MultiNormsLeafSimScorer.java b/lucene/sandbox/src/java/org/apache/lucene/search/MultiNormsLeafSimScorer.java
index 802a53a..4783604 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/search/MultiNormsLeafSimScorer.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/MultiNormsLeafSimScorer.java
@@ -69,8 +69,6 @@ final class MultiNormsLeafSimScorer {
 
       if (normsList.isEmpty()) {
         norms = null;
-      } else if (normsList.size() == 1) {
-        norms = normsList.get(0);
       } else {
         final NumericDocValues[] normsArr = normsList.toArray(new NumericDocValues[0]);
         final float[] weightArr = new float[normsList.size()];
diff --git a/lucene/sandbox/src/test/org/apache/lucene/search/TestCombinedFieldQuery.java b/lucene/sandbox/src/test/org/apache/lucene/search/TestCombinedFieldQuery.java
index 8e2fba1..147f447 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/search/TestCombinedFieldQuery.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/search/TestCombinedFieldQuery.java
@@ -57,17 +57,6 @@ public class TestCombinedFieldQuery extends LuceneTestCase {
     actual = searcher.rewrite(builder.build());
     assertEquals(actual, new MatchNoDocsQuery());
     builder.addTerm(new BytesRef("foo"));
-    actual = searcher.rewrite(builder.build());
-    assertEquals(actual, new TermQuery(new Term("field", "foo")));
-    builder.addTerm(new BytesRef("bar"));
-    actual = searcher.rewrite(builder.build());
-    assertEquals(
-        actual,
-        new SynonymQuery.Builder("field")
-            .addTerm(new Term("field", "foo"))
-            .addTerm(new Term("field", "bar"))
-            .build());
-    builder.addField("another_field", 1f);
     Query query = builder.build();
     actual = searcher.rewrite(query);
     assertEquals(actual, query);
@@ -319,6 +308,47 @@ public class TestCombinedFieldQuery extends LuceneTestCase {
     dir.close();
   }
 
+  public void testCopyFieldWithSingleField() throws IOException {
+    Directory dir = newDirectory();
+    Similarity similarity = randomCompatibleSimilarity();
+
+    IndexWriterConfig iwc = new IndexWriterConfig();
+    iwc.setSimilarity(similarity);
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+
+    int boost = Math.max(1, random().nextInt(5));
+    int numMatch = atLeast(10);
+    for (int i = 0; i < numMatch; i++) {
+      Document doc = new Document();
+      int freqA = random().nextInt(5) + 1;
+      for (int j = 0; j < freqA; j++) {
+        doc.add(new TextField("a", "foo", Store.NO));
+      }
+
+      int freqB = freqA * boost;
+      for (int j = 0; j < freqB; j++) {
+        doc.add(new TextField("b", "foo", Store.NO));
+      }
+
+      w.addDocument(doc);
+    }
+
+    IndexReader reader = w.getReader();
+    IndexSearcher searcher = newSearcher(reader);
+    searcher.setSimilarity(similarity);
+    CombinedFieldQuery query =
+        new CombinedFieldQuery.Builder()
+            .addField("a", (float) boost)
+            .addTerm(new BytesRef("foo"))
+            .build();
+
+    checkExpectedHits(searcher, numMatch, query, new TermQuery(new Term("b", "foo")));
+
+    reader.close();
+    w.close();
+    dir.close();
+  }
+
   public void testCopyFieldWithMissingFields() throws IOException {
     Directory dir = newDirectory();
     Similarity similarity = randomCompatibleSimilarity();