You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by an...@apache.org on 2023/03/08 18:19:16 UTC

[solr] branch main updated: SOLR-16643: reRankOperator=multiply/replace options (#1431)

This is an automated email from the ASF dual-hosted git repository.

andywebb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/main by this push:
     new 3ac9714557d SOLR-16643: reRankOperator=multiply/replace options (#1431)
3ac9714557d is described below

commit 3ac9714557d4cc4995aa932c584eb48ce91eabdc
Author: Andy Webb <an...@apache.org>
AuthorDate: Wed Mar 8 18:19:08 2023 +0000

    SOLR-16643: reRankOperator=multiply/replace options (#1431)
    
    The new options can be used to replace or multiply (rather than add to) documents' original scores with the output of the ReRankQuery.
    
    Co-authored-by: Andy Webb <an...@apache.org>
    Co-authored-by: Christine Poerschke <cp...@apache.org>
    Co-authored-by: Mikhail Khludnev <mk...@apache.org>
---
 solr/CHANGES.txt                                   |  2 +
 .../org/apache/solr/search/ReRankOperator.java     | 42 +++++++++++++++
 .../apache/solr/search/ReRankQParserPlugin.java    | 59 +++++++++++++++++-----
 .../solr/search/TestReRankQParserPlugin.java       | 48 +++++++++++++++++-
 .../query-guide/pages/query-re-ranking.adoc        | 32 +++++++++++-
 5 files changed, 168 insertions(+), 15 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index f5be3131cf0..bc9448b48e1 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -63,6 +63,8 @@ New Features
 
 * SOLR-16646: New function query operator isnan to verify if value is NaN (Gabriel Magno via Kevin Risden)
 
+* SOLR-16643: Add reRankOperator=multiply/replace options to rerank query parser (Andy Webb, Christine Poerschke, Mikhail Khludnev)
+
 Improvements
 ---------------------
 
diff --git a/solr/core/src/java/org/apache/solr/search/ReRankOperator.java b/solr/core/src/java/org/apache/solr/search/ReRankOperator.java
new file mode 100644
index 00000000000..0b6583e095e
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/search/ReRankOperator.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.search;
+
+import java.util.Locale;
+import org.apache.solr.common.SolrException;
+
+public enum ReRankOperator {
+  ADD,
+  MULTIPLY,
+  REPLACE;
+
+  public static ReRankOperator get(String p) {
+    if (p != null) {
+      try {
+        return ReRankOperator.valueOf(p.toUpperCase(Locale.ROOT));
+      } catch (Exception ex) {
+        throw new SolrException(
+            SolrException.ErrorCode.BAD_REQUEST, "Invalid reRankOperator: " + p);
+      }
+    }
+    return null;
+  }
+
+  public String toLower() {
+    return toString().toLowerCase(Locale.ROOT);
+  }
+}
diff --git a/solr/core/src/java/org/apache/solr/search/ReRankQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/ReRankQParserPlugin.java
index 5e3f25e9890..6804942acd6 100644
--- a/solr/core/src/java/org/apache/solr/search/ReRankQParserPlugin.java
+++ b/solr/core/src/java/org/apache/solr/search/ReRankQParserPlugin.java
@@ -44,6 +44,9 @@ public class ReRankQParserPlugin extends QParserPlugin {
   public static final String RERANK_WEIGHT = "reRankWeight";
   public static final double RERANK_WEIGHT_DEFAULT = 2.0d;
 
+  public static final String RERANK_OPERATOR = "reRankOperator";
+  public static final String RERANK_OPERATOR_DEFAULT = "add";
+
   @Override
   public QParser createParser(
       String query, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
@@ -68,21 +71,43 @@ public class ReRankQParserPlugin extends QParserPlugin {
       Query reRankQuery = reRankParser.parse();
 
       int reRankDocs = localParams.getInt(RERANK_DOCS, RERANK_DOCS_DEFAULT);
-      reRankDocs = Math.max(1, reRankDocs); //
+      reRankDocs = Math.max(1, reRankDocs);
 
       double reRankWeight = localParams.getDouble(RERANK_WEIGHT, RERANK_WEIGHT_DEFAULT);
 
-      return new ReRankQuery(reRankQuery, reRankDocs, reRankWeight);
+      ReRankOperator reRankOperator =
+          ReRankOperator.get(localParams.get(RERANK_OPERATOR, RERANK_OPERATOR_DEFAULT));
+
+      return new ReRankQuery(reRankQuery, reRankDocs, reRankWeight, reRankOperator);
     }
   }
 
   private static final class ReRankQueryRescorer extends QueryRescorer {
 
-    final double reRankWeight;
+    final BiFloatFunction scoreCombiner;
+
+    @FunctionalInterface
+    interface BiFloatFunction {
+      float func(float a, float b);
+    }
 
-    public ReRankQueryRescorer(Query reRankQuery, double reRankWeight) {
+    public ReRankQueryRescorer(
+        Query reRankQuery, double reRankWeight, ReRankOperator reRankOperator) {
       super(reRankQuery);
-      this.reRankWeight = reRankWeight;
+      switch (reRankOperator) {
+        case ADD:
+          scoreCombiner = (score, second) -> (float) (score + reRankWeight * second);
+          break;
+        case MULTIPLY:
+          scoreCombiner = (score, second) -> (float) (score * reRankWeight * second);
+          break;
+        case REPLACE:
+          scoreCombiner = (score, second) -> (float) (reRankWeight * second);
+          break;
+        default:
+          scoreCombiner = null;
+          throw new IllegalArgumentException("Unexpected: reRankOperator=" + reRankOperator);
+      }
     }
 
     @Override
@@ -90,7 +115,7 @@ public class ReRankQParserPlugin extends QParserPlugin {
         float firstPassScore, boolean secondPassMatches, float secondPassScore) {
       float score = firstPassScore;
       if (secondPassMatches) {
-        score = (float) (score + reRankWeight * secondPassScore);
+        return scoreCombiner.func(score, secondPassScore);
       }
       return score;
     }
@@ -99,6 +124,7 @@ public class ReRankQParserPlugin extends QParserPlugin {
   private static final class ReRankQuery extends AbstractReRankQuery {
     private final Query reRankQuery;
     private final double reRankWeight;
+    private final ReRankOperator reRankOperator;
 
     @Override
     public int hashCode() {
@@ -106,7 +132,8 @@ public class ReRankQParserPlugin extends QParserPlugin {
           + mainQuery.hashCode()
           + reRankQuery.hashCode()
           + (int) reRankWeight
-          + reRankDocs;
+          + reRankDocs
+          + reRankOperator.hashCode();
     }
 
     @Override
@@ -118,13 +145,19 @@ public class ReRankQParserPlugin extends QParserPlugin {
       return mainQuery.equals(rrq.mainQuery)
           && reRankQuery.equals(rrq.reRankQuery)
           && reRankWeight == rrq.reRankWeight
-          && reRankDocs == rrq.reRankDocs;
+          && reRankDocs == rrq.reRankDocs
+          && reRankOperator.equals(rrq.reRankOperator);
     }
 
-    public ReRankQuery(Query reRankQuery, int reRankDocs, double reRankWeight) {
-      super(defaultQuery, reRankDocs, new ReRankQueryRescorer(reRankQuery, reRankWeight));
+    public ReRankQuery(
+        Query reRankQuery, int reRankDocs, double reRankWeight, ReRankOperator reRankOperator) {
+      super(
+          defaultQuery,
+          reRankDocs,
+          new ReRankQueryRescorer(reRankQuery, reRankWeight, reRankOperator));
       this.reRankQuery = reRankQuery;
       this.reRankWeight = reRankWeight;
+      this.reRankOperator = reRankOperator;
     }
 
     @Override
@@ -135,13 +168,15 @@ public class ReRankQParserPlugin extends QParserPlugin {
       sb.append(" mainQuery='").append(mainQuery.toString()).append("' ");
       sb.append(RERANK_QUERY).append("='").append(reRankQuery.toString()).append("' ");
       sb.append(RERANK_DOCS).append('=').append(reRankDocs).append(' ');
-      sb.append(RERANK_WEIGHT).append('=').append(reRankWeight).append('}');
+      sb.append(RERANK_WEIGHT).append('=').append(reRankWeight).append(' ');
+      sb.append(RERANK_OPERATOR).append('=').append(reRankOperator.toLower()).append('}');
       return sb.toString();
     }
 
     @Override
     protected Query rewrite(Query rewrittenMainQuery) throws IOException {
-      return new ReRankQuery(reRankQuery, reRankDocs, reRankWeight).wrap(rewrittenMainQuery);
+      return new ReRankQuery(reRankQuery, reRankDocs, reRankWeight, reRankOperator)
+          .wrap(rewrittenMainQuery);
     }
   }
 }
diff --git a/solr/core/src/test/org/apache/solr/search/TestReRankQParserPlugin.java b/solr/core/src/test/org/apache/solr/search/TestReRankQParserPlugin.java
index 8b978ff96e5..b25d725c0a7 100644
--- a/solr/core/src/test/org/apache/solr/search/TestReRankQParserPlugin.java
+++ b/solr/core/src/test/org/apache/solr/search/TestReRankQParserPlugin.java
@@ -16,7 +16,9 @@
  */
 package org.apache.solr.search;
 
+import java.util.Locale;
 import java.util.Map;
+import java.util.function.Function;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
 import org.apache.solr.SolrTestCaseJ4;
@@ -57,6 +59,8 @@ public class TestReRankQParserPlugin extends SolrTestCaseJ4 {
 
     assertEquals(ReRankQParserPlugin.RERANK_WEIGHT, "reRankWeight");
     assertEquals(ReRankQParserPlugin.RERANK_WEIGHT_DEFAULT, 2.0d, 0.0d);
+
+    assertEquals(ReRankQParserPlugin.RERANK_OPERATOR, "reRankOperator");
   }
 
   @Test
@@ -126,6 +130,48 @@ public class TestReRankQParserPlugin extends SolrTestCaseJ4 {
         "//result/doc[5]/str[@name='id'][.='1']",
         "//result/doc[6]/str[@name='id'][.='5']");
 
+    // check each of the reRankOperators return the expected score for item 3
+    for (Map.Entry<String, String> scoreByOp :
+        Map.of("add", "10002.1", "multiply", "1000.2", "replace", "10002.0").entrySet()) {
+      params = new ModifiableSolrParams();
+      String operation = scoreByOp.getKey();
+      if (random().nextBoolean()) {
+        operation = operation.toUpperCase(Locale.ROOT);
+      }
+      final Function<String, String> rerankQueryByOp =
+          op ->
+              "{!"
+                  + ReRankQParserPlugin.NAME
+                  + " "
+                  + ReRankQParserPlugin.RERANK_QUERY
+                  + "=$rqq "
+                  + ReRankQParserPlugin.RERANK_OPERATOR
+                  + "="
+                  + op
+                  + " "
+                  + ReRankQParserPlugin.RERANK_DOCS
+                  + "=200}";
+      params.add("rq", rerankQueryByOp.apply(operation));
+      params.add("q", "term_s:YYYY^=0.1"); // force score=0.1
+      params.add("rqq", "{!edismax bf=$bff}*:*"); // returns 1 + $bff
+      params.add("bff", "field(test_ti)"); // test_ti=5000 for item 3
+      params.add("start", "0");
+      params.add("rows", "6");
+      params.add("df", "text");
+      params.add("fl", "id,score");
+      assertQ(
+          req(params),
+          "*[count(//doc)=6]",
+          "//result/doc[1]/str[@name='id'][.='3']",
+          "//result/doc[1]/float[@name='score'][.='" + scoreByOp.getValue() + "']");
+      final String badOp =
+          random().nextBoolean()
+              ? operation + operation
+              : operation.substring(0, operation.length() - 1);
+      params.set("rq", rerankQueryByOp.apply(badOp));
+      assertQEx("Wrong reRankOperation:" + badOp, req(params), SolrException.ErrorCode.BAD_REQUEST);
+    }
+
     params = new ModifiableSolrParams();
     params.add(
         "rq",
@@ -960,7 +1006,7 @@ public class TestReRankQParserPlugin extends SolrTestCaseJ4 {
               "id_p_i", String.valueOf(i),
               "field_t",
                   IntStream.range(0, numDocs)
-                      .mapToObj(val -> Integer.toString(val))
+                      .mapToObj(Integer::toString)
                       .collect(Collectors.joining(" "))));
     }
     assertU(commit());
diff --git a/solr/solr-ref-guide/modules/query-guide/pages/query-re-ranking.adoc b/solr/solr-ref-guide/modules/query-guide/pages/query-re-ranking.adoc
index 82d971fa30a..fe32836c458 100644
--- a/solr/solr-ref-guide/modules/query-guide/pages/query-re-ranking.adoc
+++ b/solr/solr-ref-guide/modules/query-guide/pages/query-re-ranking.adoc
@@ -67,9 +67,18 @@ This number will be treated as a minimum, and may be increased internally automa
 |Optional |Default: `2.0`
 |===
 +
-A multiplicative factor that will be applied to the score from the reRankQuery for each of the top matching documents, before that score is added to the original score.
+A multiplicative factor that will be applied to the score from the reRankQuery for each of the top matching documents, before that score is combined with the original score.
 
-In the example below, the top 1000 documents matching the query "greetings" will be re-ranked using the query "(hi hello hey hiya)".
+`reRankOperator`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: `add`
+|===
++
+By default the score from the reRankQuery multiplied by the `reRankWeight` is added to the original score.
+
+In the example below using the default `add` behaviour, the top 1000 documents matching the query "greetings" will be re-ranked using the query "(hi hello hey hiya)".
 The resulting scores for each of those 1000 documents will be 3 times their score from the "(hi hello hey hiya)", plus the score from the original "greetings" query:
 
 [source,text]
@@ -79,6 +88,25 @@ q=greetings&rq={!rerank reRankQuery=$rqq reRankDocs=1000 reRankWeight=3}&rqq=(hi
 
 If a document matches the original query, but does not match the re-ranking query, the document's original score will remain.
 
+Setting `reRankOperator` to `multiply` will multiply the three numbers instead. This means that other multiplying operations such as xref:edismax-query-parser.adoc#extended-dismax-parameters[eDisMax `boost` functions] can be converted to Re-Rank operations.
+
+In the example below, the scores for the top 1000 documents matching the query "phone" will be multiplied by a function of the `price` field.
+
+[source,text]
+----
+q=phone&rq={!rerank reRankQuery=$rqq reRankDocs=1000 reRankWeight=1 reRankOperator=multiply}&rqq={!func v=div(1,sum(1,price))}
+----
+
+Setting `reRankOperator` to `replace` will replace the score, so the final scores can be independent of documents' original scores.
+
+In the example below, the scores for the top 1000 documents matching the query "phone" will be replaced with a function of the `price` field.
+
+[source,text]
+----
+q=phone&rq={!rerank reRankQuery=$rqq reRankDocs=1000 reRankWeight=1 reRankOperator=replace}&rqq={!func v=div(1,sum(1,price))}
+----
+
+
 === LTR Query Parser
 
 The `ltr` stands for Learning To Rank, please see xref:learning-to-rank.adoc[] for more detailed information.