You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by an...@apache.org on 2023/03/09 08:58:36 UTC
[solr] branch branch_9x updated: SOLR-16643: reRankOperator=multiply/replace options (#1431)
This is an automated email from the ASF dual-hosted git repository.
andywebb pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new def38eabf28 SOLR-16643: reRankOperator=multiply/replace options (#1431)
def38eabf28 is described below
commit def38eabf28b445acc4e846b8c212a8a4c534266
Author: Andy Webb <an...@apache.org>
AuthorDate: Wed Mar 8 18:19:08 2023 +0000
SOLR-16643: reRankOperator=multiply/replace options (#1431)
The new options can be used to replace or multiply (rather than add to) documents' original scores with the output of the ReRankQuery.
Co-authored-by: Andy Webb <an...@apache.org>
Co-authored-by: Christine Poerschke <cp...@apache.org>
Co-authored-by: Mikhail Khludnev <mk...@apache.org>
---
solr/CHANGES.txt | 2 +
.../org/apache/solr/search/ReRankOperator.java | 42 +++++++++++++++
.../apache/solr/search/ReRankQParserPlugin.java | 59 +++++++++++++++++-----
.../solr/search/TestReRankQParserPlugin.java | 48 +++++++++++++++++-
.../query-guide/pages/query-re-ranking.adoc | 32 +++++++++++-
5 files changed, 168 insertions(+), 15 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 43e832cf8c4..fd0ac47f104 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -33,6 +33,8 @@ New Features
* SOLR-16646: New function query operator isnan to verify if value is NaN (Gabriel Magno via Kevin Risden)
+* SOLR-16643: Add reRankOperator=multiply/replace options to rerank query parser (Andy Webb, Christine Poerschke, Mikhail Khludnev)
+
Improvements
---------------------
diff --git a/solr/core/src/java/org/apache/solr/search/ReRankOperator.java b/solr/core/src/java/org/apache/solr/search/ReRankOperator.java
new file mode 100644
index 00000000000..0b6583e095e
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/search/ReRankOperator.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.search;
+
+import java.util.Locale;
+import org.apache.solr.common.SolrException;
+
+public enum ReRankOperator {
+ ADD,
+ MULTIPLY,
+ REPLACE;
+
+ public static ReRankOperator get(String p) {
+ if (p != null) {
+ try {
+ return ReRankOperator.valueOf(p.toUpperCase(Locale.ROOT));
+ } catch (Exception ex) {
+ throw new SolrException(
+ SolrException.ErrorCode.BAD_REQUEST, "Invalid reRankOperator: " + p);
+ }
+ }
+ return null;
+ }
+
+ public String toLower() {
+ return toString().toLowerCase(Locale.ROOT);
+ }
+}
diff --git a/solr/core/src/java/org/apache/solr/search/ReRankQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/ReRankQParserPlugin.java
index 5e3f25e9890..6804942acd6 100644
--- a/solr/core/src/java/org/apache/solr/search/ReRankQParserPlugin.java
+++ b/solr/core/src/java/org/apache/solr/search/ReRankQParserPlugin.java
@@ -44,6 +44,9 @@ public class ReRankQParserPlugin extends QParserPlugin {
public static final String RERANK_WEIGHT = "reRankWeight";
public static final double RERANK_WEIGHT_DEFAULT = 2.0d;
+ public static final String RERANK_OPERATOR = "reRankOperator";
+ public static final String RERANK_OPERATOR_DEFAULT = "add";
+
@Override
public QParser createParser(
String query, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
@@ -68,21 +71,43 @@ public class ReRankQParserPlugin extends QParserPlugin {
Query reRankQuery = reRankParser.parse();
int reRankDocs = localParams.getInt(RERANK_DOCS, RERANK_DOCS_DEFAULT);
- reRankDocs = Math.max(1, reRankDocs); //
+ reRankDocs = Math.max(1, reRankDocs);
double reRankWeight = localParams.getDouble(RERANK_WEIGHT, RERANK_WEIGHT_DEFAULT);
- return new ReRankQuery(reRankQuery, reRankDocs, reRankWeight);
+ ReRankOperator reRankOperator =
+ ReRankOperator.get(localParams.get(RERANK_OPERATOR, RERANK_OPERATOR_DEFAULT));
+
+ return new ReRankQuery(reRankQuery, reRankDocs, reRankWeight, reRankOperator);
}
}
private static final class ReRankQueryRescorer extends QueryRescorer {
- final double reRankWeight;
+ final BiFloatFunction scoreCombiner;
+
+ @FunctionalInterface
+ interface BiFloatFunction {
+ float func(float a, float b);
+ }
- public ReRankQueryRescorer(Query reRankQuery, double reRankWeight) {
+ public ReRankQueryRescorer(
+ Query reRankQuery, double reRankWeight, ReRankOperator reRankOperator) {
super(reRankQuery);
- this.reRankWeight = reRankWeight;
+ switch (reRankOperator) {
+ case ADD:
+ scoreCombiner = (score, second) -> (float) (score + reRankWeight * second);
+ break;
+ case MULTIPLY:
+ scoreCombiner = (score, second) -> (float) (score * reRankWeight * second);
+ break;
+ case REPLACE:
+ scoreCombiner = (score, second) -> (float) (reRankWeight * second);
+ break;
+ default:
+ scoreCombiner = null;
+ throw new IllegalArgumentException("Unexpected: reRankOperator=" + reRankOperator);
+ }
}
@Override
@@ -90,7 +115,7 @@ public class ReRankQParserPlugin extends QParserPlugin {
float firstPassScore, boolean secondPassMatches, float secondPassScore) {
float score = firstPassScore;
if (secondPassMatches) {
- score = (float) (score + reRankWeight * secondPassScore);
+ return scoreCombiner.func(score, secondPassScore);
}
return score;
}
@@ -99,6 +124,7 @@ public class ReRankQParserPlugin extends QParserPlugin {
private static final class ReRankQuery extends AbstractReRankQuery {
private final Query reRankQuery;
private final double reRankWeight;
+ private final ReRankOperator reRankOperator;
@Override
public int hashCode() {
@@ -106,7 +132,8 @@ public class ReRankQParserPlugin extends QParserPlugin {
+ mainQuery.hashCode()
+ reRankQuery.hashCode()
+ (int) reRankWeight
- + reRankDocs;
+ + reRankDocs
+ + reRankOperator.hashCode();
}
@Override
@@ -118,13 +145,19 @@ public class ReRankQParserPlugin extends QParserPlugin {
return mainQuery.equals(rrq.mainQuery)
&& reRankQuery.equals(rrq.reRankQuery)
&& reRankWeight == rrq.reRankWeight
- && reRankDocs == rrq.reRankDocs;
+ && reRankDocs == rrq.reRankDocs
+ && reRankOperator.equals(rrq.reRankOperator);
}
- public ReRankQuery(Query reRankQuery, int reRankDocs, double reRankWeight) {
- super(defaultQuery, reRankDocs, new ReRankQueryRescorer(reRankQuery, reRankWeight));
+ public ReRankQuery(
+ Query reRankQuery, int reRankDocs, double reRankWeight, ReRankOperator reRankOperator) {
+ super(
+ defaultQuery,
+ reRankDocs,
+ new ReRankQueryRescorer(reRankQuery, reRankWeight, reRankOperator));
this.reRankQuery = reRankQuery;
this.reRankWeight = reRankWeight;
+ this.reRankOperator = reRankOperator;
}
@Override
@@ -135,13 +168,15 @@ public class ReRankQParserPlugin extends QParserPlugin {
sb.append(" mainQuery='").append(mainQuery.toString()).append("' ");
sb.append(RERANK_QUERY).append("='").append(reRankQuery.toString()).append("' ");
sb.append(RERANK_DOCS).append('=').append(reRankDocs).append(' ');
- sb.append(RERANK_WEIGHT).append('=').append(reRankWeight).append('}');
+ sb.append(RERANK_WEIGHT).append('=').append(reRankWeight).append(' ');
+ sb.append(RERANK_OPERATOR).append('=').append(reRankOperator.toLower()).append('}');
return sb.toString();
}
@Override
protected Query rewrite(Query rewrittenMainQuery) throws IOException {
- return new ReRankQuery(reRankQuery, reRankDocs, reRankWeight).wrap(rewrittenMainQuery);
+ return new ReRankQuery(reRankQuery, reRankDocs, reRankWeight, reRankOperator)
+ .wrap(rewrittenMainQuery);
}
}
}
diff --git a/solr/core/src/test/org/apache/solr/search/TestReRankQParserPlugin.java b/solr/core/src/test/org/apache/solr/search/TestReRankQParserPlugin.java
index 8b978ff96e5..b25d725c0a7 100644
--- a/solr/core/src/test/org/apache/solr/search/TestReRankQParserPlugin.java
+++ b/solr/core/src/test/org/apache/solr/search/TestReRankQParserPlugin.java
@@ -16,7 +16,9 @@
*/
package org.apache.solr.search;
+import java.util.Locale;
import java.util.Map;
+import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.apache.solr.SolrTestCaseJ4;
@@ -57,6 +59,8 @@ public class TestReRankQParserPlugin extends SolrTestCaseJ4 {
assertEquals(ReRankQParserPlugin.RERANK_WEIGHT, "reRankWeight");
assertEquals(ReRankQParserPlugin.RERANK_WEIGHT_DEFAULT, 2.0d, 0.0d);
+
+ assertEquals(ReRankQParserPlugin.RERANK_OPERATOR, "reRankOperator");
}
@Test
@@ -126,6 +130,48 @@ public class TestReRankQParserPlugin extends SolrTestCaseJ4 {
"//result/doc[5]/str[@name='id'][.='1']",
"//result/doc[6]/str[@name='id'][.='5']");
+ // check each of the reRankOperators return the expected score for item 3
+ for (Map.Entry<String, String> scoreByOp :
+ Map.of("add", "10002.1", "multiply", "1000.2", "replace", "10002.0").entrySet()) {
+ params = new ModifiableSolrParams();
+ String operation = scoreByOp.getKey();
+ if (random().nextBoolean()) {
+ operation = operation.toUpperCase(Locale.ROOT);
+ }
+ final Function<String, String> rerankQueryByOp =
+ op ->
+ "{!"
+ + ReRankQParserPlugin.NAME
+ + " "
+ + ReRankQParserPlugin.RERANK_QUERY
+ + "=$rqq "
+ + ReRankQParserPlugin.RERANK_OPERATOR
+ + "="
+ + op
+ + " "
+ + ReRankQParserPlugin.RERANK_DOCS
+ + "=200}";
+ params.add("rq", rerankQueryByOp.apply(operation));
+ params.add("q", "term_s:YYYY^=0.1"); // force score=0.1
+ params.add("rqq", "{!edismax bf=$bff}*:*"); // returns 1 + $bff
+ params.add("bff", "field(test_ti)"); // test_ti=5000 for item 3
+ params.add("start", "0");
+ params.add("rows", "6");
+ params.add("df", "text");
+ params.add("fl", "id,score");
+ assertQ(
+ req(params),
+ "*[count(//doc)=6]",
+ "//result/doc[1]/str[@name='id'][.='3']",
+ "//result/doc[1]/float[@name='score'][.='" + scoreByOp.getValue() + "']");
+ final String badOp =
+ random().nextBoolean()
+ ? operation + operation
+ : operation.substring(0, operation.length() - 1);
+ params.set("rq", rerankQueryByOp.apply(badOp));
+ assertQEx("Wrong reRankOperation:" + badOp, req(params), SolrException.ErrorCode.BAD_REQUEST);
+ }
+
params = new ModifiableSolrParams();
params.add(
"rq",
@@ -960,7 +1006,7 @@ public class TestReRankQParserPlugin extends SolrTestCaseJ4 {
"id_p_i", String.valueOf(i),
"field_t",
IntStream.range(0, numDocs)
- .mapToObj(val -> Integer.toString(val))
+ .mapToObj(Integer::toString)
.collect(Collectors.joining(" "))));
}
assertU(commit());
diff --git a/solr/solr-ref-guide/modules/query-guide/pages/query-re-ranking.adoc b/solr/solr-ref-guide/modules/query-guide/pages/query-re-ranking.adoc
index 82d971fa30a..fe32836c458 100644
--- a/solr/solr-ref-guide/modules/query-guide/pages/query-re-ranking.adoc
+++ b/solr/solr-ref-guide/modules/query-guide/pages/query-re-ranking.adoc
@@ -67,9 +67,18 @@ This number will be treated as a minimum, and may be increased internally automa
|Optional |Default: `2.0`
|===
+
-A multiplicative factor that will be applied to the score from the reRankQuery for each of the top matching documents, before that score is added to the original score.
+A multiplicative factor that will be applied to the score from the reRankQuery for each of the top matching documents, before that score is combined with the original score.
-In the example below, the top 1000 documents matching the query "greetings" will be re-ranked using the query "(hi hello hey hiya)".
+`reRankOperator`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: `add`
+|===
++
+By default the score from the reRankQuery multiplied by the `reRankWeight` is added to the original score.
+
+In the example below using the default `add` behaviour, the top 1000 documents matching the query "greetings" will be re-ranked using the query "(hi hello hey hiya)".
The resulting scores for each of those 1000 documents will be 3 times their score from the "(hi hello hey hiya)", plus the score from the original "greetings" query:
[source,text]
@@ -79,6 +88,25 @@ q=greetings&rq={!rerank reRankQuery=$rqq reRankDocs=1000 reRankWeight=3}&rqq=(hi
If a document matches the original query, but does not match the re-ranking query, the document's original score will remain.
+Setting `reRankOperator` to `multiply` will multiply the three numbers instead. This means that other multiplying operations such as xref:edismax-query-parser.adoc#extended-dismax-parameters[eDisMax `boost` functions] can be converted to Re-Rank operations.
+
+In the example below, the scores for the top 1000 documents matching the query "phone" will be multiplied by a function of the `price` field.
+
+[source,text]
+----
+q=phone&rq={!rerank reRankQuery=$rqq reRankDocs=1000 reRankWeight=1 reRankOperator=multiply}&rqq={!func v=div(1,sum(1,price))}
+----
+
+Setting `reRankOperator` to `replace` will replace the score, so the final scores can be independent of documents' original scores.
+
+In the example below, the scores for the top 1000 documents matching the query "phone" will be replaced with a function of the `price` field.
+
+[source,text]
+----
+q=phone&rq={!rerank reRankQuery=$rqq reRankDocs=1000 reRankWeight=1 reRankOperator=replace}&rqq={!func v=div(1,sum(1,price))}
+----
+
+
=== LTR Query Parser
The `ltr` stands for Learning To Rank, please see xref:learning-to-rank.adoc[] for more detailed information.