You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2020/11/24 10:24:12 UTC

[lucene-solr] branch branch_8x updated: SOLR-15015: added support to parametric Interleaving algorithm (#2096)

This is an automated email from the ASF dual-hosted git repository.

abenedetti pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/branch_8x by this push:
     new 6dbb4d5  SOLR-15015: added support to parametric Interleaving algorithm (#2096)
6dbb4d5 is described below

commit 6dbb4d5a6413f7cf0b81af3a988699a1d4a369ef
Author: Alessandro Benedetti <a....@sease.io>
AuthorDate: Tue Nov 24 10:11:08 2020 +0000

    SOLR-15015: added support to parametric Interleaving algorithm (#2096)
    
    (cherry picked from commit ca040402d9470969d7f8fe81c5bf4125e9344cde)
---
 solr/CHANGES.txt                                   |   2 +
 .../apache/solr/ltr/interleaving/Interleaving.java |   5 +-
 .../apache/solr/ltr/search/LTRQParserPlugin.java   |  10 +-
 .../org/apache/solr/ltr/TestLTRQParserPlugin.java  |  28 ---
 .../interleaving/TestLTRQParserInterleaving.java   | 206 +++++++++++++++++++++
 solr/solr-ref-guide/src/learning-to-rank.adoc      |   8 +
 6 files changed, 229 insertions(+), 30 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 3706f95..de94708 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -24,6 +24,8 @@ Improvements
 
 * SOLR-14683: Metrics API should ensure consistent placeholders for missing values. (ab)
 
+* SOLR-15015 : Add interleaving algorithm parameter support in Learning To Rank (Alessandro Benedetti)
+
 Optimizations
 ---------------------
 * SOLR-14975: Optimize CoreContainer.getAllCoreNames, getLoadedCoreNames and getCoreDescriptors. (Bruno Roustant)
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/interleaving/Interleaving.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/interleaving/Interleaving.java
index 1038aca..db04443 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/interleaving/Interleaving.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/interleaving/Interleaving.java
@@ -18,6 +18,7 @@
 package org.apache.solr.ltr.interleaving;
 
 import org.apache.lucene.search.ScoreDoc;
+import org.apache.solr.common.SolrException;
 import org.apache.solr.ltr.interleaving.algorithms.TeamDraftInterleaving;
 
 /**
@@ -36,8 +37,10 @@ public interface Interleaving {
    static Interleaving getImplementation(String algorithm) {
       switch(algorithm) {
          case TEAM_DRAFT:
-         default:
             return new TeamDraftInterleaving();
+         default:
+            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+                "Unknown Interleaving algorithm: " + algorithm);
       }
    }
 }
\ No newline at end of file
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/search/LTRQParserPlugin.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/search/LTRQParserPlugin.java
index b5fb7a9..0bc0b67 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/search/LTRQParserPlugin.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/search/LTRQParserPlugin.java
@@ -78,6 +78,13 @@ public class LTRQParserPlugin extends QParserPlugin implements ResourceLoaderAwa
    **/
   public static final String RERANK_DOCS = "reRankDocs";
 
+  /** query parser plugin: default interleaving algorithm **/
+  public static final String DEFAULT_INTERLEAVING_ALGORITHM = Interleaving.TEAM_DRAFT;
+
+  /** query parser plugin:the param that selects the interleaving algorithm to use **/
+  public static final String INTERLEAVING_ALGORITHM = "interleavingAlgorithm";
+
+
   @Override
   @SuppressWarnings({"unchecked"})
   public void init(@SuppressWarnings("rawtypes") NamedList args) {
@@ -209,8 +216,9 @@ public class LTRQParserPlugin extends QParserPlugin implements ResourceLoaderAwa
         SolrQueryRequestContextUtils.setScoringQueries(req, new LTRScoringQuery[] { rerankingQuery });
         return new LTRQuery(rerankingQuery, reRankDocs);
       } else {
+        String interleavingAlgorithm = localParams.get(INTERLEAVING_ALGORITHM, DEFAULT_INTERLEAVING_ALGORITHM);
         SolrQueryRequestContextUtils.setScoringQueries(req, rerankingQueries);
-        return new LTRInterleavingQuery(Interleaving.getImplementation(Interleaving.TEAM_DRAFT),rerankingQueries, reRankDocs);
+        return new LTRInterleavingQuery(Interleaving.getImplementation(interleavingAlgorithm), rerankingQueries, reRankDocs);
       }
     }
   }
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java
index e613210..6730b9b 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java
@@ -52,20 +52,6 @@ public class TestLTRQParserPlugin extends TestRerankBase {
   }
 
   @Test
-  public void interleavingLtrTooManyModelsTest() throws Exception {
-    final String solrQuery = "_query_:{!edismax qf='title' mm=100% v='bloomberg' tie=0.1}";
-    final SolrQuery query = new SolrQuery();
-    query.setQuery(solrQuery);
-    query.add("fl", "*, score");
-    query.add("rows", "4");
-    query.add("fv", "true");
-    query.add("rq", "{!ltr model=modelA model=modelB model=C reRankDocs=100}");
-
-    final String res = restTestHarness.query("/query" + query.toQueryString());
-    assert (res.contains("Must provide one or two models in the request"));
-  }
-
-  @Test
   public void ltrModelIdDoesNotExistTest() throws Exception {
     final String solrQuery = "_query_:{!edismax qf='title' mm=100% v='bloomberg' tie=0.1}";
     final SolrQuery query = new SolrQuery();
@@ -94,20 +80,6 @@ public class TestLTRQParserPlugin extends TestRerankBase {
   }
 
   @Test
-  public void interleavingLtrModelIsEmptyTest() throws Exception {
-    final String solrQuery = "_query_:{!edismax qf='title' mm=100% v='bloomberg' tie=0.1}";
-    final SolrQuery query = new SolrQuery();
-    query.setQuery(solrQuery);
-    query.add("fl", "*, score");
-    query.add("rows", "4");
-    query.add("fv", "true");
-    query.add("rq", "{!ltr model=6029760550880411648 model=\"\" reRankDocs=100}");
-
-    final String res = restTestHarness.query("/query" + query.toQueryString());
-    assert (res.contains("the model 1 is empty"));
-  }
-
-  @Test
   public void ltrBadRerankDocsTest() throws Exception {
     final String solrQuery = "_query_:{!edismax qf='title' mm=100% v='bloomberg' tie=0.1}";
     final SolrQuery query = new SolrQuery();
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/interleaving/TestLTRQParserInterleaving.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/interleaving/TestLTRQParserInterleaving.java
new file mode 100644
index 0000000..ebaeae1
--- /dev/null
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/interleaving/TestLTRQParserInterleaving.java
@@ -0,0 +1,206 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.ltr.interleaving;
+
+import java.util.Random;
+
+import org.apache.solr.client.solrj.SolrQuery;
+import org.apache.solr.ltr.TestRerankBase;
+import org.apache.solr.ltr.feature.SolrFeature;
+import org.apache.solr.ltr.interleaving.algorithms.TeamDraftInterleaving;
+import org.apache.solr.ltr.model.LinearModel;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestLTRQParserInterleaving extends TestRerankBase {
+  @Before
+  public void before() throws Exception {
+    setuptest(false);
+
+    assertU(adoc("id", "1", "title", "w1", "description", "w5", "popularity",
+        "1"));
+    assertU(adoc("id", "2", "title", "w2 2asd asdd didid", "description",
+        "w2 2asd asdd didid", "popularity", "2"));
+    assertU(adoc("id", "3", "title", "w1", "description", "w5", "popularity",
+        "3"));
+    assertU(adoc("id", "4", "title", "w1", "description", "w1", "popularity",
+        "6"));
+    assertU(adoc("id", "5", "title", "w5", "description", "w5", "popularity",
+        "5"));
+    assertU(adoc("id", "6", "title", "w6 w2", "description", "w1 w2",
+        "popularity", "6"));
+    assertU(adoc("id", "7", "title", "w1 w2 w3 w4 w5", "description",
+        "w6 w2 w3 w4 w5 w8", "popularity", "88888"));
+    assertU(adoc("id", "8", "title", "w1 w1 w1 w2 w2 w8", "description",
+        "w1 w1 w1 w2 w2 w5", "popularity", "88888"));
+    assertU(commit());
+
+    loadFeaturesAndModelsForInterleaving();
+  }
+
+  @After
+  public void after() throws Exception {
+    aftertest();
+  }
+
+  protected void loadFeaturesAndModelsForInterleaving() throws Exception {
+    loadFeature("featureA1", SolrFeature.class.getName(),
+        "{\"fq\":[\"{!terms f=popularity}88888\"]}");
+    loadFeature("featureA2", SolrFeature.class.getName(),
+        "{\"fq\":[\"{!terms f=title}${user_query}\"]}");
+    loadFeature("featureAB", SolrFeature.class.getName(),
+        "{\"fq\":[\"{!terms f=title}${user_query}\"]}");
+    loadFeature("featureB1", SolrFeature.class.getName(),
+        "{\"fq\":[\"{!terms f=popularity}6\"]}");
+    loadFeature("featureB2", SolrFeature.class.getName(),
+        "{\"fq\":[\"{!terms f=description}${user_query}\"]}");
+    loadFeature("featureC1", SolrFeature.class.getName(),"featureStore2",
+        "{\"fq\":[\"{!terms f=popularity}6\"]}");
+    loadFeature("featureC2", SolrFeature.class.getName(),"featureStore2",
+        "{\"fq\":[\"{!terms f=popularity}1\"]}");
+
+    loadModel("modelA", LinearModel.class.getName(),
+        new String[]{"featureA1", "featureA2", "featureAB"},
+        "{\"weights\":{\"featureA1\":3.0, \"featureA2\":9.0, \"featureAB\":27.0}}");
+
+    loadModel("modelB", LinearModel.class.getName(),
+        new String[]{"featureB1", "featureB2", "featureAB"},
+        "{\"weights\":{\"featureB1\":2.0, \"featureB2\":4.0, \"featureAB\":8.0}}");
+
+    loadModel("modelC", LinearModel.class.getName(),
+        new String[]{"featureC1", "featureC2"},"featureStore2",
+        "{\"weights\":{\"featureC1\":5.0, \"featureC2\":25.0}}");
+  }
+
+  @Test
+  public void interleavingLtrNotSupportedAlgorithmTest() throws Exception {
+    final String solrQuery = "_query_:{!edismax qf='title' mm=100% v='bloomberg' tie=0.1}";
+    final SolrQuery query = new SolrQuery();
+    query.setQuery(solrQuery);
+    query.add("fl", "*, score");
+    query.add("rows", "4");
+    query.add("fv", "true");
+    query.add("rq", "{!ltr model=modelA model=modelB interleavingAlgorithm=unsupported reRankDocs=100}");
+
+    final String res = restTestHarness.query("/query" + query.toQueryString());
+    assert (res.contains("Unknown Interleaving algorithm: unsupported"));
+  }
+
+  @Test
+  public void interleavingLtrTooManyModelsTest() throws Exception {
+    final String solrQuery = "_query_:{!edismax qf='title' mm=100% v='bloomberg' tie=0.1}";
+    final SolrQuery query = new SolrQuery();
+    query.setQuery(solrQuery);
+    query.add("fl", "*, score");
+    query.add("rows", "4");
+    query.add("fv", "true");
+    query.add("rq", "{!ltr model=modelA model=modelB model=C reRankDocs=100}");
+
+    final String res = restTestHarness.query("/query" + query.toQueryString());
+    assert (res.contains("Must provide one or two models in the request"));
+  }
+
+  @Test
+  public void interleavingLtrModelIsEmptyTest() throws Exception {
+    final String solrQuery = "_query_:{!edismax qf='title' mm=100% v='bloomberg' tie=0.1}";
+    final SolrQuery query = new SolrQuery();
+    query.setQuery(solrQuery);
+    query.add("fl", "*, score");
+    query.add("rows", "4");
+    query.add("fv", "true");
+    query.add("rq", "{!ltr model=modelA model=\"\" reRankDocs=100}");
+
+    final String res = restTestHarness.query("/query" + query.toQueryString());
+    assert (res.contains("the model 1 is empty"));
+  }
+
+  @Test
+  public void defaultAlgorithm_shouldApplyTeamDraftInterleaving() throws Exception {
+    TeamDraftInterleaving.setRANDOM(new Random(10101010));//Random Boolean Choices Generation from Seed: [0,1,1]
+
+    final SolrQuery query = new SolrQuery();
+    query.setQuery("*:*");
+    query.add("fl", "*, score,interleavingPick:[interleaving]");
+    query.add("rows", "10");
+    query.add("debugQuery", "true");
+    query.add("fq", "{!terms f=title}w1"); // 1,3,4,7,8
+    query.add("rq",
+        "{!ltr model=modelA model=modelB reRankDocs=10 efi.user_query='w5'}");
+
+    /*
+    Doc1 = "featureB2=1.0", ScoreA(0), ScoreB(4)
+    Doc3 = "featureB2=1.0", ScoreA(0), ScoreB(4)
+    Doc4 = "featureB1=1.0", ScoreA(0), ScoreB(2)
+    Doc7 ="featureA1=1.0,featureA2=1.0,featureAB=1.0,featureB2=1.0", ScoreA(39), ScoreB(12)
+    Doc8 = "featureA1=1.0,featureB2=1.0", ScoreA(3), ScoreB(4)
+    ModelARerankedList = [7,8,1,3,4]
+    ModelBRerankedList = [7,1,3,8,4]
+   
+    Random Boolean Choices Generation from Seed: [0,1,1]
+    */
+    String[] expectedInterleavingPicks = new String[]{"modelA", "modelB", "modelB", "modelA", "modelB"};
+    int[] expectedInterleaved = new int[]{7, 1, 3, 8, 4};
+
+    String[] tests = new String[11];
+    tests[0] = "/response/numFound/==5";
+    for (int i = 1; i <= 5; i++) {
+      tests[i] = "/response/docs/[" + (i - 1) + "]/id==\"" + expectedInterleaved[(i - 1)] + "\"";
+      tests[i + 5] = "/response/docs/[" + (i - 1) + "]/interleavingPick==" + expectedInterleavingPicks[(i - 1)];
+    }
+    assertJQ("/query" + query.toQueryString(), tests);
+
+  }
+  
+  @Test
+  public void teamDraftAlgorithm_shouldApplyTeamDraftInterleaving() throws Exception {
+    TeamDraftInterleaving.setRANDOM(new Random(10101010));//Random Boolean Choices Generation from Seed: [0,1,1]
+
+    final SolrQuery query = new SolrQuery();
+    query.setQuery("*:*");
+    query.add("fl", "*, score,interleavingPick:[interleaving]");
+    query.add("rows", "10");
+    query.add("debugQuery", "true");
+    query.add("fq", "{!terms f=title}w1"); // 1,3,4,7,8
+    query.add("rq",
+        "{!ltr model=modelA model=modelB reRankDocs=10 interleavingAlgorithm=TeamDraft efi.user_query='w5'}");
+
+    /*
+    Doc1 = "featureB2=1.0", ScoreA(0), ScoreB(4)
+    Doc3 = "featureB2=1.0", ScoreA(0), ScoreB(4)
+    Doc4 = "featureB1=1.0", ScoreA(0), ScoreB(2)
+    Doc7 ="featureA1=1.0,featureA2=1.0,featureAB=1.0,featureB2=1.0", ScoreA(39), ScoreB(12)
+    Doc8 = "featureA1=1.0,featureB2=1.0", ScoreA(3), ScoreB(4)
+    ModelARerankedList = [7,8,1,3,4]
+    ModelBRerankedList = [7,1,3,8,4]
+   
+    Random Boolean Choices Generation from Seed: [0,1,1]
+    */
+    String[] expectedInterleavingPicks = new String[]{"modelA", "modelB", "modelB", "modelA", "modelB"};
+    int[] expectedInterleaved = new int[]{7, 1, 3, 8, 4};
+
+    String[] tests = new String[11];
+    tests[0] = "/response/numFound/==5";
+    for (int i = 1; i <= 5; i++) {
+      tests[i] = "/response/docs/[" + (i - 1) + "]/id==\"" + expectedInterleaved[(i - 1)] + "\"";
+      tests[i + 5] = "/response/docs/[" + (i - 1) + "]/interleavingPick==" + expectedInterleavingPicks[(i - 1)];
+    }
+    assertJQ("/query" + query.toQueryString(), tests);
+
+  }
+
+}
diff --git a/solr/solr-ref-guide/src/learning-to-rank.adoc b/solr/solr-ref-guide/src/learning-to-rank.adoc
index 3699ba2..6f54b86 100644
--- a/solr/solr-ref-guide/src/learning-to-rank.adoc
+++ b/solr/solr-ref-guide/src/learning-to-rank.adoc
@@ -329,6 +329,14 @@ The output XML will include the model picked for each search result, resembling
   }}
 ----
 
+=== Running a Rerank Query with Interleaving passing a specific algorithm
+To rerank the results of a query, interleaving two models using a specific algorithm, add the `interleavingAlgorithm` local parameter to the ltr query parser, for example:
+
+[source,text]
+http://localhost:8983/solr/techproducts/query?q=test&rq={!ltr model=myModelA model=myModelB reRankDocs=100 interleavingAlgorithm=TeamDraft}&fl=id,score
+
+Currently the only (and default) algorithm supported is 'TeamDraft'.
+
 === External Feature Information
 
 The {solr-javadocs}/solr-ltr/org/apache/solr/ltr/feature/ValueFeature.html[ValueFeature] and {solr-javadocs}/solr-ltr/org/apache/solr/ltr/feature/SolrFeature.html[SolrFeature] classes support the use of external feature information, `efi` for short.