You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by mk...@apache.org on 2024/02/21 21:38:30 UTC

(solr) branch branch_9x updated: SOLR-17058: Request param to disable distributed stats request at query time (#2291)

This is an automated email from the ASF dual-hosted git repository.

mkhl pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/branch_9x by this push:
     new 2eb0b9e8d6e SOLR-17058: Request param to disable distributed stats request at query time (#2291)
2eb0b9e8d6e is described below

commit 2eb0b9e8d6e3aee50313f88b38490469bae0101d
Author: weiwang19 <we...@gmail.com>
AuthorDate: Wed Feb 21 13:38:25 2024 -0800

    SOLR-17058: Request param to disable distributed stats request at query time (#2291)
    
    * SOLR-17058: Request param to disable distributed stats request at query time (#2046)
    
    Co-authored-by: wwang30 <we...@walmart.com>
    Co-authored-by: Mikhail Khludnev <mk...@apache.org>
---
 solr/CHANGES.txt                                   |  2 +
 .../solr/handler/component/QueryComponent.java     |  9 +++-
 .../solr/handler/component/ResponseBuilder.java    | 10 +++-
 .../java/org/apache/solr/search/QueryCommand.java  |  9 ++++
 .../org/apache/solr/search/QueryResultKey.java     | 16 ++++++-
 .../org/apache/solr/search/SolrIndexSearcher.java  |  8 +++-
 .../src/test/org/apache/solr/cli/PostToolTest.java |  5 +-
 .../org/apache/solr/core/QueryResultKeyTest.java   | 17 +++++++
 .../apache/solr/search/stats/TestDistribIDF.java   | 53 ++++++++++++++++++++++
 .../pages/solrcloud-distributed-requests.adoc      | 11 +++++
 .../apache/solr/common/params/CommonParams.java    |  9 ++++
 11 files changed, 139 insertions(+), 10 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 609164f7cfb..3cea0081583 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -29,6 +29,8 @@ Improvements
 * SOLR-17159: bin/solr post now has proper unit testing.  Users can specify a --dry-run option to 
   simulate posting documents without sending them to Solr. (Eric Pugh)
 
+* SOLR-17058: Add 'distrib.statsCache' parameter to disable distributed stats requests at query time. (Wei Wang, Mikhail Khludnev)
+
 Optimizations
 ---------------------
 * SOLR-17144: Close searcherExecutor thread per core after 1 minute (Pierre Salagnac, Christine Poerschke)
diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
index dc74c4d8491..e154b79b1f6 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
@@ -168,6 +168,9 @@ public class QueryComponent extends SearchComponent {
       rb.setQueryString(queryString);
     }
 
+    // set the flag for distributed stats
+    rb.setEnableDistribStats(params.getBool(CommonParams.DISTRIB_STATS_CACHE, true));
+
     try {
       QParser parser = QParser.getParser(rb.getQueryString(), defType, req);
       Query q = parser.getQuery();
@@ -365,6 +368,7 @@ public class QueryComponent extends SearchComponent {
     QueryCommand cmd = rb.createQueryCommand();
     cmd.setTimeAllowed(timeAllowed);
     cmd.setMinExactCount(getMinExactCount(params));
+    cmd.setEnableDistribStats(rb.isEnableDistribStats());
 
     boolean isCancellableQuery = params.getBool(CommonParams.IS_QUERY_CANCELLABLE, false);
 
@@ -736,8 +740,9 @@ public class QueryComponent extends SearchComponent {
 
   protected void createDistributedStats(ResponseBuilder rb) {
     StatsCache cache = rb.req.getSearcher().getStatsCache();
-    if ((rb.getFieldFlags() & SolrIndexSearcher.GET_SCORES) != 0
-        || rb.getSortSpec().includesScore()) {
+    if (rb.isEnableDistribStats()
+        && ((rb.getFieldFlags() & SolrIndexSearcher.GET_SCORES) != 0
+            || rb.getSortSpec().includesScore())) {
       ShardRequest sreq = cache.retrieveStatsRequest(rb);
       if (sreq != null) {
         rb.addRequest(this, sreq);
diff --git a/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java b/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java
index bb06eac78e1..f3e79675414 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java
@@ -73,9 +73,9 @@ public class ResponseBuilder {
 
   private boolean isCancellation;
   private String cancellationUUID;
-
   private String taskStatusCheckUUID;
   private boolean isTaskListRequest;
+  private boolean isEnableDistribStats = true;
 
   private QParser qparser = null;
   private String queryString = null;
@@ -547,4 +547,12 @@ public class ResponseBuilder {
   public String getTaskStatusCheckUUID() {
     return taskStatusCheckUUID;
   }
+
+  public void setEnableDistribStats(boolean isEnableDistribStats) {
+    this.isEnableDistribStats = isEnableDistribStats;
+  }
+
+  public boolean isEnableDistribStats() {
+    return isEnableDistribStats;
+  }
 }
diff --git a/solr/core/src/java/org/apache/solr/search/QueryCommand.java b/solr/core/src/java/org/apache/solr/search/QueryCommand.java
index 8da580d95ec..023f845d680 100755
--- a/solr/core/src/java/org/apache/solr/search/QueryCommand.java
+++ b/solr/core/src/java/org/apache/solr/search/QueryCommand.java
@@ -40,6 +40,7 @@ public class QueryCommand {
   private long timeAllowed = -1;
   private int minExactCount = Integer.MAX_VALUE;
   private CursorMark cursorMark;
+  private boolean enableDistribStats = true;
 
   public CursorMark getCursorMark() {
     return cursorMark;
@@ -245,4 +246,12 @@ public class QueryCommand {
   public boolean isQueryCancellable() {
     return isQueryCancellable;
   }
+
+  public void setEnableDistribStats(boolean enableDistribStats) {
+    this.enableDistribStats = enableDistribStats;
+  }
+
+  public boolean isEnableDistribStats() {
+    return enableDistribStats;
+  }
 }
diff --git a/solr/core/src/java/org/apache/solr/search/QueryResultKey.java b/solr/core/src/java/org/apache/solr/search/QueryResultKey.java
index caeeb337895..72c6b474d06 100644
--- a/solr/core/src/java/org/apache/solr/search/QueryResultKey.java
+++ b/solr/core/src/java/org/apache/solr/search/QueryResultKey.java
@@ -40,20 +40,31 @@ public final class QueryResultKey implements Accountable {
   final List<Query> filters;
   final int nc_flags; // non-comparable flags... ignored by hashCode and equals
   final int minExactCount;
-
+  final boolean enableDistribStats;
   private final int hc; // cached hashCode
   private final long ramBytesUsed; // cached
 
   public QueryResultKey(Query query, List<Query> filters, Sort sort, int nc_flags) {
-    this(query, filters, sort, nc_flags, Integer.MAX_VALUE);
+    this(query, filters, sort, nc_flags, Integer.MAX_VALUE, true);
   }
 
   public QueryResultKey(
       Query query, List<Query> filters, Sort sort, int nc_flags, int minExactCount) {
+    this(query, filters, sort, nc_flags, minExactCount, true);
+  }
+
+  public QueryResultKey(
+      Query query,
+      List<Query> filters,
+      Sort sort,
+      int nc_flags,
+      int minExactCount,
+      boolean enableDistribStats) {
     this.query = query;
     this.sort = sort;
     this.nc_flags = nc_flags;
     this.minExactCount = minExactCount;
+    this.enableDistribStats = enableDistribStats;
 
     int h = query.hashCode();
 
@@ -113,6 +124,7 @@ public final class QueryResultKey implements Accountable {
     if (!this.query.equals(other.query)) return false;
     if (!unorderedCompare(this.filters, other.filters)) return false;
     if (this.minExactCount != other.minExactCount) return false;
+    if (this.enableDistribStats != other.enableDistribStats) return false;
 
     for (int i = 0; i < sfields.size(); i++) {
       SortField sf1 = this.sfields.get(i);
diff --git a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
index 35f34e30e1f..a3c7f3c3690 100644
--- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
@@ -1578,7 +1578,13 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
       // all of the current flags can be reused during warming,
       // so set all of them on the cache key.
       key =
-          new QueryResultKey(q, cmd.getFilterList(), cmd.getSort(), flags, cmd.getMinExactCount());
+          new QueryResultKey(
+              q,
+              cmd.getFilterList(),
+              cmd.getSort(),
+              flags,
+              cmd.getMinExactCount(),
+              cmd.isEnableDistribStats());
       if ((flags & NO_CHECK_QCACHE) == 0) {
         superset = queryResultCache.get(key);
 
diff --git a/solr/core/src/test/org/apache/solr/cli/PostToolTest.java b/solr/core/src/test/org/apache/solr/cli/PostToolTest.java
index 3912067d0d8..94f108ea5ac 100644
--- a/solr/core/src/test/org/apache/solr/cli/PostToolTest.java
+++ b/solr/core/src/test/org/apache/solr/cli/PostToolTest.java
@@ -55,10 +55,7 @@ public class PostToolTest extends SolrCloudTestCase {
 
   @BeforeClass
   public static void setupCluster() throws Exception {
-    configureCluster(2)
-        .addConfig(
-            "conf1", configset("cloud-minimal"))
-        .configure();
+    configureCluster(2).addConfig("conf1", configset("cloud-minimal")).configure();
   }
 
   @Test
diff --git a/solr/core/src/test/org/apache/solr/core/QueryResultKeyTest.java b/solr/core/src/test/org/apache/solr/core/QueryResultKeyTest.java
index d663b6db600..e910e99097e 100644
--- a/solr/core/src/test/org/apache/solr/core/QueryResultKeyTest.java
+++ b/solr/core/src/test/org/apache/solr/core/QueryResultKeyTest.java
@@ -210,6 +210,23 @@ public class QueryResultKeyTest extends SolrTestCaseJ4 {
         new QueryResultKey(base, buildFiltersFromNumbers(nums), null, 0));
   }
 
+  public void testDisableDistribStats() {
+    int[] nums = smallArrayOfRandomNumbers();
+    final Query base = new FlatHashTermQuery("base");
+    assertKeyEquals(
+        new QueryResultKey(base, buildFiltersFromNumbers(nums), null, 0, Integer.MAX_VALUE, true),
+        new QueryResultKey(base, buildFiltersFromNumbers(nums), null, 0));
+    assertKeyEquals(
+        new QueryResultKey(base, buildFiltersFromNumbers(nums), null, 0, 10, true),
+        new QueryResultKey(base, buildFiltersFromNumbers(nums), null, 0, 10));
+    assertKeyNotEquals(
+        new QueryResultKey(base, buildFiltersFromNumbers(nums), null, 0, Integer.MAX_VALUE, false),
+        new QueryResultKey(base, buildFiltersFromNumbers(nums), null, 0));
+    assertKeyNotEquals(
+        new QueryResultKey(base, buildFiltersFromNumbers(nums), null, 0, 20, false),
+        new QueryResultKey(base, buildFiltersFromNumbers(nums), null, 0, 20));
+  }
+
   /** does bi-directional equality check as well as verifying hashCode */
   public void assertKeyEquals(QueryResultKey key1, QueryResultKey key2) {
     assertNotNull(key1);
diff --git a/solr/core/src/test/org/apache/solr/search/stats/TestDistribIDF.java b/solr/core/src/test/org/apache/solr/search/stats/TestDistribIDF.java
index a8bb1ecb69c..ec1a0f571db 100644
--- a/solr/core/src/test/org/apache/solr/search/stats/TestDistribIDF.java
+++ b/solr/core/src/test/org/apache/solr/search/stats/TestDistribIDF.java
@@ -31,7 +31,9 @@ import org.apache.solr.cloud.SolrCloudTestCase;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.cloud.CompositeIdRouter;
 import org.apache.solr.common.cloud.ImplicitDocRouter;
+import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.ShardParams;
+import org.apache.solr.common.util.NamedList;
 import org.apache.solr.embedded.JettySolrRunner;
 import org.junit.Test;
 import org.slf4j.Logger;
@@ -263,4 +265,55 @@ public class TestDistribIDF extends SolrTestCaseJ4 {
     solrCluster.getSolrClient().commit("collection1_local");
     solrCluster.getSolrClient().commit("collection2_local");
   }
+
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testDisableDistribStats() throws Exception {
+
+    // single collection with implicit router
+    final String COLLECTION = "collection1";
+    createCollection(COLLECTION, "conf1", ImplicitDocRouter.NAME);
+    SolrClient client = solrCluster.getSolrClient();
+
+    SolrInputDocument doc = new SolrInputDocument();
+    doc.setField("id", "1");
+    doc.setField("cat", "tv");
+    doc.addField(ShardParams._ROUTE_, "a");
+    client.add(COLLECTION, doc);
+
+    doc = new SolrInputDocument();
+    doc.setField("id", "2");
+    doc.setField("cat", "ipad");
+    doc.addField(ShardParams._ROUTE_, "b");
+    client.add(COLLECTION, doc);
+
+    client.commit(COLLECTION);
+    // distributed stats implicitly enabled by default
+    SolrQuery query =
+        new SolrQuery(
+            "q", "cat:tv",
+            "fl", "id,score",
+            "debug", "track");
+    QueryResponse rsp = client.query(COLLECTION, query);
+    NamedList<Object> track = (NamedList<Object>) rsp.getDebugMap().get("track");
+    assertNotNull(track);
+    assertNotNull("stats cache hit", track.get("PARSE_QUERY"));
+
+    // distributed stats explicitly disabled
+    query.set(CommonParams.DISTRIB_STATS_CACHE, "false");
+    query.set(CommonParams.Q, "{!terms f=id}1,2");
+    rsp = client.query(COLLECTION, query);
+    track = (NamedList<Object>) rsp.getDebugMap().get("track");
+    assertNotNull(track);
+    assertNull("NO stats cache hit", track.get("PARSE_QUERY"));
+    assertNotNull("just search", track.get("EXECUTE_QUERY"));
+
+    // distributed stats explicitly enabled
+    query.set(CommonParams.DISTRIB_STATS_CACHE, "true");
+    query.set(CommonParams.Q, "cat:electronics");
+    rsp = client.query(COLLECTION, query);
+    track = (NamedList<Object>) rsp.getDebugMap().get("track");
+    assertNotNull(track);
+    assertNotNull("stats cache hit", track.get("PARSE_QUERY"));
+  }
 }
diff --git a/solr/solr-ref-guide/modules/deployment-guide/pages/solrcloud-distributed-requests.adoc b/solr/solr-ref-guide/modules/deployment-guide/pages/solrcloud-distributed-requests.adoc
index b8af8e821a2..605183e27e8 100644
--- a/solr/solr-ref-guide/modules/deployment-guide/pages/solrcloud-distributed-requests.adoc
+++ b/solr/solr-ref-guide/modules/deployment-guide/pages/solrcloud-distributed-requests.adoc
@@ -446,6 +446,17 @@ For example, the following line makes Solr use the `ExactStatsCache` implementat
 <statsCache class="org.apache.solr.search.stats.ExactStatsCache"/>
 ----
 
+=== distrib.statsCache Parameter
+
+The query param distrib.statsCache defaults to `true`. If set to `false`, distributed calls to fetch global term stats is turned off for this query. This can reduce overhead for queries that do not utilize distributed IDF for score calculation.
+
+[source,xml]
+----
+http://localhost:8987/solr/collection1/select?q=*%3A*&wt=json&fq={!terms f=id}id1,id2&distrib.statsCache=false
+----
+
+
+
 == Avoiding Distributed Deadlock
 
 Each shard serves top-level query requests and then makes sub-requests to all of the other shards.
diff --git a/solr/solrj/src/java/org/apache/solr/common/params/CommonParams.java b/solr/solrj/src/java/org/apache/solr/common/params/CommonParams.java
index 90bef61db96..cc9e07e959d 100644
--- a/solr/solrj/src/java/org/apache/solr/common/params/CommonParams.java
+++ b/solr/solrj/src/java/org/apache/solr/common/params/CommonParams.java
@@ -295,6 +295,15 @@ public interface CommonParams {
   @Deprecated(since = "9.4")
   String DISABLE_REQUEST_ID = "disableRequestId";
 
+  /**
+   * Parameter to control the distributed term statistics request for current query when distributed
+   * IDF is enabled in solrconfig
+   *
+   * <p>Defaults to 'true' if not specified. Distributed stats request will be disabled by setting
+   * to 'false'
+   */
+  String DISTRIB_STATS_CACHE = "distrib.statsCache";
+
   /** Request Purpose parameter added to each internal shard request when using debug=track */
   String REQUEST_PURPOSE = "requestPurpose";