You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by mk...@apache.org on 2024/02/21 09:05:55 UTC
(solr) branch main updated: SOLR-17058: Request param to disable distributed stats request at query time (#2046)
This is an automated email from the ASF dual-hosted git repository.
mkhl pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/main by this push:
new bcb9f144974 SOLR-17058: Request param to disable distributed stats request at query time (#2046)
bcb9f144974 is described below
commit bcb9f144974ed07aa3b66766302474542067b522
Author: weiwang19 <we...@gmail.com>
AuthorDate: Wed Feb 21 01:05:48 2024 -0800
SOLR-17058: Request param to disable distributed stats request at query time (#2046)
* add param to disable distrib stats at query time
---------
Co-authored-by: wwang30 <we...@walmart.com>
Co-authored-by: Mikhail Khludnev <mk...@apache.org>
---
solr/CHANGES.txt | 2 +
.../solr/handler/component/QueryComponent.java | 9 +++-
.../solr/handler/component/ResponseBuilder.java | 10 +++-
.../java/org/apache/solr/search/QueryCommand.java | 9 ++++
.../org/apache/solr/search/QueryResultKey.java | 16 ++++++-
.../org/apache/solr/search/SolrIndexSearcher.java | 8 +++-
.../org/apache/solr/core/QueryResultKeyTest.java | 17 +++++++
.../apache/solr/search/stats/TestDistribIDF.java | 53 ++++++++++++++++++++++
.../pages/solrcloud-distributed-requests.adoc | 11 +++++
.../apache/solr/common/params/CommonParams.java | 9 ++++
10 files changed, 138 insertions(+), 6 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index a8b0fba04c9..232d5926cf1 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -100,6 +100,8 @@ Improvements
* SOLR-17159: bin/solr post now has proper unit testing. Users can specify a --dry-run option to
simulate posting documents without sending them to Solr. (Eric Pugh)
+* SOLR-17058: Add 'distrib.statsCache' parameter to disable distributed stats requests at query time. (Wei Wang, Mikhail Khludnev)
+
Optimizations
---------------------
* SOLR-17144: Close searcherExecutor thread per core after 1 minute (Pierre Salagnac, Christine Poerschke)
diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
index dc74c4d8491..e154b79b1f6 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
@@ -168,6 +168,9 @@ public class QueryComponent extends SearchComponent {
rb.setQueryString(queryString);
}
+ // set the flag for distributed stats
+ rb.setEnableDistribStats(params.getBool(CommonParams.DISTRIB_STATS_CACHE, true));
+
try {
QParser parser = QParser.getParser(rb.getQueryString(), defType, req);
Query q = parser.getQuery();
@@ -365,6 +368,7 @@ public class QueryComponent extends SearchComponent {
QueryCommand cmd = rb.createQueryCommand();
cmd.setTimeAllowed(timeAllowed);
cmd.setMinExactCount(getMinExactCount(params));
+ cmd.setEnableDistribStats(rb.isEnableDistribStats());
boolean isCancellableQuery = params.getBool(CommonParams.IS_QUERY_CANCELLABLE, false);
@@ -736,8 +740,9 @@ public class QueryComponent extends SearchComponent {
protected void createDistributedStats(ResponseBuilder rb) {
StatsCache cache = rb.req.getSearcher().getStatsCache();
- if ((rb.getFieldFlags() & SolrIndexSearcher.GET_SCORES) != 0
- || rb.getSortSpec().includesScore()) {
+ if (rb.isEnableDistribStats()
+ && ((rb.getFieldFlags() & SolrIndexSearcher.GET_SCORES) != 0
+ || rb.getSortSpec().includesScore())) {
ShardRequest sreq = cache.retrieveStatsRequest(rb);
if (sreq != null) {
rb.addRequest(this, sreq);
diff --git a/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java b/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java
index ea90503ef5c..fb7349185d5 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java
@@ -72,9 +72,9 @@ public class ResponseBuilder {
private boolean isCancellation;
private String cancellationUUID;
-
private String taskStatusCheckUUID;
private boolean isTaskListRequest;
+ private boolean isEnableDistribStats = true;
private QParser qparser = null;
private String queryString = null;
@@ -520,4 +520,12 @@ public class ResponseBuilder {
public String getTaskStatusCheckUUID() {
return taskStatusCheckUUID;
}
+
+ public void setEnableDistribStats(boolean isEnableDistribStats) {
+ this.isEnableDistribStats = isEnableDistribStats;
+ }
+
+ public boolean isEnableDistribStats() {
+ return isEnableDistribStats;
+ }
}
diff --git a/solr/core/src/java/org/apache/solr/search/QueryCommand.java b/solr/core/src/java/org/apache/solr/search/QueryCommand.java
index 33bfdc28070..dce9eaa53f9 100755
--- a/solr/core/src/java/org/apache/solr/search/QueryCommand.java
+++ b/solr/core/src/java/org/apache/solr/search/QueryCommand.java
@@ -39,6 +39,7 @@ public class QueryCommand {
private long timeAllowed = -1;
private int minExactCount = Integer.MAX_VALUE;
private CursorMark cursorMark;
+ private boolean enableDistribStats = true;
public CursorMark getCursorMark() {
return cursorMark;
@@ -220,4 +221,12 @@ public class QueryCommand {
public boolean isQueryCancellable() {
return isQueryCancellable;
}
+
+ public void setEnableDistribStats(boolean enableDistribStats) {
+ this.enableDistribStats = enableDistribStats;
+ }
+
+ public boolean isEnableDistribStats() {
+ return enableDistribStats;
+ }
}
diff --git a/solr/core/src/java/org/apache/solr/search/QueryResultKey.java b/solr/core/src/java/org/apache/solr/search/QueryResultKey.java
index caeeb337895..72c6b474d06 100644
--- a/solr/core/src/java/org/apache/solr/search/QueryResultKey.java
+++ b/solr/core/src/java/org/apache/solr/search/QueryResultKey.java
@@ -40,20 +40,31 @@ public final class QueryResultKey implements Accountable {
final List<Query> filters;
final int nc_flags; // non-comparable flags... ignored by hashCode and equals
final int minExactCount;
-
+ final boolean enableDistribStats;
private final int hc; // cached hashCode
private final long ramBytesUsed; // cached
public QueryResultKey(Query query, List<Query> filters, Sort sort, int nc_flags) {
- this(query, filters, sort, nc_flags, Integer.MAX_VALUE);
+ this(query, filters, sort, nc_flags, Integer.MAX_VALUE, true);
}
public QueryResultKey(
Query query, List<Query> filters, Sort sort, int nc_flags, int minExactCount) {
+ this(query, filters, sort, nc_flags, minExactCount, true);
+ }
+
+ public QueryResultKey(
+ Query query,
+ List<Query> filters,
+ Sort sort,
+ int nc_flags,
+ int minExactCount,
+ boolean enableDistribStats) {
this.query = query;
this.sort = sort;
this.nc_flags = nc_flags;
this.minExactCount = minExactCount;
+ this.enableDistribStats = enableDistribStats;
int h = query.hashCode();
@@ -113,6 +124,7 @@ public final class QueryResultKey implements Accountable {
if (!this.query.equals(other.query)) return false;
if (!unorderedCompare(this.filters, other.filters)) return false;
if (this.minExactCount != other.minExactCount) return false;
+ if (this.enableDistribStats != other.enableDistribStats) return false;
for (int i = 0; i < sfields.size(); i++) {
SortField sf1 = this.sfields.get(i);
diff --git a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
index e73a1554fa3..1711042bded 100644
--- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
@@ -1566,7 +1566,13 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
// all the current flags can be reused during warming,
// so set all of them on the cache key.
key =
- new QueryResultKey(q, cmd.getFilterList(), cmd.getSort(), flags, cmd.getMinExactCount());
+ new QueryResultKey(
+ q,
+ cmd.getFilterList(),
+ cmd.getSort(),
+ flags,
+ cmd.getMinExactCount(),
+ cmd.isEnableDistribStats());
if ((flags & NO_CHECK_QCACHE) == 0) {
superset = queryResultCache.get(key);
diff --git a/solr/core/src/test/org/apache/solr/core/QueryResultKeyTest.java b/solr/core/src/test/org/apache/solr/core/QueryResultKeyTest.java
index d663b6db600..e910e99097e 100644
--- a/solr/core/src/test/org/apache/solr/core/QueryResultKeyTest.java
+++ b/solr/core/src/test/org/apache/solr/core/QueryResultKeyTest.java
@@ -210,6 +210,23 @@ public class QueryResultKeyTest extends SolrTestCaseJ4 {
new QueryResultKey(base, buildFiltersFromNumbers(nums), null, 0));
}
+ public void testDisableDistribStats() {
+ int[] nums = smallArrayOfRandomNumbers();
+ final Query base = new FlatHashTermQuery("base");
+ assertKeyEquals(
+ new QueryResultKey(base, buildFiltersFromNumbers(nums), null, 0, Integer.MAX_VALUE, true),
+ new QueryResultKey(base, buildFiltersFromNumbers(nums), null, 0));
+ assertKeyEquals(
+ new QueryResultKey(base, buildFiltersFromNumbers(nums), null, 0, 10, true),
+ new QueryResultKey(base, buildFiltersFromNumbers(nums), null, 0, 10));
+ assertKeyNotEquals(
+ new QueryResultKey(base, buildFiltersFromNumbers(nums), null, 0, Integer.MAX_VALUE, false),
+ new QueryResultKey(base, buildFiltersFromNumbers(nums), null, 0));
+ assertKeyNotEquals(
+ new QueryResultKey(base, buildFiltersFromNumbers(nums), null, 0, 20, false),
+ new QueryResultKey(base, buildFiltersFromNumbers(nums), null, 0, 20));
+ }
+
/** does bi-directional equality check as well as verifying hashCode */
public void assertKeyEquals(QueryResultKey key1, QueryResultKey key2) {
assertNotNull(key1);
diff --git a/solr/core/src/test/org/apache/solr/search/stats/TestDistribIDF.java b/solr/core/src/test/org/apache/solr/search/stats/TestDistribIDF.java
index 602572d5150..e43c1dd7344 100644
--- a/solr/core/src/test/org/apache/solr/search/stats/TestDistribIDF.java
+++ b/solr/core/src/test/org/apache/solr/search/stats/TestDistribIDF.java
@@ -31,7 +31,9 @@ import org.apache.solr.cloud.SolrCloudTestCase;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.cloud.CompositeIdRouter;
import org.apache.solr.common.cloud.ImplicitDocRouter;
+import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ShardParams;
+import org.apache.solr.common.util.NamedList;
import org.apache.solr.embedded.JettySolrRunner;
import org.junit.Test;
import org.slf4j.Logger;
@@ -263,4 +265,55 @@ public class TestDistribIDF extends SolrTestCaseJ4 {
solrCluster.getSolrClient().commit("collection1_local");
solrCluster.getSolrClient().commit("collection2_local");
}
+
+ @Test
+ @SuppressWarnings("unchecked")
+ public void testDisableDistribStats() throws Exception {
+
+ // single collection with implicit router
+ final String COLLECTION = "collection1";
+ createCollection(COLLECTION, "conf1", ImplicitDocRouter.NAME);
+ SolrClient client = solrCluster.getSolrClient();
+
+ SolrInputDocument doc = new SolrInputDocument();
+ doc.setField("id", "1");
+ doc.setField("cat", "tv");
+ doc.addField(ShardParams._ROUTE_, "a");
+ client.add(COLLECTION, doc);
+
+ doc = new SolrInputDocument();
+ doc.setField("id", "2");
+ doc.setField("cat", "ipad");
+ doc.addField(ShardParams._ROUTE_, "b");
+ client.add(COLLECTION, doc);
+
+ client.commit(COLLECTION);
+ // distributed stats implicitly enabled by default
+ SolrQuery query =
+ new SolrQuery(
+ "q", "cat:tv",
+ "fl", "id,score",
+ "debug", "track");
+ QueryResponse rsp = client.query(COLLECTION, query);
+ NamedList<Object> track = (NamedList<Object>) rsp.getDebugMap().get("track");
+ assertNotNull(track);
+ assertNotNull("stats cache hit", track.get("PARSE_QUERY"));
+
+ // distributed stats explicitly disabled
+ query.set(CommonParams.DISTRIB_STATS_CACHE, "false");
+ query.set(CommonParams.Q, "{!terms f=id}1,2");
+ rsp = client.query(COLLECTION, query);
+ track = (NamedList<Object>) rsp.getDebugMap().get("track");
+ assertNotNull(track);
+ assertNull("NO stats cache hit", track.get("PARSE_QUERY"));
+ assertNotNull("just search", track.get("EXECUTE_QUERY"));
+
+ // distributed stats explicitly enabled
+ query.set(CommonParams.DISTRIB_STATS_CACHE, "true");
+ query.set(CommonParams.Q, "cat:electronics");
+ rsp = client.query(COLLECTION, query);
+ track = (NamedList<Object>) rsp.getDebugMap().get("track");
+ assertNotNull(track);
+ assertNotNull("stats cache hit", track.get("PARSE_QUERY"));
+ }
}
diff --git a/solr/solr-ref-guide/modules/deployment-guide/pages/solrcloud-distributed-requests.adoc b/solr/solr-ref-guide/modules/deployment-guide/pages/solrcloud-distributed-requests.adoc
index 68cf6070e6a..48f5331ae0b 100644
--- a/solr/solr-ref-guide/modules/deployment-guide/pages/solrcloud-distributed-requests.adoc
+++ b/solr/solr-ref-guide/modules/deployment-guide/pages/solrcloud-distributed-requests.adoc
@@ -447,6 +447,17 @@ For example, the following line makes Solr use the `ExactStatsCache` implementat
<statsCache class="org.apache.solr.search.stats.ExactStatsCache"/>
----
+=== distrib.statsCache Parameter
+
+The query param distrib.statsCache defaults to `true`. If set to `false`, distributed calls to fetch global term stats is turned off for this query. This can reduce overhead for queries that do not utilize distributed IDF for score calculation.
+
+[source,xml]
+----
+http://localhost:8987/solr/collection1/select?q=*%3A*&wt=json&fq={!terms f=id}id1,id2&distrib.statsCache=false
+----
+
+
+
== Avoiding Distributed Deadlock
Each shard serves top-level query requests and then makes sub-requests to all of the other shards.
diff --git a/solr/solrj/src/java/org/apache/solr/common/params/CommonParams.java b/solr/solrj/src/java/org/apache/solr/common/params/CommonParams.java
index 90bef61db96..cc9e07e959d 100644
--- a/solr/solrj/src/java/org/apache/solr/common/params/CommonParams.java
+++ b/solr/solrj/src/java/org/apache/solr/common/params/CommonParams.java
@@ -295,6 +295,15 @@ public interface CommonParams {
@Deprecated(since = "9.4")
String DISABLE_REQUEST_ID = "disableRequestId";
+ /**
+ * Parameter to control the distributed term statistics request for current query when distributed
+ * IDF is enabled in solrconfig
+ *
+ * <p>Defaults to 'true' if not specified. Distributed stats request will be disabled by setting
+ * to 'false'
+ */
+ String DISTRIB_STATS_CACHE = "distrib.statsCache";
+
/** Request Purpose parameter added to each internal shard request when using debug=track */
String REQUEST_PURPOSE = "requestPurpose";