You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by md...@apache.org on 2020/12/23 19:18:14 UTC

[lucene-solr] branch branch_8x updated: SOLR-14413 allow timeAllowed and cursorMark parameters

This is an automated email from the ASF dual-hosted git repository.

mdrob pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/branch_8x by this push:
     new b13011e  SOLR-14413 allow timeAllowed and cursorMark parameters
b13011e is described below

commit b13011e97aeddf7c8af1e0bd851c167665187f36
Author: John Gallagher <jg...@slack-corp.com>
AuthorDate: Fri Apr 17 12:51:35 2020 -0400

    SOLR-14413 allow timeAllowed and cursorMark parameters
    
    closes #1436
---
 solr/CHANGES.txt                                   |  2 +
 .../solr/handler/component/QueryComponent.java     |  5 --
 .../solr/handler/component/SearchHandler.java      |  8 ++
 .../collection1/conf/solrconfig-deeppaging.xml     | 11 ++-
 .../src/test/org/apache/solr/CursorPagingTest.java | 87 ++++++++++++++++++++--
 .../apache/solr/cloud/DistribCursorPagingTest.java |  7 --
 .../src/common-query-parameters.adoc               |  4 +-
 solr/solr-ref-guide/src/pagination-of-results.adoc |  1 +
 8 files changed, 103 insertions(+), 22 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 928701e..96df9c8 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -41,6 +41,8 @@ Improvements
 
 * SOLR-14987: Reuse HttpSolrClient per node vs. one per Solr core when using CloudSolrStream (Timothy Potter)
 
+* SOLR-14413: Allow timeAllowed and cursorMark parameters together. (John Gallagher via Mike Drob)
+
 Optimizations
 ---------------------
 * SOLR-14975: Optimize CoreContainer.getAllCoreNames, getLoadedCoreNames and getCoreDescriptors. (Bruno Roustant)
diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
index 853da1c..a08c41d 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
@@ -358,11 +358,6 @@ public class QueryComponent extends SearchComponent
 
     // -1 as flag if not set.
     long timeAllowed = params.getLong(CommonParams.TIME_ALLOWED, -1L);
-    if (null != rb.getCursorMark() && 0 < timeAllowed) {
-      // fundamentally incompatible
-      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Can not search using both " +
-                              CursorMarkParams.CURSOR_MARK_PARAM + " and " + CommonParams.TIME_ALLOWED);
-    }
 
     QueryCommand cmd = rb.createQueryCommand();
     cmd.setTimeAllowed(timeAllowed);
diff --git a/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java b/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java
index 24cc706..39e9795 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java
@@ -34,6 +34,7 @@ import org.apache.solr.cloud.ZkController;
 import org.apache.solr.common.SolrDocumentList;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.CursorMarkParams;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.params.ShardParams;
 import org.apache.solr.common.util.NamedList;
@@ -379,6 +380,13 @@ public class SearchHandler extends RequestHandlerBase implements SolrCoreAware,
         log.warn("Query: {}; ", req.getParamString(), ex);
         if( rb.rsp.getResponse() == null) {
           rb.rsp.addResponse(new SolrDocumentList());
+
+          // If a cursorMark was passed, and we didn't progress, set
+          // the nextCursorMark to the same position
+          String cursorStr = rb.req.getParams().get(CursorMarkParams.CURSOR_MARK_PARAM);
+          if (null != cursorStr) {
+            rb.rsp.add(CursorMarkParams.CURSOR_MARK_NEXT, cursorStr);
+          }
         }
         if(rb.isDebug()) {
           NamedList debug = new NamedList();
diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-deeppaging.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-deeppaging.xml
index 812a51f..004a4e7 100644
--- a/solr/core/src/test-files/solr/collection1/conf/solrconfig-deeppaging.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-deeppaging.xml
@@ -30,12 +30,21 @@
 
   <xi:include href="solrconfig.snippet.randomindexconfig.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
 
+  <searchComponent name="delayingSearchComponent"
+                   class="org.apache.solr.search.DelayingSearchComponent"/>
+
+  <requestHandler name="/select" class="solr.SearchHandler">
+    <arr name="first-components">
+      <str>delayingSearchComponent</str>
+    </arr>
+  </requestHandler>
+
   <updateHandler class="solr.DirectUpdateHandler2">
     <updateLog>
       <str name="dir">${solr.ulog.dir:}</str>
     </updateLog>
   </updateHandler>
-  
+
   <!-- deep paging better play nice with caching -->
   <query>
     <!-- no wautowarming, it screws up our ability to sanity check cache stats in tests -->
diff --git a/solr/core/src/test/org/apache/solr/CursorPagingTest.java b/solr/core/src/test/org/apache/solr/CursorPagingTest.java
index 2210e18..f601cb0 100644
--- a/solr/core/src/test/org/apache/solr/CursorPagingTest.java
+++ b/solr/core/src/test/org/apache/solr/CursorPagingTest.java
@@ -25,6 +25,8 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.UUID;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
 
 import org.apache.lucene.util.SentinelIntSet;
 import org.apache.lucene.util.TestUtil;
@@ -46,6 +48,7 @@ import org.junit.BeforeClass;
 import static org.apache.solr.common.params.CursorMarkParams.CURSOR_MARK_NEXT;
 import static org.apache.solr.common.params.CursorMarkParams.CURSOR_MARK_PARAM;
 import static org.apache.solr.common.params.CursorMarkParams.CURSOR_MARK_START;
+import static org.apache.solr.common.params.CommonParams.TIME_ALLOWED;
 import static org.apache.solr.common.util.Utils.fromJSONString;
 
 /**
@@ -104,13 +107,6 @@ public class CursorPagingTest extends SolrTestCaseJ4 {
                       CURSOR_MARK_PARAM, CURSOR_MARK_START),
                ErrorCode.BAD_REQUEST, "_docid_");
 
-    // using cursor w/ timeAllowed
-    assertFail(params("q", "*:*", 
-                      "sort", "id desc", 
-                      CommonParams.TIME_ALLOWED, "1000",
-                      CURSOR_MARK_PARAM, CURSOR_MARK_START),
-               ErrorCode.BAD_REQUEST, CommonParams.TIME_ALLOWED);
-
     // using cursor w/ grouping
     assertFail(params("q", "*:*", 
                       "sort", "id desc", 
@@ -500,6 +496,83 @@ public class CursorPagingTest extends SolrTestCaseJ4 {
   }
 
   /**
+   * test that timeAllowed parameter can be used with cursors
+   * uses DelayingSearchComponent in solrconfig-deeppaging.xml
+   */
+  public void testTimeAllowed() throws Exception {
+    String wontExceedTimeout = "10000";
+    int numDocs = 1000;
+    List<String> ids = IntStream.range(0, 1000).mapToObj(String::valueOf).collect(Collectors.toList());
+    // Shuffle to test ordering
+    Collections.shuffle(ids, random());
+    for (String id : ids) {
+      assertU(adoc("id", id, "name", "a" + id));
+      if (random().nextInt(numDocs) == 0) {
+        assertU(commit());  // sometimes make multiple segments
+      }
+    }
+    assertU(commit());
+
+    Collections.sort(ids);
+
+    String cursorMark, nextCursorMark = CURSOR_MARK_START;
+
+    SolrParams params = params("q", "name:a*",
+        "fl", "id",
+        "sort", "id asc",
+        "rows", "50",
+        "sleep", "10");
+
+    List<String> foundDocIds = new ArrayList<>();
+    String[] timeAllowedVariants = {"1", "50", wontExceedTimeout};
+    int partialCount = 0;
+    do {
+      cursorMark = nextCursorMark;
+      for (String timeAllowed : timeAllowedVariants) {
+
+        // execute the query
+        String json = assertJQ(req(params, CURSOR_MARK_PARAM, cursorMark, TIME_ALLOWED, timeAllowed));
+
+        @SuppressWarnings({"unchecked"})
+        Map<Object, Object> response = (Map<Object, Object>) fromJSONString(json);
+        @SuppressWarnings({"unchecked"})
+        Map<Object, Object> responseHeader = (Map<Object, Object>) response.get("responseHeader");
+        @SuppressWarnings({"unchecked"})
+        Map<Object, Object> responseBody = (Map<Object, Object>) response.get("response");
+        nextCursorMark = (String) response.get(CURSOR_MARK_NEXT);
+
+        // count occurance of partialResults (confirm at the end at least one)
+        if (responseHeader.containsKey("partialResults")) {
+          partialCount++;
+        }
+
+        // add the ids found (confirm we found all at the end in correct order)
+        @SuppressWarnings({"unchecked"})
+        List<Map<Object, Object>> docs = (List<Map<Object, Object>>) (responseBody.get("docs"));
+        for (Map<Object, Object> doc : docs) {
+          foundDocIds.add(doc.get("id").toString());
+        }
+
+        // break out of the timeAllowed variants as soon as we progress
+        if (!cursorMark.equals(nextCursorMark)) {
+          break;
+        }
+      }
+    } while (!cursorMark.equals(nextCursorMark));
+
+    List<String> sortedFoundDocIds = new ArrayList<>();
+    sortedFoundDocIds.addAll(foundDocIds);
+    Collections.sort(sortedFoundDocIds);
+    // Note: it is not guaranteed that all docs will be found, because a query may time out
+    // before reaching all segments, this causes documents in the skipped segements to be skipped
+    // in the overall result set as the cursor pages through.
+    assertEquals("Should have found last doc id eventually", ids.get(ids.size() -1), foundDocIds.get(foundDocIds.size() -1));
+    assertEquals("Documents arrived in sorted order within and between pages", sortedFoundDocIds, foundDocIds);
+    assertTrue("Should have experienced at least one partialResult", partialCount > 0);
+  }
+
+
+  /**
    * test that our assumptions about how caches are affected hold true
    */
   public void testCacheImpacts() throws Exception {
diff --git a/solr/core/src/test/org/apache/solr/cloud/DistribCursorPagingTest.java b/solr/core/src/test/org/apache/solr/cloud/DistribCursorPagingTest.java
index 0b488f0..b006145 100644
--- a/solr/core/src/test/org/apache/solr/cloud/DistribCursorPagingTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/DistribCursorPagingTest.java
@@ -132,13 +132,6 @@ public class DistribCursorPagingTest extends AbstractFullDistribZkTestBase {
                       CURSOR_MARK_PARAM, CURSOR_MARK_START),
                ErrorCode.BAD_REQUEST, "_docid_");
 
-    // using cursor w/ timeAllowed
-    assertFail(params("q", "*:*", 
-                      "sort", "id desc", 
-                      CommonParams.TIME_ALLOWED, "1000",
-                      CURSOR_MARK_PARAM, CURSOR_MARK_START),
-               ErrorCode.BAD_REQUEST, CommonParams.TIME_ALLOWED);
-
     // using cursor w/ grouping
     assertFail(params("q", "*:*", 
                       "sort", "id desc", 
diff --git a/solr/solr-ref-guide/src/common-query-parameters.adoc b/solr/solr-ref-guide/src/common-query-parameters.adoc
index c101a8b..b650f92 100644
--- a/solr/solr-ref-guide/src/common-query-parameters.adoc
+++ b/solr/solr-ref-guide/src/common-query-parameters.adoc
@@ -206,7 +206,7 @@ The default value of this parameter is blank, which causes no extra "explain inf
 
 == timeAllowed Parameter
 
-This parameter specifies the amount of time, in milliseconds, allowed for a search to complete. If this time expires before the search is complete, any partial results will be returned, but values such as `numFound`, <<faceting.adoc#faceting,facet>> counts, and result <<the-stats-component.adoc#the-stats-component,stats>> may not be accurate for the entire result set. In case of expiration, if `omitHeader` isn't set to `true` the response header contains a special flag called `partialResults`.
+This parameter specifies the amount of time, in milliseconds, allowed for a search to complete. If this time expires before the search is complete, any partial results will be returned, but values such as `numFound`, <<faceting.adoc#faceting,facet>> counts, and result <<the-stats-component.adoc#the-stats-component,stats>> may not be accurate for the entire result set. In case of expiration, if `omitHeader` isn't set to `true` the response header contains a special flag called `partialRes [...]
 
 [source,json]
 ----
@@ -252,7 +252,7 @@ The default value of this parameter is `false`.
 
 This parameter may be set to either `true` or `false`.
 
-If set to `true`, this parameter excludes the header from the returned results. The header contains information about the request, such as the time it took to complete. The default value for this parameter is `false`.
+If set to `true`, this parameter excludes the header from the returned results. The header contains information about the request, such as the time it took to complete. The default value for this parameter is `false`. When using parameters such as <<common-query-parameters.adoc#timeallowed-parameter,`timeAllowed`>>, and <<solrcloud-query-routing-and-read-tolerance.adoc#shards-tolerant-parameter,`shards.tolerant`>>, which can lead to partial results, it is advisable to keep the header, so [...]
 
 == wt Parameter
 
diff --git a/solr/solr-ref-guide/src/pagination-of-results.adoc b/solr/solr-ref-guide/src/pagination-of-results.adoc
index 7be5ac7..491f135 100644
--- a/solr/solr-ref-guide/src/pagination-of-results.adoc
+++ b/solr/solr-ref-guide/src/pagination-of-results.adoc
@@ -97,6 +97,7 @@ There are a few important constraints to be aware of when using `cursorMark` par
 
 . `cursorMark` and `start` are mutually exclusive parameters.
 * Your requests must either not include a `start` parameter, or it must be specified with a value of "```0```".
+. When using the <<common-query-parameters.adoc#timeallowed-parameter,`timeAllowed` request param>>, partial results may be returned.  If time expires before the search is complete - as indicated when the `responseHeader` includes `"partialResults": true`, some matching documents may have been skipped. Additionally, if `cursorMark` matches `nextCursorMark`, you cannot be sure that there are no more results. In these situation, consider increasing `timeAllowed` and reissuing the query.  W [...]
 . `sort` clauses must include the uniqueKey field (either `asc` or `desc`).
 * If `id` is your uniqueKey field, then sort parameters like `id asc` and `name asc, id desc` would both work fine, but `name asc` by itself would not
 . Sorts including <<working-with-dates.adoc#working-with-dates,Date Math>> based functions that involve calculations relative to `NOW` will cause confusing results, since every document will get a new sort value on every subsequent request. This can easily result in cursors that never end, and constantly return the same documents over and over – even if the documents are never updated.