You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2020/06/18 16:29:50 UTC

[lucene-solr] branch jira/solr-14537 updated: SOLR-14537: Add "batchSize" param to adjust buffer size per request.

This is an automated email from the ASF dual-hosted git repository.

ab pushed a commit to branch jira/solr-14537
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/jira/solr-14537 by this push:
     new 830e357  SOLR-14537: Add "batchSize" param to adjust buffer size per request.
830e357 is described below

commit 830e35747be2a7e7a76da8b75a723b7d78be5e26
Author: Andrzej Bialecki <ab...@apache.org>
AuthorDate: Thu Jun 18 18:29:12 2020 +0200

    SOLR-14537: Add "batchSize" param to adjust buffer size per request.
---
 .../src/java/org/apache/solr/handler/export/ExportWriter.java | 11 +++++++----
 solr/solr-ref-guide/src/exporting-result-sets.adoc            |  2 ++
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java
index 862e507..ad403a3 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java
@@ -87,17 +87,18 @@ import static org.apache.solr.common.util.Utils.makeMap;
  * {@link ExportWriter} gathers and sorts the documents for a core using "stream sorting".
  * <p>
  * Stream sorting works by repeatedly processing and modifying a bitmap of matching documents.  Each pass over the
- * bitmap identifies the smallest {@link #DOCUMENT_BATCH_SIZE} docs that haven't been sent yet and stores them in a
+ * bitmap identifies the smallest docs (default is {@link #DEFAULT_BATCH_SIZE}) that haven't been sent yet and stores them in a
  * Priority Queue.  They are then exported (written across the wire) and marked as sent (unset in the bitmap).
  * This process repeats until all matching documents have been sent.
  * <p>
- * This streaming approach is light on memory (only up to 2x {@link #DOCUMENT_BATCH_SIZE} documents are ever stored in memory at
+ * This streaming approach is light on memory (only up to 2x batch size documents are ever stored in memory at
  * once), and it allows {@link ExportWriter} to scale well with regard to numDocs.
  */
 public class ExportWriter implements SolrCore.RawWriter, Closeable {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
-  private static final int DOCUMENT_BATCH_SIZE = 30000;
+  public static final String BATCH_SIZE_PARAM = "batchSize";
+  public static final int DEFAULT_BATCH_SIZE = 30000;
 
   private OutputStreamWriter respWriter;
   final SolrQueryRequest req;
@@ -105,6 +106,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
   final StreamContext initialStreamContext;
   final SolrMetricsContext solrMetricsContext;
   final String metricsPath;
+  final int batchSize;
   StreamExpression streamExpression;
   StreamContext streamContext;
   FieldWriter[] fieldWriters;
@@ -128,6 +130,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
     this.initialStreamContext = initialStreamContext;
     this.solrMetricsContext = solrMetricsContext;
     this.metricsPath = metricsPath;
+    this.batchSize = req.getParams().getInt(BATCH_SIZE_PARAM, DEFAULT_BATCH_SIZE);
     identifyLowestSortingDocTimer = solrMetricsContext.timer("identifyLowestSortingDoc", metricsPath);
     transferBatchToBufferTimer = solrMetricsContext.timer("transferBatchToBuffer", metricsPath);
     writeOutputBufferTimer = solrMetricsContext.timer("writeOutputBuffer", metricsPath);
@@ -350,7 +353,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
 
   protected void writeDocs(SolrQueryRequest req, OutputStream os, IteratorWriter.ItemWriter writer, Sort sort) throws IOException {
     List<LeafReaderContext> leaves = req.getSearcher().getTopReaderContext().leaves();
-    final int queueSize = Math.min(DOCUMENT_BATCH_SIZE, totalHits);
+    final int queueSize = Math.min(batchSize, totalHits);
 
     ExportBuffers buffers = new ExportBuffers(this, leaves, req.getSearcher(), os, writer, sort, queueSize, totalHits,
         writeOutputBufferTimer, fillerWaitTimer, writerWaitTimer);
diff --git a/solr/solr-ref-guide/src/exporting-result-sets.adoc b/solr/solr-ref-guide/src/exporting-result-sets.adoc
index 8a072f2..674ab96 100644
--- a/solr/solr-ref-guide/src/exporting-result-sets.adoc
+++ b/solr/solr-ref-guide/src/exporting-result-sets.adoc
@@ -39,6 +39,8 @@ You can use `/export` to make requests to export the result set of a query.
 
 All queries must include `sort` and `fl` parameters, or the query will return an error. Filter queries are also supported.
 
+Optional parameter `batchSize` determines the size of the internal buffers for partial results. The default value is 30000 but users may want to specify smaller values to limit the memory use (at the cost of degraded performance) or higher values to improve export performance (the relationship is not linear and larger values don't bring proportionally larger performance increases).
+
 The supported response writers are `json` and `javabin`. For backward compatibility reasons `wt=xsort` is also supported as input, but `wt=xsort` behaves same as `wt=json`. The default output format is `json`.
 
 Here is an example of an export request of some indexed log data: