You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dw...@apache.org on 2021/03/10 09:58:59 UTC

[lucene] 04/05: SOLR-14608: Add queueSize parameter and improve code readability

This is an automated email from the ASF dual-hosted git repository.

dweiss pushed a commit to branch jira/SOLR-14608-export-merge
in repository https://gitbox.apache.org/repos/asf/lucene.git

commit 4c03a46a09f09ba871d2631980e6351ddebaeb30
Author: Joel Bernstein <jb...@apache.org>
AuthorDate: Thu Jan 14 16:22:34 2021 -0500

    SOLR-14608: Add queueSize parameter and improve code readability
---
 .../apache/solr/handler/export/ExportWriter.java   | 49 +++++++++++-----------
 .../apache/solr/handler/export/StringValue.java    |  2 +-
 2 files changed, 26 insertions(+), 25 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java
index c2e4b55..4db66e1 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java
@@ -90,15 +90,16 @@ import static org.apache.solr.common.util.Utils.makeMap;
  * bitmap identifies the smallest docs (default is {@link #DEFAULT_BATCH_SIZE}) that haven't been sent yet and stores them in a
  * Priority Queue.  They are then exported (written across the wire) and marked as sent (unset in the bitmap).
  * This process repeats until all matching documents have been sent.
- * <p>
- * This streaming approach is light on memory (only up to 2x batch size documents are ever stored in memory at
- * once), and it allows {@link ExportWriter} to scale well with regard to numDocs.
  */
 public class ExportWriter implements SolrCore.RawWriter, Closeable {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
   public static final String BATCH_SIZE_PARAM = "batchSize";
+  public static final String QUEUE_SIZE_PARAM = "queueSize";
+
   public static final int DEFAULT_BATCH_SIZE = 30000;
+  public static final int DEFAULT_QUEUE_SIZE = 150000;
+
 
   private OutputStreamWriter respWriter;
   final SolrQueryRequest req;
@@ -106,7 +107,10 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
   final StreamContext initialStreamContext;
   final SolrMetricsContext solrMetricsContext;
   final String metricsPath;
+  //The batch size for the output writer thread.
   final int batchSize;
+  //The max combined size of the segment level priority queues.
+  final int priorityQueueSize;
   StreamExpression streamExpression;
   StreamContext streamContext;
   FieldWriter[] fieldWriters;
@@ -126,8 +130,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
     this.initialStreamContext = initialStreamContext;
     this.solrMetricsContext = solrMetricsContext;
     this.metricsPath = metricsPath;
-    // may be too tricky to get this right? always use default for now
-    //this.batchSize = req.getParams().getInt(BATCH_SIZE_PARAM, DEFAULT_BATCH_SIZE);
+    this.priorityQueueSize = req.getParams().getInt(QUEUE_SIZE_PARAM, DEFAULT_QUEUE_SIZE);
     this.batchSize = DEFAULT_BATCH_SIZE;
   }
 
@@ -662,7 +665,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
       int[] sizes = new int[leaves.size()];
       for (int i = 0; i < leaves.size(); i++) {
         long maxDoc = leaves.get(i).reader().maxDoc();
-        int sortQueueSize = Math.min((int) (((double) maxDoc / (double) totalDocs) * 200000), batchSize);
+        int sortQueueSize = Math.min((int) (((double) maxDoc / (double) totalDocs) * this.priorityQueueSize), batchSize);
         sizes[i] = sortQueueSize;
       }
 
@@ -700,28 +703,26 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
     }
 
     public SortDoc next() throws IOException {
-      SortDoc sortDoc = null;
+      SortDoc _sortDoc = null;
       if (index > -1) {
-        sortDoc = outDocs[index--];
+        _sortDoc = outDocs[index--];
       } else {
         topDocs();
         if (index > -1) {
-          sortDoc = outDocs[index--];
+          _sortDoc = outDocs[index--];
         }
       }
 
-      if (sortDoc != null) {
+      if (_sortDoc != null) {
         //Clear the bit so it's not loaded again.
-        bits.clear(sortDoc.docId);
+        bits.clear(_sortDoc.docId);
 
         //Load the global ordinal (only matters for strings)
-        sortDoc.setGlobalValues(nextDoc);
+        _sortDoc.setGlobalValues(nextDoc);
 
-        //Save this doc so we don't have to lookup the global ordinal for the next doc if they have the same segment ordinal.
-        //lastDoc.setValues(sortDoc);
-        nextDoc.setValues(sortDoc);
+        nextDoc.setValues(_sortDoc);
         //We are now done with this doc.
-        sortDoc.reset();
+        _sortDoc.reset();
       } else {
         nextDoc = null;
       }
@@ -732,13 +733,13 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
       try {
         queue.reset();
         SortDoc top = queue.top();
-        sortDoc.setNextReader(context);
+        this.sortDoc.setNextReader(context);
         DocIdSetIterator it = new BitSetIterator(bits, 0); // cost is not useful here
         int docId;
         while ((docId = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
-          sortDoc.setValues(docId);
-          if (top.lessThan(sortDoc)) {
-            top.setValues(sortDoc);
+          this.sortDoc.setValues(docId);
+          if (top.lessThan(this.sortDoc)) {
+            top.setValues(this.sortDoc);
             top = queue.updateTop();
           }
         }
@@ -746,10 +747,10 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
         //Pop the queue and load up the array.
         index = -1;
 
-        SortDoc sortDoc;
-        while ((sortDoc = queue.pop()) != null) {
-          if (sortDoc.docId > -1) {
-            outDocs[++index] = sortDoc;
+        SortDoc _sortDoc;
+        while ((_sortDoc = queue.pop()) != null) {
+          if (_sortDoc.docId > -1) {
+            outDocs[++index] = _sortDoc;
           }
         }
       } catch (Exception e) {
diff --git a/solr/core/src/java/org/apache/solr/handler/export/StringValue.java b/solr/core/src/java/org/apache/solr/handler/export/StringValue.java
index 2550c07..df57c9e 100644
--- a/solr/core/src/java/org/apache/solr/handler/export/StringValue.java
+++ b/solr/core/src/java/org/apache/solr/handler/export/StringValue.java
@@ -161,6 +161,6 @@ class StringValue implements SortValue {
   }
 
   public String toString() {
-    return "HERE:"+Integer.toString(this.currentOrd);
+    return Integer.toString(this.currentOrd);
   }
 }