You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2016/04/10 01:10:18 UTC
lucene-solr:master: SOLR-8922: optimize DocSetCollector to produce less garbage

Repository: lucene-solr
Updated Branches:
  refs/heads/master d377e7fd3 -> cfba58f0d


SOLR-8922: optimize DocSetCollector to produce less garbage


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/cfba58f0
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/cfba58f0
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/cfba58f0

Branch: refs/heads/master
Commit: cfba58f0d0adecab495c8ea073f38b0e53f5481f
Parents: d377e7f
Author: yonik <yo...@apache.org>
Authored: Sat Apr 9 19:10:02 2016 -0400
Committer: yonik <yo...@apache.org>
Committed: Sat Apr 9 19:10:02 2016 -0400

----------------------------------------------------------------------
 solr/CHANGES.txt                                |  5 ++
 .../org/apache/solr/search/DocSetCollector.java | 84 ++++++++++++++++++--
 2 files changed, 82 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/cfba58f0/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 1175123..b0047a1 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -118,6 +118,11 @@ Optimizations
   
 * SOLR-8856: Do not cache merge or 'read once' contexts in the hdfs block cache. (Mark Miller, Mike Drob)
 
+* SOLR-8922: Optimize filter creation (DocSetCollector) to minimize the amount of garbage
+  produced. This resulted in up to 3x throughput when small filter creation was the bottleneck,
+  as well as orders of magnitude less garbage. (Jeff Wartes, yonik)
+
+
 Other Changes
 ----------------------
 * SOLR-7516: Improve javadocs for JavaBinCodec, ObjectResolver and enforce the single-usage policy.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/cfba58f0/solr/core/src/java/org/apache/solr/search/DocSetCollector.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/DocSetCollector.java b/solr/core/src/java/org/apache/solr/search/DocSetCollector.java
index 8e529d9..25b12c5 100644
--- a/solr/core/src/java/org/apache/solr/search/DocSetCollector.java
+++ b/solr/core/src/java/org/apache/solr/search/DocSetCollector.java
@@ -17,6 +17,7 @@
 package org.apache.solr.search;
 
 import java.io.IOException;
+import java.util.ArrayList;
 
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.search.Scorer;
@@ -37,7 +38,7 @@ public class DocSetCollector extends SimpleCollector {
   // in case there aren't that many hits, we may not want a very sparse
   // bit array.  Optimistically collect the first few docs in an array
   // in case there are only a few.
-  final int[] scratch;
+  final ExpandingIntArray scratch;
 
   public DocSetCollector(int maxDoc) {
     this(DocSetUtil.smallSetSize(maxDoc), maxDoc);
@@ -46,7 +47,7 @@ public class DocSetCollector extends SimpleCollector {
   public DocSetCollector(int smallSetSize, int maxDoc) {
     this.smallSetSize = smallSetSize;
     this.maxDoc = maxDoc;
-    this.scratch = new int[smallSetSize];
+    this.scratch = new ExpandingIntArray(smallSetSize);
   }
 
   @Override
@@ -59,8 +60,8 @@ public class DocSetCollector extends SimpleCollector {
     // than scanning through a potentially huge bit vector.
     // FUTURE: when search methods all start returning docs in order, maybe
     // we could have a ListDocSet() and use the collected array directly.
-    if (pos < scratch.length) {
-      scratch[pos]=doc;
+    if (pos < smallSetSize) {
+      scratch.add(pos, doc);
     } else {
       // this conditional could be removed if BitSet was preallocated, but that
       // would take up more memory, and add more GC time...
@@ -72,12 +73,12 @@ public class DocSetCollector extends SimpleCollector {
   }
 
   public DocSet getDocSet() {
-    if (pos<=scratch.length) {
+    if (pos<=scratch.size()) {
       // assumes docs were collected in sorted order!
-      return new SortedIntDocSet(scratch, pos);
+      return new SortedIntDocSet(scratch.toArray(), pos);
     } else {
       // set the bits for ids that were collected in the array
-      for (int i=0; i<scratch.length; i++) bits.set(scratch[i]);
+      scratch.copyTo(bits);
       return new BitDocSet(bits,pos);
     }
   }
@@ -95,4 +96,73 @@ public class DocSetCollector extends SimpleCollector {
   protected void doSetNextReader(LeafReaderContext context) throws IOException {
     this.base = context.docBase;
   }
+
+  protected static class ExpandingIntArray {
+    private static final int[] EMPTY = new int[0];
+    private int[] currentAddArray = null;
+    private int indexForNextAddInCurrentAddArray = 0;
+    private int size = 0;
+    private final int smallSetSize;
+    private ArrayList<int[]> arrays;
+
+    public ExpandingIntArray(int smallSetSize) {
+      this.smallSetSize = smallSetSize;
+      this.currentAddArray = EMPTY;
+    }
+
+    private void addNewArray() {
+      int arrSize = Math.max(10, currentAddArray.length << 1);
+      arrSize = Math.min(arrSize, smallSetSize - size); // max out at the smallSetSize
+      this.currentAddArray = new int[arrSize];
+      if (arrays == null) {
+        arrays = new ArrayList<>();
+      }
+      arrays.add(this.currentAddArray);
+      indexForNextAddInCurrentAddArray = 0;
+      // System.out.println("### ALLOCATED " + this + " " + arrSize + " smallSetSize="+smallSetSize + " left=" + (smallSetSize-size));
+    }
+
+    public void add(int index, int value) {
+      // assert index == size; // only appending is supported
+      if (indexForNextAddInCurrentAddArray >= currentAddArray.length) {
+        addNewArray();
+      }
+      currentAddArray[indexForNextAddInCurrentAddArray++] = value;
+      size++;
+    }
+
+    public void copyTo(FixedBitSet bits) {
+      if (size > 0) {
+        int resultPos = 0;
+        for (int i = 0; i < arrays.size(); i++) {
+          int[] srcArray = arrays.get(i);
+          int intsToCopy = (i < (arrays.size() - 1)) ? srcArray.length : indexForNextAddInCurrentAddArray;
+          for (int j = 0; j < intsToCopy; j++) {
+            bits.set(srcArray[j]);
+          }
+          resultPos += intsToCopy;
+        }
+        assert resultPos == size;
+      }
+    }
+
+    public int[] toArray() {
+      int[] result = new int[size];
+      if (size > 0) {
+        int resultPos = 0;
+        for (int i = 0; i < arrays.size(); i++) {
+          int[] srcArray = arrays.get(i);
+          int intsToCopy = (i < (arrays.size() - 1)) ? srcArray.length : indexForNextAddInCurrentAddArray;
+          System.arraycopy(srcArray, 0, result, resultPos, intsToCopy);
+          resultPos += intsToCopy;
+        }
+        assert resultPos == size;
+      }
+      return result;
+    }
+
+    public int size() {
+      return size;
+    }
+  }
 }