You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2016/04/10 01:10:18 UTC
lucene-solr:master: SOLR-8922: optimize DocSetCollector to produce
less garbage
Repository: lucene-solr
Updated Branches:
refs/heads/master d377e7fd3 -> cfba58f0d
SOLR-8922: optimize DocSetCollector to produce less garbage
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/cfba58f0
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/cfba58f0
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/cfba58f0
Branch: refs/heads/master
Commit: cfba58f0d0adecab495c8ea073f38b0e53f5481f
Parents: d377e7f
Author: yonik <yo...@apache.org>
Authored: Sat Apr 9 19:10:02 2016 -0400
Committer: yonik <yo...@apache.org>
Committed: Sat Apr 9 19:10:02 2016 -0400
----------------------------------------------------------------------
solr/CHANGES.txt | 5 ++
.../org/apache/solr/search/DocSetCollector.java | 84 ++++++++++++++++++--
2 files changed, 82 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/cfba58f0/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 1175123..b0047a1 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -118,6 +118,11 @@ Optimizations
* SOLR-8856: Do not cache merge or 'read once' contexts in the hdfs block cache. (Mark Miller, Mike Drob)
+* SOLR-8922: Optimize filter creation (DocSetCollector) to minimize the amount of garbage
+ produced. This resulted in up to 3x throughput when small filter creation was the bottleneck,
+ as well as orders of magnitude less garbage. (Jeff Wartes, yonik)
+
+
Other Changes
----------------------
* SOLR-7516: Improve javadocs for JavaBinCodec, ObjectResolver and enforce the single-usage policy.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/cfba58f0/solr/core/src/java/org/apache/solr/search/DocSetCollector.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/DocSetCollector.java b/solr/core/src/java/org/apache/solr/search/DocSetCollector.java
index 8e529d9..25b12c5 100644
--- a/solr/core/src/java/org/apache/solr/search/DocSetCollector.java
+++ b/solr/core/src/java/org/apache/solr/search/DocSetCollector.java
@@ -17,6 +17,7 @@
package org.apache.solr.search;
import java.io.IOException;
+import java.util.ArrayList;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Scorer;
@@ -37,7 +38,7 @@ public class DocSetCollector extends SimpleCollector {
// in case there aren't that many hits, we may not want a very sparse
// bit array. Optimistically collect the first few docs in an array
// in case there are only a few.
- final int[] scratch;
+ final ExpandingIntArray scratch;
public DocSetCollector(int maxDoc) {
this(DocSetUtil.smallSetSize(maxDoc), maxDoc);
@@ -46,7 +47,7 @@ public class DocSetCollector extends SimpleCollector {
public DocSetCollector(int smallSetSize, int maxDoc) {
this.smallSetSize = smallSetSize;
this.maxDoc = maxDoc;
- this.scratch = new int[smallSetSize];
+ this.scratch = new ExpandingIntArray(smallSetSize);
}
@Override
@@ -59,8 +60,8 @@ public class DocSetCollector extends SimpleCollector {
// than scanning through a potentially huge bit vector.
// FUTURE: when search methods all start returning docs in order, maybe
// we could have a ListDocSet() and use the collected array directly.
- if (pos < scratch.length) {
- scratch[pos]=doc;
+ if (pos < smallSetSize) {
+ scratch.add(pos, doc);
} else {
// this conditional could be removed if BitSet was preallocated, but that
// would take up more memory, and add more GC time...
@@ -72,12 +73,12 @@ public class DocSetCollector extends SimpleCollector {
}
public DocSet getDocSet() {
- if (pos<=scratch.length) {
+ if (pos<=scratch.size()) {
// assumes docs were collected in sorted order!
- return new SortedIntDocSet(scratch, pos);
+ return new SortedIntDocSet(scratch.toArray(), pos);
} else {
// set the bits for ids that were collected in the array
- for (int i=0; i<scratch.length; i++) bits.set(scratch[i]);
+ scratch.copyTo(bits);
return new BitDocSet(bits,pos);
}
}
@@ -95,4 +96,73 @@ public class DocSetCollector extends SimpleCollector {
protected void doSetNextReader(LeafReaderContext context) throws IOException {
this.base = context.docBase;
}
+
+ protected static class ExpandingIntArray {
+ private static final int[] EMPTY = new int[0];
+ private int[] currentAddArray = null;
+ private int indexForNextAddInCurrentAddArray = 0;
+ private int size = 0;
+ private final int smallSetSize;
+ private ArrayList<int[]> arrays;
+
+ public ExpandingIntArray(int smallSetSize) {
+ this.smallSetSize = smallSetSize;
+ this.currentAddArray = EMPTY;
+ }
+
+ private void addNewArray() {
+ int arrSize = Math.max(10, currentAddArray.length << 1);
+ arrSize = Math.min(arrSize, smallSetSize - size); // max out at the smallSetSize
+ this.currentAddArray = new int[arrSize];
+ if (arrays == null) {
+ arrays = new ArrayList<>();
+ }
+ arrays.add(this.currentAddArray);
+ indexForNextAddInCurrentAddArray = 0;
+ // System.out.println("### ALLOCATED " + this + " " + arrSize + " smallSetSize="+smallSetSize + " left=" + (smallSetSize-size));
+ }
+
+ public void add(int index, int value) {
+ // assert index == size; // only appending is supported
+ if (indexForNextAddInCurrentAddArray >= currentAddArray.length) {
+ addNewArray();
+ }
+ currentAddArray[indexForNextAddInCurrentAddArray++] = value;
+ size++;
+ }
+
+ public void copyTo(FixedBitSet bits) {
+ if (size > 0) {
+ int resultPos = 0;
+ for (int i = 0; i < arrays.size(); i++) {
+ int[] srcArray = arrays.get(i);
+ int intsToCopy = (i < (arrays.size() - 1)) ? srcArray.length : indexForNextAddInCurrentAddArray;
+ for (int j = 0; j < intsToCopy; j++) {
+ bits.set(srcArray[j]);
+ }
+ resultPos += intsToCopy;
+ }
+ assert resultPos == size;
+ }
+ }
+
+ public int[] toArray() {
+ int[] result = new int[size];
+ if (size > 0) {
+ int resultPos = 0;
+ for (int i = 0; i < arrays.size(); i++) {
+ int[] srcArray = arrays.get(i);
+ int intsToCopy = (i < (arrays.size() - 1)) ? srcArray.length : indexForNextAddInCurrentAddArray;
+ System.arraycopy(srcArray, 0, result, resultPos, intsToCopy);
+ resultPos += intsToCopy;
+ }
+ assert resultPos == size;
+ }
+ return result;
+ }
+
+ public int size() {
+ return size;
+ }
+ }
}