You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ho...@apache.org on 2015/05/13 19:37:15 UTC
svn commit: r1679250 - in /lucene/dev/branches/branch_5x: ./ solr/ solr/core/
solr/core/src/java/org/apache/solr/handler/component/StatsField.java
solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java
Author: hossman
Date: Wed May 13 17:37:14 2015
New Revision: 1679250
URL: http://svn.apache.org/r1679250
Log:
SOLR-6968: perf tweak: eliminate use of SPARSE storage option since it has some pathologically bad behavior for some set sizes (particularly when merging shard responses) (merge r1679241)
Modified:
lucene/dev/branches/branch_5x/ (props changed)
lucene/dev/branches/branch_5x/solr/ (props changed)
lucene/dev/branches/branch_5x/solr/core/ (props changed)
lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsField.java
lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java
Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsField.java?rev=1679250&r1=1679249&r2=1679250&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsField.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsField.java Wed May 13 17:37:14 2015
@@ -57,6 +57,7 @@ import org.apache.solr.search.SolrIndexS
import org.apache.solr.search.SyntaxError;
import net.agkn.hll.HLL;
+import net.agkn.hll.HLLType;
import com.google.common.hash.Hashing;
import com.google.common.hash.HashFunction;
@@ -727,7 +728,14 @@ public class StatsField {
return hasher;
}
public HLL newHLL() {
- return new HLL(getLog2m(), getRegwidth());
+ // Although it (in theory) saves memory for "medium" size sets, the SPARSE type seems to have
+ // some nasty impacts on response time as it gets larger - particularly in distrib requests.
+ // Merging large SPARSE HLLs is much much slower then merging FULL HLLs with the same num docs
+ //
+ // TODO: add more tunning options for this.
+ return new HLL(getLog2m(), getRegwidth(), -1 /* auto explict threshold */,
+ false /* no sparse representation */, HLLType.EMPTY);
+
}
}
Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java?rev=1679250&r1=1679249&r2=1679250&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java Wed May 13 17:37:14 2015
@@ -35,6 +35,7 @@ import org.apache.solr.schema.*;
import com.tdunning.math.stats.AVLTreeDigest;
import net.agkn.hll.HLL;
+import net.agkn.hll.HLLType;
import com.google.common.hash.Hashing;
import com.google.common.hash.HashFunction;
@@ -139,7 +140,8 @@ abstract class AbstractStatsValues<T> im
* Hash function that must be used by implementations of {@link #hash}
*/
protected final HashFunction hasher;
- private final HLL hll;
+ // if null, no HLL logic can be computed; not final because of "union" optimization (see below)
+ private HLL hll;
// facetField facetValue
protected Map<String,Map<String, StatsValues>> facets = new HashMap<>();
@@ -212,7 +214,17 @@ abstract class AbstractStatsValues<T> im
if (computeCardinality) {
byte[] data = (byte[]) stv.get("cardinality");
- hll.union(HLL.fromBytes(data));
+ HLL other = HLL.fromBytes(data);
+ if (hll.getType().equals(HLLType.EMPTY)) {
+ // The HLL.union method goes out of it's way not to modify the "other" HLL.
+ // Which means in the case of merging into an "EMPTY" HLL (garunteed to happen at
+ // least once in every coordination of shard requests) it always clones all
+ // of the internal storage -- but since we're going to throw "other" away after
+ // the merge, this just means a short term doubling of RAM that we can skip.
+ hll = other;
+ } else {
+ hll.union(other);
+ }
}
updateTypeSpecificStats(stv);