You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ho...@apache.org on 2015/03/24 17:23:52 UTC
svn commit: r1668926 - in /lucene/dev/branches/branch_5x: ./ dev-tools/
lucene/ lucene/analysis/
lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/
lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std40/
lucene...
Author: hossman
Date: Tue Mar 24 16:23:50 2015
New Revision: 1668926
URL: http://svn.apache.org/r1668926
Log:
SOLR-6350: StatsComponent now supports Percentiles (merge r1668922)
Added:
lucene/dev/branches/branch_5x/solr/licenses/t-digest-3.0.jar.sha1
- copied unchanged from r1668922, lucene/dev/trunk/solr/licenses/t-digest-3.0.jar.sha1
lucene/dev/branches/branch_5x/solr/licenses/t-digest-LICENSE-ASL.txt
- copied unchanged from r1668922, lucene/dev/trunk/solr/licenses/t-digest-LICENSE-ASL.txt
lucene/dev/branches/branch_5x/solr/licenses/t-digest-NOTICE.txt
- copied unchanged from r1668922, lucene/dev/trunk/solr/licenses/t-digest-NOTICE.txt
Modified:
lucene/dev/branches/branch_5x/ (props changed)
lucene/dev/branches/branch_5x/dev-tools/ (props changed)
lucene/dev/branches/branch_5x/lucene/ (props changed)
lucene/dev/branches/branch_5x/lucene/BUILD.txt (props changed)
lucene/dev/branches/branch_5x/lucene/CHANGES.txt (props changed)
lucene/dev/branches/branch_5x/lucene/JRE_VERSION_MIGRATION.txt (props changed)
lucene/dev/branches/branch_5x/lucene/LICENSE.txt (props changed)
lucene/dev/branches/branch_5x/lucene/MIGRATE.txt (props changed)
lucene/dev/branches/branch_5x/lucene/NOTICE.txt (props changed)
lucene/dev/branches/branch_5x/lucene/README.txt (props changed)
lucene/dev/branches/branch_5x/lucene/SYSTEM_REQUIREMENTS.txt (props changed)
lucene/dev/branches/branch_5x/lucene/analysis/ (props changed)
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/Lucene47WordDelimiterFilter.java (props changed)
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std40/ASCIITLD.jflex-macro (props changed)
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std40/SUPPLEMENTARY.jflex-macro (props changed)
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std40/StandardTokenizerImpl40.java (props changed)
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std40/StandardTokenizerImpl40.jflex (props changed)
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std40/UAX29URLEmailTokenizerImpl40.java (props changed)
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std40/UAX29URLEmailTokenizerImpl40.jflex (props changed)
lucene/dev/branches/branch_5x/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/std40/package.html (props changed)
lucene/dev/branches/branch_5x/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLucene47WordDelimiterFilter.java (props changed)
lucene/dev/branches/branch_5x/lucene/backward-codecs/ (props changed)
lucene/dev/branches/branch_5x/lucene/benchmark/ (props changed)
lucene/dev/branches/branch_5x/lucene/build.xml (props changed)
lucene/dev/branches/branch_5x/lucene/classification/ (props changed)
lucene/dev/branches/branch_5x/lucene/classification/build.xml (props changed)
lucene/dev/branches/branch_5x/lucene/classification/ivy.xml (props changed)
lucene/dev/branches/branch_5x/lucene/classification/src/ (props changed)
lucene/dev/branches/branch_5x/lucene/codecs/ (props changed)
lucene/dev/branches/branch_5x/lucene/common-build.xml (props changed)
lucene/dev/branches/branch_5x/lucene/core/ (props changed)
lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions2.java (props changed)
lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/TestSort.java (props changed)
lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/TestSortRandom.java (props changed)
lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java (props changed)
lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/search/TestTotalHitCountCollector.java (props changed)
lucene/dev/branches/branch_5x/lucene/demo/ (props changed)
lucene/dev/branches/branch_5x/lucene/expressions/ (props changed)
lucene/dev/branches/branch_5x/lucene/facet/ (props changed)
lucene/dev/branches/branch_5x/lucene/grouping/ (props changed)
lucene/dev/branches/branch_5x/lucene/highlighter/ (props changed)
lucene/dev/branches/branch_5x/lucene/ivy-ignore-conflicts.properties (props changed)
lucene/dev/branches/branch_5x/lucene/ivy-settings.xml (props changed)
lucene/dev/branches/branch_5x/lucene/ivy-versions.properties (contents, props changed)
lucene/dev/branches/branch_5x/lucene/join/ (props changed)
lucene/dev/branches/branch_5x/lucene/licenses/ (props changed)
lucene/dev/branches/branch_5x/lucene/memory/ (props changed)
lucene/dev/branches/branch_5x/lucene/misc/ (props changed)
lucene/dev/branches/branch_5x/lucene/module-build.xml (props changed)
lucene/dev/branches/branch_5x/lucene/queries/ (props changed)
lucene/dev/branches/branch_5x/lucene/queries/src/test/org/apache/lucene/queries/function/TestFunctionQuerySort.java (props changed)
lucene/dev/branches/branch_5x/lucene/queryparser/ (props changed)
lucene/dev/branches/branch_5x/lucene/replicator/ (props changed)
lucene/dev/branches/branch_5x/lucene/sandbox/ (props changed)
lucene/dev/branches/branch_5x/lucene/site/ (props changed)
lucene/dev/branches/branch_5x/lucene/spatial/ (props changed)
lucene/dev/branches/branch_5x/lucene/spatial/src/java/org/apache/lucene/spatial/bbox/ (props changed)
lucene/dev/branches/branch_5x/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/PrefixTreeFacetCounter.java (props changed)
lucene/dev/branches/branch_5x/lucene/spatial/src/java/org/apache/lucene/spatial/util/ShapeAreaValueSource.java (props changed)
lucene/dev/branches/branch_5x/lucene/spatial/src/test-files/data/simple-bbox.txt (props changed)
lucene/dev/branches/branch_5x/lucene/spatial/src/test-files/simple-Queries-BBox.txt (props changed)
lucene/dev/branches/branch_5x/lucene/spatial/src/test/org/apache/lucene/spatial/bbox/ (props changed)
lucene/dev/branches/branch_5x/lucene/suggest/ (props changed)
lucene/dev/branches/branch_5x/lucene/test-framework/ (props changed)
lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/codecs/cranky/ (props changed)
lucene/dev/branches/branch_5x/lucene/tools/ (props changed)
lucene/dev/branches/branch_5x/lucene/version.properties (props changed)
lucene/dev/branches/branch_5x/solr/ (props changed)
lucene/dev/branches/branch_5x/solr/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_5x/solr/LICENSE.txt (props changed)
lucene/dev/branches/branch_5x/solr/NOTICE.txt (props changed)
lucene/dev/branches/branch_5x/solr/README.txt (props changed)
lucene/dev/branches/branch_5x/solr/bin/ (props changed)
lucene/dev/branches/branch_5x/solr/build.xml (props changed)
lucene/dev/branches/branch_5x/solr/cloud-dev/ (props changed)
lucene/dev/branches/branch_5x/solr/common-build.xml (props changed)
lucene/dev/branches/branch_5x/solr/contrib/ (props changed)
lucene/dev/branches/branch_5x/solr/core/ (props changed)
lucene/dev/branches/branch_5x/solr/core/ivy.xml
lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java
lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsField.java
lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java
lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/request/DocValuesStats.java (props changed)
lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/SchemaManager.java (props changed)
lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/TestDistributedSearch.java
lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/core/TestConfig.java (props changed)
lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java
lucene/dev/branches/branch_5x/solr/example/ (props changed)
lucene/dev/branches/branch_5x/solr/licenses/ (props changed)
lucene/dev/branches/branch_5x/solr/licenses/httpclient-LICENSE-ASL.txt (props changed)
lucene/dev/branches/branch_5x/solr/licenses/httpclient-NOTICE.txt (props changed)
lucene/dev/branches/branch_5x/solr/licenses/httpcore-LICENSE-ASL.txt (props changed)
lucene/dev/branches/branch_5x/solr/licenses/httpcore-NOTICE.txt (props changed)
lucene/dev/branches/branch_5x/solr/licenses/httpmime-LICENSE-ASL.txt (props changed)
lucene/dev/branches/branch_5x/solr/licenses/httpmime-NOTICE.txt (props changed)
lucene/dev/branches/branch_5x/solr/scripts/ (props changed)
lucene/dev/branches/branch_5x/solr/server/ (props changed)
lucene/dev/branches/branch_5x/solr/site/ (props changed)
lucene/dev/branches/branch_5x/solr/site/SYSTEM_REQUIREMENTS.mdtext (props changed)
lucene/dev/branches/branch_5x/solr/solrj/ (props changed)
lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/client/solrj/response/FieldStatsInfo.java
lucene/dev/branches/branch_5x/solr/test-framework/ (props changed)
lucene/dev/branches/branch_5x/solr/webapp/ (props changed)
Modified: lucene/dev/branches/branch_5x/lucene/ivy-versions.properties
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/ivy-versions.properties?rev=1668926&r1=1668925&r2=1668926&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/ivy-versions.properties (original)
+++ lucene/dev/branches/branch_5x/lucene/ivy-versions.properties Tue Mar 24 16:23:50 2015
@@ -50,6 +50,8 @@ com.sun.jersey.version = 1.9
/com.sun.mail/javax.mail = 1.5.1
/com.sun.xml.bind/jaxb-impl = 2.2.3-1
+
+/com.tdunning/t-digest = 3.0
/com.thoughtworks.paranamer/paranamer = 2.3
/com.typesafe/config = 1.0.2
/com.uwyn/jhighlight = 1.0
@@ -243,3 +245,4 @@ org.slf4j.version = 1.7.7
/org.xerial.snappy/snappy-java = 1.0.5
/rome/rome = 1.0
/xerces/xercesImpl = 2.9.1
+
Modified: lucene/dev/branches/branch_5x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/CHANGES.txt?rev=1668926&r1=1668925&r2=1668926&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/solr/CHANGES.txt Tue Mar 24 16:23:50 2015
@@ -146,6 +146,8 @@ New Features
* SOLR-7245: Temporary ZK election or connection loss should not stall indexing
due to leader initiated recovery (Ramkumar Aiyengar)
+* SOLR-6350: StatsComponent now supports Percentiles (Xu Zhang, hossman)
+
Bug Fixes
----------------------
Modified: lucene/dev/branches/branch_5x/solr/core/ivy.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/ivy.xml?rev=1668926&r1=1668925&r2=1668926&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/ivy.xml (original)
+++ lucene/dev/branches/branch_5x/solr/core/ivy.xml Tue Mar 24 16:23:50 2015
@@ -87,6 +87,9 @@
<dependency org="org.apache.hadoop" name="hadoop-minikdc" rev="${/org.apache.hadoop/hadoop-minikdc}" conf="test.MiniKdc"/>
<dependency org="org.apache.directory.server" name="apacheds-all" rev="${/org.apache.directory.server/apacheds-all}" conf="test.MiniKdc"/>
+ <!-- StatsComponents percentiles Dependencies-->
+ <dependency org="com.tdunning" name="t-digest" rev="${/com.tdunning/t-digest}" conf="compile->*"/>
+
<exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/>
</dependencies>
</ivy-module>
Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java?rev=1668926&r1=1668925&r2=1668926&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java Tue Mar 24 16:23:50 2015
@@ -52,8 +52,6 @@ public class StatsComponent extends Sear
@Override
public void process(ResponseBuilder rb) throws IOException {
if (!rb.doStats) return;
-
- boolean isShard = rb.req.getParams().getBool(ShardParams.IS_SHARD, false);
Map<String, StatsValues> statsValues = new LinkedHashMap<>();
for (StatsField statsField : rb._statsInfo.getStatsFields()) {
Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsField.java?rev=1668926&r1=1668925&r2=1668926&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsField.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsField.java Tue Mar 24 16:23:50 2015
@@ -81,8 +81,34 @@ public class StatsField {
mean(false, sum, count),
sumOfSquares(true),
stddev(false, sum, count, sumOfSquares),
- calcdistinct(true);
-
+ calcdistinct(true),
+ percentiles(true){
+ /** special for percentiles **/
+ boolean parseParams(StatsField sf) {
+ String percentileParas = sf.localParams.get(this.name());
+ if (percentileParas != null) {
+ List<Double> percentiles = new ArrayList<Double>();
+ try {
+ for (String percentile : StrUtils.splitSmart(percentileParas, ',')) {
+ percentiles.add(Double.parseDouble(percentile));
+ }
+ if (!percentiles.isEmpty()) {
+ sf.percentilesList.addAll(percentiles);
+ sf.tdigestCompression = sf.localParams.getDouble("tdigestCompression",
+ sf.tdigestCompression);
+ return true;
+ }
+ } catch (NumberFormatException e) {
+ throw new SolrException(ErrorCode.BAD_REQUEST, "Unable to parse "
+ + StatsParams.STATS_FIELD + " local params: " + sf.localParams + " due to: "
+ + e.getMessage(), e);
+ }
+
+ }
+ return false;
+ }
+ };
+
private final List<Stat> distribDeps;
/**
@@ -123,6 +149,12 @@ public class StatsField {
public EnumSet<Stat> getDistribDeps() {
return EnumSet.copyOf(this.distribDeps);
}
+
+ /** return value of true means user is requesting this stat */
+ boolean parseParams(StatsField sf) {
+ return sf.localParams.getBool(this.name(), false);
+ }
+
}
/**
@@ -144,8 +176,12 @@ public class StatsField {
private final List<String> excludeTagList;
private final EnumSet<Stat> statsToCalculate = EnumSet.noneOf(Stat.class);
private final EnumSet<Stat> statsInResponse = EnumSet.noneOf(Stat.class);
+ private final List<Double> percentilesList= new ArrayList<Double>();
private final boolean isShard;
-
+
+ private double tdigestCompression = 100.0D;
+
+
/**
* @param rb the current request/response
* @param statsParam the raw {@link StatsParams#STATS_FIELD} string
@@ -168,7 +204,6 @@ public class StatsField {
this.localParams = localParams;
-
String parserName = localParams.get(QueryParsing.TYPE);
SchemaField sf = null;
ValueSource vs = null;
@@ -220,7 +255,7 @@ public class StatsField {
this.topLevelCalcDistinct = null == schemaField
? params.getBool(StatsParams.STATS_CALC_DISTINCT, false)
: params.getFieldBool(schemaField.getName(), StatsParams.STATS_CALC_DISTINCT, false);
-
+
populateStatsSets();
String[] facets = params.getFieldParams(key, StatsParams.STATS_FACET);
@@ -451,30 +486,28 @@ public class StatsField {
return "StatsField<" + originalParam + ">";
}
-
/**
* A helper method which inspects the {@link #localParams} associated with this StatsField,
* and uses them to populate the {@link #statsInResponse} and {@link #statsToCalculate} data
* structures
*/
private void populateStatsSets() {
-
boolean statSpecifiedByLocalParam = false;
// local individual stat
Iterator<String> itParams = localParams.getParameterNamesIterator();
+
while (itParams.hasNext()) {
String paramKey = itParams.next();
- Stat stat = Stat.forName(paramKey);
- if (stat != null) {
- statSpecifiedByLocalParam = true;
- // TODO: this isn't going to work for planned "non-boolean' stats - eg: SOLR-6350, SOLR-6968
- if (localParams.getBool(paramKey, false)) {
- statsInResponse.add(stat);
- statsToCalculate.addAll(stat.getDistribDeps());
- }
+ Stat stat = Stat.forName(paramKey);
+ if (stat != null) {
+ statSpecifiedByLocalParam = true;
+ if (stat.parseParams(this)) {
+ statsInResponse.add(stat);
+ statsToCalculate.addAll(stat.getDistribDeps());
}
+ }
}
-
+
// if no individual stat setting.
if ( ! statSpecifiedByLocalParam ) {
statsInResponse.addAll(DEFAULT_STATS);
@@ -505,5 +538,15 @@ public class StatsField {
return false;
}
-
+ public List<Double> getPercentilesList() {
+ return percentilesList;
+ }
+
+ public boolean getIsShard() {
+ return isShard;
+ }
+
+ public double getTdigestCompression() {
+ return tdigestCompression;
+ }
}
Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java?rev=1668926&r1=1668925&r2=1668926&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java Tue Mar 24 16:23:50 2015
@@ -19,6 +19,7 @@ package org.apache.solr.handler.componen
import java.io.IOException;
import java.util.*;
+import java.nio.ByteBuffer;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.queries.function.FunctionValues;
@@ -31,26 +32,33 @@ import org.apache.solr.common.util.Simpl
import org.apache.solr.handler.component.StatsField.Stat;
import org.apache.solr.schema.*;
+import com.tdunning.math.stats.AVLTreeDigest;
+
/**
- * Factory class for creating instance of {@link org.apache.solr.handler.component.StatsValues}
+ * Factory class for creating instance of
+ * {@link org.apache.solr.handler.component.StatsValues}
*/
public class StatsValuesFactory {
/**
- * Creates an instance of StatsValues which supports values from the specified {@link StatsField}
+ * Creates an instance of StatsValues which supports values from the specified
+ * {@link StatsField}
*
- * @param statsField {@link StatsField} whose statistics will be created by the resulting {@link StatsValues}
- * @return Instance of {@link StatsValues} that will create statistics from values from the specified {@link StatsField}
+ * @param statsField
+ * {@link StatsField} whose statistics will be created by the
+ * resulting {@link StatsValues}
+ * @return Instance of {@link StatsValues} that will create statistics from
+ * values from the specified {@link StatsField}
*/
public static StatsValues createStatsValues(StatsField statsField) {
-
+
final SchemaField sf = statsField.getSchemaField();
-
+
if (null == sf) {
// function stats
return new NumericStatsValues(statsField);
- }
-
+ }
+
final FieldType fieldType = sf.getType(); // TODO: allow FieldType to provide impl.
if (TrieDateField.class.isInstance(fieldType)) {
@@ -62,27 +70,32 @@ public class StatsValuesFactory {
} else if (sf.getType().getClass().equals(EnumField.class)) {
return new EnumStatsValues(statsField);
} else {
- throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Field type " + fieldType + " is not currently supported");
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+ "Field type " + fieldType + " is not currently supported");
}
}
}
/**
- * Abstract implementation of {@link org.apache.solr.handler.component.StatsValues}
- * that provides the default behavior for most StatsValues implementations.
+ * Abstract implementation of
+ * {@link org.apache.solr.handler.component.StatsValues} that provides the
+ * default behavior for most StatsValues implementations.
*
- * There are very few requirements placed on what statistics concrete implementations
- * should collect, with the only required statistics being the minimum and maximum values.
+ * There are very few requirements placed on what statistics concrete
+ * implementations should collect, with the only required statistics being the
+ * minimum and maximum values.
*/
abstract class AbstractStatsValues<T> implements StatsValues {
private static final String FACETS = "facets";
-
+
/** Tracks all data about tthe stats we need to collect */
final protected StatsField statsField;
/** may be null if we are collecting stats directly from a function ValueSource */
final protected SchemaField sf;
- /** may be null if we are collecting stats directly from a function ValueSource */
+ /**
+ * may be null if we are collecting stats directly from a function ValueSource
+ */
final protected FieldType ft;
// final booleans from StatsField to allow better inlining & JIT optimizing
@@ -99,17 +112,17 @@ abstract class AbstractStatsValues<T> im
* {@link #setNextReader} is called at least once
*/
private ValueSource valueSource;
- /**
- * Context to use when retrieving FunctionValues, will be null until/unless
+ /**
+ * Context to use when retrieving FunctionValues, will be null until/unless
* {@link #setNextReader} is called at least once
*/
private Map vsContext;
- /**
- * Values to collect, will be null until/unless {@link #setNextReader} is called
- * at least once
+ /**
+ * Values to collect, will be null until/unless {@link #setNextReader} is
+ * called at least once
*/
protected FunctionValues values;
-
+
protected T max;
protected T min;
protected long missing;
@@ -117,9 +130,9 @@ abstract class AbstractStatsValues<T> im
protected long countDistinct;
protected final Set<T> distinctValues;
- // facetField facetValue
- protected Map<String, Map<String, StatsValues>> facets = new HashMap<>();
-
+ // facetField facetValue
+ protected Map<String,Map<String, StatsValues>> facets = new HashMap<>();
+
protected AbstractStatsValues(StatsField statsField) {
this.statsField = statsField;
this.computeCount = statsField.calculateStats(Stat.count);
@@ -136,10 +149,11 @@ abstract class AbstractStatsValues<T> im
// duplicate code between "NumericSchemaFieldStatsValues" and
// "NumericValueSourceStatsValues" which would have diff parent classes
//
- // part of the complexity here being that the StatsValues API serves two
- // masters: collecting concrete Values from things like DocValuesStats and
- // the distributed aggregation logic, but also collecting docIds which it then
- // uses to go out and pull concreate values from the ValueSource
+ // part of the complexity here being that the StatsValues API serves two
+ // masters: collecting concrete Values from things like DocValuesStats and
+ // the distributed aggregation logic, but also collecting docIds which it
+ // then
+ // uses to go out and pull concreate values from the ValueSource
// (from a func, or single valued field)
if (null != statsField.getSchemaField()) {
assert null == statsField.getValueSource();
@@ -152,7 +166,7 @@ abstract class AbstractStatsValues<T> im
this.ft = null;
}
}
-
+
/**
* {@inheritDoc}
*/
@@ -173,12 +187,12 @@ abstract class AbstractStatsValues<T> im
updateMinMax((T) stv.get("min"), (T) stv.get("max"));
}
updateTypeSpecificStats(stv);
-
+
NamedList f = (NamedList) stv.get(FACETS);
if (f == null) {
return;
}
-
+
for (int i = 0; i < f.size(); i++) {
String field = f.getName(i);
NamedList vals = (NamedList) f.getVal(i);
@@ -198,16 +212,18 @@ abstract class AbstractStatsValues<T> im
}
}
}
-
+
/**
* {@inheritDoc}
*/
@Override
public void accumulate(BytesRef value, int count) {
if (null == ft) {
- throw new IllegalStateException("Can't collect & convert BytesRefs on stats that do't use a a FieldType: " + statsField);
+ throw new IllegalStateException(
+ "Can't collect & convert BytesRefs on stats that do't use a a FieldType: "
+ + statsField);
}
- T typedValue = (T)ft.toObject(sf, value);
+ T typedValue = (T) ft.toObject(sf, value);
accumulate(typedValue, count);
}
@@ -224,7 +240,7 @@ abstract class AbstractStatsValues<T> im
}
updateTypeSpecificStats(value, count);
}
-
+
/**
* {@inheritDoc}
*/
@@ -234,7 +250,7 @@ abstract class AbstractStatsValues<T> im
missing++;
}
}
-
+
/**
* {@inheritDoc}
*/
@@ -242,7 +258,7 @@ abstract class AbstractStatsValues<T> im
public void addMissing(int count) {
missing += count;
}
-
+
/**
* {@inheritDoc}
*/
@@ -250,7 +266,7 @@ abstract class AbstractStatsValues<T> im
public void addFacet(String facetName, Map<String, StatsValues> facetValues) {
facets.put(facetName, facetValues);
}
-
+
/**
* {@inheritDoc}
*/
@@ -274,113 +290,138 @@ abstract class AbstractStatsValues<T> im
res.add("distinctValues", distinctValues);
res.add("countDistinct", countDistinct);
}
-
+
addTypeSpecificStats(res);
if (!facets.isEmpty()) {
-
+
// add the facet stats
- NamedList<NamedList<?>> nl = new SimpleOrderedMap<>();
- for (Map.Entry<String, Map<String, StatsValues>> entry : facets.entrySet()) {
- NamedList<NamedList<?>> nl2 = new SimpleOrderedMap<>();
- nl.add(entry.getKey(), nl2);
- for (Map.Entry<String, StatsValues> e2 : entry.getValue().entrySet()) {
- nl2.add(e2.getKey(), e2.getValue().getStatsValues());
- }
- }
- res.add(FACETS, nl);
+ NamedList<NamedList<?>> nl = new SimpleOrderedMap<>();
+ for (Map.Entry<String,Map<String,StatsValues>> entry : facets.entrySet()) {
+ NamedList<NamedList<?>> nl2 = new SimpleOrderedMap<>();
+ nl.add(entry.getKey(), nl2);
+ for (Map.Entry<String,StatsValues> e2 : entry.getValue().entrySet()) {
+ nl2.add(e2.getKey(), e2.getValue().getStatsValues());
+ }
+ }
+
+ res.add(FACETS, nl);
}
return res;
}
-
+
/**
* {@inheritDoc}
*/
- public void setNextReader(LeafReaderContext ctx) throws IOException {
+ public void setNextReader(LeafReaderContext ctx) throws IOException {
if (valueSource == null) {
// first time we've collected local values, get the right ValueSource
valueSource = (null == ft)
- ? statsField.getValueSource()
+ ? statsField.getValueSource()
: ft.getValueSource(sf, null);
vsContext = ValueSource.newContext(statsField.getSearcher());
}
values = valueSource.getValues(vsContext, ctx);
}
-
+
/**
* Updates the minimum and maximum statistics based on the given values
*
- * @param min Value that the current minimum should be updated against
- * @param max Value that the current maximum should be updated against
+ * @param min
+ * Value that the current minimum should be updated against
+ * @param max
+ * Value that the current maximum should be updated against
*/
protected abstract void updateMinMax(T min, T max);
-
+
/**
* Updates the type specific statistics based on the given value
*
- * @param value Value the statistics should be updated against
- * @param count Number of times the value is being accumulated
+ * @param value
+ * Value the statistics should be updated against
+ * @param count
+ * Number of times the value is being accumulated
*/
protected abstract void updateTypeSpecificStats(T value, int count);
-
+
/**
* Updates the type specific statistics based on the values in the given list
*
- * @param stv List containing values the current statistics should be updated against
+ * @param stv
+ * List containing values the current statistics should be updated
+ * against
*/
protected abstract void updateTypeSpecificStats(NamedList stv);
-
+
/**
* Add any type specific statistics to the given NamedList
*
- * @param res NamedList to add the type specific statistics too
+ * @param res
+ * NamedList to add the type specific statistics too
*/
protected abstract void addTypeSpecificStats(NamedList<Object> res);
}
- /**
+/**
* Implementation of StatsValues that supports Double values
*/
class NumericStatsValues extends AbstractStatsValues<Number> {
-
+
double sum;
double sumOfSquares;
+
+ AVLTreeDigest tdigest;
double minD; // perf optimization, only valid if (null != this.min)
double maxD; // perf optimization, only valid if (null != this.max)
-
+
final protected boolean computeSum;
final protected boolean computeSumOfSquares;
+ final protected boolean computePercentiles;
public NumericStatsValues(StatsField statsField) {
super(statsField);
+
this.computeSum = statsField.calculateStats(Stat.sum);
this.computeSumOfSquares = statsField.calculateStats(Stat.sumOfSquares);
+
+ this.computePercentiles = statsField.calculateStats(Stat.percentiles);
+ if ( computePercentiles ) {
+
+ tdigest = new AVLTreeDigest(statsField.getTdigestCompression());
+ }
}
-
+
@Override
public void accumulate(int docID) {
if (values.exists(docID)) {
- accumulate((Number) values.objectVal(docID), 1);
+ Number value = (Number) values.objectVal(docID);
+ accumulate(value, 1);
} else {
missing();
}
}
-
+
/**
* {@inheritDoc}
*/
@Override
public void updateTypeSpecificStats(NamedList stv) {
if (computeSum) {
- sum += ((Number)stv.get("sum")).doubleValue();
+ sum += ((Number) stv.get("sum")).doubleValue();
}
if (computeSumOfSquares) {
- sumOfSquares += ((Number)stv.get("sumOfSquares")).doubleValue();
+ sumOfSquares += ((Number) stv.get("sumOfSquares")).doubleValue();
+ }
+
+ if (computePercentiles) {
+ byte[] data = (byte[]) stv.get("percentiles");
+ ByteBuffer buf = ByteBuffer.wrap(data);
+ tdigest.add(AVLTreeDigest.fromBytes(buf));
}
}
-
+
/**
* {@inheritDoc}
*/
@@ -393,9 +434,12 @@ class NumericStatsValues extends Abstrac
if (computeSum) {
sum += value * count;
}
+ if (computePercentiles) {
+ tdigest.add(value, count);
+ }
}
-
- /**
+
+ /**
* {@inheritDoc}
*/
@Override
@@ -424,11 +468,13 @@ class NumericStatsValues extends Abstrac
}
}
}
-
+
/**
- * Adds sum, sumOfSquares, mean and standard deviation statistics to the given NamedList
+ * Adds sum, sumOfSquares, mean, stddev, and percentiles to the given
+ * NamedList
*
- * @param res NamedList to add the type specific statistics too
+ * @param res
+ * NamedList to add the type specific statistics too
*/
@Override
protected void addTypeSpecificStats(NamedList<Object> res) {
@@ -444,8 +490,32 @@ class NumericStatsValues extends Abstrac
if (statsField.includeInResponse(Stat.stddev)) {
res.add("stddev", getStandardDeviation());
}
+ if (statsField.includeInResponse(Stat.percentiles)) {
+ if (statsField.getIsShard()) {
+ // as of current t-digest version, smallByteSize() internally does a full conversion in
+ // order to determine what the size is (can't be precomputed?) .. so rather then
+ // serialize to a ByteBuffer twice, allocate the max possible size buffer,
+ // serialize once, and then copy only the byte[] subset that we need, and free up the buffer
+ ByteBuffer buf = ByteBuffer.allocate(tdigest.byteSize()); // upper bound
+ tdigest.asSmallBytes(buf);
+ res.add("percentiles", Arrays.copyOf(buf.array(), buf.position()) );
+ } else {
+ NamedList<Object> percentileNameList = new NamedList<Object>();
+ for (Double percentile : statsField.getPercentilesList()) {
+ // Empty document set case
+ if (tdigest.size() == 0) {
+ percentileNameList.add(percentile.toString(), null);
+ } else {
+ Double cutoff = tdigest.quantile(percentile / 100);
+ percentileNameList.add(percentile.toString(), cutoff);
+ }
+ }
+ res.add("percentiles", percentileNameList);
+ }
+ }
}
-
+
+
/**
* Calculates the standard deviation statistic
*
@@ -455,8 +525,9 @@ class NumericStatsValues extends Abstrac
if (count <= 1.0D) {
return 0.0D;
}
-
+
return Math.sqrt(((count * sumOfSquares) - (sum * sum)) / (count * (count - 1.0D)));
+
}
}
@@ -464,11 +535,11 @@ class NumericStatsValues extends Abstrac
* Implementation of StatsValues that supports EnumField values
*/
class EnumStatsValues extends AbstractStatsValues<EnumFieldValue> {
-
+
public EnumStatsValues(StatsField statsField) {
super(statsField);
}
-
+
/**
* {@inheritDoc}
*/
@@ -483,7 +554,7 @@ class EnumStatsValues extends AbstractSt
missing();
}
}
-
+
/**
* {@inheritDoc}
*/
@@ -503,7 +574,7 @@ class EnumStatsValues extends AbstractSt
}
}
}
-
+
/**
* {@inheritDoc}
*/
@@ -511,7 +582,7 @@ class EnumStatsValues extends AbstractSt
protected void updateTypeSpecificStats(NamedList stv) {
// No type specific stats
}
-
+
/**
* {@inheritDoc}
*/
@@ -519,7 +590,7 @@ class EnumStatsValues extends AbstractSt
protected void updateTypeSpecificStats(EnumFieldValue value, int count) {
// No type specific stats
}
-
+
/**
* Adds no type specific statistics
*/
@@ -527,19 +598,17 @@ class EnumStatsValues extends AbstractSt
protected void addTypeSpecificStats(NamedList<Object> res) {
// Add no statistics
}
-
-
+
}
/**
- * /**
- * Implementation of StatsValues that supports Date values
+ * /** Implementation of StatsValues that supports Date values
*/
class DateStatsValues extends AbstractStatsValues<Date> {
-
+
private long sum = 0;
double sumOfSquares = 0;
-
+
final protected boolean computeSum;
final protected boolean computeSumOfSquares;
@@ -548,7 +617,7 @@ class DateStatsValues extends AbstractSt
this.computeSum = statsField.calculateStats(Stat.sum);
this.computeSumOfSquares = statsField.calculateStats(Stat.sumOfSquares);
}
-
+
@Override
public void accumulate(int docID) {
if (values.exists(docID)) {
@@ -557,7 +626,7 @@ class DateStatsValues extends AbstractSt
missing();
}
}
-
+
/**
* {@inheritDoc}
*/
@@ -567,10 +636,10 @@ class DateStatsValues extends AbstractSt
sum += ((Date) stv.get("sum")).getTime();
}
if (computeSumOfSquares) {
- sumOfSquares += ((Number)stv.get("sumOfSquares")).doubleValue();
+ sumOfSquares += ((Number) stv.get("sumOfSquares")).doubleValue();
}
}
-
+
/**
* {@inheritDoc}
*/
@@ -584,8 +653,8 @@ class DateStatsValues extends AbstractSt
sum += value * count;
}
}
-
- /**
+
+ /**
* {@inheritDoc}
*/
@Override
@@ -601,11 +670,12 @@ class DateStatsValues extends AbstractSt
}
}
}
-
+
/**
* Adds sum and mean statistics to the given NamedList
*
- * @param res NamedList to add the type specific statistics too
+ * @param res
+ * NamedList to add the type specific statistics too
*/
@Override
protected void addTypeSpecificStats(NamedList<Object> res) {
@@ -623,10 +693,9 @@ class DateStatsValues extends AbstractSt
}
}
-
-
/**
- * Calculates the standard deviation. For dates, this is really the MS deviation
+ * Calculates the standard deviation. For dates, this is really the MS
+ * deviation
*
* @return Standard deviation statistic
*/
@@ -634,7 +703,8 @@ class DateStatsValues extends AbstractSt
if (count <= 1) {
return 0.0D;
}
- return Math.sqrt(((count * sumOfSquares) - (sum * sum)) / (count * (count - 1.0D)));
+ return Math.sqrt(((count * sumOfSquares) - (sum * sum))
+ / (count * (count - 1.0D)));
}
}
@@ -642,24 +712,25 @@ class DateStatsValues extends AbstractSt
* Implementation of StatsValues that supports String values
*/
class StringStatsValues extends AbstractStatsValues<String> {
-
+
public StringStatsValues(StatsField statsField) {
super(statsField);
}
-
+
@Override
public void accumulate(int docID) {
if (values.exists(docID)) {
String value = values.strVal(docID);
- if (value != null)
+ if (value != null) {
accumulate(value, 1);
- else
+ } else {
missing();
+ }
} else {
missing();
}
}
-
+
/**
* {@inheritDoc}
*/
@@ -667,7 +738,7 @@ class StringStatsValues extends Abstract
protected void updateTypeSpecificStats(NamedList stv) {
// No type specific stats
}
-
+
/**
* {@inheritDoc}
*/
@@ -675,8 +746,8 @@ class StringStatsValues extends Abstract
protected void updateTypeSpecificStats(String value, int count) {
// No type specific stats
}
-
- /**
+
+ /**
* {@inheritDoc}
*/
@Override
@@ -688,7 +759,7 @@ class StringStatsValues extends Abstract
this.max = max(this.max, max);
}
}
-
+
/**
* Adds no type specific statistics
*/
@@ -696,13 +767,17 @@ class StringStatsValues extends Abstract
protected void addTypeSpecificStats(NamedList<Object> res) {
// Add no statistics
}
-
- /**
- * Determines which of the given Strings is the maximum, as computed by {@link String#compareTo(String)}
+
+ /**
+ * Determines which of the given Strings is the maximum, as computed by
+ * {@link String#compareTo(String)}
*
- * @param str1 String to compare against b
- * @param str2 String compared against a
- * @return str1 if it is considered greater by {@link String#compareTo(String)}, str2 otherwise
+ * @param str1
+ * String to compare against b
+ * @param str2
+ * String compared against a
+ * @return str1 if it is considered greater by
+ * {@link String#compareTo(String)}, str2 otherwise
*/
private static String max(String str1, String str2) {
if (str1 == null) {
@@ -712,13 +787,17 @@ class StringStatsValues extends Abstract
}
return (str1.compareTo(str2) > 0) ? str1 : str2;
}
-
+
/**
- * Determines which of the given Strings is the minimum, as computed by {@link String#compareTo(String)}
+ * Determines which of the given Strings is the minimum, as computed by
+ * {@link String#compareTo(String)}
*
- * @param str1 String to compare against b
- * @param str2 String compared against a
- * @return str1 if it is considered less by {@link String#compareTo(String)}, str2 otherwise
+ * @param str1
+ * String to compare against b
+ * @param str2
+ * String compared against a
+ * @return str1 if it is considered less by {@link String#compareTo(String)},
+ * str2 otherwise
*/
private static String min(String str1, String str2) {
if (str1 == null) {
Modified: lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/TestDistributedSearch.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/TestDistributedSearch.java?rev=1668926&r1=1668925&r2=1668926&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/TestDistributedSearch.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/TestDistributedSearch.java Tue Mar 24 16:23:50 2015
@@ -51,6 +51,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
+import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.EnumSet;
@@ -392,6 +393,48 @@ public class TestDistributedSearch exten
query("q","*:*", "sort",i1+" desc", "stats", "true", "stats.field", i1);
query("q","*:*", "sort",i1+" desc", "stats", "true", "stats.field", tdate_a);
query("q","*:*", "sort",i1+" desc", "stats", "true", "stats.field", tdate_b);
+
+ query("q", "*:*", "sort", i1 + " desc", "stats", "true", "stats.field",
+ "{!percentiles='1,2,3,4,5'}" + i1);
+
+ query("q", "*:*", "sort", i1 + " desc", "stats", "true", "stats.field",
+ "{!percentiles='1,20,30,40,98,99,99.9'}" + i1);
+
+ rsp = query("q", "*:*", "sort", i1 + " desc", "stats", "true", "stats.field",
+ "{!percentiles='1.0,99.999,0.001'}" + tlong);
+ { // don't leak variabls
+ Double[] expectedKeys = new Double[] { 1.0D, 99.999D, 0.001D };
+ Double[] expectedVals = new Double[] { 2.0D, 4320.0D, 2.0D };
+ FieldStatsInfo s = rsp.getFieldStatsInfo().get(tlong);
+ assertNotNull("no stats for " + tlong, s);
+
+ Map<Double,Double> p = s.getPercentiles();
+ assertNotNull("no percentils", p);
+ assertEquals("insufficient percentiles", expectedKeys.length, p.size());
+ Iterator<Double> actualKeys = p.keySet().iterator();
+ for (int i = 0; i < expectedKeys.length; i++) {
+ Double expectedKey = expectedKeys[i];
+ assertTrue("Ran out of actual keys as of : "+ i + "->" +expectedKey,
+ actualKeys.hasNext());
+ assertEquals(expectedKey, actualKeys.next());
+ assertEquals("percentiles are off: " + p.toString(),
+ expectedVals[i], p.get(expectedKey), 1.0D);
+ }
+
+ //
+ assertNull("expected null for count", s.getMin());
+ assertNull("expected null for count", s.getMean());
+ assertNull("expected null for count", s.getCount());
+ assertNull("expected null for calcDistinct", s.getCountDistinct());
+ assertNull("expected null for distinct vals", s.getDistinctValues());
+ assertNull("expected null for max", s.getMax());
+ assertNull("expected null for missing", s.getMissing());
+ assertNull("expected null for stddev", s.getStddev());
+ assertNull("expected null for sum", s.getSum());
+ }
+
+ query("q", "*:*", "sort", i1 + " desc", "stats", "true", "stats.field",
+ "{!percentiles='1,20,50,80,99'}" + tdate_a);
query("q","*:*", "sort",i1+" desc", "stats", "true",
"fq", "{!tag=nothing}-*:*",
@@ -437,6 +480,7 @@ public class TestDistributedSearch exten
assertNull("expected null for missing", s.getMissing());
assertNull("expected null for stddev", s.getStddev());
assertNull("expected null for sum", s.getSum());
+ assertNull("expected null for percentiles", s.getPercentiles());
// sanity check deps relationship
for (Stat dep : EnumSet.of(Stat.sum, Stat.count)) {
@@ -492,6 +536,7 @@ public class TestDistributedSearch exten
assertNull("expected null for max", s.getMax());
assertNull("expected null for missing", s.getMissing());
assertNull("expected null for sum", s.getSum());
+ assertNull("expected null for percentiles", s.getPercentiles());
}
// request stats, but disable them all via param refs
@@ -512,6 +557,7 @@ public class TestDistributedSearch exten
assertNull("expected null for max", s.getMax());
assertNull("expected null for missing", s.getMissing());
assertNull("expected null for sum", s.getSum());
+ assertNull("expected null for percentiles", s.getPercentiles());
}
final String[] stats = new String[] {
@@ -596,6 +642,7 @@ public class TestDistributedSearch exten
assertNull(p+" expected null for missing", s.getMissing());
assertNull(p+" expected null for stddev", s.getStddev());
assertNull(p+" expected null for sum", s.getSum());
+ assertNull(p+" expected null for percentiles", s.getPercentiles());
}
@@ -630,6 +677,7 @@ public class TestDistributedSearch exten
assertNull(p+" expected null for missing", s.getMissing());
assertNull(p+" expected null for stddev", s.getStddev());
assertNull(p+" expected null for sum", s.getSum());
+ assertNull(p+"expected null for percentiles", s.getPercentiles());
}
@@ -654,6 +702,7 @@ public class TestDistributedSearch exten
assertNull("expected null for max", s.getMax());
assertNull("expected null for missing", s.getMissing());
assertNull("expected null for sum", s.getSum());
+ assertNull("expected null for percentiles", s.getPercentiles());
}
// look at stats on non numeric fields
@@ -662,7 +711,11 @@ public class TestDistributedSearch exten
// result in no stats being computed but this at least lets us sanity check that for each
// of these field+stats(s) combinations we get consistent results between the distribted
// request and the single node situation.
- EnumSet<Stat> allStats = EnumSet.allOf(Stat.class);
+ //
+ // NOTE: percentiles excluded because it doesn't support simple 'true/false' syntax
+ // (and since it doesn't work for non-numerics anyway, we aren't missing any coverage here)
+ EnumSet<Stat> allStats = EnumSet.complementOf(EnumSet.of(Stat.percentiles));
+
int numTotalStatQueries = 0;
// don't go overboard, just do all permutations of 1 or 2 stat params, for each field & query
final int numStatParamsAtOnce = 2;
Modified: lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java?rev=1668926&r1=1668925&r2=1668926&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java Tue Mar 24 16:23:50 2015
@@ -16,6 +16,7 @@ package org.apache.solr.handler.componen
* limitations under the License.
*/
+import java.nio.ByteBuffer;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
@@ -36,6 +37,9 @@ import org.apache.solr.common.params.Com
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.StatsParams;
+import org.apache.solr.common.util.Base64;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.StrUtils;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.StatsField.Stat;
import org.apache.solr.request.LocalSolrQueryRequest;
@@ -45,10 +49,10 @@ import org.apache.solr.schema.SchemaFiel
import org.apache.solr.util.AbstractSolrTestCase;
import org.apache.commons.math3.util.Combinations;
+import com.tdunning.math.stats.AVLTreeDigest;
import org.junit.BeforeClass;
-
/**
* Statistics Component Test
*/
@@ -1051,7 +1055,7 @@ public class StatsComponentTest extends
);
}
}
-
+
public void testEnumFieldTypeStatus() throws Exception {
clearIndex();
@@ -1141,8 +1145,9 @@ public class StatsComponentTest extends
assertU(adoc("id", "1", "a_f", "2.3", "b_f", "9.7", "a_i", "9", "foo_t", "how now brown cow"));
assertU(commit());
+ AVLTreeDigest tdigest = new AVLTreeDigest(100);
+
// some quick sanity check assertions...
-
// trivial check that we only get the exact 2 we ask for
assertQ("ask for and get only 2 stats",
req("q","*:*", "stats", "true",
@@ -1169,40 +1174,59 @@ public class StatsComponentTest extends
, "count(" + kpre + "*)=0"
);
- double sum = 0;
- double sumOfSquares = 0;
- final int count = 20;
- for (int i = 0; i < count; i++) {
- assertU(adoc("id", String.valueOf(i), "a_f", "2.3", "b_f", "9.7", "a_i", String.valueOf(i%10), "foo_t", "how now brown cow"));
- sum+=i%10;
- sumOfSquares+=(i%10)*(i%10);
- }
+ double sum = 0;
+ double sumOfSquares = 0;
+ final int count = 20;
+ for (int i = 0; i < count; i++) {
+ assertU(adoc("id", String.valueOf(i), "a_f", "2.3", "b_f", "9.7", "a_i",
+ String.valueOf(i % 10), "foo_t", "how now brown cow"));
+ tdigest.add(i % 10);
+ sum += i % 10;
+ sumOfSquares += (i % 10) * (i % 10);
+ }
- assertU(commit());
-
- EnumSet<Stat> allStats = EnumSet.allOf(Stat.class);
-
- Map<Stat, String> expectedStats = new HashMap<>();
- expectedStats.put(Stat.min, "0.0");
- expectedStats.put(Stat.max, "9.0");
- expectedStats.put(Stat.missing, "0");
- expectedStats.put(Stat.sum, String.valueOf(sum));
- expectedStats.put(Stat.count, String.valueOf(count));
- expectedStats.put(Stat.mean, String.valueOf(sum/count));
- expectedStats.put(Stat.sumOfSquares, String.valueOf(sumOfSquares));
- expectedStats.put(Stat.stddev, String.valueOf(Math.sqrt(((count * sumOfSquares) - (sum * sum)) / (20 * (count - 1.0D)))));
- expectedStats.put(Stat.calcdistinct, "10");
+ assertU(commit());
+
+ ByteBuffer buf = ByteBuffer.allocate(tdigest.smallByteSize());
+ tdigest.asSmallBytes(buf);
+ EnumSet<Stat> allStats = EnumSet.allOf(Stat.class);
+
+ Map<Stat,String> expectedStats = new HashMap<>();
+ expectedStats.put(Stat.min, "0.0");
+ expectedStats.put(Stat.max, "9.0");
+ expectedStats.put(Stat.missing, "0");
+ expectedStats.put(Stat.sum, String.valueOf(sum));
+ expectedStats.put(Stat.count, String.valueOf(count));
+ expectedStats.put(Stat.mean, String.valueOf(sum / count));
+ expectedStats.put(Stat.sumOfSquares, String.valueOf(sumOfSquares));
+ expectedStats.put(Stat.stddev, String.valueOf(Math.sqrt(((count * sumOfSquares) - (sum * sum))/ (20 * (count - 1.0D)))));
+ expectedStats.put(Stat.calcdistinct, "10");
+ // NOTE: per shard expected value
+ expectedStats.put(Stat.percentiles, Base64.byteArrayToBase64(buf.array(), 0, buf.array().length));
+
+ Map<Stat,String> expectedType = new HashMap<>();
+ expectedType.put(Stat.min, "double");
+ expectedType.put(Stat.max, "double");
+ expectedType.put(Stat.missing, "long");
+ expectedType.put(Stat.sum, "double");
+ expectedType.put(Stat.count, "long");
+ expectedType.put(Stat.mean, "double");
+ expectedType.put(Stat.sumOfSquares, "double");
+ expectedType.put(Stat.stddev, "double");
+ expectedType.put(Stat.calcdistinct, "long");
+ expectedType.put(Stat.percentiles, "str");
- Map<Stat, String> expectedType = new HashMap<>();
- expectedType.put(Stat.min, "double");
- expectedType.put(Stat.max, "double");
- expectedType.put(Stat.missing, "long");
- expectedType.put(Stat.sum, "double");
- expectedType.put(Stat.count, "long");
- expectedType.put(Stat.mean, "double");
- expectedType.put(Stat.sumOfSquares, "double");
- expectedType.put(Stat.stddev, "double");
- expectedType.put(Stat.calcdistinct, "long");
+ Map<Stat,String> localParasInput = new HashMap<>();
+ localParasInput.put(Stat.min, "true");
+ localParasInput.put(Stat.max, "true");
+ localParasInput.put(Stat.missing, "true");
+ localParasInput.put(Stat.sum, "true");
+ localParasInput.put(Stat.count, "true");
+ localParasInput.put(Stat.mean, "true");
+ localParasInput.put(Stat.sumOfSquares, "true");
+ localParasInput.put(Stat.stddev, "true");
+ localParasInput.put(Stat.calcdistinct, "true");
+ localParasInput.put(Stat.percentiles, "'90, 99'");
// canary in the coal mine
assertEquals("size of expectedStats doesn't match all known stats; " +
@@ -1233,13 +1257,15 @@ public class StatsComponentTest extends
"[@name='" + key + "'][.='" + expectedStats.get(perShardStat) + "']");
// even if we go out of our way to exclude the dependent stats,
// the shard should return them since they are a dependency for the requested stat
- exclude.append(perShardStat + "=false ");
+ if (!stat.equals(Stat.percentiles)){
+ exclude.append(perShardStat + "=false ");
+ }
}
testParas.add("count(" + kpre + "*)=" + (distribDeps.size() + calcdistinctFudge));
assertQ("ask for only "+stat+", with isShard=true, and expect only deps: " + distribDeps,
req("q", "*:*", "isShard", "true", "stats", "true",
- "stats.field", "{!key=k " + exclude + stat + "=true}a_i")
+ "stats.field", "{!key=k " + exclude + stat +"=" + localParasInput.get(stat) + "}a_i")
, testParas.toArray(new String[testParas.size()])
);
}
@@ -1265,8 +1291,17 @@ public class StatsComponentTest extends
calcdistinctFudge++;
testParas.add("count(" + kpre + "arr[@name='distinctValues']/*)=10");
}
- paras.append(stat + "=true ");
- testParas.add(kpre + expectedType.get(stat) + "[@name='" + key + "'][.='" + expectedStats.get(stat) + "']");
+ paras.append(stat + "=" + localParasInput.get(stat)+ " ");
+
+ if (!stat.equals(Stat.percentiles)){
+ testParas.add(kpre + expectedType.get(stat) + "[@name='" + key + "'][.='" + expectedStats.get(stat) + "']");
+ } else {
+ testParas.add("count(" + kpre + "lst[@name='percentiles']/*)=2");
+ String p90 = "" + tdigest.quantile(0.90D);
+ String p99 = "" + tdigest.quantile(0.99D);
+ testParas.add(kpre + "lst[@name='percentiles']/double[@name='90.0'][.="+p90+"]");
+ testParas.add(kpre + "lst[@name='percentiles']/double[@name='99.0'][.="+p99+"]");
+ }
}
paras.append("}a_i");
@@ -1279,7 +1314,6 @@ public class StatsComponentTest extends
);
}
}
-
}
// Test for Solr-6349
@@ -1402,6 +1436,90 @@ public class StatsComponentTest extends
}
}
+ // simple percentiles test
+ public void testPercentiles() throws Exception {
+
+ // NOTE: deliberately not in numeric order
+ String percentiles = "10.0,99.9,1.0,2.0,20.0,30.0,40.0,50.0,60.0,70.0,80.0,98.0,99.0";
+ List <String> percentilesList = StrUtils.splitSmart(percentiles, ',');
+
+ // test empty case
+ SolrQueryRequest query = req("q", "*:*", "stats", "true",
+ "stats.field", "{!percentiles='" + percentiles + "'}stat_f");
+ try {
+ SolrQueryResponse rsp = h.queryAndResponse(null, query);
+ NamedList<Double> pout = extractPercentils(rsp, "stat_f");
+ for (int i = 0; i < percentilesList.size(); i++) {
+ // ensure exact order, but all values should be null (empty result set)
+ assertEquals(percentilesList.get(i), pout.getName(i));
+ assertEquals(null, pout.getVal(i));
+ }
+ } finally {
+ query.close();
+ }
+
+ int id = 0;
+ // add trivial docs to test basic percentiles
+ for (int i = 0; i < 100; i++) {
+ // add the same values multiple times (diff docs)
+ for (int j =0; j < 5; j++) {
+ assertU(adoc("id", ++id+"", "stat_f", ""+i));
+ }
+ }
+
+ assertU(commit());
+
+ query = req("q", "*:*", "stats", "true",
+ "stats.field", "{!percentiles='" + percentiles + "'}stat_f");
+ try {
+ SolrQueryResponse rsp = h.queryAndResponse(null, query);
+ NamedList<Double> pout = extractPercentils(rsp, "stat_f");
+ for (int i = 0; i < percentilesList.size(); i++) {
+ String p = percentilesList.get(i);
+ assertEquals(p, pout.getName(i));
+ assertEquals(Double.parseDouble(p), pout.getVal(i), 1.0D);
+
+ }
+ } finally {
+ query.close();
+ }
+
+ // test request for no percentiles
+ query = req("q", "*:*", "stats", "true",
+ "stats.field", "{!percentiles=''}stat_f");
+ try {
+ SolrQueryResponse rsp = h.queryAndResponse(null, query);
+ NamedList<Double> pout = extractPercentils(rsp, "stat_f");
+ assertNull(pout);
+ } finally {
+ query.close();
+ }
+
+ // non-numeric types don't support percentiles
+ assertU(adoc("id", ++id+"", "stat_dt", "1999-05-03T04:55:01Z"));
+ assertU(adoc("id", ++id+"", "stat_s", "cow"));
+
+ assertU(commit());
+
+ query = req("q", "*:*", "stats", "true",
+ "stats.field", "{!percentiles='" + percentiles + "'}stat_dt",
+ "stats.field", "{!percentiles='" + percentiles + "'}stat_s");
+
+ try {
+ SolrQueryResponse rsp = h.queryAndResponse(null, query);
+ assertNull(extractPercentils(rsp, "stat_dt"));
+ assertNull(extractPercentils(rsp, "stat_s"));
+ } finally {
+ query.close();
+ }
+
+ }
+
+ private NamedList<Double> extractPercentils(SolrQueryResponse rsp, String key) {
+ return ((NamedList<NamedList<NamedList<NamedList<Double>>>> )
+ rsp.getValues().get("stats")).get("stats_fields").get(key).get("percentiles");
+ }
+
/**
* given a comboSize and an EnumSet of Stats, generates iterators that produce every possible
* enum combination of that size
@@ -1435,5 +1553,4 @@ public class StatsComponentTest extends
};
}
}
-
}
Modified: lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/client/solrj/response/FieldStatsInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/client/solrj/response/FieldStatsInfo.java?rev=1668926&r1=1668925&r2=1668926&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/client/solrj/response/FieldStatsInfo.java (original)
+++ lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/client/solrj/response/FieldStatsInfo.java Tue Mar 24 16:23:50 2015
@@ -22,6 +22,7 @@ import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
+import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
@@ -47,6 +48,8 @@ public class FieldStatsInfo implements S
Map<String,List<FieldStatsInfo>> facets;
+ Map<Double, Double> percentiles;
+
public FieldStatsInfo( NamedList<Object> nl, String fname )
{
name = fname;
@@ -96,6 +99,13 @@ public class FieldStatsInfo implements S
vals.add( new FieldStatsInfo( vnl.getVal(i), n ) );
}
}
+ } else if ( "percentiles".equals( entry.getKey() ) ){
+ @SuppressWarnings("unchecked")
+ NamedList<Object> fields = (NamedList<Object>) entry.getValue();
+ percentiles = new LinkedHashMap<>();
+ for( Map.Entry<String, Object> ev : fields ) {
+ percentiles.put(Double.parseDouble(ev.getKey()), (Double)ev.getValue());
+ }
}
else {
throw new RuntimeException( "unknown key: "+entry.getKey() + " ["+entry.getValue()+"]" );
@@ -136,6 +146,10 @@ public class FieldStatsInfo implements S
if( stddev != null ) {
sb.append( " stddev:").append(stddev);
}
+ if( percentiles != null ) {
+ sb.append( " percentiles:").append(percentiles);
+ }
+
sb.append( " }" );
return sb.toString();
}
@@ -155,7 +169,7 @@ public class FieldStatsInfo implements S
public Object getSum() {
return sum;
}
-
+
public Long getCount() {
return count;
}
@@ -188,4 +202,11 @@ public class FieldStatsInfo implements S
return facets;
}
+ /**
+ * The percentiles requested if any, otherwise null. If non-null then the
+ * iteration order will match the order the percentiles were originally specified in.
+ */
+ public Map<Double, Double> getPercentiles() {
+ return percentiles;
+ }
}