You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mu...@apache.org on 2019/12/05 05:41:42 UTC
[lucene-solr] branch branch_8x updated: SOLR-11706: add support for
aggregation on multivalued fields
This is an automated email from the ASF dual-hosted git repository.
munendrasn pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/branch_8x by this push:
new 2d2f4b9 SOLR-11706: add support for aggregation on multivalued fields
2d2f4b9 is described below
commit 2d2f4b95212db921c4161e0a17f79114f865ba95
Author: Munendra S N <mu...@apache.org>
AuthorDate: Thu Dec 5 10:48:22 2019 +0530
SOLR-11706: add support for aggregation on multivalued fields
* min, max, sum, sumsq, avg, stddev, variance, percentile aggregations
in JSON facets now supports multivalued fields
---
solr/CHANGES.txt | 3 +
.../org/apache/solr/search/ValueSourceParser.java | 31 ++-
.../java/org/apache/solr/search/facet/AggUtil.java | 53 ++++
.../java/org/apache/solr/search/facet/AvgAgg.java | 200 +++++++++++++-
.../org/apache/solr/search/facet/DocValuesAcc.java | 211 +++++++++++++++
.../org/apache/solr/search/facet/MinMaxAgg.java | 185 ++++++++++++-
.../apache/solr/search/facet/PercentileAgg.java | 293 ++++++++++++++++++++-
.../java/org/apache/solr/search/facet/SlotAcc.java | 21 +-
.../org/apache/solr/search/facet/StddevAgg.java | 72 ++++-
.../java/org/apache/solr/search/facet/SumAgg.java | 86 +++++-
.../org/apache/solr/search/facet/SumsqAgg.java | 86 +++++-
.../solr/search/facet/UnInvertedFieldAcc.java | 113 ++++++++
.../org/apache/solr/search/facet/VarianceAgg.java | 72 ++++-
.../test-files/solr/collection1/conf/schema.xml | 1 +
.../solr/handler/component/StatsComponentTest.java | 143 +++++-----
...stributedFacetSimpleRefinementLongTailTest.java | 15 +-
.../apache/solr/search/facet/TestJsonFacets.java | 108 +++++++-
17 files changed, 1554 insertions(+), 139 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index e113ca9..63b2e27 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -88,6 +88,9 @@ Improvements
* SOLR-13968: Support postingsFormat and docValuesFormat in schema fields. (Bruno Roustant)
+* SOLR-11706: Add support for aggregation on multivalued fields in JSON facets. min, max, avg, sum, sumsq, stddev,
+ variance, percentile aggregations now have support for multivalued fields. (hossman, Munendra S N)
+
Optimizations
---------------------
(No changes)
diff --git a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
index 6eeff2b..64cadb6 100644
--- a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
+++ b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
@@ -985,35 +985,35 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin {
addParser("agg_sum", new ValueSourceParser() {
@Override
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
- return new SumAgg(fp.parseValueSource());
+ return new SumAgg(fp.parseValueSource(FunctionQParser.FLAG_DEFAULT | FunctionQParser.FLAG_USE_FIELDNAME_SOURCE));
}
});
addParser("agg_avg", new ValueSourceParser() {
@Override
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
- return new AvgAgg(fp.parseValueSource());
+ return new AvgAgg(fp.parseValueSource(FunctionQParser.FLAG_DEFAULT | FunctionQParser.FLAG_USE_FIELDNAME_SOURCE));
}
});
addParser("agg_sumsq", new ValueSourceParser() {
@Override
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
- return new SumsqAgg(fp.parseValueSource());
+ return new SumsqAgg(fp.parseValueSource(FunctionQParser.FLAG_DEFAULT | FunctionQParser.FLAG_USE_FIELDNAME_SOURCE));
}
});
addParser("agg_variance", new ValueSourceParser() {
@Override
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
- return new VarianceAgg(fp.parseValueSource());
+ return new VarianceAgg(fp.parseValueSource(FunctionQParser.FLAG_DEFAULT | FunctionQParser.FLAG_USE_FIELDNAME_SOURCE));
}
});
addParser("agg_stddev", new ValueSourceParser() {
@Override
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
- return new StddevAgg(fp.parseValueSource());
+ return new StddevAgg(fp.parseValueSource(FunctionQParser.FLAG_DEFAULT | FunctionQParser.FLAG_USE_FIELDNAME_SOURCE));
}
});
@@ -1054,7 +1054,26 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin {
}
});
- addParser("agg_percentile", new PercentileAgg.Parser());
+ addParser("agg_percentile", new ValueSourceParser() {
+ @Override
+ public ValueSource parse(FunctionQParser fp) throws SyntaxError {
+ List<Double> percentiles = new ArrayList<>();
+ ValueSource vs = fp.parseValueSource(FunctionQParser.FLAG_DEFAULT | FunctionQParser.FLAG_USE_FIELDNAME_SOURCE);
+ while (fp.hasMoreArguments()) {
+ double val = fp.parseDouble();
+ if (val<0 || val>100) {
+ throw new SyntaxError("requested percentile must be between 0 and 100. got " + val);
+ }
+ percentiles.add(val);
+ }
+
+ if (percentiles.isEmpty()) {
+ throw new SyntaxError("expected percentile(valsource,percent1[,percent2]*) EXAMPLE:percentile(myfield,50)");
+ }
+
+ return new PercentileAgg(vs, percentiles);
+ }
+ });
addParser("agg_" + RelatednessAgg.NAME, new ValueSourceParser() {
@Override
diff --git a/solr/core/src/java/org/apache/solr/search/facet/AggUtil.java b/solr/core/src/java/org/apache/solr/search/facet/AggUtil.java
new file mode 100644
index 0000000..3370fd9
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/search/facet/AggUtil.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.search.facet;
+
+
+public class AggUtil {
+
+ private AggUtil() {
+ }
+
+ /**
+ * Computes and returns average for given sum and count
+ */
+ public static double avg(double sum, long count) {
+ // todo: should we return NAN when count==0?
+ return count == 0? 0.0d: sum / count;
+ }
+
+ /**
+ * Computes and returns uncorrected standard deviation for given values
+ */
+ public static double stdDev(double sumSq, double sum, long count) {
+ // todo: switch to corrected stddev SOLR-11725
+ // todo: should we return NAN when count==0?
+ double val = count == 0 ? 0 : Math.sqrt((sumSq / count) - Math.pow(sum / count, 2));
+ return val;
+ }
+
+ /**
+ * Computes and returns uncorrected variance for given values
+ */
+ public static double variance(double sumSq, double sum, long count) {
+ // todo: switch to corrected variance SOLR-11725
+ // todo: should we return NAN when count==0?
+ double val = count == 0 ? 0 : (sumSq / count) - Math.pow(sum / count, 2);
+ return val;
+ }
+}
diff --git a/solr/core/src/java/org/apache/solr/search/facet/AvgAgg.java b/solr/core/src/java/org/apache/solr/search/facet/AvgAgg.java
index ebc6459..716bb80 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/AvgAgg.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/AvgAgg.java
@@ -17,9 +17,18 @@
package org.apache.solr.search.facet;
import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Date;
import java.util.List;
+import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.queries.function.ValueSource;
+import org.apache.lucene.util.BytesRef;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.search.function.FieldNameValueSource;
public class AvgAgg extends SimpleAggValueSource {
@@ -29,7 +38,31 @@ public class AvgAgg extends SimpleAggValueSource {
@Override
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
- return new AvgSlotAcc(getArg(), fcontext, numSlots);
+ ValueSource vs = getArg();
+
+ if (vs instanceof FieldNameValueSource) {
+ String field = ((FieldNameValueSource) vs).getFieldName();
+ SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(field);
+ if (sf.getType().getNumberType() == null) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+ name() + " aggregation not supported for " + sf.getType().getTypeName());
+ }
+ if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
+ if (sf.hasDocValues()) {
+ if (sf.getType().isPointField()) {
+ return new AvgSortedNumericAcc(fcontext, sf, numSlots);
+ }
+ return new AvgSortedSetAcc(fcontext, sf, numSlots);
+ }
+ if (sf.getType().isPointField()) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+ name() + " aggregation not supported for PointField w/o docValues");
+ }
+ return new AvgUnInvertedFieldAcc(fcontext, sf, numSlots);
+ }
+ vs = sf.getType().getValueSource(sf, null);
+ }
+ return new AvgSlotAcc(vs, fcontext, numSlots);
}
@Override
@@ -43,7 +76,7 @@ public class AvgAgg extends SimpleAggValueSource {
@Override
public void merge(Object facetResult, Context mcontext1) {
- List<Number> numberList = (List<Number>)facetResult;
+ List<Number> numberList = (List<Number>) facetResult;
num += numberList.get(0).longValue();
sum += numberList.get(1).doubleValue();
}
@@ -51,8 +84,167 @@ public class AvgAgg extends SimpleAggValueSource {
@Override
protected double getDouble() {
// TODO: is it worth to try and cache?
- return num==0 ? 0.0d : sum/num;
+ return AggUtil.avg(sum, num);
+ }
+ }
+
+ class AvgSortedNumericAcc extends DoubleSortedNumericDVAcc {
+ int[] counts;
+
+ public AvgSortedNumericAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
+ super(fcontext, sf, numSlots, 0);
+ this.counts = new int[numSlots];
+ }
+
+ @Override
+ protected void collectValues(int doc, int slot) throws IOException {
+ for (int i = 0, count = values.docValueCount(); i < count; i++) {
+ result[slot]+=getDouble(values.nextValue());
+ counts[slot]++;
+ }
+ }
+
+ private double avg(int slot) {
+ return AggUtil.avg(result[slot], counts[slot]); // calc once and cache in result?
+ }
+
+ @Override
+ public int compare(int slotA, int slotB) {
+ return Double.compare(avg(slotA), avg(slotB));
+ }
+
+ @Override
+ public Object getValue(int slot) {
+ if (fcontext.isShard()) {
+ ArrayList lst = new ArrayList(2);
+ lst.add(counts[slot]);
+ lst.add(result[slot]);
+ return lst;
+ } else {
+ return avg(slot);
+ }
}
- };
+ @Override
+ public void reset() throws IOException {
+ super.reset();
+ Arrays.fill(counts, 0);
+ }
+
+ @Override
+ public void resize(Resizer resizer) {
+ super.resize(resizer);
+ resizer.resize(counts, 0);
+ }
+ }
+
+ class AvgSortedSetAcc extends DoubleSortedSetDVAcc {
+ int[] counts;
+
+ public AvgSortedSetAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
+ super(fcontext, sf, numSlots, 0);
+ this.counts = new int[numSlots];
+ }
+
+ @Override
+ protected void collectValues(int doc, int slot) throws IOException {
+ long ord;
+ while ((ord = values.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
+ BytesRef term = values.lookupOrd(ord);
+ Object obj = sf.getType().toObject(sf, term);
+ double val = obj instanceof Date ? ((Date)obj).getTime(): ((Number)obj).doubleValue();
+ result[slot] += val;
+ counts[slot]++;
+ }
+ }
+
+ private double avg(int slot) {
+ return AggUtil.avg(result[slot], counts[slot]);
+ }
+
+ @Override
+ public int compare(int slotA, int slotB) {
+ return Double.compare(avg(slotA), avg(slotB));
+ }
+
+ @Override
+ public Object getValue(int slot) {
+ if (fcontext.isShard()) {
+ ArrayList lst = new ArrayList(2);
+ lst.add(counts[slot]);
+ lst.add(result[slot]);
+ return lst;
+ } else {
+ return avg(slot);
+ }
+ }
+
+ @Override
+ public void reset() throws IOException {
+ super.reset();
+ Arrays.fill(counts, 0);
+ }
+
+ @Override
+ public void resize(Resizer resizer) {
+ super.resize(resizer);
+ resizer.resize(counts, 0);
+ }
+ }
+
+ class AvgUnInvertedFieldAcc extends DoubleUnInvertedFieldAcc {
+ int[] counts;
+
+ public AvgUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
+ super(fcontext, sf, numSlots, 0);
+ this.counts = new int[numSlots];
+ }
+
+ @Override
+ public void call(int termNum) {
+ try {
+ BytesRef term = docToTerm.lookupOrd(termNum);
+ Object obj = sf.getType().toObject(sf, term);
+ double val = obj instanceof Date? ((Date)obj).getTime(): ((Number)obj).doubleValue();
+ result[currentSlot] += val;
+ counts[currentSlot]++;
+ } catch (IOException e) {
+ // find a better way to do it
+ throw new UncheckedIOException(e);
+ }
+ }
+
+ private double avg(int slot) {
+ return AggUtil.avg(result[slot], counts[slot]);
+ }
+
+ @Override
+ public int compare(int slotA, int slotB) {
+ return Double.compare(avg(slotA), avg(slotB));
+ }
+
+ @Override
+ public Object getValue(int slot) {
+ if (fcontext.isShard()) {
+ ArrayList lst = new ArrayList(2);
+ lst.add(counts[slot]);
+ lst.add(result[slot]);
+ return lst;
+ } else {
+ return avg(slot);
+ }
+ }
+
+ @Override
+ public void reset() throws IOException {
+ super.reset();
+ Arrays.fill(counts, 0);
+ }
+
+ @Override
+ public void resize(Resizer resizer) {
+ super.resize(resizer);
+ resizer.resize(counts, 0);
+ }
+ }
}
diff --git a/solr/core/src/java/org/apache/solr/search/facet/DocValuesAcc.java b/solr/core/src/java/org/apache/solr/search/facet/DocValuesAcc.java
index e3740e7..2bd07a4 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/DocValuesAcc.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/DocValuesAcc.java
@@ -18,7 +18,9 @@
package org.apache.solr.search.facet;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Date;
import java.util.function.IntFunction;
import org.apache.lucene.index.DocValues;
@@ -28,6 +30,8 @@ import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.NumericUtils;
import org.apache.solr.schema.SchemaField;
/**
@@ -139,6 +143,118 @@ abstract class LongSortedNumericDVAcc extends SortedNumericDVAcc {
}
+abstract class DoubleSortedNumericDVAcc extends SortedNumericDVAcc {
+ double[] result;
+ double initialValue;
+
+ public DoubleSortedNumericDVAcc(FacetContext fcontext, SchemaField sf, int numSlots, double initialValue) throws IOException {
+ super(fcontext, sf, numSlots);
+ this.result = new double[numSlots];
+ this.initialValue = initialValue;
+ if (initialValue != 0) {
+ Arrays.fill(result, initialValue);
+ }
+ }
+
+ @Override
+ public int compare(int slotA, int slotB) {
+ return Double.compare(result[slotA], result[slotB]);
+ }
+
+ @Override
+ public Object getValue(int slotNum) throws IOException {
+ return result[slotNum];
+ }
+
+ @Override
+ public void reset() throws IOException {
+ Arrays.fill(result, initialValue);
+ }
+
+ @Override
+ public void resize(Resizer resizer) {
+ resizer.resize(result, initialValue);
+ }
+
+ /**
+ * converts given long value to double based on field type
+ */
+ protected double getDouble(long val) {
+ switch (sf.getType().getNumberType()) {
+ case INTEGER:
+ case LONG:
+ case DATE:
+ return val;
+ case FLOAT:
+ return NumericUtils.sortableIntToFloat((int) val);
+ case DOUBLE:
+ return NumericUtils.sortableLongToDouble(val);
+ default:
+ // this would never happen
+ return 0.0d;
+ }
+ }
+
+}
+
+/**
+ * Base class for standard deviation and variance computation for fields with {@link SortedNumericDocValues}
+ */
+abstract class SDVSortedNumericAcc extends DoubleSortedNumericDVAcc {
+ int[] counts;
+ double[] sum;
+
+ public SDVSortedNumericAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
+ super(fcontext, sf, numSlots, 0);
+ this.counts = new int[numSlots];
+ this.sum = new double[numSlots];
+ }
+
+ @Override
+ protected void collectValues(int doc, int slot) throws IOException {
+ for (int i = 0, count = values.docValueCount(); i < count; i++) {
+ double val = getDouble(values.nextValue());
+ result[slot]+= val * val;
+ sum[slot]+= val;
+ counts[slot]++;
+ }
+ }
+
+ protected abstract double computeVal(int slot);
+
+ @Override
+ public int compare(int slotA, int slotB) {
+ return Double.compare(computeVal(slotA), computeVal(slotB));
+ }
+
+ @Override
+ public Object getValue(int slot) {
+ if (fcontext.isShard()) {
+ ArrayList lst = new ArrayList(3);
+ lst.add(counts[slot]);
+ lst.add(result[slot]);
+ lst.add(sum[slot]);
+ return lst;
+ } else {
+ return computeVal(slot);
+ }
+ }
+
+ @Override
+ public void reset() throws IOException {
+ super.reset();
+ Arrays.fill(counts, 0);
+ Arrays.fill(sum, 0);
+ }
+
+ @Override
+ public void resize(Resizer resizer) {
+ super.resize(resizer);
+ resizer.resize(counts, 0);
+ resizer.resize(sum, 0);
+ }
+}
+
/**
* Accumulator for {@link SortedDocValues}
*/
@@ -216,3 +332,98 @@ abstract class LongSortedSetDVAcc extends SortedSetDVAcc {
resizer.resize(result, initialValue);
}
}
+
+abstract class DoubleSortedSetDVAcc extends SortedSetDVAcc {
+ double[] result;
+ double initialValue;
+
+ public DoubleSortedSetDVAcc(FacetContext fcontext, SchemaField sf, int numSlots, long initialValue) throws IOException {
+ super(fcontext, sf, numSlots);
+ result = new double[numSlots];
+ this.initialValue = initialValue;
+ if (initialValue != 0) {
+ Arrays.fill(result, initialValue);
+ }
+ }
+
+ @Override
+ public int compare(int slotA, int slotB) {
+ return Double.compare(result[slotA], result[slotB]);
+ }
+
+ @Override
+ public Object getValue(int slotNum) throws IOException {
+ return result[slotNum];
+ }
+
+ @Override
+ public void reset() throws IOException {
+ Arrays.fill(result, initialValue);
+ }
+
+ @Override
+ public void resize(Resizer resizer) {
+ resizer.resize(result, initialValue);
+ }
+}
+
+/**
+ * Base class for standard deviation and variance computation for fields with {@link SortedSetDocValues}
+ */
+abstract class SDVSortedSetAcc extends DoubleSortedSetDVAcc {
+ int[] counts;
+ double[] sum;
+
+ public SDVSortedSetAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
+ super(fcontext, sf, numSlots, 0);
+ this.counts = new int[numSlots];
+ this.sum = new double[numSlots];
+ }
+
+ @Override
+ protected void collectValues(int doc, int slot) throws IOException {
+ long ord;
+ while ((ord = values.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
+ BytesRef term = values.lookupOrd(ord);
+ Object obj = sf.getType().toObject(sf, term);
+ double val = obj instanceof Date ? ((Date)obj).getTime(): ((Number)obj).doubleValue();
+ result[slot] += val * val;
+ sum[slot] += val;
+ counts[slot]++;
+ }
+ }
+
+ protected abstract double computeVal(int slot);
+
+ @Override
+ public int compare(int slotA, int slotB) {
+ return Double.compare(computeVal(slotA), computeVal(slotB));
+ }
+
+ @Override
+ public Object getValue(int slot) {
+ if (fcontext.isShard()) {
+ ArrayList lst = new ArrayList(3);
+ lst.add(counts[slot]);
+ lst.add(result[slot]);
+ lst.add(sum[slot]);
+ return lst;
+ } else {
+ return computeVal(slot);
+ }
+ }
+
+ @Override
+ public void reset() throws IOException {
+ super.reset();
+ Arrays.fill(counts, 0);
+ Arrays.fill(sum, 0);
+ }
+
+ @Override
+ public void resize(Resizer resizer) {
+ super.resize(resizer);
+ resizer.resize(counts, 0);
+ resizer.resize(sum, 0);
+ }
+}
diff --git a/solr/core/src/java/org/apache/solr/search/facet/MinMaxAgg.java b/solr/core/src/java/org/apache/solr/search/facet/MinMaxAgg.java
index 1c961e0..0d7d863 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/MinMaxAgg.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/MinMaxAgg.java
@@ -25,11 +25,15 @@ import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.queries.function.ValueSource;
+import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LongValues;
import org.apache.solr.common.SolrException;
+import org.apache.solr.schema.FieldType;
+import org.apache.solr.schema.NumberType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.StrFieldSource;
import org.apache.solr.search.function.FieldNameValueSource;
@@ -53,8 +57,21 @@ public class MinMaxAgg extends SimpleAggValueSource {
sf = fcontext.qcontext.searcher().getSchema().getField(field);
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
- vs = null;
- throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "min/max aggregations can't be used on multi-valued field " + field);
+ if (sf.hasDocValues()) {
+ if(sf.getType().getNumberType() != null) {
+ FieldType.MultiValueSelector choice = minmax == 1 ? FieldType.MultiValueSelector.MIN : FieldType.MultiValueSelector.MAX;
+ vs = sf.getType().getSingleValueSource(choice, sf, null);
+ } else {
+ // multi-valued strings
+ return new MinMaxSortedSetDVAcc(fcontext, sf, numSlots);
+ }
+ } else {
+ if (sf.getType().isPointField()) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+ "min/max aggregations can't be used on PointField w/o DocValues");
+ }
+ return new MinMaxUnInvertedFieldAcc(fcontext, sf, numSlots);
+ }
} else {
vs = sf.getType().getValueSource(sf, null);
}
@@ -137,6 +154,80 @@ public class MinMaxAgg extends SimpleAggValueSource {
}
}
+ class MinMaxUnInvertedFieldAcc extends UnInvertedFieldAcc {
+ final static int MISSING = -1;
+ private int currentSlot;
+ int[] result;
+
+ public MinMaxUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
+ super(fcontext, sf, numSlots);
+ result = new int[numSlots];
+ Arrays.fill(result, MISSING);
+ }
+
+ @Override
+ public void collect(int doc, int slot, IntFunction<SlotContext> slotContext) throws IOException {
+ this.currentSlot = slot;
+ docToTerm.getBigTerms(doc + currentDocBase, this);
+ docToTerm.getSmallTerms(doc + currentDocBase, this);
+ }
+
+ @Override
+ public int compare(int slotA, int slotB) {
+ int a = result[slotA];
+ int b = result[slotB];
+ return a == MISSING ? -1: (b == MISSING? 1: Integer.compare(a, b));
+ }
+
+ @Override
+ public Object getValue(int slotNum) throws IOException {
+ int ord = result[slotNum];
+ if (ord == MISSING) return null;
+ BytesRef term = docToTerm.lookupOrd(ord);
+ return getObject(term);
+ }
+
+ /**
+ * Wrapper to convert stored format to external format.
+ * <p>
+ * This ensures consistent behavior like other accumulators where
+ * long is returned for integer field types and double is returned for float field types
+ * </p>
+ */
+ private Object getObject(BytesRef term) {
+ Object obj = sf.getType().toObject(sf, term);
+ NumberType type = sf.getType().getNumberType();
+ if (type == null) {
+ return obj;
+ } else if (type == NumberType.INTEGER) {
+ // this is to ensure consistent behavior with other accumulators
+ // where long is returned for integer field types
+ return ((Number)obj).longValue();
+ } else if (type == NumberType.FLOAT) {
+ return ((Number)obj).floatValue();
+ }
+ return obj;
+ }
+
+ @Override
+ public void reset() throws IOException {
+ Arrays.fill(result, MISSING);
+ }
+
+ @Override
+ public void resize(Resizer resizer) {
+ resizer.resize(result, MISSING);
+ }
+
+ @Override
+ public void call(int termNum) {
+ int currOrd = result[currentSlot];
+ if (currOrd == MISSING || Integer.compare(termNum, currOrd) * minmax < 0) {
+ result[currentSlot] = termNum;
+ }
+ }
+ }
+
class DFuncAcc extends DoubleFuncSlotAcc {
public DFuncAcc(ValueSource values, FacetContext fcontext, int numSlots) {
super(values, fcontext, numSlots, Double.NaN);
@@ -291,7 +382,6 @@ public class MinMaxAgg extends SimpleAggValueSource {
}
}
-
class SingleValuedOrdAcc extends OrdAcc {
SortedDocValues topLevel;
SortedDocValues[] subDvs;
@@ -346,5 +436,94 @@ public class MinMaxAgg extends SimpleAggValueSource {
}
}
+ class MinMaxSortedSetDVAcc extends DocValuesAcc {
+ final static int MISSING = -1;
+ SortedSetDocValues topLevel;
+ SortedSetDocValues[] subDvs;
+ OrdinalMap ordMap;
+ LongValues toGlobal;
+ SortedSetDocValues subDv;
+ long[] slotOrd;
+
+ public MinMaxSortedSetDVAcc(FacetContext fcontext, SchemaField field, int numSlots) throws IOException {
+ super(fcontext, field);
+ this.slotOrd = new long[numSlots];
+ Arrays.fill(slotOrd, MISSING);
+ }
+
+ @Override
+ public void resetIterators() throws IOException {
+ super.resetIterators();
+ topLevel = FieldUtil.getSortedSetDocValues(fcontext.qcontext, sf, null);
+ if (topLevel instanceof MultiDocValues.MultiSortedSetDocValues) {
+ ordMap = ((MultiDocValues.MultiSortedSetDocValues)topLevel).mapping;
+ subDvs = ((MultiDocValues.MultiSortedSetDocValues)topLevel).values;
+ } else {
+ ordMap = null;
+ subDvs = null;
+ }
+ }
+
+ @Override
+ public void setNextReader(LeafReaderContext readerContext) throws IOException {
+ super.setNextReader(readerContext);
+ if (subDvs != null) {
+ subDv = subDvs[readerContext.ord];
+ toGlobal = ordMap.getGlobalOrds(readerContext.ord);
+ assert toGlobal != null;
+ } else {
+ assert readerContext.ord==0 || topLevel.getValueCount() == 0;
+ subDv = topLevel;
+ }
+ }
+
+ @Override
+ public int compare(int slotA, int slotB) {
+ long a = slotOrd[slotA];
+ long b = slotOrd[slotB];
+ return a == MISSING ? -1: (b == MISSING? 1: Long.compare(a, b));
+ }
+
+ @Override
+ public Object getValue(int slotNum) throws IOException {
+ long ord = slotOrd[slotNum];
+ if (ord == MISSING) return null;
+ BytesRef term = topLevel.lookupOrd(ord);
+ return sf.getType().toObject(sf, term);
+ }
+
+ @Override
+ public void reset() throws IOException {
+ Arrays.fill(slotOrd, MISSING);
+ }
+
+ @Override
+ public void resize(Resizer resizer) {
+ resizer.resize(slotOrd, MISSING);
+ }
+
+ @Override
+ public void collectValues(int doc, int slotNum) throws IOException {
+ long newOrd = MISSING;
+ if (minmax == 1) {// min
+ newOrd = subDv.nextOrd();
+ } else { // max
+ long ord;
+ while ((ord = subDv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
+ newOrd = ord;
+ }
+ }
+ long currOrd = slotOrd[slotNum];
+ long finalOrd = toGlobal==null ? newOrd : toGlobal.get(newOrd);
+ if (currOrd == MISSING || Long.compare(finalOrd, currOrd) * minmax < 0) {
+ slotOrd[slotNum] = finalOrd;
+ }
+ }
+
+ @Override
+ protected DocIdSetIterator docIdSetIterator() {
+ return subDv;
+ }
+ }
}
diff --git a/solr/core/src/java/org/apache/solr/search/facet/PercentileAgg.java b/solr/core/src/java/org/apache/solr/search/facet/PercentileAgg.java
index efdef55..c298fd1 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/PercentileAgg.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/PercentileAgg.java
@@ -17,17 +17,29 @@
package org.apache.solr.search.facet;
import java.io.IOException;
+import java.io.UncheckedIOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Date;
import java.util.List;
import java.util.function.IntFunction;
import com.tdunning.math.stats.AVLTreeDigest;
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.SortedNumericDocValues;
+import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.queries.function.ValueSource;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.NumericUtils;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.FunctionQParser;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.ValueSourceParser;
+import org.apache.solr.search.function.FieldNameValueSource;
public class PercentileAgg extends SimpleAggValueSource {
List<Double> percentiles;
@@ -39,7 +51,31 @@ public class PercentileAgg extends SimpleAggValueSource {
@Override
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
- return new Acc(getArg(), fcontext, numSlots);
+ ValueSource vs = getArg();
+
+ if (vs instanceof FieldNameValueSource) {
+ String field = ((FieldNameValueSource) vs).getFieldName();
+ SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(field);
+ if (sf.getType().getNumberType() == null) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+ name() + " aggregation not supported for " + sf.getType().getTypeName());
+ }
+ if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
+ if (sf.hasDocValues()) {
+ if (sf.getType().isPointField()) {
+ return new PercentileSortedNumericAcc(fcontext, sf, numSlots);
+ }
+ return new PercentileSortedSetAcc(fcontext, sf, numSlots);
+ }
+ if (sf.getType().isPointField()) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+ name() + " aggregation not supported for PointField w/o docValues");
+ }
+ return new PercentileUnInvertedFieldAcc(fcontext, sf, numSlots);
+ }
+ vs = sf.getType().getValueSource(sf, null);
+ }
+ return new Acc(vs, fcontext, numSlots);
}
@Override
@@ -80,7 +116,6 @@ public class PercentileAgg extends SimpleAggValueSource {
}
}
-
protected Object getValueFromDigest(AVLTreeDigest digest) {
if (digest == null) {
return null;
@@ -90,7 +125,7 @@ public class PercentileAgg extends SimpleAggValueSource {
return digest.quantile( percentiles.get(0) * 0.01 );
}
- List<Double> lst = new ArrayList(percentiles.size());
+ List<Double> lst = new ArrayList<>(percentiles.size());
for (Double percentile : percentiles) {
double val = digest.quantile( percentile * 0.01 );
lst.add( val );
@@ -98,8 +133,6 @@ public class PercentileAgg extends SimpleAggValueSource {
return lst;
}
-
-
class Acc extends FuncSlotAcc {
protected AVLTreeDigest[] digests;
protected ByteBuffer buf;
@@ -155,7 +188,6 @@ public class PercentileAgg extends SimpleAggValueSource {
return getValueFromDigest( digests[slotNum] );
}
-
public Object getShardValue(int slot) throws IOException {
AVLTreeDigest digest = digests[slot];
if (digest == null) return null; // no values for this slot
@@ -172,6 +204,76 @@ public class PercentileAgg extends SimpleAggValueSource {
return arr;
}
+ @Override
+ public void reset() {
+ digests = new AVLTreeDigest[digests.length];
+ sortvals = null;
+ }
+
+ @Override
+ public void resize(Resizer resizer) {
+ digests = resizer.resize(digests, null);
+ }
+ }
+
+ abstract class BasePercentileDVAcc extends DocValuesAcc {
+ AVLTreeDigest[] digests;
+ protected ByteBuffer buf;
+ double[] sortvals;
+
+ public BasePercentileDVAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
+ super(fcontext, sf);
+ digests = new AVLTreeDigest[numSlots];
+ }
+
+ @Override
+ public int compare(int slotA, int slotB) {
+ if (sortvals == null) {
+ fillSortVals();
+ }
+ return Double.compare(sortvals[slotA], sortvals[slotB]);
+ }
+
+ private void fillSortVals() {
+ sortvals = new double[ digests.length ];
+ double sortp = percentiles.get(0) * 0.01;
+ for (int i=0; i<digests.length; i++) {
+ AVLTreeDigest digest = digests[i];
+ if (digest == null) {
+ sortvals[i] = Double.NEGATIVE_INFINITY;
+ } else {
+ sortvals[i] = digest.quantile(sortp);
+ }
+ }
+ }
+
+ @Override
+ public Object getValue(int slotNum) throws IOException {
+ if (fcontext.isShard()) {
+ return getShardValue(slotNum);
+ }
+ if (sortvals != null && percentiles.size()==1) {
+ // we've already calculated everything we need
+ return digests[slotNum] != null ? sortvals[slotNum] : null;
+ }
+ return getValueFromDigest( digests[slotNum] );
+ }
+
+ public Object getShardValue(int slot) throws IOException {
+ AVLTreeDigest digest = digests[slot];
+ if (digest == null) return null; // no values for this slot
+
+ digest.compress();
+ int sz = digest.byteSize();
+ if (buf == null || buf.capacity() < sz) {
+ buf = ByteBuffer.allocate(sz+(sz>>1)); // oversize by 50%
+ } else {
+ buf.clear();
+ }
+ digest.asSmallBytes(buf);
+ byte[] arr = Arrays.copyOf(buf.array(), buf.position());
+ return arr;
+ }
@Override
public void reset() {
@@ -185,6 +287,184 @@ public class PercentileAgg extends SimpleAggValueSource {
}
}
+ class PercentileSortedNumericAcc extends BasePercentileDVAcc {
+ SortedNumericDocValues values;
+
+ public PercentileSortedNumericAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
+ super(fcontext, sf, numSlots);
+ }
+
+ @Override
+ protected void collectValues(int doc, int slot) throws IOException {
+ AVLTreeDigest digest = digests[slot];
+ if (digest == null) {
+ digests[slot] = digest = new AVLTreeDigest(100);
+ }
+ for (int i = 0, count = values.docValueCount(); i < count; i++) {
+ double val = getDouble(values.nextValue());
+ digest.add(val);
+ }
+ }
+
+ @Override
+ public void setNextReader(LeafReaderContext readerContext) throws IOException {
+ super.setNextReader(readerContext);
+ values = DocValues.getSortedNumeric(readerContext.reader(), sf.getName());
+ }
+
+ @Override
+ protected DocIdSetIterator docIdSetIterator() {
+ return values;
+ }
+
+ /**
+ * converts given long value to double based on field type
+ */
+ protected double getDouble(long val) {
+ switch (sf.getType().getNumberType()) {
+ case INTEGER:
+ case LONG:
+ case DATE:
+ return val;
+ case FLOAT:
+ return NumericUtils.sortableIntToFloat((int) val);
+ case DOUBLE:
+ return NumericUtils.sortableLongToDouble(val);
+ default:
+ // this would never happen
+ return 0.0d;
+ }
+ }
+ }
+
+ class PercentileSortedSetAcc extends BasePercentileDVAcc {
+ SortedSetDocValues values;
+
+ public PercentileSortedSetAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
+ super(fcontext, sf, numSlots);
+ }
+
+ @Override
+ protected void collectValues(int doc, int slot) throws IOException {
+ AVLTreeDigest digest = digests[slot];
+ if (digest == null) {
+ digests[slot] = digest = new AVLTreeDigest(100);
+ }
+ long ord;
+ while ((ord = values.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
+ BytesRef term = values.lookupOrd(ord);
+ Object obj = sf.getType().toObject(sf, term);
+ double val = obj instanceof Date ? ((Date)obj).getTime(): ((Number)obj).doubleValue();
+ digest.add(val);
+ }
+ }
+
+ @Override
+ public void setNextReader(LeafReaderContext readerContext) throws IOException {
+ super.setNextReader(readerContext);
+ values = DocValues.getSortedSet(readerContext.reader(), sf.getName());
+ }
+
+ @Override
+ protected DocIdSetIterator docIdSetIterator() {
+ return values;
+ }
+ }
+
+ class PercentileUnInvertedFieldAcc extends UnInvertedFieldAcc {
+ protected AVLTreeDigest[] digests;
+ protected ByteBuffer buf;
+ protected double[] sortvals;
+ private int currentSlot;
+
+ public PercentileUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
+ super(fcontext, sf, numSlots);
+ digests = new AVLTreeDigest[numSlots];
+ }
+
+ @Override
+ public void collect(int doc, int slot, IntFunction<SlotContext> slotContext) throws IOException {
+ this.currentSlot = slot;
+ docToTerm.getBigTerms(doc + currentDocBase, this);
+ docToTerm.getSmallTerms(doc + currentDocBase, this);
+ }
+
+ @Override
+ public int compare(int slotA, int slotB) {
+ if (sortvals == null) {
+ fillSortVals();
+ }
+ return Double.compare(sortvals[slotA], sortvals[slotB]);
+ }
+
+ private void fillSortVals() {
+ sortvals = new double[ digests.length ];
+ double sortp = percentiles.get(0) * 0.01;
+ for (int i=0; i<digests.length; i++) {
+ AVLTreeDigest digest = digests[i];
+ if (digest == null) {
+ sortvals[i] = Double.NEGATIVE_INFINITY;
+ } else {
+ sortvals[i] = digest.quantile(sortp);
+ }
+ }
+ }
+
+ @Override
+ public Object getValue(int slotNum) throws IOException {
+ if (fcontext.isShard()) {
+ return getShardValue(slotNum);
+ }
+ if (sortvals != null && percentiles.size()==1) {
+ // we've already calculated everything we need
+ return digests[slotNum] != null ? sortvals[slotNum] : null;
+ }
+ return getValueFromDigest( digests[slotNum] );
+ }
+
+ public Object getShardValue(int slot) throws IOException {
+ AVLTreeDigest digest = digests[slot];
+ if (digest == null) return null;
+
+ digest.compress();
+ int sz = digest.byteSize();
+ if (buf == null || buf.capacity() < sz) {
+ buf = ByteBuffer.allocate(sz+(sz>>1)); // oversize by 50%
+ } else {
+ buf.clear();
+ }
+ digest.asSmallBytes(buf);
+ byte[] arr = Arrays.copyOf(buf.array(), buf.position());
+ return arr;
+ }
+
+ @Override
+ public void reset() {
+ digests = new AVLTreeDigest[digests.length];
+ sortvals = null;
+ }
+
+ @Override
+ public void resize(Resizer resizer) {
+ digests = resizer.resize(digests, null);
+ }
+
+ @Override
+ public void call(int ord) {
+ AVLTreeDigest digest = digests[currentSlot];
+ if (digest == null) {
+ digests[currentSlot] = digest = new AVLTreeDigest(100);
+ }
+ try {
+ BytesRef term = docToTerm.lookupOrd(ord);
+ Object obj = sf.getType().toObject(sf, term);
+ double val = obj instanceof Date ? ((Date) obj).getTime() : ((Number) obj).doubleValue();
+ digest.add(val);
+ } catch (IOException e) {
+ throw new UncheckedIOException(e);
+ }
+ }
+ }
class Merger extends FacetSortableMerger {
protected AVLTreeDigest digest;
@@ -221,4 +501,3 @@ public class PercentileAgg extends SimpleAggValueSource {
}
}
}
-
diff --git a/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java b/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java
index 244485e..2a92348 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java
@@ -431,13 +431,8 @@ class AvgSlotAcc extends DoubleFuncSlotAcc {
}
}
- private double avg(double tot, int count) {
- return count == 0 ? 0 : tot / count; // returns 0 instead of NaN.. todo - make configurable? if NaN, we need to
- // handle comparisons though...
- }
-
private double avg(int slot) {
- return avg(result[slot], counts[slot]); // calc once and cache in result?
+ return AggUtil.avg(result[slot], counts[slot]); // calc once and cache in result?
}
@Override
@@ -488,13 +483,8 @@ class VarianceSlotAcc extends DoubleFuncSlotAcc {
this.sum = resizer.resize(this.sum, 0);
}
- private double variance(double sumSq, double sum, int count) {
- double val = count == 0 ? 0 : (sumSq / count) - Math.pow(sum / count, 2);
- return val;
- }
-
private double variance(int slot) {
- return variance(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
+ return AggUtil.variance(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
}
@Override
@@ -550,13 +540,8 @@ class StddevSlotAcc extends DoubleFuncSlotAcc {
this.result = resizer.resize(this.result, 0);
}
- private double stdDev(double sumSq, double sum, int count) {
- double val = count == 0 ? 0 : Math.sqrt((sumSq / count) - Math.pow(sum / count, 2));
- return val;
- }
-
private double stdDev(int slot) {
- return stdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
+ return AggUtil.stdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
}
@Override
diff --git a/solr/core/src/java/org/apache/solr/search/facet/StddevAgg.java b/solr/core/src/java/org/apache/solr/search/facet/StddevAgg.java
index 917df6e..d7237d3 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/StddevAgg.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/StddevAgg.java
@@ -21,6 +21,9 @@ import java.io.IOException;
import java.util.List;
import org.apache.lucene.queries.function.ValueSource;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.search.function.FieldNameValueSource;
public class StddevAgg extends SimpleAggValueSource {
@@ -30,7 +33,31 @@ public class StddevAgg extends SimpleAggValueSource {
@Override
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
- return new StddevSlotAcc(getArg(), fcontext, numSlots);
+ ValueSource vs = getArg();
+
+ if (vs instanceof FieldNameValueSource) {
+ String field = ((FieldNameValueSource) vs).getFieldName();
+ SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(field);
+ if (sf.getType().getNumberType() == null) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+ name() + " aggregation not supported for " + sf.getType().getTypeName());
+ }
+ if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
+ if (sf.hasDocValues()) {
+ if (sf.getType().isPointField()) {
+ return new StddevSortedNumericAcc(fcontext, sf, numSlots);
+ }
+ return new StddevSortedSetAcc(fcontext, sf, numSlots);
+ }
+ if (sf.getType().isPointField()) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+ name() + " aggregation not supported for PointField w/o docValues");
+ }
+ return new StddevUnInvertedFieldAcc(fcontext, sf, numSlots);
+ }
+ vs = sf.getType().getValueSource(sf, null);
+ }
+ return new StddevSlotAcc(vs, fcontext, numSlots);
}
@Override
@@ -58,9 +85,44 @@ public class StddevAgg extends SimpleAggValueSource {
}
@Override
- protected double getDouble() {
- double val = count == 0 ? 0.0d : Math.sqrt((sumSq/count)-Math.pow(sum/count, 2));
- return val;
+ protected double getDouble() {
+ return AggUtil.stdDev(sumSq, sum, count);
}
- };
+ }
+
+ class StddevSortedNumericAcc extends SDVSortedNumericAcc {
+
+ public StddevSortedNumericAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
+ super(fcontext, sf, numSlots);
+ }
+
+ @Override
+ protected double computeVal(int slot) {
+ return AggUtil.stdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
+ }
+ }
+
+ class StddevSortedSetAcc extends SDVSortedSetAcc {
+
+ public StddevSortedSetAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
+ super(fcontext, sf, numSlots);
+ }
+
+ @Override
+ protected double computeVal(int slot) {
+ return AggUtil.stdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
+ }
+ }
+
+ class StddevUnInvertedFieldAcc extends SDVUnInvertedFieldAcc {
+
+ public StddevUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
+ super(fcontext, sf, numSlots);
+ }
+
+ @Override
+ protected double computeVal(int slot) {
+ return AggUtil.stdDev(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
+ }
+ }
}
diff --git a/solr/core/src/java/org/apache/solr/search/facet/SumAgg.java b/solr/core/src/java/org/apache/solr/search/facet/SumAgg.java
index 7b7f34b..7cd4b9d 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/SumAgg.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/SumAgg.java
@@ -17,8 +17,15 @@
package org.apache.solr.search.facet;
import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.Date;
+import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.queries.function.ValueSource;
+import org.apache.lucene.util.BytesRef;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.search.function.FieldNameValueSource;
public class SumAgg extends SimpleAggValueSource {
@@ -28,7 +35,31 @@ public class SumAgg extends SimpleAggValueSource {
@Override
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
- return new SumSlotAcc(getArg(), fcontext, numSlots);
+ ValueSource vs = getArg();
+
+ if (vs instanceof FieldNameValueSource) {
+ String field = ((FieldNameValueSource)vs).getFieldName();
+ SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(field);
+ if (sf.getType().getNumberType() == null) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+ name() + " aggregation not supported for " + sf.getType().getTypeName());
+ }
+ if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
+ if (sf.hasDocValues()) {
+ if (sf.getType().isPointField()) {
+ return new SumSortedNumericAcc(fcontext, sf, numSlots);
+ }
+ return new SumSortedSetAcc(fcontext, sf, numSlots);
+ }
+ if (sf.getType().isPointField()) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+ name() + " aggregation not supported for PointField w/o docValues");
+ }
+ return new SumUnInvertedFieldAcc(fcontext, sf, numSlots);
+ }
+ vs = sf.getType().getValueSource(sf, null);
+ }
+ return new SumSlotAcc(vs, fcontext, numSlots);
}
@Override
@@ -48,5 +79,58 @@ public class SumAgg extends SimpleAggValueSource {
return val;
}
}
+
+ class SumSortedNumericAcc extends DoubleSortedNumericDVAcc {
+
+ public SumSortedNumericAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
+ super(fcontext, sf, numSlots, 0);
+ }
+
+ @Override
+ protected void collectValues(int doc, int slot) throws IOException {
+ for (int i = 0, count = values.docValueCount(); i < count; i++) {
+ result[slot]+=getDouble(values.nextValue());
+ }
+ }
+
+ }
+
+ class SumSortedSetAcc extends DoubleSortedSetDVAcc {
+
+ public SumSortedSetAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
+ super(fcontext, sf, numSlots, 0);
+ }
+
+ @Override
+ protected void collectValues(int doc, int slot) throws IOException {
+ long ord;
+ while ((ord = values.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
+ BytesRef term = values.lookupOrd(ord);
+ Object obj = sf.getType().toObject(sf, term);
+ double val = obj instanceof Date? ((Date)obj).getTime(): ((Number)obj).doubleValue();
+ result[slot] += val;
+ }
+ }
+ }
+
+ class SumUnInvertedFieldAcc extends DoubleUnInvertedFieldAcc {
+
+ public SumUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
+ super(fcontext, sf, numSlots, 0);
+ }
+
+ @Override
+ public void call(int termNum) {
+ try {
+ BytesRef term = docToTerm.lookupOrd(termNum);
+ Object obj = sf.getType().toObject(sf, term);
+ double val = obj instanceof Date? ((Date)obj).getTime(): ((Number)obj).doubleValue();
+ result[currentSlot] += val;
+ } catch (IOException e) {
+ // find a better way to do it
+ throw new UncheckedIOException(e);
+ }
+ }
+ }
}
diff --git a/solr/core/src/java/org/apache/solr/search/facet/SumsqAgg.java b/solr/core/src/java/org/apache/solr/search/facet/SumsqAgg.java
index 732ab14..133e39c 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/SumsqAgg.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/SumsqAgg.java
@@ -17,8 +17,15 @@
package org.apache.solr.search.facet;
import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.Date;
+import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.queries.function.ValueSource;
+import org.apache.lucene.util.BytesRef;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.search.function.FieldNameValueSource;
public class SumsqAgg extends SimpleAggValueSource {
public SumsqAgg(ValueSource vs) {
@@ -27,11 +34,88 @@ public class SumsqAgg extends SimpleAggValueSource {
@Override
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
- return new SumsqSlotAcc(getArg(), fcontext, numSlots);
+ ValueSource vs = getArg();
+
+ if (vs instanceof FieldNameValueSource) {
+ String field = ((FieldNameValueSource)vs).getFieldName();
+ SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(field);
+ if (sf.getType().getNumberType() == null) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+ name() + " aggregation not supported for " + sf.getType().getTypeName());
+ }
+ if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
+ if (sf.hasDocValues()) {
+ if (sf.getType().isPointField()) {
+ return new SumSqSortedNumericAcc(fcontext, sf, numSlots);
+ }
+ return new SumSqSortedSetAcc(fcontext, sf, numSlots);
+ }
+ if (sf.getType().isPointField()) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+ name() + " aggregation not supported for PointField w/o docValues");
+ }
+ return new SumSqUnInvertedFieldAcc(fcontext, sf, numSlots);
+ }
+ vs = sf.getType().getValueSource(sf, null);
+ }
+ return new SumsqSlotAcc(vs, fcontext, numSlots);
}
@Override
public FacetMerger createFacetMerger(Object prototype) {
return new SumAgg.Merger();
}
+
+ class SumSqSortedNumericAcc extends DoubleSortedNumericDVAcc {
+
+ public SumSqSortedNumericAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
+ super(fcontext, sf, numSlots, 0);
+ }
+
+ @Override
+ protected void collectValues(int doc, int slot) throws IOException {
+ for (int i = 0, count = values.docValueCount(); i < count; i++) {
+ double val = getDouble(values.nextValue());
+ result[slot]+= val * val;
+ }
+ }
+ }
+
+ class SumSqSortedSetAcc extends DoubleSortedSetDVAcc {
+
+ public SumSqSortedSetAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
+ super(fcontext, sf, numSlots, 0);
+ }
+
+ @Override
+ protected void collectValues(int doc, int slot) throws IOException {
+ long ord;
+ while ((ord = values.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
+ BytesRef term = values.lookupOrd(ord);
+ Object obj = sf.getType().toObject(sf, term);
+ double val = obj instanceof Date ? ((Date)obj).getTime(): ((Number)obj).doubleValue();
+ result[slot] += val * val;
+ }
+ }
+ }
+
+ class SumSqUnInvertedFieldAcc extends DoubleUnInvertedFieldAcc {
+
+ public SumSqUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
+ super(fcontext, sf, numSlots, 0);
+ }
+
+ @Override
+ public void call(int termNum) {
+ try {
+ BytesRef term = docToTerm.lookupOrd(termNum);
+ Object obj = sf.getType().toObject(sf, term);
+ double val = obj instanceof Date? ((Date)obj).getTime(): ((Number)obj).doubleValue();
+ result[currentSlot] += val * val;
+ } catch (IOException e) {
+ // find a better way to do it
+ throw new UncheckedIOException(e);
+ }
+ }
+ }
}
diff --git a/solr/core/src/java/org/apache/solr/search/facet/UnInvertedFieldAcc.java b/solr/core/src/java/org/apache/solr/search/facet/UnInvertedFieldAcc.java
index 3230d38..7f2d9eb 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/UnInvertedFieldAcc.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/UnInvertedFieldAcc.java
@@ -18,7 +18,13 @@
package org.apache.solr.search.facet;
import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Date;
+import java.util.function.IntFunction;
+import org.apache.lucene.util.BytesRef;
import org.apache.solr.schema.SchemaField;
/**
@@ -28,9 +34,11 @@ public abstract class UnInvertedFieldAcc extends SlotAcc implements UnInvertedFi
UnInvertedField uif;
UnInvertedField.DocToTerm docToTerm;
+ SchemaField sf;
public UnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
super(fcontext);
+ this.sf = sf;
uif = UnInvertedField.getUnInvertedField(sf.getName(), fcontext.qcontext.searcher());
docToTerm = uif.new DocToTerm();
fcontext.qcontext.addCloseHook(this);
@@ -44,3 +52,108 @@ public abstract class UnInvertedFieldAcc extends SlotAcc implements UnInvertedFi
}
}
}
+
+abstract class DoubleUnInvertedFieldAcc extends UnInvertedFieldAcc {
+ double[] result;
+ int currentSlot;
+ double initialValue;
+
+ public DoubleUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots, double initialValue) throws IOException {
+ super(fcontext, sf, numSlots);
+ result = new double[numSlots];
+ if (initialValue != 0) {
+ this.initialValue = initialValue;
+ Arrays.fill(result, initialValue);
+ }
+ }
+
+ @Override
+ public void collect(int doc, int slot, IntFunction<SlotContext> slotContext) throws IOException {
+ this.currentSlot = slot;
+ docToTerm.getBigTerms(doc + currentDocBase, this);
+ docToTerm.getSmallTerms(doc + currentDocBase, this);
+ }
+
+ @Override
+ public int compare(int slotA, int slotB) {
+ return Double.compare(result[slotA], result[slotB]);
+ }
+
+ @Override
+ public Object getValue(int slotNum) throws IOException {
+ return result[slotNum];
+ }
+
+ @Override
+ public void reset() throws IOException {
+ Arrays.fill(result, initialValue);
+ }
+
+ @Override
+ public void resize(Resizer resizer) {
+ resizer.resize(result, initialValue);
+ }
+}
+
+/**
+ * Base accumulator to compute standard deviation and variance for uninvertible fields
+ */
+abstract class SDVUnInvertedFieldAcc extends DoubleUnInvertedFieldAcc {
+ int[] counts;
+ double[] sum;
+
+ public SDVUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
+ super(fcontext, sf, numSlots, 0);
+ this.counts = new int[numSlots];
+ this.sum = new double[numSlots];
+ }
+
+ @Override
+ public void call(int termNum) {
+ try {
+ BytesRef term = docToTerm.lookupOrd(termNum);
+ Object obj = sf.getType().toObject(sf, term);
+ double val = obj instanceof Date ? ((Date)obj).getTime(): ((Number)obj).doubleValue();
+ result[currentSlot] += val * val;
+ sum[currentSlot]+= val;
+ counts[currentSlot]++;
+ } catch (IOException e) {
+ // find a better way to do it
+ throw new UncheckedIOException(e);
+ }
+ }
+
+ protected abstract double computeVal(int slot);
+
+ @Override
+ public int compare(int slotA, int slotB) {
+ return Double.compare(computeVal(slotA), computeVal(slotB));
+ }
+
+ @Override
+ public Object getValue(int slot) {
+ if (fcontext.isShard()) {
+ ArrayList lst = new ArrayList(3);
+ lst.add(counts[slot]);
+ lst.add(result[slot]);
+ lst.add(sum[slot]);
+ return lst;
+ } else {
+ return computeVal(slot);
+ }
+ }
+
+ @Override
+ public void reset() throws IOException {
+ super.reset();
+ Arrays.fill(counts, 0);
+ Arrays.fill(sum, 0);
+ }
+
+ @Override
+ public void resize(Resizer resizer) {
+ super.resize(resizer);
+ resizer.resize(counts, 0);
+ resizer.resize(sum, 0);
+ }
+}
diff --git a/solr/core/src/java/org/apache/solr/search/facet/VarianceAgg.java b/solr/core/src/java/org/apache/solr/search/facet/VarianceAgg.java
index ec6955f..f04a073 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/VarianceAgg.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/VarianceAgg.java
@@ -20,6 +20,9 @@ import java.io.IOException;
import java.util.List;
import org.apache.lucene.queries.function.ValueSource;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.search.function.FieldNameValueSource;
public class VarianceAgg extends SimpleAggValueSource {
@@ -29,7 +32,31 @@ public class VarianceAgg extends SimpleAggValueSource {
@Override
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
- return new VarianceSlotAcc(getArg(), fcontext, numSlots);
+ ValueSource vs = getArg();
+
+ if (vs instanceof FieldNameValueSource) {
+ String field = ((FieldNameValueSource) vs).getFieldName();
+ SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(field);
+ if (sf.getType().getNumberType() == null) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+ name() + " aggregation not supported for " + sf.getType().getTypeName());
+ }
+ if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
+ if (sf.hasDocValues()) {
+ if (sf.getType().isPointField()) {
+ return new VarianceSortedNumericAcc(fcontext, sf, numSlots);
+ }
+ return new VarianceSortedSetAcc(fcontext, sf, numSlots);
+ }
+ if (sf.getType().isPointField()) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+ name() + " aggregation not supported for PointField w/o docValues");
+ }
+ return new VarianceUnInvertedFieldAcc(fcontext, sf, numSlots);
+ }
+ vs = sf.getType().getValueSource(sf, null);
+ }
+ return new VarianceSlotAcc(vs, fcontext, numSlots);
}
@Override
@@ -57,9 +84,44 @@ public class VarianceAgg extends SimpleAggValueSource {
}
@Override
- protected double getDouble() {
- double val = count == 0 ? 0.0d : (sumSq/count)-Math.pow(sum/count, 2);
- return val;
+ protected double getDouble() {
+ return AggUtil.variance(sumSq, sum, count);
}
- };
+ }
+
+ class VarianceSortedNumericAcc extends SDVSortedNumericAcc {
+
+ public VarianceSortedNumericAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
+ super(fcontext, sf, numSlots);
+ }
+
+ @Override
+ protected double computeVal(int slot) {
+ return AggUtil.variance(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
+ }
+ }
+
+ class VarianceSortedSetAcc extends SDVSortedSetAcc {
+
+ public VarianceSortedSetAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
+ super(fcontext, sf, numSlots);
+ }
+
+ @Override
+ protected double computeVal(int slot) {
+ return AggUtil.variance(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
+ }
+ }
+
+ class VarianceUnInvertedFieldAcc extends SDVUnInvertedFieldAcc {
+
+ public VarianceUnInvertedFieldAcc(FacetContext fcontext, SchemaField sf, int numSlots) throws IOException {
+ super(fcontext, sf, numSlots);
+ }
+
+ @Override
+ protected double computeVal(int slot) {
+ return AggUtil.variance(result[slot], sum[slot], counts[slot]); // calc once and cache in result?
+ }
+ }
}
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema.xml b/solr/core/src/test-files/solr/collection1/conf/schema.xml
index d5cf090..5c3d483 100644
--- a/solr/core/src/test-files/solr/collection1/conf/schema.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema.xml
@@ -694,6 +694,7 @@
-->
<dynamicField name="*_i" type="int" indexed="true" stored="true"/>
<dynamicField name="*_i1" type="int" indexed="true" stored="true" multiValued="false" sortMissingLast="true"/>
+ <dynamicField name="*_is" type="int" indexed="true" stored="true" multiValued="true" sortMissingLast="true"/>
<dynamicField name="*_idv" type="int" indexed="true" stored="true" docValues="true" multiValued="false"/>
diff --git a/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java
index 3fffc30..4df839b 100644
--- a/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java
@@ -18,22 +18,26 @@ package org.apache.solr.handler.component;
import java.nio.ByteBuffer;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
-import java.util.Arrays;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
-import java.util.Iterator;
import java.util.EnumSet;
import java.util.HashMap;
+import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.TimeZone;
+import com.google.common.hash.HashFunction;
+import com.tdunning.math.stats.AVLTreeDigest;
+import org.apache.commons.math3.util.Combinations;
import org.apache.lucene.index.Term;
-import org.apache.lucene.search.TermQuery;
import org.apache.lucene.queries.function.valuesource.QueryValueSource;
+import org.apache.lucene.search.TermQuery;
+import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.CommonParams;
@@ -44,23 +48,17 @@ import org.apache.solr.common.util.Base64;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.core.SolrCore;
-import org.apache.solr.handler.component.StatsField.Stat;
import org.apache.solr.handler.component.StatsField.HllOptions;
+import org.apache.solr.handler.component.StatsField.Stat;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.SchemaField;
-import org.apache.solr.SolrTestCaseJ4;
-
-import org.apache.commons.math3.util.Combinations;
-import com.tdunning.math.stats.AVLTreeDigest;
-import com.google.common.hash.HashFunction;
import org.apache.solr.util.hll.HLL;
-
import org.junit.BeforeClass;
/**
- * Statistics Component Test
+ * Statistics Component Test (which also checks some equivalent json.facet functionality)
*/
public class StatsComponentTest extends SolrTestCaseJ4 {
@@ -116,7 +114,7 @@ public class StatsComponentTest extends SolrTestCaseJ4 {
"stats_tis_ni_dv","stats_tfs_ni_dv","stats_tls_ni_dv","stats_tds_ni_dv", // Doc Values Not indexed
"stats_is_p", "stats_fs_p", "stats_ls_p", "stats_ds_p", // Point Fields
"stats_is_ni_p","stats_fs_ni_p","stats_ls_ni_p" // Point Doc Values Not indexed
- }) {
+ }) {
doTestMVFieldStatisticsResult(f);
clearIndex();
@@ -298,87 +296,90 @@ public class StatsComponentTest extends SolrTestCaseJ4 {
"fq", "{!tag=fq1}id:1"),
params("stats.field", "{!ex=fq1,fq2}"+f, "stats", "true",
"fq", "{!tag=fq1}-id_i:[0 TO 2]",
- "fq", "{!tag=fq2}-id_i:[2 TO 1000]") }) {
-
-
+ "fq", "{!tag=fq2}-id_i:[2 TO 1000]"),
+ params("json.facet", // note: no distinctValues support and not comparing min/max values
+ "{min:'min("+f+")',count:'countvals("+f+")',missing:'missing("+f+")',max:'max("+f+")', sum:'sum("+f+")', " +
+ " countDistinct:'unique("+f+")', sumOfSquares:'sumsq("+f+")', mean:'avg("+f+")', stddev:'stddev("+f+")' }")
+ }) {
+ // easy switch to know if/when we are using json.facet which doesn't support some options
+ final boolean json = (null != baseParams.get("json.facet"));
assertQ("test statistics values",
req(baseParams, "q", "*:*", "stats.calcdistinct", "true")
- , "//double[@name='min'][.='-100.0']"
- , "//double[@name='max'][.='200.0']"
+ , json ? "//*" : "//double[@name='min'][.='-100.0']"
+ , json ? "//*" : "//double[@name='max'][.='200.0']"
, "//double[@name='sum'][.='9.0']"
, "//long[@name='count'][.='8']"
, "//long[@name='missing'][.='3']"
- , "//long[@name='countDistinct'][.='8']"
- , "count(//arr[@name='distinctValues']/*)=8"
+ , json ? "//int[@name='countDistinct'][.='8']": "//long[@name='countDistinct'][.='8']" // SOLR-11775
+ , json ? "//*" : "count(//arr[@name='distinctValues']/*)=8"
, "//double[@name='sumOfSquares'][.='53101.0']"
, "//double[@name='mean'][.='1.125']"
- , "//double[@name='stddev'][.='87.08852228787508']"
+ ,json ? "//*" : "//double[@name='stddev'][.='87.08852228787508']" // SOLR-11725
);
assertQ("test statistics values w/fq",
req(baseParams, "fq", "-id:1",
"q", "*:*", "stats.calcdistinct", "true")
- , "//double[@name='min'][.='-40.0']"
- , "//double[@name='max'][.='200.0']"
+ , json ? "//*" : "//double[@name='min'][.='-40.0']"
+ , json ? "//*" : "//double[@name='max'][.='200.0']"
, "//double[@name='sum'][.='119.0']"
, "//long[@name='count'][.='6']"
, "//long[@name='missing'][.='3']"
- , "//long[@name='countDistinct'][.='6']"
- , "count(//arr[@name='distinctValues']/*)=6"
+ , json? "//int[@name='countDistinct'][.='6']" :"//long[@name='countDistinct'][.='6']" // SOLR-11775
+ , json ? "//*" : "count(//arr[@name='distinctValues']/*)=6"
, "//double[@name='sumOfSquares'][.='43001.0']"
, "//double[@name='mean'][.='19.833333333333332']"
- , "//double[@name='stddev'][.='90.15634568163611']"
- );
-
- // TODO: why are there 3 identical requests below?
-
- assertQ("test statistics values",
- req(baseParams, "q", "*:*", "stats.calcdistinct", "true", "stats.facet", "active_s")
- , "//double[@name='min'][.='-100.0']"
- , "//double[@name='max'][.='200.0']"
- , "//double[@name='sum'][.='9.0']"
- , "//long[@name='count'][.='8']"
- , "//long[@name='missing'][.='3']"
- , "//long[@name='countDistinct'][.='8']"
- , "count(//lst[@name='" + f + "']/arr[@name='distinctValues']/*)=8"
- , "//double[@name='sumOfSquares'][.='53101.0']"
- , "//double[@name='mean'][.='1.125']"
- , "//double[@name='stddev'][.='87.08852228787508']"
+ , json ? "//*" : "//double[@name='stddev'][.='90.15634568163611']" // SOLR-11725
);
- assertQ("test value for active_s=true",
- req(baseParams, "q", "*:*", "stats.calcdistinct", "true", "stats.facet", "active_s")
- , "//lst[@name='true']/double[@name='min'][.='-100.0']"
- , "//lst[@name='true']/double[@name='max'][.='200.0']"
- , "//lst[@name='true']/double[@name='sum'][.='70.0']"
- , "//lst[@name='true']/long[@name='count'][.='4']"
- , "//lst[@name='true']/long[@name='missing'][.='1']"
- , "//lst[@name='true']//long[@name='countDistinct'][.='4']"
- , "count(//lst[@name='true']/arr[@name='distinctValues']/*)=4"
- , "//lst[@name='true']/double[@name='sumOfSquares'][.='50500.0']"
- , "//lst[@name='true']/double[@name='mean'][.='17.5']"
- , "//lst[@name='true']/double[@name='stddev'][.='128.16005617976296']"
- );
-
- assertQ("test value for active_s=false",
- req(baseParams, "q", "*:*", "stats.calcdistinct", "true", "stats.facet", "active_s")
- , "//lst[@name='false']/double[@name='min'][.='-40.0']"
- , "//lst[@name='false']/double[@name='max'][.='10.0']"
- , "//lst[@name='false']/double[@name='sum'][.='-61.0']"
- , "//lst[@name='false']/long[@name='count'][.='4']"
- , "//lst[@name='false']/long[@name='missing'][.='2']"
- , "//lst[@name='true']//long[@name='countDistinct'][.='4']"
- , "count(//lst[@name='true']/arr[@name='distinctValues']/*)=4"
- , "//lst[@name='false']/double[@name='sumOfSquares'][.='2601.0']"
- , "//lst[@name='false']/double[@name='mean'][.='-15.25']"
- , "//lst[@name='false']/double[@name='stddev'][.='23.59908190304586']"
- );
+ if (!json) { // checking stats.facet makes no sense for json faceting
+ assertQ("test stats.facet (using boolean facet field)",
+ req(baseParams, "q", "*:*", "stats.calcdistinct", "true", "stats.facet", "active_s")
+ // baseline
+ , "//lst[@name='"+f+"']/double[@name='min'][.='-100.0']"
+ , "//lst[@name='"+f+"']/double[@name='max'][.='200.0']"
+ , "//lst[@name='"+f+"']/double[@name='sum'][.='9.0']"
+ , "//lst[@name='"+f+"']/long[@name='count'][.='8']"
+ , "//lst[@name='"+f+"']/long[@name='missing'][.='3']"
+ , "//lst[@name='"+f+"']/long[@name='countDistinct'][.='8']"
+ , "count(//lst[@name='" + f + "']/arr[@name='distinctValues']/*)=8"
+ , "//lst[@name='"+f+"']/double[@name='sumOfSquares'][.='53101.0']"
+ , "//lst[@name='"+f+"']/double[@name='mean'][.='1.125']"
+ , "//lst[@name='"+f+"']/double[@name='stddev'][.='87.08852228787508']"
+ // facet 'true'
+ , "//lst[@name='true']/double[@name='min'][.='-100.0']"
+ , "//lst[@name='true']/double[@name='max'][.='200.0']"
+ , "//lst[@name='true']/double[@name='sum'][.='70.0']"
+ , "//lst[@name='true']/long[@name='count'][.='4']"
+ , "//lst[@name='true']/long[@name='missing'][.='1']"
+ , "//lst[@name='true']//long[@name='countDistinct'][.='4']"
+ , "count(//lst[@name='true']/arr[@name='distinctValues']/*)=4"
+ , "//lst[@name='true']/double[@name='sumOfSquares'][.='50500.0']"
+ , "//lst[@name='true']/double[@name='mean'][.='17.5']"
+ , "//lst[@name='true']/double[@name='stddev'][.='128.16005617976296']"
+ // facet 'false'
+ , "//lst[@name='false']/double[@name='min'][.='-40.0']"
+ , "//lst[@name='false']/double[@name='max'][.='10.0']"
+ , "//lst[@name='false']/double[@name='sum'][.='-61.0']"
+ , "//lst[@name='false']/long[@name='count'][.='4']"
+ , "//lst[@name='false']/long[@name='missing'][.='2']"
+ , "//lst[@name='true']//long[@name='countDistinct'][.='4']"
+ , "count(//lst[@name='true']/arr[@name='distinctValues']/*)=4"
+ , "//lst[@name='false']/double[@name='sumOfSquares'][.='2601.0']"
+ , "//lst[@name='false']/double[@name='mean'][.='-15.25']"
+ , "//lst[@name='false']/double[@name='stddev'][.='23.59908190304586']"
+ );
+ }
}
assertQ("cardinality"
- , req("q", "*:*", "rows", "0", "stats", "true", "stats.field", "{!cardinality=true}" + f)
- , "//long[@name='cardinality'][.='8']"
- );
+ , req("q", "*:*", "rows", "0", "stats", "true", "stats.field", "{!cardinality=true}" + f)
+ , "//long[@name='cardinality'][.='8']"
+ );
+ assertQ("json cardinality"
+ , req("q", "*:*", "rows", "0", "json.facet", "{cardinality:'hll("+f+")'}")
+ , "//int[@name='cardinality'][.='8']" // SOLR-11775
+ );
}
public void testFieldStatisticsResultsStringField() throws Exception {
diff --git a/solr/core/src/test/org/apache/solr/search/facet/DistributedFacetSimpleRefinementLongTailTest.java b/solr/core/src/test/org/apache/solr/search/facet/DistributedFacetSimpleRefinementLongTailTest.java
index 450745b..6c019b7 100644
--- a/solr/core/src/test/org/apache/solr/search/facet/DistributedFacetSimpleRefinementLongTailTest.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/DistributedFacetSimpleRefinementLongTailTest.java
@@ -43,20 +43,21 @@ import org.junit.Test;
public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistributedSearchTestCase {
// TODO: add hll & variance - update all assertions to test their values (right after any mention of 'stddev')
- private static List<String> ALL_STATS = Arrays.asList("min", "max", "sum", "stddev", "avg", "sumsq", "unique", "missing", "countvals");
+ private static List<String> ALL_STATS = Arrays.asList("min", "max", "sum", "stddev", "avg", "sumsq", "unique",
+ "missing", "countvals", "percentile");
- private String STAT_FIELD = "stat_i1";
+ private final String STAT_FIELD;
private String ALL_STATS_JSON = "";
public DistributedFacetSimpleRefinementLongTailTest() {
// we need DVs on point fields to compute stats & facets
if (Boolean.getBoolean(NUMERIC_POINTS_SYSPROP)) System.setProperty(NUMERIC_DOCVALUES_SYSPROP,"true");
- // TODO: randomizing STAT_FIELD to be multiValued=true blocked by SOLR-11706
- // STAT_FIELD = random().nextBoolean() ? "stat_i1" : "stat_i";
+ STAT_FIELD = random().nextBoolean() ? "stat_is" : "stat_i";
for (String stat : ALL_STATS) {
- ALL_STATS_JSON += stat + ":'" + stat + "(" + STAT_FIELD + ")',";
+ String val = stat.equals("percentile")? STAT_FIELD+",90": STAT_FIELD;
+ ALL_STATS_JSON += stat + ":'" + stat + "(" + val + ")',";
}
}
@@ -232,6 +233,7 @@ public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistribute
assertEquals(101L, bucket.get("countvals"));
assertEquals(0L, bucket.get("missing"));
assertEquals(48.0D, bucket.get("sum"));
+ assertEquals(1.0D, bucket.get("percentile"));
assertEquals(0.475247524752475D, (double) bucket.get("avg"), 0.1E-7);
assertEquals(54.0D, (double) bucket.get("sumsq"), 0.1E-7);
// assertEquals(0.55846323792D, bucket.getStddev(), 0.1E-7); // TODO: SOLR-11725
@@ -391,6 +393,7 @@ public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistribute
assertEquals(300L, aaa0_Bucket.get("countvals"));
assertEquals(0L, aaa0_Bucket.get("missing"));
assertEquals(34650.0D, aaa0_Bucket.get("sum"));
+ assertEquals(483.70000000000016D, (double)aaa0_Bucket.get("percentile"), 0.1E-7);
assertEquals(115.5D, (double) aaa0_Bucket.get("avg"), 0.1E-7);
assertEquals(1.674585E7D, (double) aaa0_Bucket.get("sumsq"), 0.1E-7);
// assertEquals(206.4493184076D, (double) aaa0_Bucket.get("stddev"), 0.1E-7); // TODO: SOLR-11725
@@ -403,6 +406,7 @@ public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistribute
assertEquals(0L, tail_Bucket.get("min"));
assertEquals(44L, tail_Bucket.get("max"));
assertEquals(90L, tail_Bucket.get("countvals"));
+ assertEquals(40.0D, tail_Bucket.get("percentile"));
assertEquals(45L, tail_Bucket.get("missing"));
assertEquals(1980.0D, tail_Bucket.get("sum"));
assertEquals(22.0D, (double) tail_Bucket.get("avg"), 0.1E-7);
@@ -419,6 +423,7 @@ public class DistributedFacetSimpleRefinementLongTailTest extends BaseDistribute
assertEquals(35L, tailB_Bucket.get("min"));
assertEquals(40L, tailB_Bucket.get("max"));
assertEquals(12L, tailB_Bucket.get("countvals"));
+ assertEquals(39.9D, tailB_Bucket.get("percentile"));
assertEquals(5L, tailB_Bucket.get("missing"));
assertEquals(450.0D, tailB_Bucket.get("sum"));
assertEquals(37.5D, (double) tailB_Bucket.get("avg"), 0.1E-7);
diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
index 9f0f7bf..06c13be 100644
--- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
@@ -217,14 +217,22 @@ public class TestJsonFacets extends SolrTestCaseHS {
public void indexSimple(Client client) throws Exception {
client.deleteByQuery("*:*", null);
- client.add(sdoc("id", "1", "cat_s", "A", "where_s", "NY", "num_d", "4", "num_i", "2", "val_b", "true", "sparse_s", "one"), null);
- client.add(sdoc("id", "2", "cat_s", "B", "where_s", "NJ", "num_d", "-9", "num_i", "-5", "val_b", "false"), null);
+ client.add(sdoc("id", "1", "cat_s", "A", "where_s", "NY", "num_d", "4", "num_i", "2",
+ "num_is", "4", "num_is", "2",
+ "val_b", "true", "sparse_s", "one"), null);
+ client.add(sdoc("id", "2", "cat_s", "B", "where_s", "NJ", "num_d", "-9", "num_i", "-5",
+ "num_is", "-9", "num_is", "-5",
+ "val_b", "false"), null);
client.add(sdoc("id", "3"), null);
client.commit();
- client.add(sdoc("id", "4", "cat_s", "A", "where_s", "NJ", "num_d", "2", "num_i", "3"), null);
- client.add(sdoc("id", "5", "cat_s", "B", "where_s", "NJ", "num_d", "11", "num_i", "7", "sparse_s", "two"),null);
+ client.add(sdoc("id", "4", "cat_s", "A", "where_s", "NJ", "num_d", "2", "num_i", "3",
+ "num_is", "2", "num_is", "3"), null);
+ client.add(sdoc("id", "5", "cat_s", "B", "where_s", "NJ", "num_d", "11", "num_i", "7",
+ "num_is", "11", "num_is", "7",
+ "sparse_s", "two"),null);
client.commit();
- client.add(sdoc("id", "6", "cat_s", "B", "where_s", "NY", "num_d", "-5", "num_i", "-5"),null);
+ client.add(sdoc("id", "6", "cat_s", "B", "where_s", "NY", "num_d", "-5", "num_i", "-5",
+ "num_is", "-5"),null);
client.commit();
}
@@ -874,12 +882,20 @@ public class TestJsonFacets extends SolrTestCaseHS {
Client client = Client.localClient();
indexSimple(client);
+ assertJQ(req("q", "*:*", "rows", "0", "json.facet", "{x:'sum(num_is)'}")
+ , "facets=={count:6 , x:,10.0}"
+ );
+ assertJQ(req("q", "*:*", "rows", "0", "json.facet", "{x:'min(num_is)'}")
+ , "facets=={count:6 , x:,-9}"
+ );
+
// test multiple json.facet commands
assertJQ(req("q", "*:*", "rows", "0"
- , "json.facet", "{x:'sum(num_d)'}"
- , "json.facet", "{y:'min(num_d)'}"
+ , "json.facet", "{x:'sum(num_d)'}"
+ , "json.facet", "{y:'min(num_d)'}"
+ , "json.facet", "{z:'min(num_is)'}"
)
- , "facets=={count:6 , x:3.0, y:-9.0 }"
+ , "facets=={count:6 , x:3.0, y:-9.0, z:-9 }"
);
@@ -922,10 +938,11 @@ public class TestJsonFacets extends SolrTestCaseHS {
// test nested streaming with stats under streaming
assertJQ(req("q", "*:*", "rows", "0"
- , "json.facet", "{ cat:{terms:{field:'cat_s', method:stream,sort:'index asc', facet:{ where:{terms:{field:where_s,method:stream,sort:'index asc',sort:'index asc', facet:{x:'max(num_d)'} }}} }}}"
+ , "json.facet", "{ cat:{terms:{field:'cat_s', method:stream,sort:'index asc', facet:{ where:{terms:{field:where_s,method:stream,sort:'index asc',sort:'index asc', facet:{x:'max(num_d)', y:'sum(num_is)'} }}} }}}"
)
, "facets=={count:6 " +
- ", cat :{buckets:[{val:A, count:2, where:{buckets:[{val:NJ,count:1,x:2.0},{val:NY,count:1,x:4.0}]} },{val:B, count:3, where:{buckets:[{val:NJ,count:2,x:11.0},{val:NY,count:1,x:-5.0}]} }]}"
+ ", cat :{buckets:[{val:A, count:2, where:{buckets:[{val:NJ,count:1,x:2.0,y:5.0},{val:NY,count:1,x:4.0,y:6.0}]} }," +
+ "{val:B, count:3, where:{buckets:[{val:NJ,count:2,x:11.0,y:4.0},{val:NY,count:1,x:-5.0,y:-5.0}]} }]}"
+ "}"
);
@@ -1340,7 +1357,7 @@ public class TestJsonFacets extends SolrTestCaseHS {
", f2:{ 'buckets':[{ val:'A', count:2, n1:2}, { val:'B', count:3, n1:0 }]} }"
);
- // test sorting by missing stat with domain query
+ // test sorting by countvals stat with domain query
client.testJQ(params(p, "q", "-id:*"
, "json.facet", "{f1:{terms:{${terms} field:'${cat_s}', domain:{query:'*:*'}, sort:'n1 asc', facet:{n1:'countvals(field(${sparse_num_d}))'} }}" +
" , f2:{terms:{${terms} field:'${cat_s}', domain:{query:'*:*'}, sort:'n1 desc', facet:{n1:'countvals(field(${sparse_num_d}))'} }} }"
@@ -1779,8 +1796,6 @@ public class TestJsonFacets extends SolrTestCaseHS {
);
}
-
-
// stats at top level
client.testJQ(params(p, "q", "*:*"
, "json.facet", "{ sum1:'sum(${num_d})', sumsq1:'sumsq(${num_d})', avg1:'avg(${num_d})', avg2:'avg(def(${num_d},0))', mind:'min(${num_d})', maxd:'max(${num_d})'" +
@@ -1799,6 +1814,73 @@ public class TestJsonFacets extends SolrTestCaseHS {
"}"
);
+ // stats at top level on multi-valued fields
+ client.testJQ(params(p, "q", "*:*"
+ , "json.facet", "{ sum1:'sum(${num_fs})', sumsq1:'sumsq(${num_fs})', avg1:'avg(${num_fs})', mind:'min(${num_fs})', maxd:'max(${num_fs})'" +
+ ", mini:'min(${num_is})', maxi:'max(${num_is})', mins:'min(${multi_ss})', maxs:'max(${multi_ss})'" +
+ ", stddev:'stddev(${num_fs})', variance:'variance(${num_fs})', median:'percentile(${num_fs}, 50)'" +
+ ", perc:'percentile(${num_fs}, 0,75,100)'" +
+ " }"
+ )
+ , "facets=={ 'count':6, " +
+ "sum1:0.0, sumsq1:51.5, avg1:0.0, mind:-5.0, maxd:3.0" +
+ ", mini:-5, maxi:3, mins:'a', maxs:'b'" +
+ ", stddev:2.537222891273055, variance:6.4375, median:0.0, perc:[-5.0,2.25,3.0]" +
+ "}"
+ );
+
+ // test sorting by multi-valued
+ client.testJQ(params(p, "q", "*:*"
+ , "json.facet", "{f1:{terms:{${terms} field:'${cat_s}', sort:'n1 desc', facet:{n1:'avg(${num_is})'} }}" +
+ " , f2:{terms:{${terms} field:'${cat_s}', sort:'n1 asc', facet:{n1:'avg(${num_is})'} }} }"
+ )
+ , "facets=={ 'count':6, " +
+ " f1:{ 'buckets':[{ val:'B', count:3, n1: 0.25}, { val:'A', count:2, n1:0.0}]}" +
+ ", f2:{ 'buckets':[{ val:'A', count:2, n1:0.0}, { val:'B', count:3, n1:0.25 }]} }"
+ );
+
+ // test sorting by percentile
+ client.testJQ(params(p, "q", "*:*"
+ , "json.facet", "{f1:{terms:{${terms} field:'${cat_s}', sort:'n1 asc', facet:{n1:'percentile(${num_is}, 50)'} }}" +
+ " , f2:{terms:{${terms} field:'${cat_s}', sort:'n1 desc', facet:{n1:'percentile(${num_is}, 50)'} }} }"
+ )
+ , "facets=={ 'count':6, " +
+ " f1:{ 'buckets':[{ val:'B', count:3, n1: -0.50}, { val:'A', count:2, n1:1.0}]}" +
+ ", f2:{ 'buckets':[{ val:'A', count:2, n1:1.0}, { val:'B', count:3, n1:-0.50 }]} }"
+ );
+
+ // test sorting by multi-valued field with domain query
+ client.testJQ(params(p, "q", "-id:*"
+ , "json.facet", "{f1:{terms:{${terms} field:'${cat_s}', domain:{query:'*:*'}, sort:'n1 desc', facet:{n1:'sum(${num_is})'} }}" +
+ " , f2:{terms:{${terms} field:'${cat_s}', domain:{query:'*:*'}, sort:'n1 asc', facet:{n1:'sum(${num_is})'} }} }"
+ )
+ , "facets=={ 'count':0, " +
+ " f1:{ 'buckets':[{ val:'B', count:3, n1:1.0 }, { val:'A', count:2, n1:0.0}]}" +
+ ", f2:{ 'buckets':[{ val:'A', count:2, n1:0.0}, { val:'B', count:3, n1:1.0 }]} }"
+ );
+
+ client.testJQ(params(p, "q", "*:*"
+ , "json.facet", " {f1:{terms:{${terms}, field:'${cat_s}', " +
+ "facet:{f2:{terms:{${terms}, field:${where_s}, sort:'index asc', " +
+ "facet:{n1:'min(${multi_ss})'}}}}}}}"
+ )
+ , "facets=={ 'count':6, " +
+ " f1:{ 'buckets':[{ val:'B', count:3, f2:{'buckets':[{val:'NJ', count:2, n1:'a'},{val:'NY', count:1, n1:'a'}]} }," +
+ " { val:'A', count:2, f2:{'buckets':[{val:'NJ', count:1, n1:'b'},{val:'NY', count:1}]}}]}" +
+ "}"
+ );
+
+ client.testJQ(params(p, "q", "*:*"
+ , "json.facet", " {f1:{terms:{${terms}, field:'${cat_s}', " +
+ "facet:{f2:{terms:{${terms}, field:${where_s}, sort:'index asc', " +
+ "facet:{n1:'max(${multi_ss})'}}}}}}}"
+ )
+ , "facets=={ 'count':6, " +
+ " f1:{ 'buckets':[{ val:'B', count:3, f2:{'buckets':[{val:'NJ', count:2, n1:'b'},{val:'NY', count:1, n1:'b'}]} }," +
+ " { val:'A', count:2, f2:{'buckets':[{val:'NJ', count:1, n1:'b'},{val:'NY', count:1}]}}]}" +
+ "}"
+ );
+
// stats at top level, no matches
client.testJQ(params(p, "q", "id:DOESNOTEXIST"
, "json.facet", "{ sum1:'sum(${num_d})', sumsq1:'sumsq(${num_d})', avg1:'avg(${num_d})', min1:'min(${num_d})', max1:'max(${num_d})'" +