You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2015/04/22 19:23:56 UTC
svn commit: r1675428 - in /lucene/dev/branches/branch_5x: ./ solr/
solr/CHANGES.txt solr/core/
solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java
solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
Author: yonik
Date: Wed Apr 22 17:23:55 2015
New Revision: 1675428
URL: http://svn.apache.org/r1675428
Log:
SOLR-7417: implement unique() for numeric fields
Modified:
lucene/dev/branches/branch_5x/ (props changed)
lucene/dev/branches/branch_5x/solr/ (props changed)
lucene/dev/branches/branch_5x/solr/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_5x/solr/core/ (props changed)
lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java
lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
Modified: lucene/dev/branches/branch_5x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/CHANGES.txt?rev=1675428&r1=1675427&r2=1675428&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/solr/CHANGES.txt Wed Apr 22 17:23:55 2015
@@ -56,6 +56,9 @@ New Features
* SOLR-7176: zkcli script can perfrom the CLUSTERPROP command without a running Solr cluster
(Hrishikesh Gadre, Per Steffensen, Noble Paul)
+* SOLR-7417: JSON Facet API - unique() is now implemented for numeric and date fields.
+ (yonik)
+
Bug Fixes
----------------------
Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java?rev=1675428&r1=1675427&r2=1675428&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java Wed Apr 22 17:23:55 2015
@@ -18,10 +18,18 @@ package org.apache.solr.search.facet;
*/
import java.io.IOException;
+import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FixedBitSet;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.schema.SchemaField;
@@ -45,7 +53,11 @@ public class UniqueAgg extends StrAggVal
return new UniqueMultivaluedSlotAcc(fcontext, getArg(), numSlots);
}
} else {
- return new UniqueSinglevaluedSlotAcc(fcontext, getArg(), numSlots);
+ if (sf.getType().getNumericType() != null) {
+ return new NumericAcc(fcontext, getArg(), numSlots);
+ } else {
+ return new UniqueSinglevaluedSlotAcc(fcontext, getArg(), numSlots);
+ }
}
}
@@ -86,4 +98,163 @@ public class UniqueAgg extends StrAggVal
}
};
}
+
+
+ static class LongSet {
+
+ static final float LOAD_FACTOR = 0.7f;
+
+ long[] vals;
+ int cardinality;
+ int mask;
+ int threshold;
+ int zeroCount; // 1 if a 0 was collected
+
+ /** sz must be a power of two */
+ LongSet(int sz) {
+ vals = new long[sz];
+ mask = sz - 1;
+ threshold = (int) (sz * LOAD_FACTOR);
+ }
+
+ void add(long val) {
+ if (val == 0) {
+ zeroCount = 1;
+ return;
+ }
+ if (cardinality >= threshold) {
+ rehash();
+ }
+
+ // For floats: exponent bits start at bit 23 for single precision,
+ // and bit 52 for double precision.
+ // Many values will only have significant bits just to the right of that,
+ // and the leftmost bits will all be zero.
+
+ // For now, lets just settle to get first 8 significant mantissa bits of double or float in the lowest bits of our hash
+ // The upper bits of our hash will be irrelevant.
+ int h = (int) (val + (val >>> 44) + (val >>> 15));
+ for (int slot = h & mask; ;slot = (slot + 1) & mask) {
+ long v = vals[slot];
+ if (v == 0) {
+ vals[slot] = val;
+ cardinality++;
+ break;
+ } else if (v == val) {
+ // val is already in the set
+ break;
+ }
+ }
+ }
+
+ private void rehash() {
+ long[] oldVals = vals;
+ int newCapacity = vals.length << 1;
+ vals = new long[newCapacity];
+ mask = newCapacity - 1;
+ threshold = (int) (newCapacity * LOAD_FACTOR);
+ cardinality = 0;
+
+ for (long val : oldVals) {
+ if (val != 0) {
+ add(val);
+ }
+ }
+ }
+
+ int cardinality() {
+ return cardinality + zeroCount;
+ }
+ }
+
+
+ class NumericAcc extends SlotAcc {
+ SchemaField sf;
+ LongSet[] sets;
+ NumericDocValues values;
+ Bits exists;
+
+ public NumericAcc(FacetContext fcontext, String field, int numSlots) throws IOException {
+ super(fcontext);
+ sf = fcontext.searcher.getSchema().getField(field);
+ sets = new LongSet[numSlots];
+ }
+
+ @Override
+ public void reset() {
+ sets = new LongSet[sets.length];
+ }
+
+ @Override
+ public void setNextReader(LeafReaderContext readerContext) throws IOException {
+ values = DocValues.getNumeric(readerContext.reader(), sf.getName());
+ exists = DocValues.getDocsWithField(readerContext.reader(), sf.getName());
+ }
+
+ @Override
+ public void collect(int doc, int slot) throws IOException {
+ long val = values.get(doc);
+ if (val == 0 && !exists.get(doc)) {
+ return;
+ }
+
+ LongSet set = sets[slot];
+ if (set == null) {
+ set = sets[slot] = new LongSet(16);
+ }
+ // TODO: could handle 0s at this level too
+ set.add(val);
+ }
+
+ @Override
+ public Object getValue(int slot) throws IOException {
+ if (fcontext.isShard()) {
+ return getShardValue(slot);
+ }
+ return getCardinality(slot);
+ }
+
+ private int getCardinality(int slot) {
+ LongSet set = sets[slot];
+ return set==null ? 0 : set.cardinality();
+ }
+
+ public Object getShardValue(int slot) throws IOException {
+ LongSet set = sets[slot];
+ int unique = getCardinality(slot);
+
+ SimpleOrderedMap map = new SimpleOrderedMap();
+ map.add("unique", unique);
+
+ int maxExplicit=100;
+ // TODO: make configurable
+ // TODO: share values across buckets
+ if (unique <= maxExplicit) {
+ List lst = new ArrayList( Math.min(unique, maxExplicit) );
+ if (set != null) {
+ if (set.zeroCount > 0) {
+ lst.add(0);
+ }
+ for (long val : set.vals) {
+ if (val != 0) {
+ lst.add(val);
+ }
+ }
+ }
+
+ map.add("vals", lst);
+ }
+
+ return map;
+ }
+
+
+ @Override
+ public int compare(int slotA, int slotB) {
+ return getCardinality(slotA) - getCardinality(slotB);
+ }
+
+ }
+
+
}
Modified: lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java?rev=1675428&r1=1675427&r2=1675428&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java Wed Apr 22 17:23:55 2015
@@ -611,15 +611,15 @@ public class TestJsonFacets extends Solr
// stats at top level
client.testJQ(params(p, "q", "*:*"
- , "json.facet", "{ sum1:'sum(${num_d})', sumsq1:'sumsq(${num_d})', avg1:'avg(${num_d})', min1:'min(${num_d})', max1:'max(${num_d})', numwhere:'unique(${where_s})', med:'percentile(${num_d},50)', perc:'percentile(${num_d},0,50.0,100)' }"
+ , "json.facet", "{ sum1:'sum(${num_d})', sumsq1:'sumsq(${num_d})', avg1:'avg(${num_d})', min1:'min(${num_d})', max1:'max(${num_d})', numwhere:'unique(${where_s})', unique_num_i:'unique(${num_i})', unique_num_d:'unique(${num_d})', unique_date:'unique(${date})', med:'percentile(${num_d},50)', perc:'percentile(${num_d},0,50.0,100)' }"
)
, "facets=={ 'count':6, " +
- "sum1:3.0, sumsq1:247.0, avg1:0.5, min1:-9.0, max1:11.0, numwhere:2, med:2.0, perc:[-9.0,2.0,11.0] }"
+ "sum1:3.0, sumsq1:247.0, avg1:0.5, min1:-9.0, max1:11.0, numwhere:2, unique_num_i:4, unique_num_d:5, unique_date:5, med:2.0, perc:[-9.0,2.0,11.0] }"
);
// stats at top level, no matches
client.testJQ(params(p, "q", "id:DOESNOTEXIST"
- , "json.facet", "{ sum1:'sum(${num_d})', sumsq1:'sumsq(${num_d})', avg1:'avg(${num_d})', min1:'min(${num_d})', max1:'max(${num_d})', numwhere:'unique(${where_s})', med:'percentile(${num_d},50)', perc:'percentile(${num_d},0,50.0,100)' }"
+ , "json.facet", "{ sum1:'sum(${num_d})', sumsq1:'sumsq(${num_d})', avg1:'avg(${num_d})', min1:'min(${num_d})', max1:'max(${num_d})', numwhere:'unique(${where_s})', unique_num_i:'unique(${num_i})', unique_num_d:'unique(${num_d})', unique_date:'unique(${date})', med:'percentile(${num_d},50)', perc:'percentile(${num_d},0,50.0,100)' }"
)
, "facets=={count:0 " +
"/* ,sum1:0.0, sumsq1:0.0, avg1:0.0, min1:'NaN', max1:'NaN', numwhere:0 */ }"