You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2015/04/22 19:23:56 UTC

svn commit: r1675428 - in /lucene/dev/branches/branch_5x: ./ solr/ solr/CHANGES.txt solr/core/ solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java

Author: yonik
Date: Wed Apr 22 17:23:55 2015
New Revision: 1675428

URL: http://svn.apache.org/r1675428
Log:
SOLR-7417: implement unique() for numeric fields

Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/solr/   (props changed)
    lucene/dev/branches/branch_5x/solr/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_5x/solr/core/   (props changed)
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java
    lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java

Modified: lucene/dev/branches/branch_5x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/CHANGES.txt?rev=1675428&r1=1675427&r2=1675428&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/solr/CHANGES.txt Wed Apr 22 17:23:55 2015
@@ -56,6 +56,9 @@ New Features
 * SOLR-7176: zkcli script can perfrom the CLUSTERPROP command without a running Solr cluster
   (Hrishikesh Gadre, Per Steffensen, Noble Paul)
 
+* SOLR-7417: JSON Facet API - unique() is now implemented for numeric and date fields.
+  (yonik)
+
 
 Bug Fixes
 ----------------------

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java?rev=1675428&r1=1675427&r2=1675428&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java Wed Apr 22 17:23:55 2015
@@ -18,10 +18,18 @@ package org.apache.solr.search.facet;
  */
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FixedBitSet;
 import org.apache.solr.common.util.SimpleOrderedMap;
 import org.apache.solr.schema.SchemaField;
 
@@ -45,7 +53,11 @@ public class UniqueAgg extends StrAggVal
         return new UniqueMultivaluedSlotAcc(fcontext, getArg(), numSlots);
       }
     } else {
-      return new UniqueSinglevaluedSlotAcc(fcontext, getArg(), numSlots);
+      if (sf.getType().getNumericType() != null) {
+        return new NumericAcc(fcontext, getArg(), numSlots);
+      } else {
+        return new UniqueSinglevaluedSlotAcc(fcontext, getArg(), numSlots);
+      }
     }
   }
 
@@ -86,4 +98,163 @@ public class UniqueAgg extends StrAggVal
       }
     };
   }
+
+
+  static class LongSet {
+
+    static final float LOAD_FACTOR = 0.7f;
+
+    long[] vals;
+    int cardinality;
+    int mask;
+    int threshold;
+    int zeroCount;  // 1 if a 0 was collected
+
+    /** sz must be a power of two */
+    LongSet(int sz) {
+      vals = new long[sz];
+      mask = sz - 1;
+      threshold = (int) (sz * LOAD_FACTOR);
+    }
+
+    void add(long val) {
+      if (val == 0) {
+        zeroCount = 1;
+        return;
+      }
+      if (cardinality >= threshold) {
+        rehash();
+      }
+      
+      // For floats: exponent bits start at bit 23 for single precision,
+      // and bit 52 for double precision.
+      // Many values will only have significant bits just to the right of that,
+      // and the leftmost bits will all be zero.
+
+      // For now, lets just settle to get first 8 significant mantissa bits of double or float in the lowest bits of our hash
+      // The upper bits of our hash will be irrelevant.
+      int h = (int) (val + (val >>> 44) + (val >>> 15));
+      for (int slot = h & mask; ;slot = (slot + 1) & mask) {
+        long v = vals[slot];
+        if (v == 0) {
+          vals[slot] = val;
+          cardinality++;
+          break;
+        } else if (v == val) {
+          // val is already in the set
+          break;
+        }
+      }
+    }
+
+    private void rehash() {
+      long[] oldVals = vals;
+      int newCapacity = vals.length << 1;
+      vals = new long[newCapacity];
+      mask = newCapacity - 1;
+      threshold = (int) (newCapacity * LOAD_FACTOR);
+      cardinality = 0;
+
+      for (long val : oldVals) {
+        if (val != 0) {
+          add(val);
+        }
+      }
+    }
+
+    int cardinality() {
+      return cardinality + zeroCount;
+    }
+  }
+
+
+  class NumericAcc extends SlotAcc {
+    SchemaField sf;
+    LongSet[] sets;
+    NumericDocValues values;
+    Bits exists;
+
+    public NumericAcc(FacetContext fcontext, String field, int numSlots) throws IOException {
+      super(fcontext);
+      sf = fcontext.searcher.getSchema().getField(field);
+      sets = new LongSet[numSlots];
+    }
+
+    @Override
+    public void reset() {
+      sets = new LongSet[sets.length];
+    }
+
+    @Override
+    public void setNextReader(LeafReaderContext readerContext) throws IOException {
+      values = DocValues.getNumeric(readerContext.reader(),  sf.getName());
+      exists = DocValues.getDocsWithField(readerContext.reader(), sf.getName());
+    }
+
+    @Override
+    public void collect(int doc, int slot) throws IOException {
+      long val = values.get(doc);
+      if (val == 0 && !exists.get(doc)) {
+        return;
+      }
+
+      LongSet set = sets[slot];
+      if (set == null) {
+        set = sets[slot] = new LongSet(16);
+      }
+      // TODO: could handle 0s at this level too
+      set.add(val);
+    }
+
+    @Override
+    public Object getValue(int slot) throws IOException {
+      if (fcontext.isShard()) {
+        return getShardValue(slot);
+      }
+      return getCardinality(slot);
+    }
+
+    private int getCardinality(int slot) {
+      LongSet set = sets[slot];
+      return set==null ? 0 : set.cardinality();
+    }
+
+    public Object getShardValue(int slot) throws IOException {
+      LongSet set = sets[slot];
+      int unique = getCardinality(slot);
+
+      SimpleOrderedMap map = new SimpleOrderedMap();
+      map.add("unique", unique);
+
+      int maxExplicit=100;
+      // TODO: make configurable
+      // TODO: share values across buckets
+      if (unique <= maxExplicit) {
+        List lst = new ArrayList( Math.min(unique, maxExplicit) );
+        if (set != null) {
+          if (set.zeroCount > 0) {
+            lst.add(0);
+          }
+          for (long val : set.vals) {
+            if (val != 0) {
+              lst.add(val);
+            }
+          }
+        }
+
+        map.add("vals", lst);
+      }
+
+      return map;
+    }
+
+
+    @Override
+    public int compare(int slotA, int slotB) {
+      return getCardinality(slotA) - getCardinality(slotB);
+    }
+
+  }
+
+
 }

Modified: lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java?rev=1675428&r1=1675427&r2=1675428&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java Wed Apr 22 17:23:55 2015
@@ -611,15 +611,15 @@ public class TestJsonFacets extends Solr
 
     // stats at top level
     client.testJQ(params(p, "q", "*:*"
-            , "json.facet", "{ sum1:'sum(${num_d})', sumsq1:'sumsq(${num_d})', avg1:'avg(${num_d})', min1:'min(${num_d})', max1:'max(${num_d})', numwhere:'unique(${where_s})', med:'percentile(${num_d},50)', perc:'percentile(${num_d},0,50.0,100)' }"
+            , "json.facet", "{ sum1:'sum(${num_d})', sumsq1:'sumsq(${num_d})', avg1:'avg(${num_d})', min1:'min(${num_d})', max1:'max(${num_d})', numwhere:'unique(${where_s})', unique_num_i:'unique(${num_i})', unique_num_d:'unique(${num_d})', unique_date:'unique(${date})',  med:'percentile(${num_d},50)', perc:'percentile(${num_d},0,50.0,100)' }"
         )
         , "facets=={ 'count':6, " +
-            "sum1:3.0, sumsq1:247.0, avg1:0.5, min1:-9.0, max1:11.0, numwhere:2, med:2.0, perc:[-9.0,2.0,11.0]  }"
+            "sum1:3.0, sumsq1:247.0, avg1:0.5, min1:-9.0, max1:11.0, numwhere:2, unique_num_i:4, unique_num_d:5, unique_date:5, med:2.0, perc:[-9.0,2.0,11.0]  }"
     );
 
     // stats at top level, no matches
     client.testJQ(params(p, "q", "id:DOESNOTEXIST"
-            , "json.facet", "{ sum1:'sum(${num_d})', sumsq1:'sumsq(${num_d})', avg1:'avg(${num_d})', min1:'min(${num_d})', max1:'max(${num_d})', numwhere:'unique(${where_s})', med:'percentile(${num_d},50)', perc:'percentile(${num_d},0,50.0,100)' }"
+            , "json.facet", "{ sum1:'sum(${num_d})', sumsq1:'sumsq(${num_d})', avg1:'avg(${num_d})', min1:'min(${num_d})', max1:'max(${num_d})', numwhere:'unique(${where_s})', unique_num_i:'unique(${num_i})', unique_num_d:'unique(${num_d})', unique_date:'unique(${date})',  med:'percentile(${num_d},50)', perc:'percentile(${num_d},0,50.0,100)' }"
         )
         , "facets=={count:0 " +
             "/* ,sum1:0.0, sumsq1:0.0, avg1:0.0, min1:'NaN', max1:'NaN', numwhere:0 */ }"