You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2017/01/05 22:15:13 UTC

lucene-solr:branch_6x: SOLR-9931: return 0 for hll on field with no values in bucket

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_6x 6a5895c0e -> dd06a0b90


SOLR-9931: return 0 for hll on field with no values in bucket


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/dd06a0b9
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/dd06a0b9
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/dd06a0b9

Branch: refs/heads/branch_6x
Commit: dd06a0b9041eb42dd308a51e6337bbbe4b3057fc
Parents: 6a5895c
Author: yonik <yo...@apache.org>
Authored: Thu Jan 5 17:02:24 2017 -0500
Committer: yonik <yo...@apache.org>
Committed: Thu Jan 5 17:02:51 2017 -0500

----------------------------------------------------------------------
 solr/CHANGES.txt                                |  3 +++
 .../org/apache/solr/search/facet/HLLAgg.java    | 13 +++++++---
 .../apache/solr/search/facet/UniqueSlotAcc.java |  2 +-
 .../solr/search/facet/TestJsonFacets.java       | 27 ++++++++++++++++++++
 4 files changed, 41 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/dd06a0b9/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 8287d94..60a2836 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -274,6 +274,9 @@ Bug Fixes
 * SOLR-9917: JSON Facet API percentile function caused a NullPointerException in distributed mode when
   there were no values in a bucket from a shard. (yonik)
 
+* SOLR-9931: JSON Facet API hll (hyper-log-log) function returned 0 for non-empty buckets with no field values
+  in local mode, but nothing for distributed mode.  Both modes now return 0.  (yonik)
+
 Other Changes
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/dd06a0b9/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java b/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java
index 89e2386..c446fc9 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java
@@ -28,6 +28,8 @@ import org.apache.solr.common.util.SimpleOrderedMap;
 import org.apache.solr.schema.SchemaField;
 
 public class HLLAgg extends StrAggValueSource {
+  public static Integer NO_VALUES = 0;
+
   protected HLLFactory factory;
 
   public HLLAgg(String field) {
@@ -72,10 +74,15 @@ public class HLLAgg extends StrAggValueSource {
 
   private static class Merger extends FacetSortableMerger {
     HLL aggregate = null;
-    long answer = -1;
+    long answer = -1; // -1 means unset
 
     @Override
     public void merge(Object facetResult, Context mcontext) {
+      if (facetResult instanceof Number) {
+        assert NO_VALUES.equals(facetResult);
+        return;
+      }
+
       SimpleOrderedMap map = (SimpleOrderedMap)facetResult;
       byte[] serialized = ((byte[])map.get("hll"));
       HLL subHLL = HLL.fromBytes(serialized);
@@ -88,7 +95,7 @@ public class HLLAgg extends StrAggValueSource {
 
     private long getLong() {
       if (answer < 0) {
-        answer = aggregate.cardinality();
+        answer = aggregate == null ? 0 : aggregate.cardinality();
       }
       return answer;
     }
@@ -167,7 +174,7 @@ public class HLLAgg extends StrAggValueSource {
 
     public Object getShardValue(int slot) throws IOException {
       HLL hll = sets[slot];
-      if (hll == null) return null;
+      if (hll == null) return NO_VALUES;
       SimpleOrderedMap map = new SimpleOrderedMap();
       map.add("hll", hll.toBytes());
       // optionally use explicit values

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/dd06a0b9/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java b/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java
index 9f9e9b1..ae542ac 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java
@@ -73,7 +73,7 @@ abstract class UniqueSlotAcc extends SlotAcc {
 
   private Object getShardHLL(int slot) throws IOException {
     FixedBitSet ords = arr[slot];
-    if (ords == null) return null;  // TODO: when we get to refinements, may be useful to return something???
+    if (ords == null) return HLLAgg.NO_VALUES;
 
     HLL hll = factory.getHLL();
     long maxOrd = ords.length();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/dd06a0b9/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
index d7e1cc0..d8f3ae5 100644
--- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
@@ -870,6 +870,33 @@ public class TestJsonFacets extends SolrTestCaseHS {
             " }"
     );
 
+
+    // stats at top level, matching documents, but no values in the field
+    // NOTE: this represents the current state of what is returned, not the ultimate desired state.
+    client.testJQ(params(p, "q", "id:3"
+        , "json.facet", "{ sum1:'sum(${num_d})', sumsq1:'sumsq(${num_d})', avg1:'avg(${num_d})', min1:'min(${num_d})', max1:'max(${num_d})'" +
+            ", numwhere:'unique(${where_s})', unique_num_i:'unique(${num_i})', unique_num_d:'unique(${num_d})', unique_date:'unique(${date})'" +
+            ", where_hll:'hll(${where_s})', hll_num_i:'hll(${num_i})', hll_num_d:'hll(${num_d})', hll_date:'hll(${date})'" +
+            ", med:'percentile(${num_d},50)', perc:'percentile(${num_d},0,50.0,100)' }"
+        )
+        , "facets=={count:1 " +
+            ",sum1:0.0," +
+            " sumsq1:0.0," +
+            " avg1:0.0," +   // TODO: undesirable. omit?
+            " min1:'NaN'," + // TODO: undesirable. omit?
+            " max1:'NaN'," +
+            " numwhere:0," +
+            " unique_num_i:0," +
+            " unique_num_d:0," +
+            " unique_date:0," +
+            " where_hll:0," +
+            " hll_num_i:0," +
+            " hll_num_d:0," +
+            " hll_date:0" +
+            " }"
+    );
+
+
     //
     // tests on a multi-valued field with actual multiple values, just to ensure that we are
     // using a multi-valued method for the rest of the tests when appropriate.