You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2017/12/06 01:15:27 UTC

lucene-solr:master: SOLR-11664: fix range facet issues with sub-aggregations on string fields, adds resetIterator to SlotAcc

Repository: lucene-solr
Updated Branches:
  refs/heads/master 2c14b9141 -> e84cce8ea


SOLR-11664: fix range facet issues with sub-aggregations on string fields, adds resetIterator to SlotAcc


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/e84cce8e
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/e84cce8e
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/e84cce8e

Branch: refs/heads/master
Commit: e84cce8ea15d28aa1d261441bebdef48b8baf9a6
Parents: 2c14b91
Author: yonik <yo...@apache.org>
Authored: Tue Dec 5 20:14:57 2017 -0500
Committer: yonik <yo...@apache.org>
Committed: Tue Dec 5 20:14:57 2017 -0500

----------------------------------------------------------------------
 solr/CHANGES.txt                                |  4 +++
 .../org/apache/solr/search/facet/HLLAgg.java    |  2 ++
 .../org/apache/solr/search/facet/MinMaxAgg.java |  8 ++----
 .../org/apache/solr/search/facet/SlotAcc.java   | 22 ++++++++++++++-
 .../solr/search/facet/UniqueMultiDvSlotAcc.java |  6 +---
 .../search/facet/UniqueSinglevaluedSlotAcc.java | 11 +++-----
 .../apache/solr/search/facet/UniqueSlotAcc.java |  6 ----
 .../org/apache/solr/search/facet/DebugAgg.java  |  5 ++++
 .../solr/search/facet/TestJsonFacets.java       | 29 +++++++++++++++++++-
 9 files changed, 68 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e84cce8e/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 2f09b53..a6e146c 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -208,6 +208,10 @@ Bug Fixes
 * SOLR-11590: Synchronize ZK connect/disconnect handling so that they are processed in linear order
   (noble, Varun Thacker)
 
+* SOLR-11664: JSON Facet API: range facets containing unique, hll, min, max aggregations over string fields
+  produced incorrect results since 7.0 (Volodymyr Rudniev, yonik)
+
+
 Optimizations
 ----------------------
 * SOLR-11285: Refactor autoscaling framework to avoid direct references to Zookeeper and Solr

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e84cce8e/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java b/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java
index 85964f7..897dceb 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java
@@ -199,6 +199,7 @@ public class HLLAgg extends StrAggValueSource {
 
     @Override
     public void setNextReader(LeafReaderContext readerContext) throws IOException {
+      super.setNextReader(readerContext);
       values = DocValues.getNumeric(readerContext.reader(),  sf.getName());
     }
 
@@ -224,6 +225,7 @@ public class HLLAgg extends StrAggValueSource {
 
     @Override
     public void setNextReader(LeafReaderContext readerContext) throws IOException {
+      super.setNextReader(readerContext);
       values = DocValues.getSortedNumeric(readerContext.reader(),  sf.getName());
     }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e84cce8e/solr/core/src/java/org/apache/solr/search/facet/MinMaxAgg.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/MinMaxAgg.java b/solr/core/src/java/org/apache/solr/search/facet/MinMaxAgg.java
index 008d0fd..ac8bf0b 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/MinMaxAgg.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/MinMaxAgg.java
@@ -303,8 +303,8 @@ public class MinMaxAgg extends SimpleAggValueSource {
     }
 
     @Override
-    public void reset() throws IOException {
-      super.reset();
+    public void resetIterators() throws IOException {
+      super.resetIterators();
       topLevel = FieldUtil.getSortedDocValues(fcontext.qcontext, field, null);
       if (topLevel instanceof MultiDocValues.MultiSortedDocValues) {
         ordMap = ((MultiDocValues.MultiSortedDocValues)topLevel).mapping;
@@ -322,13 +322,11 @@ public class MinMaxAgg extends SimpleAggValueSource {
 
     @Override
     public void setNextReader(LeafReaderContext readerContext) throws IOException {
-      if (topLevel == null) {
-        reset();
-      }
       super.setNextReader(readerContext);
       if (subDvs != null) {
         subDv = subDvs[readerContext.ord];
         toGlobal = ordMap.getGlobalOrds(readerContext.ord);
+        assert toGlobal != null;
       } else {
         assert readerContext.ord==0 || topLevel.getValueCount() == 0;
         subDv = topLevel;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e84cce8e/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java b/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java
index 578ef17..9165799 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java
@@ -43,12 +43,27 @@ import org.apache.solr.search.SolrIndexSearcher;
 public abstract class SlotAcc implements Closeable {
   String key; // todo...
   protected final FacetContext fcontext;
+  protected LeafReaderContext currentReaderContext;
+  protected int currentDocBase;
 
   public SlotAcc(FacetContext fcontext) {
     this.fcontext = fcontext;
   }
 
-  public void setNextReader(LeafReaderContext readerContext) throws IOException {}
+  /**
+   * NOTE: this currently detects when it is being reused and calls resetIterators by comparing reader ords
+   * with previous calls to setNextReader.  For this reason, current users must call setNextReader
+   * in segment order.  Failure to do so will cause worse performance.
+   */
+  public void setNextReader(LeafReaderContext readerContext) throws IOException {
+    LeafReaderContext lastReaderContext = currentReaderContext;
+    currentReaderContext = readerContext;
+    currentDocBase = currentReaderContext.docBase;
+    if (lastReaderContext == null || lastReaderContext.ord >= currentReaderContext.ord) {
+      // if we went backwards (or reused) a reader, we need to reset iterators that can be used only once.
+      resetIterators();
+    }
+  }
 
   public abstract void collect(int doc, int slot) throws IOException;
 
@@ -96,8 +111,12 @@ public abstract class SlotAcc implements Closeable {
     }
   }
 
+  /** Called to reset the acc to a fresh state, ready for reuse */
   public abstract void reset() throws IOException;
 
+  /** Typically called from setNextReader to reset docValue iterators */
+  protected void resetIterators() throws IOException {};
+
   public abstract void resize(Resizer resizer);
 
   @Override
@@ -208,6 +227,7 @@ abstract class FuncSlotAcc extends SlotAcc {
 
   @Override
   public void setNextReader(LeafReaderContext readerContext) throws IOException {
+    super.setNextReader(readerContext);
     values = valueSource.getValues(fcontext.qcontext, readerContext);
   }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e84cce8e/solr/core/src/java/org/apache/solr/search/facet/UniqueMultiDvSlotAcc.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/UniqueMultiDvSlotAcc.java b/solr/core/src/java/org/apache/solr/search/facet/UniqueMultiDvSlotAcc.java
index 0a6eb22..02d457f 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/UniqueMultiDvSlotAcc.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/UniqueMultiDvSlotAcc.java
@@ -40,8 +40,7 @@ class UniqueMultiDvSlotAcc extends UniqueSlotAcc {
   }
 
   @Override
-  public void reset() throws IOException {
-    super.reset();
+  public void resetIterators() throws IOException {
     topLevel = FieldUtil.getSortedSetDocValues(fcontext.qcontext, field, null);
     nTerms = (int) topLevel.getValueCount();
     if (topLevel instanceof MultiDocValues.MultiSortedSetDocValues) {
@@ -60,9 +59,6 @@ class UniqueMultiDvSlotAcc extends UniqueSlotAcc {
 
   @Override
   public void setNextReader(LeafReaderContext readerContext) throws IOException {
-    if (topLevel == null) {
-      reset();
-    }
     super.setNextReader(readerContext);
     if (subDvs != null) {
       subDv = subDvs[readerContext.ord];

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e84cce8e/solr/core/src/java/org/apache/solr/search/facet/UniqueSinglevaluedSlotAcc.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/UniqueSinglevaluedSlotAcc.java b/solr/core/src/java/org/apache/solr/search/facet/UniqueSinglevaluedSlotAcc.java
index 434e680..ca261ea 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/UniqueSinglevaluedSlotAcc.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/UniqueSinglevaluedSlotAcc.java
@@ -38,13 +38,11 @@ class UniqueSinglevaluedSlotAcc extends UniqueSlotAcc {
 
   public UniqueSinglevaluedSlotAcc(FacetContext fcontext, SchemaField field, int numSlots, HLLAgg.HLLFactory factory) throws IOException {
     super(fcontext, field, numSlots, factory);
-    // let setNextReader lazily call reset(), that way an extra call to reset() after creation won't matter
   }
 
   @Override
-  public void reset() throws IOException {
-    super.reset();
-    SolrIndexSearcher searcher = fcontext.qcontext.searcher();
+  public void resetIterators() throws IOException {
+    super.resetIterators();
     topLevel = FieldUtil.getSortedDocValues(fcontext.qcontext, field, null);
     nTerms = topLevel.getValueCount();
     if (topLevel instanceof MultiDocValues.MultiSortedDocValues) {
@@ -63,17 +61,16 @@ class UniqueSinglevaluedSlotAcc extends UniqueSlotAcc {
 
   @Override
   public void setNextReader(LeafReaderContext readerContext) throws IOException {
-    if (topLevel == null) {
-      reset();
-    }
     super.setNextReader(readerContext);
     if (subDvs != null) {
       subDv = subDvs[readerContext.ord];
       toGlobal = ordMap.getGlobalOrds(readerContext.ord);
+      assert toGlobal != null;
     } else {
       assert readerContext.ord==0 || topLevel.getValueCount() == 0;
       subDv = topLevel;
     }
+    assert subDv.docID() == -1; // make sure we haven't used this iterator before
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e84cce8e/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java b/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java
index 607b067..ca95d8f 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java
@@ -33,7 +33,6 @@ abstract class UniqueSlotAcc extends SlotAcc {
   HLLAgg.HLLFactory factory;
   SchemaField field;
   FixedBitSet[] arr;
-  int currentDocBase;
   int[] counts;  // populated with the cardinality once
   int nTerms;
 
@@ -54,11 +53,6 @@ abstract class UniqueSlotAcc extends SlotAcc {
   }
 
   @Override
-  public void setNextReader(LeafReaderContext readerContext) throws IOException {
-    currentDocBase = readerContext.docBase;
-  }
-
-  @Override
   public Object getValue(int slot) throws IOException {
     if (fcontext.isShard()) {
       return getShardValue(slot);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e84cce8e/solr/core/src/test/org/apache/solr/search/facet/DebugAgg.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/facet/DebugAgg.java b/solr/core/src/test/org/apache/solr/search/facet/DebugAgg.java
index d1be4fe..8eed4c7 100644
--- a/solr/core/src/test/org/apache/solr/search/facet/DebugAgg.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/DebugAgg.java
@@ -109,6 +109,11 @@ public class DebugAgg extends AggValueSource {
     }
 
     @Override
+    protected void resetIterators() throws IOException {
+      sub.resetIterators();
+    }
+
+    @Override
     public void resize(Resizer resizer) {
       resizes.addAndGet(1);
       this.numSlots = resizer.getNewSize();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e84cce8e/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
index a1c8cf7..bf709cf 100644
--- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
@@ -591,6 +591,9 @@ public class TestJsonFacets extends SolrTestCaseHS {
     iclient.commit();
     client.commit();
 
+
+
+
     // test for presence of debugging info
     ModifiableSolrParams debugP = params(p);
     debugP.set("debugQuery","true");
@@ -729,7 +732,7 @@ public class TestJsonFacets extends SolrTestCaseHS {
 
     // Same thing for dates
     // test min/max of string field
-    if (date.equals("date_dt") || date.equals("date_dtd")) {  // supports only single valued currently...
+    if (date.equals("date_dt") || date.equals("date_dtd")) {  // supports only single valued currently... see SOLR-11706
       client.testJQ(params(p, "q", "*:*"
           , "json.facet", "{" +
               " f3:{${terms}  type:field, field:${num_is}, facet:{a:'min(${date})'}, sort:'a desc' }" +
@@ -1037,6 +1040,30 @@ public class TestJsonFacets extends SolrTestCaseHS {
             " } }"
     );
 
+    // range facet with stats on string fields
+    client.testJQ(params(p, "q", "*:*"
+        , "json.facet", "{f:{type:range, field:${num_d}, start:-5, end:10, gap:5, other:all,   facet:{ wn:'unique(${where_s})',wh:'hll(${where_s})'     }   }}"
+        )
+        , "facets=={count:6, f:{buckets:[ {val:-5.0,count:1,wn:1,wh:1}, {val:0.0,count:2,wn:2,wh:2}, {val:5.0,count:0}]" +
+            " ,before:{count:1,wn:1,wh:1}" +
+            " ,after:{count:1,wn:1,wh:1} " +
+            " ,between:{count:3,wn:2,wh:2} " +
+            " } }"
+    );
+
+    if (where_s.equals("where_s") || where_s.equals("where_sd")) {  // min/max only supports only single valued currently... see SOLR-11706
+      client.testJQ(params(p, "q", "*:*"
+          , "json.facet", "{f:{type:range, field:${num_d}, start:-5, end:10, gap:5, other:all,   facet:{ wmin:'min(${where_s})', wmax:'max(${where_s})'    }   }}"
+          )
+          , "facets=={count:6, f:{buckets:[ {val:-5.0,count:1,wmin:NY,wmax:NY}, {val:0.0,count:2,wmin:NJ,wmax:NY}, {val:5.0,count:0}]" +
+              " ,before:{count:1,wmin:NJ,wmax:NJ}" +
+              " ,after:{count:1,wmin:NJ,wmax:NJ} " +
+              " ,between:{count:3,wmin:NJ,wmax:NY} " +
+              " } }"
+      );
+    }
+
+
 
     // stats at top level
     client.testJQ(params(p, "q", "*:*"