You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2017/06/07 00:57:46 UTC

lucene-solr:master: SOLR-7452: fix facet refinement for range queries

Repository: lucene-solr
Updated Branches:
  refs/heads/master a03c3369e -> a1692c160


SOLR-7452: fix facet refinement for range queries


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/a1692c16
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/a1692c16
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/a1692c16

Branch: refs/heads/master
Commit: a1692c160ac24746c17eae8d6cc5d40771c38e61
Parents: a03c336
Author: yonik <yo...@apache.org>
Authored: Tue Jun 6 20:54:29 2017 -0400
Committer: yonik <yo...@apache.org>
Committed: Tue Jun 6 20:54:41 2017 -0400

----------------------------------------------------------------------
 .../apache/solr/search/facet/FacetRange.java    | 127 ++++++++++++++++++-
 .../org/apache/solr/search/facet/DebugAgg.java  |   3 +-
 .../search/facet/TestJsonFacetRefinement.java   |  29 ++++-
 3 files changed, 155 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a1692c16/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java b/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java
index 682dc19..398fa63 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java
@@ -37,6 +37,8 @@ import org.apache.solr.schema.TrieField;
 import org.apache.solr.search.DocSet;
 import org.apache.solr.util.DateMathParser;
 
+import static org.apache.solr.search.facet.FacetContext.SKIP_FACET;
+
 public class FacetRange extends FacetRequestSorted {
   String field;
   Object start;
@@ -203,6 +205,10 @@ class FacetRangeProcessor extends FacetProcessor<FacetRange> {
               "Unable to range facet on field:" + sf);
     }
 
+    if (fcontext.facetInfo != null) {
+      return refineFacets();
+    }
+
     createRangeList();
     return getRangeCountsIndexed();
   }
@@ -322,7 +328,7 @@ class FacetRangeProcessor extends FacetProcessor<FacetRange> {
     }
 
     for (int idx = 0; idx<otherList.size(); idx++) {
-      // we dont' skip these buckets based on mincount
+      // we don't skip these buckets based on mincount
       Range range = otherList.get(idx);
       SimpleOrderedMap bucket = new SimpleOrderedMap();
       res.add(range.label.toString(), bucket);
@@ -583,4 +589,123 @@ class FacetRangeProcessor extends FacetProcessor<FacetRange> {
     }
   }
 
+
+  // this refineFacets method is patterned after FacetFieldProcessor.refineFacets and should
+  // probably be merged when range facet becomes more like field facet in it's ability to sort and limit
+  protected SimpleOrderedMap<Object> refineFacets() throws IOException {
+    boolean skipThisFacet = (fcontext.flags & SKIP_FACET) != 0;
+
+    List leaves = FacetFieldProcessor.asList(fcontext.facetInfo.get("_l"));        // We have not seen this bucket: do full faceting for this bucket, including all sub-facets
+    List<List> skip = FacetFieldProcessor.asList(fcontext.facetInfo.get("_s"));    // We have seen this bucket, so skip stats on it, and skip sub-facets except for the specified sub-facets that should calculate specified buckets.
+    List<List> partial = FacetFieldProcessor.asList(fcontext.facetInfo.get("_p")); // We have not seen this bucket, do full faceting for this bucket, and most sub-facets... but some sub-facets are partial and should only visit specified buckets.
+
+    // currently, only _s should be present for range facets.  In the future, range facets will
+    // be more like field facets and will have the same refinement cases.  When that happens, we should try to unify the refinement code more
+    assert leaves.size() == 0;
+    assert partial.size() == 0;
+
+    // For leaf refinements, we do full faceting for each leaf bucket.  Any sub-facets of these buckets will be fully evaluated.  Because of this, we should never
+    // encounter leaf refinements that have sub-facets that return partial results.
+
+    SimpleOrderedMap<Object> res = new SimpleOrderedMap<>();
+    List<SimpleOrderedMap> bucketList = new ArrayList<>( leaves.size() + skip.size() + partial.size() );
+    res.add("buckets", bucketList);
+
+    // TODO: an alternate implementations can fill all accs at once
+    createAccs(-1, 1);
+
+    for (Object bucketVal : leaves) {
+      bucketList.add( refineBucket(bucketVal, false, null) );
+    }
+
+    for (List bucketAndFacetInfo : skip) {
+      assert bucketAndFacetInfo.size() == 2;
+      Object bucketVal = bucketAndFacetInfo.get(0);
+      Map<String,Object> facetInfo = (Map<String, Object>) bucketAndFacetInfo.get(1);
+
+      bucketList.add( refineBucket(bucketVal, true, facetInfo ) );
+    }
+
+    // The only difference between skip and missing is the value of "skip" passed to refineBucket
+    for (List bucketAndFacetInfo : partial) {
+      assert bucketAndFacetInfo.size() == 2;
+      Object bucketVal = bucketAndFacetInfo.get(0);
+      Map<String,Object> facetInfo = (Map<String, Object>) bucketAndFacetInfo.get(1);
+
+      bucketList.add( refineBucket(bucketVal, false, facetInfo ) );
+    }
+
+    /*** special buckets
+    if (freq.missing) {
+      Map<String,Object> bucketFacetInfo = (Map<String,Object>)fcontext.facetInfo.get("missing");
+
+      if (bucketFacetInfo != null || !skipThisFacet) {
+        SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>();
+        fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null, skipThisFacet, bucketFacetInfo);
+        res.add("missing", missingBucket);
+      }
+    }
+     **********/
+
+
+    // If there are just a couple of leaves, and if the domain is large, then
+    // going by term is likely the most efficient?
+    // If the domain is small, or if the number of leaves is large, then doing
+    // the normal collection method may be best.
+
+    return res;
+  }
+
+  private SimpleOrderedMap<Object> refineBucket(Object bucketVal, boolean skip, Map<String,Object> facetInfo) throws IOException {
+    // TODO: refactor this repeated code from above
+    Comparable start = calc.getValue(bucketVal.toString());
+    Comparable end = calc.getValue(freq.end.toString());
+    EnumSet<FacetParams.FacetRangeInclude> include = freq.include;
+
+    String gap = freq.gap.toString();
+
+    Comparable low = calc.getValue(bucketVal.toString());
+    Comparable high = calc.addGap(low, gap);
+    if (end.compareTo(high) < 0) {
+      if (freq.hardend) {
+        high = end;
+      } else {
+        end = high;
+      }
+    }
+    if (high.compareTo(low) < 0) {
+      throw new SolrException
+          (SolrException.ErrorCode.BAD_REQUEST,
+              "range facet infinite loop (is gap negative? did the math overflow?)");
+    }
+    if (high.compareTo(low) == 0) {
+      throw new SolrException
+          (SolrException.ErrorCode.BAD_REQUEST,
+              "range facet infinite loop: gap is either zero, or too small relative start/end and caused underflow: " + low + " + " + gap + " = " + high );
+    }
+
+    boolean incLower =
+        (include.contains(FacetParams.FacetRangeInclude.LOWER) ||
+            (include.contains(FacetParams.FacetRangeInclude.EDGE) &&
+                0 == low.compareTo(start)));
+    boolean incUpper =
+        (include.contains(FacetParams.FacetRangeInclude.UPPER) ||
+            (include.contains(FacetParams.FacetRangeInclude.EDGE) &&
+                0 == high.compareTo(end)));
+
+    Range range = new Range(low, low, high, incLower, incUpper);
+
+
+    // now refine this range
+
+    SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>();
+    FieldType ft = sf.getType();
+    bucket.add("val", bucketVal);
+    // String internal = ft.toInternal( tobj.toString() );  // TODO - we need a better way to get from object to query...
+
+    Query domainQ = sf.getType().getRangeQuery(null, sf, range.low == null ? null : calc.formatValue(range.low), range.high==null ? null : calc.formatValue(range.high), range.includeLower, range.includeUpper);
+    fillBucket(bucket, domainQ, null, skip, facetInfo);
+
+    return bucket;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a1692c16/solr/core/src/test/org/apache/solr/search/facet/DebugAgg.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/facet/DebugAgg.java b/solr/core/src/test/org/apache/solr/search/facet/DebugAgg.java
index ad4fabf..b8dbf2a 100644
--- a/solr/core/src/test/org/apache/solr/search/facet/DebugAgg.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/DebugAgg.java
@@ -84,8 +84,7 @@ public class DebugAgg extends AggValueSource {
       this.numSlots = numSlots;
       creates.addAndGet(1);
       sub = new CountSlotArrAcc(fcontext, numSlots);
-
-      new RuntimeException("DEBUG Acc numSlots=" + numSlots).printStackTrace();
+//      new RuntimeException("DEBUG Acc numSlots=" + numSlots).printStackTrace();
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a1692c16/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetRefinement.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetRefinement.java b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetRefinement.java
index 9087d30..7cf1428 100644
--- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetRefinement.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetRefinement.java
@@ -22,6 +22,7 @@ import java.util.List;
 
 import org.apache.solr.JSONTestUtil;
 import org.apache.solr.SolrTestCaseHS;
+import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.client.solrj.SolrClient;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.util.SimpleOrderedMap;
@@ -32,7 +33,7 @@ import org.junit.Test;
 import org.noggit.JSONParser;
 import org.noggit.ObjectBuilder;
 
-
+@SolrTestCaseJ4.SuppressPointFields
 public class TestJsonFacetRefinement extends SolrTestCaseHS {
 
   private static SolrInstances servers;  // for distributed testing
@@ -314,6 +315,32 @@ public class TestJsonFacetRefinement extends SolrTestCaseHS {
             "}"
     );
 
+    // basic refining test through/under a query facet
+    client.testJQ(params(p, "q", "*:*",
+        "json.facet", "{" +
+            "q1 : { type:query, q:'*:*', facet:{" +
+            "cat0:{${terms} type:terms, field:${cat_s}, sort:'count desc', limit:1, overrequest:0, refine:true}" +
+            "}}" +
+            "}"
+        )
+        , "facets=={ count:8" +
+            ", q1:{ count:8, cat0:{ buckets:[ {val:A,count:4} ] }   }" +
+            "}"
+    );
+
+    // basic refining test through/under a range facet
+    client.testJQ(params(p, "q", "*:*",
+        "json.facet", "{" +
+            "r1 : { type:range, field:${num_d} start:-20, end:20, gap:40   , facet:{" +
+            "cat0:{${terms} type:terms, field:${cat_s}, sort:'count desc', limit:1, overrequest:0, refine:true}" +
+            "}}" +
+            "}"
+        )
+        , "facets=={ count:8" +
+            ", r1:{ buckets:[{val:-20.0,count:8,  cat0:{buckets:[{val:A,count:4}]}  }]   }" +
+            "}"
+    );
+
     // test that basic stats work for refinement
     client.testJQ(params(p, "q", "*:*",
         "json.facet", "{" +