You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2018/11/05 18:56:39 UTC
[21/26] lucene-solr:jira/solr-12730: SOLR-12954: fix facet.pivot
refinement bugs when using facet.sort=index and facet.mincount>1
SOLR-12954: fix facet.pivot refinement bugs when using facet.sort=index and facet.mincount>1
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/c5ff4a44
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/c5ff4a44
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/c5ff4a44
Branch: refs/heads/jira/solr-12730
Commit: c5ff4a4444fe95001bc1baf29f64fb2406fd2ca3
Parents: 31d7dfe
Author: Chris Hostetter <ho...@apache.org>
Authored: Fri Nov 2 20:32:32 2018 -0700
Committer: Chris Hostetter <ho...@apache.org>
Committed: Fri Nov 2 20:32:32 2018 -0700
----------------------------------------------------------------------
solr/CHANGES.txt | 2 +
.../solr/handler/component/FacetComponent.java | 3 +-
.../PivotFacetFieldValueCollection.java | 34 ++++++--
.../DistributedFacetPivotLargeTest.java | 88 ++++++++++++++++++++
4 files changed, 119 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c5ff4a44/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 6f8c449..90cce5b 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -233,6 +233,8 @@ Bug Fixes
* SOLR-12875: fix ArrayIndexOutOfBoundsException when unique(field) or uniqueBlock(_root_) is
used with DVHASH method in json.facet. (Tim Underwood via Mikhail Khludnev)
+* SOLR-12954: fix facet.pivot refinement bugs when using facet.sort=index and facet.mincount>1 (hossman)
+
Improvements
----------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c5ff4a44/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java b/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java
index 01f8875..e01c958 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java
@@ -650,7 +650,7 @@ public class FacetComponent extends SearchComponent {
(fieldToOverRequest, FacetParams.FACET_SORT, defaultSort);
int shardLimit = requestedLimit + offset;
- int shardMinCount = requestedMinCount;
+ int shardMinCount = Math.min(requestedMinCount, 1);
// per-shard mincount & overrequest
if ( FacetParams.FACET_SORT_INDEX.equals(sort) &&
@@ -670,7 +670,6 @@ public class FacetComponent extends SearchComponent {
if ( 0 < requestedLimit ) {
shardLimit = doOverRequestMath(shardLimit, overRequestRatio, overRequestCount);
}
- shardMinCount = Math.min(requestedMinCount, 1);
}
sreq.params.set(paramStart + FacetParams.FACET_LIMIT, shardLimit);
sreq.params.set(paramStart + FacetParams.FACET_PIVOT_MINCOUNT, shardMinCount);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c5ff4a44/solr/core/src/java/org/apache/solr/handler/component/PivotFacetFieldValueCollection.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/component/PivotFacetFieldValueCollection.java b/solr/core/src/java/org/apache/solr/handler/component/PivotFacetFieldValueCollection.java
index 5c2b07f..d9aeaea 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/PivotFacetFieldValueCollection.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/PivotFacetFieldValueCollection.java
@@ -118,14 +118,36 @@ public class PivotFacetFieldValueCollection implements Iterable<PivotFacetValue>
*/
public List<PivotFacetValue> getNextLevelValuesToRefine() {
final int numRefinableValues = getExplicitValuesListSize();
- if (facetFieldOffset < numRefinableValues) {
- final int offsetPlusCount = (facetFieldLimit >= 0)
- ? Math.min(facetFieldLimit + facetFieldOffset, numRefinableValues)
- : numRefinableValues;
- return getExplicitValuesList().subList(facetFieldOffset, offsetPlusCount);
- } else {
+ if (numRefinableValues < facetFieldOffset) {
return Collections.<PivotFacetValue>emptyList();
}
+
+ final int offsetPlusCount = (facetFieldLimit >= 0)
+ ? Math.min(facetFieldLimit + facetFieldOffset, numRefinableValues)
+ : numRefinableValues;
+
+ if (1 < facetFieldMinimumCount && facetFieldSort.equals(FacetParams.FACET_SORT_INDEX)) {
+ // we have to skip any values that (still) don't meet the mincount
+ //
+ // TODO: in theory we could avoid this extra check by trimming sooner (SOLR-6331)
+ // but since that's a destructive op that blows away the `valuesMap` which we (might?) still need
+ // (and pre-emptively skips the offsets) we're avoiding re-working that optimization
+ // for now until/unless someone gives it more careful thought...
+ final List<PivotFacetValue> results = new ArrayList<>(numRefinableValues);
+ for (PivotFacetValue pivotValue : explicitValues) {
+ if (pivotValue.getCount() >= facetFieldMinimumCount) {
+ results.add(pivotValue);
+ if (numRefinableValues <= results.size()) {
+ break;
+ }
+ }
+ }
+ return results;
+ }
+
+ // in the non "sort==count OR mincount==1" situation, we can just return the first N values
+ // because any viable candidate is already in the top N
+ return getExplicitValuesList().subList(facetFieldOffset, offsetPlusCount);
}
/**
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c5ff4a44/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLargeTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLargeTest.java b/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLargeTest.java
index eb6f54d..cc31135 100644
--- a/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLargeTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/component/DistributedFacetPivotLargeTest.java
@@ -17,6 +17,7 @@
package org.apache.solr.handler.component;
import java.io.IOException;
+import java.util.Arrays;
import java.util.Date;
import java.util.List;
@@ -264,6 +265,93 @@ public class DistributedFacetPivotLargeTest extends BaseDistributedSearchTestCas
}
}
+ // check of a single level pivot using sort=index w/mincount big enough
+ // to triggers per-shard mincount > num docs on one shard
+ // (beefed up test of same with nested pivot below)
+ for (int limit : Arrays.asList(4, 444444, -1)) {
+ SolrParams p = params("q", "*:*",
+ "rows", "0",
+ // skip place_s:Nplaceholder buckets
+ "fq","-hiredate_dt:\"2012-10-01T12:30:00Z\"",
+ // skip company_t:compHolderN buckets from twoShard
+ "fq","-(+company_t:compHolder* +real_b:true)",
+ "facet","true",
+ "facet.pivot","place_s",
+ FacetParams.FACET_PIVOT_MINCOUNT, "50",
+ FacetParams.FACET_LIMIT, ""+limit,
+ "facet.sort", "index");
+ rsp = null;
+ try {
+ rsp = query( p );
+ assertPivot("place_s", "cardiff", 107, rsp.getFacetPivot().get("place_s").get(0));
+ // - zeroShard = 50 ... above per-shard min of 50/(numShards=4)
+ // - oneShard = 5 ... below per-shard min of 50/(numShards=4) .. should be refined
+ // - twoShard = 52 ... above per-shard min of 50/(numShards=4)
+ // = threeShard = 0 ... should be refined and still match nothing
+ } catch (AssertionError ae) {
+ throw new AssertionError(ae.getMessage() + ": " + p.toString() + " ==> " + rsp, ae);
+ }
+ }
+
+ // test permutations of mincount & limit with sort=index
+ // (there is a per-shard optimization on mincount when sort=index is used)
+ for (int limit : Arrays.asList(4, 444444, -1)) {
+ SolrParams p = params("q", "*:*",
+ "rows", "0",
+ // skip place_s:Nplaceholder buckets
+ "fq","-hiredate_dt:\"2012-10-01T12:30:00Z\"",
+ // skip company_t:compHolderN buckets from twoShard
+ "fq","-(+company_t:compHolder* +real_b:true)",
+ "facet","true",
+ "facet.pivot","place_s,company_t",
+ FacetParams.FACET_PIVOT_MINCOUNT, "50",
+ FacetParams.FACET_LIMIT, ""+limit,
+ "facet.sort", "index");
+ rsp = null;
+ try {
+ rsp = query( p );
+ pivots = rsp.getFacetPivot().get("place_s,company_t");
+ firstPlace = pivots.get(0);
+ assertPivot("place_s", "cardiff", 107, firstPlace);
+ //
+ assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0));
+ assertPivot("company_t", "honda", 50, firstPlace.getPivot().get(1));
+ assertPivot("company_t", "microsoft", 56, firstPlace.getPivot().get(2));
+ assertPivot("company_t", "polecat", 52, firstPlace.getPivot().get(3));
+ } catch (AssertionError ae) {
+ throw new AssertionError(ae.getMessage() + ": " + p.toString() + " ==> " + rsp, ae);
+ }
+ }
+
+ { // similar to the test above, but now force a restriction on the over request and allow
+ // terms that are early in index sort -- but don't meet the mincount overall -- to be considered
+ // in the first phase. (SOLR-12954)
+ SolrParams p = params("q", "*:*",
+ "rows", "0",
+ // skip company_t:compHolderN buckets from twoShard
+ "fq","-(+company_t:compHolder* +real_b:true)",
+ "facet","true",
+ "facet.pivot","place_s,company_t",
+ // the (50) Nplaceholder place_s values exist in 6 each on oneShard
+ FacetParams.FACET_PIVOT_MINCOUNT, ""+(6 * shardsArr.length),
+ FacetParams.FACET_LIMIT, "4",
+ "facet.sort", "index");
+ rsp = null;
+ try {
+ rsp = query( p );
+ pivots = rsp.getFacetPivot().get("place_s,company_t");
+ firstPlace = pivots.get(0);
+ assertPivot("place_s", "cardiff", 107, firstPlace);
+ //
+ assertPivot("company_t", "bbc", 101, firstPlace.getPivot().get(0));
+ assertPivot("company_t", "honda", 50, firstPlace.getPivot().get(1));
+ assertPivot("company_t", "microsoft", 56, firstPlace.getPivot().get(2));
+ assertPivot("company_t", "polecat", 52, firstPlace.getPivot().get(3));
+ } catch (AssertionError ae) {
+ throw new AssertionError(ae.getMessage() + ": " + p.toString() + " ==> " + rsp, ae);
+ }
+ }
+
// Pivot Faceting (combined wtih Field Faceting)
for (SolrParams facetParams :
// with and w/o an excluded fq