You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2016/10/24 18:24:52 UTC
lucene-solr:master: SOLR-9654: add overrequest param to JSON Facet API
Repository: lucene-solr
Updated Branches:
refs/heads/master 37871de29 -> 4a8516375
SOLR-9654: add overrequest param to JSON Facet API
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/4a851637
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/4a851637
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/4a851637
Branch: refs/heads/master
Commit: 4a85163754e16b466cb4ef3dd0de92fe7d5b87d1
Parents: 37871de
Author: yonik <yo...@apache.org>
Authored: Mon Oct 24 14:23:12 2016 -0400
Committer: yonik <yo...@apache.org>
Committed: Mon Oct 24 14:23:12 2016 -0400
----------------------------------------------------------------------
solr/CHANGES.txt | 3 ++
.../apache/solr/search/facet/FacetField.java | 1 +
.../solr/search/facet/FacetFieldMerger.java | 8 ++---
.../solr/search/facet/FacetFieldProcessor.java | 22 +++++++++---
.../apache/solr/search/facet/FacetRequest.java | 1 +
.../solr/search/facet/TestJsonFacets.java | 37 ++++++++++++++++++++
6 files changed, 63 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4a851637/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 3bb28c4..4355b80 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -157,6 +157,9 @@ New Features
* SOLR-9662: New parameter -u <user:pass> in bin/post to pass basicauth credentials (janhoy)
+* SOLR-9654: Add "overrequest" parameter to JSON Facet API to control amount of overrequest
+ on a distributed terms facet. (yonik)
+
Bug Fixes
----------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4a851637/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetField.java b/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
index 3f8cb0b..c2cf0c2 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
@@ -29,6 +29,7 @@ import org.apache.solr.schema.SchemaField;
abstract class FacetRequestSorted extends FacetRequest {
long offset;
long limit;
+ int overrequest = -1; // Number of buckets to request beyond the limit to do internally during distributed search. -1 means default.
long mincount;
String sortVariable;
SortDirection sortDirection;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4a851637/solr/core/src/java/org/apache/solr/search/facet/FacetFieldMerger.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldMerger.java b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldMerger.java
index 432e1a7..9f99919 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldMerger.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldMerger.java
@@ -110,11 +110,11 @@ public class FacetFieldMerger extends FacetRequestSortedMerger<FacetField> {
sortBuckets();
- int first = (int)freq.offset;
- int end = freq.limit >=0 ? first + (int) freq.limit : Integer.MAX_VALUE;
- int last = Math.min(sortedBuckets.size(), end);
+ long first = freq.offset;
+ long end = freq.limit >=0 ? first + (int) freq.limit : Integer.MAX_VALUE;
+ long last = Math.min(sortedBuckets.size(), end);
- List<SimpleOrderedMap> resultBuckets = new ArrayList<>(Math.max(0, (last - first)));
+ List<SimpleOrderedMap> resultBuckets = new ArrayList<>(Math.max(0, (int)(last - first)));
/** this only works if there are no filters (like mincount)
for (int i=first; i<last; i++) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4a851637/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java
index 3c1a40c..bbc782c 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java
@@ -212,12 +212,24 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
}
final int off = fcontext.isShard() ? 0 : (int) freq.offset;
- // add a modest amount of over-request if this is a shard request
- final int lim = freq.limit >= 0 ? (fcontext.isShard() ? (int)(freq.limit*1.1+4) : (int)freq.limit) : Integer.MAX_VALUE;
+
+ long effectiveLimit = Integer.MAX_VALUE; // use max-int instead of max-long to avoid overflow
+ if (freq.limit >= 0) {
+ effectiveLimit = freq.limit;
+ if (fcontext.isShard()) {
+ // add over-request if this is a shard request
+ if (freq.overrequest == -1) {
+ effectiveLimit = (long) (effectiveLimit*1.1+4); // default: add 10% plus 4 (to overrequest for very small limits)
+ } else {
+ effectiveLimit += freq.overrequest;
+ }
+ }
+ }
+
final int sortMul = freq.sortDirection.getMultiplier();
- int maxTopVals = (int) (lim >= 0 ? (long) off + lim : Integer.MAX_VALUE - 1);
+ int maxTopVals = (int) (effectiveLimit >= 0 ? Math.min(off + effectiveLimit, Integer.MAX_VALUE - 1) : Integer.MAX_VALUE - 1);
maxTopVals = Math.min(maxTopVals, slotCardinality);
final SlotAcc sortAcc = this.sortAcc, indexOrderAcc = this.indexOrderAcc;
final BiPredicate<Slot,Slot> orderPredicate;
@@ -258,7 +270,7 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
bottom.slot = slotNum;
bottom = queue.updateTop();
}
- } else if (lim > 0) {
+ } else if (effectiveLimit > 0) {
// queue not full
Slot s = new Slot();
s.slot = slotNum;
@@ -304,7 +316,7 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
// if we are deep paging, we don't have to order the highest "offset" counts.
int collectCount = Math.max(0, queue.size() - off);
- assert collectCount <= lim;
+ assert collectCount <= effectiveLimit;
int[] sortedSlots = new int[collectCount];
for (int i = collectCount - 1; i >= 0; i--) {
sortedSlots[i] = queue.pop().slot;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4a851637/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java b/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java
index 76d7d2a..40ca686 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java
@@ -586,6 +586,7 @@ class FacetFieldParser extends FacetParser<FacetField> {
facet.field = getField(m);
facet.offset = getLong(m, "offset", facet.offset);
facet.limit = getLong(m, "limit", facet.limit);
+ facet.overrequest = (int) getLong(m, "overrequest", facet.overrequest);
if (facet.limit == 0) facet.offset = 0; // normalize. an offset with a limit of non-zero isn't useful.
facet.mincount = getLong(m, "mincount", facet.mincount);
facet.missing = getBoolean(m, "missing", facet.missing);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4a851637/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
index c83d308..0ec0be4 100644
--- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
@@ -1147,6 +1147,43 @@ public class TestJsonFacets extends SolrTestCaseHS {
);
+
+ if (!client.local()) {
+ client.testJQ(params(p, "q", "*:*"
+ , "json.facet", "{" +
+ "cat0:{type:terms, field:${cat_s}, limit:1, overrequest:0}" +
+ ",cat1:{type:terms, field:${cat_s}, limit:1, overrequest:1}" +
+ ",catDef:{type:terms, field:${cat_s}, limit:1, overrequest:-1}" + // -1 is default overrequest
+ ",catBig:{type:terms, field:${cat_s}, offset:1, limit:2147483647, overrequest:2147483647}" + // make sure overflows don't mess us up
+ "}"
+ )
+ , "facets=={ count:6" +
+ ", cat0:{ buckets:[ {val:A,count:2} ] }" + // with no overrequest, we incorrectly conclude that A is the top bucket
+ ", cat1:{ buckets:[ {val:B,count:3} ] }" +
+ ", catDef:{ buckets:[ {val:B,count:3} ] }" +
+ ", catBig:{ buckets:[ {val:A,count:2} ] }" +
+ "}"
+ );
+ } else {
+ // In non-distrib mode, should still be able to specify overrequest, but it shouldn't matter.
+ client.testJQ(params(p, "q", "*:*"
+ , "json.facet", "{" +
+ "cat0:{type:terms, field:${cat_s}, limit:1, overrequest:0}" +
+ ",cat1:{type:terms, field:${cat_s}, limit:1, overrequest:1}" +
+ ",catDef:{type:terms, field:${cat_s}, limit:1, overrequest:-1}" + // -1 is default overrequest
+ ",catBig:{type:terms, field:${cat_s}, offset:1, limit:2147483647, overrequest:2147483647}" + // make sure overflows don't mess us up
+ "}"
+ )
+ , "facets=={ count:6" +
+ ", cat0:{ buckets:[ {val:B,count:3} ] }" + // only change from distrib-mode test above
+ ", cat1:{ buckets:[ {val:B,count:3} ] }" +
+ ", catDef:{ buckets:[ {val:B,count:3} ] }" +
+ ", catBig:{ buckets:[ {val:A,count:2} ] }" +
+ "}"
+ );
+ }
+
+
}