You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by er...@apache.org on 2014/09/08 18:34:24 UTC
svn commit: r1623429 - in /lucene/dev/trunk/solr: CHANGES.txt
core/src/java/org/apache/solr/handler/component/FacetComponent.java
core/src/test/org/apache/solr/TestDistributedSearch.java
Author: erick
Date: Mon Sep 8 16:34:23 2014
New Revision: 1623429
URL: http://svn.apache.org/r1623429
Log:
SOLR-6187: facet.mincount ignored in range faceting using distributed search
Modified:
lucene/dev/trunk/solr/CHANGES.txt
lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/TestDistributedSearch.java
Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1623429&r1=1623428&r2=1623429&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Mon Sep 8 16:34:23 2014
@@ -179,6 +179,10 @@ Bug Fixes
* SOLR-6467: bin/solr script should direct stdout/stderr when starting in the background
to the solr-PORT-console.log in the logs directory instead of bin. (Timothy Potter)
+* SOLR-6187: SOLR-6154: facet.mincount ignored in range faceting using distributed search
+ NOTE: This does NOT fixed for the (deprecated) facet.date idiom, use facet.range
+ instead. (Erick Erickson, Zacchio Bagnati, Ronald Matamoros, Vamsee Yalargadda)
+
Other Changes
---------------------
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java?rev=1623429&r1=1623428&r2=1623429&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java Mon Sep 8 16:34:23 2014
@@ -135,7 +135,7 @@ public class FacetComponent extends Sear
// only other required phase).
// We do this in distributedProcess so we can look at all of the
// requests in the outgoing queue at once.
-
+
for (int shardNum = 0; shardNum < rb.shards.length; shardNum++) {
List<String> distribFieldFacetRefinements = null;
@@ -164,7 +164,7 @@ public class FacetComponent extends Sear
if (distribFieldFacetRefinements == null) {
distribFieldFacetRefinements = new ArrayList<>();
}
-
+
distribFieldFacetRefinements.add(facetCommand);
distribFieldFacetRefinements.add(termsKey);
distribFieldFacetRefinements.add(termsVal);
@@ -175,7 +175,7 @@ public class FacetComponent extends Sear
if (distribFieldFacetRefinements == null
&& !pivotFacetRefinementRequestsExistForShard) {
- // nothing to refine, short circut out
+ // nothing to refine, short circuit out
continue;
}
@@ -214,22 +214,22 @@ public class FacetComponent extends Sear
shardsRefineRequest.params.set(FacetParams.FACET, "true");
shardsRefineRequest.params.remove(FacetParams.FACET_FIELD);
shardsRefineRequest.params.remove(FacetParams.FACET_QUERY);
-
+
for (int i = 0; i < distribFieldFacetRefinements.size();) {
String facetCommand = distribFieldFacetRefinements.get(i++);
String termsKey = distribFieldFacetRefinements.get(i++);
String termsVal = distribFieldFacetRefinements.get(i++);
-
+
shardsRefineRequest.params.add(FacetParams.FACET_FIELD,
facetCommand);
shardsRefineRequest.params.set(termsKey, termsVal);
}
}
-
+
if (newRequest) {
rb.addRequest(this, shardsRefineRequest);
}
-
+
// PivotFacetAdditions
if (pivotFacetRefinementRequestsExistForShard) {
if (newRequest) {
@@ -314,6 +314,8 @@ public class FacetComponent extends Sear
}
modifyRequestForFieldFacets(rb, sreq, fi);
+
+ modifyRequestForRangeFacets(sreq, fi);
modifyRequestForPivotFacets(rb, sreq, fi.pivotFacets);
@@ -326,7 +328,24 @@ public class FacetComponent extends Sear
// we could optionally remove faceting params
}
}
-
+
+ // we must get all the range buckets back in order to have coherent lists at the end, see SOLR-6154
+ private void modifyRequestForRangeFacets(ShardRequest sreq, FacetInfo fi) {
+ // Collect all the range fields.
+ if (sreq.params.getParams(FacetParams.FACET_RANGE) == null) {
+ return;
+ }
+ List<String> rangeFields = new ArrayList<>();
+ for (String field : sreq.params.getParams(FacetParams.FACET_RANGE)) {
+ rangeFields.add(field);
+ }
+
+ for (String field : rangeFields) {
+ sreq.params.remove("f." + field + ".facet.mincount");
+ sreq.params.add("f." + field + ".facet.mincount", "0");
+ }
+ }
+
private void modifyRequestForFieldFacets(ResponseBuilder rb, ShardRequest sreq, FacetInfo fi) {
for (DistribFieldFacet dff : fi.facets.values()) {
@@ -372,7 +391,7 @@ public class FacetComponent extends Sear
dff.initialMincount = (int) Math.ceil((double) dff.minCount / rb.slices.length);
}
}
-
+
// Currently this is for testing only and allows overriding of the
// facet.limit set to the shards
dff.initialLimit = rb.req.getParams().getInt("facet.shard.limit", dff.initialLimit);
@@ -517,7 +536,7 @@ public class FacetComponent extends Sear
dff.add(shardNum, (NamedList) facet_fields.get(dff.getKey()), dff.initialLimit);
}
}
-
+
// Distributed facet_dates
doDistribDates(fi, facet_counts);
@@ -546,14 +565,14 @@ public class FacetComponent extends Sear
for (DistribFieldFacet dff : fi.facets.values()) {
// no need to check these facets for refinement
if (dff.initialLimit <= 0 && dff.initialMincount <= 1) continue;
-
+
// only other case where index-sort doesn't need refinement is if minCount==0
if (dff.minCount <= 1 && dff.sort.equals(FacetParams.FACET_SORT_INDEX)) continue;
-
+
@SuppressWarnings("unchecked") // generic array's are annoying
List<String>[] tmp = (List<String>[]) new List[rb.shards.length];
dff._toRefine = tmp;
-
+
ShardFacetCount[] counts = dff.getCountSorted();
int ntop = Math.min(counts.length,
dff.limit >= 0 ? dff.offset + dff.limit : Integer.MAX_VALUE);
@@ -562,7 +581,7 @@ public class FacetComponent extends Sear
for (int i = 0; i < counts.length; i++) {
ShardFacetCount sfc = counts[i];
boolean needRefinement = false;
-
+
if (i < ntop) {
// automatically flag the top values for refinement
// this should always be true for facet.sort=index
@@ -586,14 +605,14 @@ public class FacetComponent extends Sear
needRefinement = true;
}
}
-
+
if (needRefinement) {
// add a query for each shard missing the term that needs refinement
for (int shardNum = 0; shardNum < rb.shards.length; shardNum++) {
FixedBitSet fbs = dff.counted[shardNum];
// fbs can be null if a shard request failed
- if (fbs != null &&
- (sfc.termNum >= fbs.length() || !fbs.get(sfc.termNum)) &&
+ if (fbs != null &&
+ (sfc.termNum >= fbs.length() || !fbs.get(sfc.termNum)) &&
dff.maxPossible(sfc, shardNum) > 0) {
dff.needRefinements = true;
@@ -607,6 +626,100 @@ public class FacetComponent extends Sear
}
}
}
+ removeFieldFacetsUnderLimits(rb);
+ removeRangeFacetsUnderLimits(rb);
+ removeQueryFacetsUnderLimits(rb);
+
+ }
+
+ private void removeQueryFacetsUnderLimits(ResponseBuilder rb) {
+ if (rb.stage != ResponseBuilder.STAGE_EXECUTE_QUERY) {
+ return;
+ }
+ FacetInfo fi = rb._facetInfo;
+ Map<String, QueryFacet> query_facets = fi.queryFacets;
+ if (query_facets == null) {
+ return;
+ }
+ LinkedHashMap<String, QueryFacet> newQueryFacets = new LinkedHashMap<>();
+
+ // The
+ int minCount = rb.req.getParams().getInt(FacetParams.FACET_MINCOUNT, 0);
+ boolean replace = false;
+ for (Map.Entry<String, QueryFacet> ent : query_facets.entrySet()) {
+ if (ent.getValue().count >= minCount) {
+ newQueryFacets.put(ent.getKey(), ent.getValue());
+ } else {
+ log.trace("Removing facetQuery/key: " + ent.getKey() + "/" + ent.getValue().toString() + " mincount=" + minCount);
+ replace = true;
+ }
+ }
+ if (replace) {
+ fi.queryFacets = newQueryFacets;
+ }
+ }
+
+ private void removeRangeFacetsUnderLimits(ResponseBuilder rb) {
+ if (rb.stage != ResponseBuilder.STAGE_EXECUTE_QUERY) {
+ return;
+ }
+
+ FacetInfo fi = rb._facetInfo;
+
+ @SuppressWarnings("unchecked")
+ SimpleOrderedMap<SimpleOrderedMap<Object>> facet_ranges =
+ (SimpleOrderedMap<SimpleOrderedMap<Object>>)
+ fi.rangeFacets;
+
+ if (facet_ranges == null) {
+ return;
+ }
+
+ // go through each facet_range
+ for (Map.Entry<String, SimpleOrderedMap<Object>> entry : facet_ranges) {
+ boolean replace = false;
+ final String field = entry.getKey();
+ int minCount = rb.req.getParams().getFieldInt(field, FacetParams.FACET_MINCOUNT, 0);
+ if (minCount == 0) {
+ continue;
+ }
+
+ @SuppressWarnings("unchecked")
+ NamedList<Integer> vals
+ = (NamedList<Integer>) facet_ranges.get(field).get("counts");
+ NamedList newList = new NamedList();
+ for (Map.Entry<String, Integer> pair : vals) {
+ if (pair.getValue() >= minCount) {
+ newList.add(pair.getKey(), pair.getValue());
+ } else {
+ log.trace("Removing facet/key: " + pair.getKey() + "/" + pair.getValue().toString() + " mincount=" + minCount);
+ replace = true;
+ }
+ }
+ if (replace) {
+ vals.clear();
+ vals.addAll(newList);
+ }
+ }
+ }
+ private void removeFieldFacetsUnderLimits(ResponseBuilder rb) {
+ if (rb.stage != ResponseBuilder.STAGE_DONE) {
+ return;
+ }
+
+ FacetInfo fi = rb._facetInfo;
+ if (fi.facets == null) {
+ return;
+ }
+ // Do field facets
+ for (Entry<String, DistribFieldFacet> ent : fi.facets.entrySet()) {
+ String field = ent.getKey();
+ int minCount = rb.req.getParams().getFieldInt(field, FacetParams.FACET_MINCOUNT, 0);
+ if (minCount == 0) { // return them all
+ continue;
+ }
+ ent.getValue().respectMinCount(minCount);
+ }
}
// The implementation below uses the first encountered shard's
@@ -763,7 +876,7 @@ public class FacetComponent extends Sear
private void refineFacets(ResponseBuilder rb, ShardRequest sreq) {
FacetInfo fi = rb._facetInfo;
-
+
for (ShardResponse srsp : sreq.responses) {
// int shardNum = rb.getShardNum(srsp.shard);
NamedList facet_counts = (NamedList) srsp.getSolrResponse().getResponse().get("facet_counts");
@@ -775,7 +888,7 @@ public class FacetComponent extends Sear
String key = facet_fields.getName(i);
DistribFieldFacet dff = fi.facets.get(key);
if (dff == null) continue;
-
+
NamedList shardCounts = (NamedList) facet_fields.getVal(i);
for (int j = 0; j < shardCounts.size(); j++) {
@@ -1179,7 +1292,7 @@ public class FacetComponent extends Sear
int numReceived = sz;
FixedBitSet terms = new FixedBitSet(termNum + sz);
-
+
long last = 0;
for (int i = 0; i < sz; i++) {
String name = shardCounts.getName(i);
@@ -1248,6 +1361,22 @@ public class FacetComponent extends Sear
// TODO: could store the last term in the shard to tell if this term
// comes before or after it. If it comes before, we could subtract 1
}
+
+ public void respectMinCount(long minCount) {
+ HashMap<String, ShardFacetCount> newOne = new HashMap<>();
+ boolean replace = false;
+ for (Map.Entry<String, ShardFacetCount> ent : counts.entrySet()) {
+ if (ent.getValue().count >= minCount) {
+ newOne.put(ent.getKey(), ent.getValue());
+ } else {
+ log.trace("Removing facet/key: " + ent.getKey() + "/" + ent.getValue().toString() + " mincount=" + minCount);
+ replace = true;
+ }
+ }
+ if (replace) {
+ counts = newOne;
+ }
+ }
}
/**
Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/TestDistributedSearch.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/TestDistributedSearch.java?rev=1623429&r1=1623428&r2=1623429&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/TestDistributedSearch.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/TestDistributedSearch.java Mon Sep 8 16:34:23 2014
@@ -28,7 +28,9 @@ import org.apache.solr.client.solrj.Solr
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
+import org.apache.solr.client.solrj.response.FacetField;
import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.client.solrj.response.RangeFacet;
import org.apache.solr.cloud.ChaosMonkey;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
@@ -66,14 +68,14 @@ public class TestDistributedSearch exten
del("*:*");
- indexr(id,1, i1, 100, tlong, 100,t1,"now is the time for all good men",
+ indexr(id,1, i1, 100, tlong, 100,t1,"now is the time for all good men",
tdate_a, "2010-04-20T11:00:00Z",
tdate_b, "2009-08-20T11:00:00Z",
"foo_f", 1.414f, "foo_b", "true", "foo_d", 1.414d);
- indexr(id,2, i1, 50 , tlong, 50,t1,"to come to the aid of their country.",
+ indexr(id,2, i1, 50 , tlong, 50,t1,"to come to the aid of their country.",
tdate_a, "2010-05-02T11:00:00Z",
tdate_b, "2009-11-02T11:00:00Z");
- indexr(id,3, i1, 2, tlong, 2,t1,"how now brown cow",
+ indexr(id,3, i1, 2, tlong, 2,t1,"how now brown cow",
tdate_a, "2010-05-03T11:00:00Z");
indexr(id,4, i1, -100 ,tlong, 101,
t1,"the quick fox jumped over the lazy dog",
@@ -175,13 +177,13 @@ public class TestDistributedSearch exten
// a facet query to test out chars out of the ascii range
query("q","*:*", "rows",0, "facet","true", "facet.query","{!term f=foo_s}international\u00ff\u01ff\u2222\u3333");
-
+
// simple field facet on date fields
- rsp = query("q","*:*", "rows", 0,
+ rsp = query("q","*:*", "rows", 0,
"facet","true", "facet.limit", 1, // TODO: limit shouldn't be needed: SOLR-6386
"facet.field", tdate_a);
assertEquals(1, rsp.getFacetFields().size());
- rsp = query("q","*:*", "rows", 0,
+ rsp = query("q","*:*", "rows", 0,
"facet","true", "facet.limit", 1, // TODO: limit shouldn't be needed: SOLR-6386
"facet.field", tdate_b, "facet.field", tdate_a);
assertEquals(2, rsp.getFacetFields().size());
@@ -225,7 +227,85 @@ public class TestDistributedSearch exten
"facet.range.start",200,
"facet.range.gap",100,
"f."+tlong+".facet.range.end",900);
-
+
+ // Test mincounts. Do NOT want to go through all the stuff where with validateControlData in query() method
+ // Purposely packing a _bunch_ of stuff together here to insure that the proper level of mincount is used for
+ // each
+ ModifiableSolrParams minParams = new ModifiableSolrParams();
+ minParams.set("q","*:*");
+ minParams.set("rows", 1);
+ minParams.set("facet", "true");
+ minParams.set("facet.missing", "true");
+ minParams.set("facet.field", i1);
+ minParams.set("facet.missing", "true");
+ minParams.set("facet.mincount", 2);
+
+ // Return a separate section of ranges over i1. Should respect global range mincount
+ minParams.set("facet.range", i1);
+ minParams.set("f." + i1 + ".facet.range.start", 0);
+ minParams.set("f." + i1 + ".facet.range.gap", 200);
+ minParams.set("f." + i1 + ".facet.range.end", 1200);
+ minParams.set("f." + i1 + ".facet.mincount", 4);
+
+
+ // Return a separate section of ranges over tlong Should respect facet.mincount
+ minParams.add("facet.range", tlong);
+ minParams.set("f." + tlong + ".facet.range.start", 0);
+ minParams.set("f." + tlong + ".facet.range.gap", 100);
+ minParams.set("f." + tlong + ".facet.range.end", 1200);
+ // Repeat with a range type of date
+ minParams.add("facet.range", tdate_b);
+ minParams.set("f." + tdate_b + ".facet.range.start", "2009-02-01T00:00:00Z");
+ minParams.set("f." + tdate_b + ".facet.range.gap", "+1YEAR");
+ minParams.set("f." + tdate_b + ".facet.range.end", "2011-01-01T00:00:00Z");
+ minParams.set("f." + tdate_b + ".facet.mincount", 3);
+
+ // Insure that global mincount is respected for facet queries
+ minParams.set("facet.query", tdate_a + ":[2010-01-01T00:00:00Z TO 2011-01-01T00:00:00Z]"); // Should return some counts
+ //minParams.set("facet.query", tdate_a + ":[* TO *]"); // Should be removed
+ minParams.add("facet.query", tdate_b + ":[2008-01-01T00:00:00Z TO 2009-09-01T00:00:00Z]"); // Should be removed from response
+
+
+ setDistributedParams(minParams);
+ QueryResponse minResp = queryServer(minParams);
+
+ ModifiableSolrParams eParams = new ModifiableSolrParams();
+ eParams.set("q",tdate_b + ":[* TO *]");
+ eParams.set("rows", 1000);
+ eParams.set("fl", tdate_b);
+ setDistributedParams(eParams);
+ QueryResponse eResp = queryServer(eParams);
+
+ // Check that exactly the right numbers of counts came through
+ assertEquals("Should be exactly 2 range facets returned after minCounts taken into account ", 3, minResp.getFacetRanges().size());
+ assertEquals("Should only be 1 query facets returned after minCounts taken into account ", 1, minResp.getFacetQuery().size());
+
+ checkMinCountsField(minResp.getFacetField(i1).getValues(), new Object[]{null, 55L}); // Should just be the null entries for field
+
+ checkMinCountsRange(minResp.getFacetRanges().get(0).getCounts(), new Object[]{"0", 5L}); // range on i1
+ checkMinCountsRange(minResp.getFacetRanges().get(1).getCounts(), new Object[]{"0", 3L, "100", 3L}); // range on tlong
+ checkMinCountsRange(minResp.getFacetRanges().get(2).getCounts(), new Object[]{"2009-02-01T00:00:00Z", 3L}); // date (range) on tvh
+
+ assertTrue("Should have a facet for tdate_a", minResp.getFacetQuery().containsKey("a_n_tdt:[2010-01-01T00:00:00Z TO 2011-01-01T00:00:00Z]"));
+ int qCount = minResp.getFacetQuery().get("a_n_tdt:[2010-01-01T00:00:00Z TO 2011-01-01T00:00:00Z]");
+ assertEquals("tdate_a should be 5", qCount, 5);
+
+ // Now let's do some queries, the above is getting too complex
+ minParams = new ModifiableSolrParams();
+ minParams.set("q","*:*");
+ minParams.set("rows", 1);
+ minParams.set("facet", "true");
+ minParams.set("facet.mincount", 3);
+
+ minParams.set("facet.query", tdate_a + ":[2010-01-01T00:00:00Z TO 2010-05-04T00:00:00Z]");
+ minParams.add("facet.query", tdate_b + ":[2009-01-01T00:00:00Z TO 2010-01-01T00:00:00Z]"); // Should be removed
+ setDistributedParams(minParams);
+ minResp = queryServer(minParams);
+
+ assertEquals("Should only be 1 query facets returned after minCounts taken into account ", 1, minResp.getFacetQuery().size());
+ assertTrue("Should be an entry for a_n_tdt", minResp.getFacetQuery().containsKey("a_n_tdt:[2010-01-01T00:00:00Z TO 2010-05-04T00:00:00Z]"));
+ qCount = minResp.getFacetQuery().get("a_n_tdt:[2010-01-01T00:00:00Z TO 2010-05-04T00:00:00Z]");
+ assertEquals("a_n_tdt should have a count of 4 ", qCount, 4);
// variations of fl
query("q","*:*", "fl","score","sort",i1 + " desc");
query("q","*:*", "fl",i1 + ",score","sort",i1 + " desc");
@@ -454,7 +534,33 @@ public class TestDistributedSearch exten
}
}
}
-
+
+ protected void checkMinCountsField(List<FacetField.Count> counts, Object[] pairs) {
+ assertEquals("There should be exactly " + pairs.length / 2 + " returned counts. There were: " + counts.size(), counts.size(), pairs.length / 2);
+ assertTrue("Variable len param must be an even number, it was: " + pairs.length, (pairs.length % 2) == 0);
+ for (int pairs_idx = 0, counts_idx = 0; pairs_idx < pairs.length; pairs_idx += 2, counts_idx++) {
+ String act_name = counts.get(counts_idx).getName();
+ long act_count = counts.get(counts_idx).getCount();
+ String exp_name = (String) pairs[pairs_idx];
+ long exp_count = (long) pairs[pairs_idx + 1];
+ assertEquals("Expected ordered entry " + exp_name + " at position " + counts_idx + " got " + act_name, act_name, exp_name);
+ assertEquals("Expected count for entry: " + exp_name + " at position " + counts_idx + " got " + act_count, act_count, exp_count);
+ }
+ }
+
+ protected void checkMinCountsRange(List<RangeFacet.Count> counts, Object[] pairs) {
+ assertEquals("There should be exactly " + pairs.length / 2 + " returned counts. There were: " + counts.size(), counts.size(), pairs.length / 2);
+ assertTrue("Variable len param must be an even number, it was: " + pairs.length, (pairs.length % 2) == 0);
+ for (int pairs_idx = 0, counts_idx = 0; pairs_idx < pairs.length; pairs_idx += 2, counts_idx++) {
+ String act_name = counts.get(counts_idx).getValue();
+ long act_count = counts.get(counts_idx).getCount();
+ String exp_name = (String) pairs[pairs_idx];
+ long exp_count = (long) pairs[pairs_idx + 1];
+ assertEquals("Expected ordered entry " + exp_name + " at position " + counts_idx + " got " + act_name, act_name, exp_name);
+ assertEquals("Expected count for entry: " + exp_name + " at position " + counts_idx + " got " + act_count, act_count, exp_count);
+ }
+ }
+
protected void queryPartialResults(final List<String> upShards,
final List<SolrServer> upClients,
Object... q) throws Exception {