You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by er...@apache.org on 2014/09/08 18:34:24 UTC

svn commit: r1623429 - in /lucene/dev/trunk/solr: CHANGES.txt core/src/java/org/apache/solr/handler/component/FacetComponent.java core/src/test/org/apache/solr/TestDistributedSearch.java

Author: erick
Date: Mon Sep  8 16:34:23 2014
New Revision: 1623429

URL: http://svn.apache.org/r1623429
Log:
SOLR-6187: facet.mincount ignored in range faceting using distributed search

Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/TestDistributedSearch.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1623429&r1=1623428&r2=1623429&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Mon Sep  8 16:34:23 2014
@@ -179,6 +179,10 @@ Bug Fixes
 * SOLR-6467: bin/solr script should direct stdout/stderr when starting in the background
   to the solr-PORT-console.log in the logs directory instead of bin. (Timothy Potter)
 
+* SOLR-6187: SOLR-6154: facet.mincount ignored in range faceting using distributed search
+  NOTE: This does NOT fixed for the (deprecated) facet.date idiom, use facet.range
+  instead. (Erick Erickson, Zacchio Bagnati, Ronald Matamoros, Vamsee Yalargadda)
+
 Other Changes
 ---------------------
 

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java?rev=1623429&r1=1623428&r2=1623429&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java Mon Sep  8 16:34:23 2014
@@ -135,7 +135,7 @@ public class FacetComponent extends Sear
       // only other required phase).
       // We do this in distributedProcess so we can look at all of the
       // requests in the outgoing queue at once.
-      
+
       for (int shardNum = 0; shardNum < rb.shards.length; shardNum++) {
         List<String> distribFieldFacetRefinements = null;
         
@@ -164,7 +164,7 @@ public class FacetComponent extends Sear
           if (distribFieldFacetRefinements == null) {
             distribFieldFacetRefinements = new ArrayList<>();
           }
-          
+
           distribFieldFacetRefinements.add(facetCommand);
           distribFieldFacetRefinements.add(termsKey);
           distribFieldFacetRefinements.add(termsVal);
@@ -175,7 +175,7 @@ public class FacetComponent extends Sear
 
         if (distribFieldFacetRefinements == null
             && !pivotFacetRefinementRequestsExistForShard) {
-          // nothing to refine, short circut out
+          // nothing to refine, short circuit out
           continue;
         }
         
@@ -214,22 +214,22 @@ public class FacetComponent extends Sear
           shardsRefineRequest.params.set(FacetParams.FACET, "true");
           shardsRefineRequest.params.remove(FacetParams.FACET_FIELD);
           shardsRefineRequest.params.remove(FacetParams.FACET_QUERY);
-          
+
           for (int i = 0; i < distribFieldFacetRefinements.size();) {
             String facetCommand = distribFieldFacetRefinements.get(i++);
             String termsKey = distribFieldFacetRefinements.get(i++);
             String termsVal = distribFieldFacetRefinements.get(i++);
-            
+
             shardsRefineRequest.params.add(FacetParams.FACET_FIELD,
                 facetCommand);
             shardsRefineRequest.params.set(termsKey, termsVal);
           }
         }
-        
+
         if (newRequest) {
           rb.addRequest(this, shardsRefineRequest);
         }
-        
+
         // PivotFacetAdditions
         if (pivotFacetRefinementRequestsExistForShard) {
           if (newRequest) {
@@ -314,6 +314,8 @@ public class FacetComponent extends Sear
       }
       
       modifyRequestForFieldFacets(rb, sreq, fi);
+
+      modifyRequestForRangeFacets(sreq, fi);
       
       modifyRequestForPivotFacets(rb, sreq, fi.pivotFacets);
       
@@ -326,7 +328,24 @@ public class FacetComponent extends Sear
       // we could optionally remove faceting params
     }
   }
-  
+
+  // we must get all the range buckets back in order to have coherent lists at the end, see SOLR-6154
+  private void modifyRequestForRangeFacets(ShardRequest sreq, FacetInfo fi) {
+    // Collect all the range fields.
+    if (sreq.params.getParams(FacetParams.FACET_RANGE) == null) {
+      return;
+    }
+    List<String> rangeFields = new ArrayList<>();
+    for (String field : sreq.params.getParams(FacetParams.FACET_RANGE)) {
+      rangeFields.add(field);
+    }
+
+    for (String field : rangeFields) {
+      sreq.params.remove("f." + field + ".facet.mincount");
+      sreq.params.add("f." + field + ".facet.mincount", "0");
+    }
+  }
+
   private void modifyRequestForFieldFacets(ResponseBuilder rb, ShardRequest sreq, FacetInfo fi) {
     for (DistribFieldFacet dff : fi.facets.values()) {
       
@@ -372,7 +391,7 @@ public class FacetComponent extends Sear
           dff.initialMincount = (int) Math.ceil((double) dff.minCount / rb.slices.length);
         }
       }
-      
+
       // Currently this is for testing only and allows overriding of the
       // facet.limit set to the shards
       dff.initialLimit = rb.req.getParams().getInt("facet.shard.limit", dff.initialLimit);
@@ -517,7 +536,7 @@ public class FacetComponent extends Sear
           dff.add(shardNum, (NamedList) facet_fields.get(dff.getKey()), dff.initialLimit);
         }
       }
-      
+
       // Distributed facet_dates
       doDistribDates(fi, facet_counts);
 
@@ -546,14 +565,14 @@ public class FacetComponent extends Sear
     for (DistribFieldFacet dff : fi.facets.values()) {
       // no need to check these facets for refinement
       if (dff.initialLimit <= 0 && dff.initialMincount <= 1) continue;
-      
+
       // only other case where index-sort doesn't need refinement is if minCount==0
       if (dff.minCount <= 1 && dff.sort.equals(FacetParams.FACET_SORT_INDEX)) continue;
-      
+
       @SuppressWarnings("unchecked") // generic array's are annoying
       List<String>[] tmp = (List<String>[]) new List[rb.shards.length];
       dff._toRefine = tmp;
-      
+
       ShardFacetCount[] counts = dff.getCountSorted();
       int ntop = Math.min(counts.length, 
                           dff.limit >= 0 ? dff.offset + dff.limit : Integer.MAX_VALUE);
@@ -562,7 +581,7 @@ public class FacetComponent extends Sear
       for (int i = 0; i < counts.length; i++) {
         ShardFacetCount sfc = counts[i];
         boolean needRefinement = false;
-        
+
         if (i < ntop) {
           // automatically flag the top values for refinement
           // this should always be true for facet.sort=index
@@ -586,14 +605,14 @@ public class FacetComponent extends Sear
             needRefinement = true;
           }
         }
-        
+
         if (needRefinement) {
           // add a query for each shard missing the term that needs refinement
           for (int shardNum = 0; shardNum < rb.shards.length; shardNum++) {
             FixedBitSet fbs = dff.counted[shardNum];
             // fbs can be null if a shard request failed
-            if (fbs != null && 
-                (sfc.termNum >= fbs.length() || !fbs.get(sfc.termNum)) && 
+            if (fbs != null &&
+                (sfc.termNum >= fbs.length() || !fbs.get(sfc.termNum)) &&
                 dff.maxPossible(sfc, shardNum) > 0) {
 
               dff.needRefinements = true;
@@ -607,6 +626,100 @@ public class FacetComponent extends Sear
         }
       }
     }
+    removeFieldFacetsUnderLimits(rb);
+    removeRangeFacetsUnderLimits(rb);
+    removeQueryFacetsUnderLimits(rb);
+
+  }
+
+  private void removeQueryFacetsUnderLimits(ResponseBuilder rb) {
+    if (rb.stage != ResponseBuilder.STAGE_EXECUTE_QUERY) {
+      return;
+    }
+    FacetInfo fi = rb._facetInfo;
+    Map<String, QueryFacet> query_facets = fi.queryFacets;
+    if (query_facets == null) {
+      return;
+    }
+    LinkedHashMap<String, QueryFacet> newQueryFacets = new LinkedHashMap<>();
+
+    // The
+    int minCount = rb.req.getParams().getInt(FacetParams.FACET_MINCOUNT, 0);
+    boolean replace = false;
+    for (Map.Entry<String, QueryFacet> ent : query_facets.entrySet()) {
+      if (ent.getValue().count >= minCount) {
+        newQueryFacets.put(ent.getKey(), ent.getValue());
+      } else {
+        log.trace("Removing facetQuery/key: " + ent.getKey() + "/" + ent.getValue().toString() + " mincount=" + minCount);
+        replace = true;
+      }
+    }
+    if (replace) {
+      fi.queryFacets = newQueryFacets;
+    }
+  }
+
+  private void removeRangeFacetsUnderLimits(ResponseBuilder rb) {
+    if (rb.stage != ResponseBuilder.STAGE_EXECUTE_QUERY) {
+      return;
+    }
+
+    FacetInfo fi = rb._facetInfo;
+
+    @SuppressWarnings("unchecked")
+    SimpleOrderedMap<SimpleOrderedMap<Object>> facet_ranges =
+        (SimpleOrderedMap<SimpleOrderedMap<Object>>)
+            fi.rangeFacets;
+
+    if (facet_ranges == null) {
+      return;
+    }
+
+    // go through each facet_range
+    for (Map.Entry<String, SimpleOrderedMap<Object>> entry : facet_ranges) {
+      boolean replace = false;
+      final String field = entry.getKey();
+      int minCount = rb.req.getParams().getFieldInt(field, FacetParams.FACET_MINCOUNT, 0);
+      if (minCount == 0) {
+        continue;
+      }
+
+      @SuppressWarnings("unchecked")
+      NamedList<Integer> vals
+          = (NamedList<Integer>) facet_ranges.get(field).get("counts");
+      NamedList newList = new NamedList();
+      for (Map.Entry<String, Integer> pair : vals) {
+        if (pair.getValue() >= minCount) {
+          newList.add(pair.getKey(), pair.getValue());
+        } else {
+          log.trace("Removing facet/key: " + pair.getKey() + "/" + pair.getValue().toString() + " mincount=" + minCount);
+          replace = true;
+        }
+      }
+      if (replace) {
+        vals.clear();
+        vals.addAll(newList);
+      }
+    }
+  }
+  private void removeFieldFacetsUnderLimits(ResponseBuilder rb) {
+    if (rb.stage != ResponseBuilder.STAGE_DONE) {
+      return;
+    }
+
+    FacetInfo fi = rb._facetInfo;
+    if (fi.facets == null) {
+      return;
+    }
+    // Do field facets
+    for (Entry<String, DistribFieldFacet> ent : fi.facets.entrySet()) {
+      String field = ent.getKey();
+      int minCount = rb.req.getParams().getFieldInt(field, FacetParams.FACET_MINCOUNT, 0);
+      if (minCount == 0) { // return them all
+        continue;
+      }
+      ent.getValue().respectMinCount(minCount);
+    }
   }
 
   // The implementation below uses the first encountered shard's
@@ -763,7 +876,7 @@ public class FacetComponent extends Sear
 
   private void refineFacets(ResponseBuilder rb, ShardRequest sreq) {
     FacetInfo fi = rb._facetInfo;
-    
+
     for (ShardResponse srsp : sreq.responses) {
       // int shardNum = rb.getShardNum(srsp.shard);
       NamedList facet_counts = (NamedList) srsp.getSolrResponse().getResponse().get("facet_counts");
@@ -775,7 +888,7 @@ public class FacetComponent extends Sear
         String key = facet_fields.getName(i);
         DistribFieldFacet dff = fi.facets.get(key);
         if (dff == null) continue;
-        
+
         NamedList shardCounts = (NamedList) facet_fields.getVal(i);
         
         for (int j = 0; j < shardCounts.size(); j++) {
@@ -1179,7 +1292,7 @@ public class FacetComponent extends Sear
       int numReceived = sz;
       
       FixedBitSet terms = new FixedBitSet(termNum + sz);
-      
+
       long last = 0;
       for (int i = 0; i < sz; i++) {
         String name = shardCounts.getName(i);
@@ -1248,6 +1361,22 @@ public class FacetComponent extends Sear
       // TODO: could store the last term in the shard to tell if this term
       // comes before or after it. If it comes before, we could subtract 1
     }
+
+    public void respectMinCount(long minCount) {
+      HashMap<String, ShardFacetCount> newOne = new HashMap<>();
+      boolean replace = false;
+      for (Map.Entry<String, ShardFacetCount> ent : counts.entrySet()) {
+        if (ent.getValue().count >= minCount) {
+          newOne.put(ent.getKey(), ent.getValue());
+        } else {
+          log.trace("Removing facet/key: " + ent.getKey() + "/" + ent.getValue().toString() + " mincount=" + minCount);
+          replace = true;
+        }
+      }
+      if (replace) {
+        counts = newOne;
+      }
+    }
   }
   
   /**

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/TestDistributedSearch.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/TestDistributedSearch.java?rev=1623429&r1=1623428&r2=1623429&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/TestDistributedSearch.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/TestDistributedSearch.java Mon Sep  8 16:34:23 2014
@@ -28,7 +28,9 @@ import org.apache.solr.client.solrj.Solr
 import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.client.solrj.embedded.JettySolrRunner;
 import org.apache.solr.client.solrj.impl.HttpSolrServer;
+import org.apache.solr.client.solrj.response.FacetField;
 import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.client.solrj.response.RangeFacet;
 import org.apache.solr.cloud.ChaosMonkey;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.CommonParams;
@@ -66,14 +68,14 @@ public class TestDistributedSearch exten
 
 
     del("*:*");
-    indexr(id,1, i1, 100, tlong, 100,t1,"now is the time for all good men", 
+    indexr(id,1, i1, 100, tlong, 100,t1,"now is the time for all good men",
            tdate_a, "2010-04-20T11:00:00Z",
            tdate_b, "2009-08-20T11:00:00Z",
            "foo_f", 1.414f, "foo_b", "true", "foo_d", 1.414d);
-    indexr(id,2, i1, 50 , tlong, 50,t1,"to come to the aid of their country.", 
+    indexr(id,2, i1, 50 , tlong, 50,t1,"to come to the aid of their country.",
            tdate_a, "2010-05-02T11:00:00Z",
            tdate_b, "2009-11-02T11:00:00Z");
-    indexr(id,3, i1, 2, tlong, 2,t1,"how now brown cow", 
+    indexr(id,3, i1, 2, tlong, 2,t1,"how now brown cow",
            tdate_a, "2010-05-03T11:00:00Z");
     indexr(id,4, i1, -100 ,tlong, 101,
            t1,"the quick fox jumped over the lazy dog", 
@@ -175,13 +177,13 @@ public class TestDistributedSearch exten
 
     // a facet query to test out chars out of the ascii range
     query("q","*:*", "rows",0, "facet","true", "facet.query","{!term f=foo_s}international\u00ff\u01ff\u2222\u3333");
-    
+
     // simple field facet on date fields
-    rsp = query("q","*:*", "rows", 0, 
+    rsp = query("q","*:*", "rows", 0,
                 "facet","true", "facet.limit", 1, // TODO: limit shouldn't be needed: SOLR-6386
                 "facet.field", tdate_a);
     assertEquals(1, rsp.getFacetFields().size());
-    rsp = query("q","*:*", "rows", 0, 
+    rsp = query("q","*:*", "rows", 0,
                 "facet","true", "facet.limit", 1, // TODO: limit shouldn't be needed: SOLR-6386
                 "facet.field", tdate_b, "facet.field", tdate_a);
     assertEquals(2, rsp.getFacetFields().size());
@@ -225,7 +227,85 @@ public class TestDistributedSearch exten
           "facet.range.start",200, 
           "facet.range.gap",100, 
           "f."+tlong+".facet.range.end",900);
-    
+
+    // Test mincounts. Do NOT want to go through all the stuff where with validateControlData in query() method
+    // Purposely packing a _bunch_ of stuff together here to insure that the proper level of mincount is used for
+    // each
+    ModifiableSolrParams minParams = new ModifiableSolrParams();
+    minParams.set("q","*:*");
+    minParams.set("rows", 1);
+    minParams.set("facet", "true");
+    minParams.set("facet.missing", "true");
+    minParams.set("facet.field", i1);
+    minParams.set("facet.missing", "true");
+    minParams.set("facet.mincount", 2);
+
+    // Return a separate section of ranges over i1. Should respect global range mincount
+    minParams.set("facet.range", i1);
+    minParams.set("f." + i1 + ".facet.range.start", 0);
+    minParams.set("f." + i1 + ".facet.range.gap", 200);
+    minParams.set("f." + i1 + ".facet.range.end", 1200);
+    minParams.set("f." + i1 + ".facet.mincount", 4);
+
+
+    // Return a separate section of ranges over tlong Should respect facet.mincount
+    minParams.add("facet.range", tlong);
+    minParams.set("f." + tlong + ".facet.range.start", 0);
+    minParams.set("f." + tlong + ".facet.range.gap", 100);
+    minParams.set("f." + tlong + ".facet.range.end", 1200);
+    // Repeat with a range type of date
+    minParams.add("facet.range", tdate_b);
+    minParams.set("f." + tdate_b + ".facet.range.start", "2009-02-01T00:00:00Z");
+    minParams.set("f." + tdate_b + ".facet.range.gap", "+1YEAR");
+    minParams.set("f." + tdate_b + ".facet.range.end", "2011-01-01T00:00:00Z");
+    minParams.set("f." + tdate_b + ".facet.mincount", 3);
+
+    // Insure that global mincount is respected for facet queries
+    minParams.set("facet.query", tdate_a + ":[2010-01-01T00:00:00Z TO 2011-01-01T00:00:00Z]"); // Should return some counts
+    //minParams.set("facet.query", tdate_a + ":[* TO *]"); // Should be removed
+    minParams.add("facet.query", tdate_b + ":[2008-01-01T00:00:00Z TO 2009-09-01T00:00:00Z]"); // Should be removed from response
+
+
+    setDistributedParams(minParams);
+    QueryResponse minResp = queryServer(minParams);
+
+    ModifiableSolrParams eParams = new ModifiableSolrParams();
+    eParams.set("q",tdate_b + ":[* TO *]");
+    eParams.set("rows", 1000);
+    eParams.set("fl", tdate_b);
+    setDistributedParams(eParams);
+    QueryResponse eResp = queryServer(eParams);
+
+    // Check that exactly the right numbers of counts came through
+    assertEquals("Should be exactly 2 range facets returned after minCounts taken into account ", 3, minResp.getFacetRanges().size());
+    assertEquals("Should only be 1 query facets returned after minCounts taken into account ", 1, minResp.getFacetQuery().size());
+
+    checkMinCountsField(minResp.getFacetField(i1).getValues(), new Object[]{null, 55L}); // Should just be the null entries for field
+
+    checkMinCountsRange(minResp.getFacetRanges().get(0).getCounts(), new Object[]{"0", 5L}); // range on i1
+    checkMinCountsRange(minResp.getFacetRanges().get(1).getCounts(), new Object[]{"0", 3L, "100", 3L}); // range on tlong
+    checkMinCountsRange(minResp.getFacetRanges().get(2).getCounts(), new Object[]{"2009-02-01T00:00:00Z",  3L}); // date (range) on tvh
+
+    assertTrue("Should have a facet for tdate_a", minResp.getFacetQuery().containsKey("a_n_tdt:[2010-01-01T00:00:00Z TO 2011-01-01T00:00:00Z]"));
+    int qCount = minResp.getFacetQuery().get("a_n_tdt:[2010-01-01T00:00:00Z TO 2011-01-01T00:00:00Z]");
+    assertEquals("tdate_a should be 5", qCount, 5);
+
+    // Now let's do some queries, the above is getting too complex
+    minParams = new ModifiableSolrParams();
+    minParams.set("q","*:*");
+    minParams.set("rows", 1);
+    minParams.set("facet", "true");
+    minParams.set("facet.mincount", 3);
+
+    minParams.set("facet.query", tdate_a + ":[2010-01-01T00:00:00Z TO 2010-05-04T00:00:00Z]");
+    minParams.add("facet.query", tdate_b + ":[2009-01-01T00:00:00Z TO 2010-01-01T00:00:00Z]"); // Should be removed
+    setDistributedParams(minParams);
+    minResp = queryServer(minParams);
+
+    assertEquals("Should only be 1 query facets returned after minCounts taken into account ", 1, minResp.getFacetQuery().size());
+    assertTrue("Should be an entry for a_n_tdt", minResp.getFacetQuery().containsKey("a_n_tdt:[2010-01-01T00:00:00Z TO 2010-05-04T00:00:00Z]"));
+    qCount = minResp.getFacetQuery().get("a_n_tdt:[2010-01-01T00:00:00Z TO 2010-05-04T00:00:00Z]");
+    assertEquals("a_n_tdt should have a count of 4 ", qCount, 4);
     //  variations of fl
     query("q","*:*", "fl","score","sort",i1 + " desc");
     query("q","*:*", "fl",i1 + ",score","sort",i1 + " desc");
@@ -454,7 +534,33 @@ public class TestDistributedSearch exten
       }
     }
   }
-  
+
+  protected void checkMinCountsField(List<FacetField.Count> counts, Object[] pairs) {
+    assertEquals("There should be exactly " + pairs.length / 2 + " returned counts. There were: " + counts.size(), counts.size(), pairs.length / 2);
+    assertTrue("Variable len param must be an even number, it was: " + pairs.length, (pairs.length % 2) == 0);
+    for (int pairs_idx = 0, counts_idx = 0; pairs_idx < pairs.length; pairs_idx += 2, counts_idx++) {
+      String act_name = counts.get(counts_idx).getName();
+      long act_count = counts.get(counts_idx).getCount();
+      String exp_name = (String) pairs[pairs_idx];
+      long exp_count = (long) pairs[pairs_idx + 1];
+      assertEquals("Expected ordered entry " + exp_name + " at position " + counts_idx + " got " + act_name, act_name, exp_name);
+      assertEquals("Expected count for entry: " + exp_name + " at position " + counts_idx + " got " + act_count, act_count, exp_count);
+    }
+  }
+
+  protected void checkMinCountsRange(List<RangeFacet.Count> counts, Object[] pairs) {
+    assertEquals("There should be exactly " + pairs.length / 2 + " returned counts. There were: " + counts.size(), counts.size(), pairs.length / 2);
+    assertTrue("Variable len param must be an even number, it was: " + pairs.length, (pairs.length % 2) == 0);
+    for (int pairs_idx = 0, counts_idx = 0; pairs_idx < pairs.length; pairs_idx += 2, counts_idx++) {
+      String act_name = counts.get(counts_idx).getValue();
+      long act_count = counts.get(counts_idx).getCount();
+      String exp_name = (String) pairs[pairs_idx];
+      long exp_count = (long) pairs[pairs_idx + 1];
+      assertEquals("Expected ordered entry " + exp_name + " at position " + counts_idx + " got " + act_name, act_name, exp_name);
+      assertEquals("Expected count for entry: " + exp_name + " at position " + counts_idx + " got " + act_count, act_count, exp_count);
+    }
+  }
+
   protected void queryPartialResults(final List<String> upShards,
                                      final List<SolrServer> upClients, 
                                      Object... q) throws Exception {