You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2012/01/17 10:33:46 UTC

svn commit: r1232359 - in /lucene/dev/branches/branch_3x: ./ lucene/ lucene/contrib/analyzers/ lucene/contrib/analyzers/common/ lucene/contrib/analyzers/kuromoji/ solr/ solr/core/ solr/core/src/java/org/apache/solr/handler/component/ solr/core/src/test...

Author: simonw
Date: Tue Jan 17 09:33:46 2012
New Revision: 1232359

URL: http://svn.apache.org/viewvc?rev=1232359&view=rev
Log:
SOLR-1709: Distributed support for Date and Numeric Range Faceting

Modified:
    lucene/dev/branches/branch_3x/   (props changed)
    lucene/dev/branches/branch_3x/lucene/   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/   (props changed)
    lucene/dev/branches/branch_3x/solr/   (props changed)
    lucene/dev/branches/branch_3x/solr/CHANGES.txt
    lucene/dev/branches/branch_3x/solr/core/   (props changed)
    lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java
    lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/TestDistributedSearch.java

Modified: lucene/dev/branches/branch_3x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/CHANGES.txt?rev=1232359&r1=1232358&r2=1232359&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/solr/CHANGES.txt Tue Jan 17 09:33:46 2012
@@ -65,6 +65,9 @@ New Features
   consistency, and is also propagated to shards in distributed search.
   Adding a parameter NOW=<time_in_ms> to the request will override the
   current time.  (Peter Sturge, yonik, Simon Willnauer)
+  
+* SOLR-1709: Distributed support for Date and Numeric Range Faceting
+  (Peter Sturge, David Smiley, hossman, Simon Willnauer)
 
 Optimizations
 ----------------------

Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java?rev=1232359&r1=1232358&r2=1232359&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java Tue Jan 17 09:33:46 2012
@@ -17,23 +17,23 @@
 
 package org.apache.solr.handler.component;
 
-import java.io.IOException;
-import java.net.URL;
-import java.util.*;
-
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.util.OpenBitSet;
+import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.FacetParams;
-import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.SimpleOrderedMap;
 import org.apache.solr.common.util.StrUtils;
-import org.apache.solr.common.SolrException;
 import org.apache.solr.request.SimpleFacets;
-import org.apache.lucene.util.OpenBitSet;
-import org.apache.solr.search.QueryParsing;
 import org.apache.solr.schema.FieldType;
-import org.apache.lucene.queryParser.ParseException;
+import org.apache.solr.search.QueryParsing;
+
+import java.io.IOException;
+import java.net.URL;
+import java.util.*;
 
 /**
  * TODO!
@@ -289,6 +289,94 @@ public class  FacetComponent extends Sea
           dff.add(shardNum, (NamedList)facet_fields.get(dff.getKey()), dff.initialLimit);
         }
       }
+
+      // Distributed facet_dates
+      //
+      // The implementation below uses the first encountered shard's 
+      // facet_dates as the basis for subsequent shards' data to be merged.
+      // (the "NOW" param should ensure consistency)
+      @SuppressWarnings("unchecked")
+      SimpleOrderedMap<SimpleOrderedMap<Object>> facet_dates = 
+        (SimpleOrderedMap<SimpleOrderedMap<Object>>) 
+        facet_counts.get("facet_dates");
+      
+      if (facet_dates != null) {
+
+        // go through each facet_date
+        for (Map.Entry<String,SimpleOrderedMap<Object>> entry : facet_dates) {
+          final String field = entry.getKey();
+          if (fi.dateFacets.get(field) == null) { 
+            // first time we've seen this field, no merging
+            fi.dateFacets.add(field, entry.getValue());
+
+          } else { 
+            // not the first time, merge current field
+
+            SimpleOrderedMap<Object> shardFieldValues 
+              = entry.getValue();
+            SimpleOrderedMap<Object> existFieldValues 
+              = fi.dateFacets.get(field);
+
+            for (Map.Entry<String,Object> existPair : existFieldValues) {
+              final String key = existPair.getKey();
+              if (key.equals("gap") || 
+                  key.equals("end") || 
+                  key.equals("start")) {
+                // we can skip these, must all be the same across shards
+                continue; 
+              }
+              // can be null if inconsistencies in shards responses
+              Integer newValue = (Integer) shardFieldValues.get(key);
+              if  (null != newValue) {
+                Integer oldValue = ((Integer) existPair.getValue());
+                existPair.setValue(oldValue + newValue);
+              }
+            }
+          }
+        }
+      }
+
+      // Distributed facet_ranges
+      //
+      // The implementation below uses the first encountered shard's 
+      // facet_ranges as the basis for subsequent shards' data to be merged.
+      @SuppressWarnings("unchecked")
+      SimpleOrderedMap<SimpleOrderedMap<Object>> facet_ranges = 
+        (SimpleOrderedMap<SimpleOrderedMap<Object>>) 
+        facet_counts.get("facet_ranges");
+      
+      if (facet_ranges != null) {
+
+        // go through each facet_range
+        for (Map.Entry<String,SimpleOrderedMap<Object>> entry : facet_ranges) {
+          final String field = entry.getKey();
+          if (fi.rangeFacets.get(field) == null) { 
+            // first time we've seen this field, no merging
+            fi.rangeFacets.add(field, entry.getValue());
+
+          } else { 
+            // not the first time, merge current field counts
+
+            @SuppressWarnings("unchecked")
+            NamedList<Integer> shardFieldValues 
+              = (NamedList<Integer>) entry.getValue().get("counts");
+
+            @SuppressWarnings("unchecked")
+            NamedList<Integer> existFieldValues 
+              = (NamedList<Integer>) fi.rangeFacets.get(field).get("counts");
+
+            for (Map.Entry<String,Integer> existPair : existFieldValues) {
+              final String key = existPair.getKey();
+              // can be null if inconsistencies in shards responses
+              Integer newValue = shardFieldValues.get(key);
+              if  (null != newValue) {
+                Integer oldValue = existPair.getValue();
+                existPair.setValue(oldValue + newValue);
+              }
+            }
+          }
+        }
+      }
     }
 
 
@@ -395,7 +483,7 @@ public class  FacetComponent extends Sea
 
     FacetInfo fi = rb._facetInfo;
 
-    NamedList facet_counts = new SimpleOrderedMap();
+    NamedList<Object> facet_counts = new SimpleOrderedMap<Object>();
 
     NamedList facet_queries = new SimpleOrderedMap();
     facet_counts.add("facet_queries",facet_queries);
@@ -403,11 +491,11 @@ public class  FacetComponent extends Sea
       facet_queries.add(qf.getKey(), num(qf.count));
     }
 
-    NamedList facet_fields = new SimpleOrderedMap();
+    NamedList<Object> facet_fields = new SimpleOrderedMap<Object>();
     facet_counts.add("facet_fields", facet_fields);
 
     for (DistribFieldFacet dff : fi.facets.values()) {
-      NamedList fieldCounts = new NamedList(); // order is more important for facets
+      NamedList<Object> fieldCounts = new NamedList<Object>(); // order is more important for facets
       facet_fields.add(dff.getKey(), fieldCounts);
 
       ShardFacetCount[] counts;
@@ -456,9 +544,8 @@ public class  FacetComponent extends Sea
       }
     }
 
-    // TODO: facet dates & numbers
-    facet_counts.add("facet_dates", new SimpleOrderedMap());
-    facet_counts.add("facet_ranges", new SimpleOrderedMap());
+    facet_counts.add("facet_dates", fi.dateFacets);
+    facet_counts.add("facet_ranges", fi.rangeFacets);
 
     rb.rsp.add("facet_counts", facet_counts);
 
@@ -512,6 +599,11 @@ public class  FacetComponent extends Sea
   public static class FacetInfo {
     public LinkedHashMap<String,QueryFacet> queryFacets;
     public LinkedHashMap<String,DistribFieldFacet> facets;
+    public SimpleOrderedMap<SimpleOrderedMap<Object>> dateFacets
+      = new SimpleOrderedMap<SimpleOrderedMap<Object>>();
+    public SimpleOrderedMap<SimpleOrderedMap<Object>> rangeFacets
+      = new SimpleOrderedMap<SimpleOrderedMap<Object>>();
+
     public List exceptionList;
 
     void parse(SolrParams params, ResponseBuilder rb) {

Modified: lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/TestDistributedSearch.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/TestDistributedSearch.java?rev=1232359&r1=1232358&r2=1232359&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/TestDistributedSearch.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/TestDistributedSearch.java Tue Jan 17 09:33:46 2012
@@ -39,7 +39,8 @@ public class TestDistributedSearch exten
   String nlong = "n_l";
   String tlong = "n_tl";
   String ndate = "n_dt";
-  String tdate = "n_tdt";
+  String tdate_a = "a_n_tdt";
+  String tdate_b = "b_n_tdt";
   
   String oddField="oddField_s";
   String missingField="ignore_exception__missing_but_valid_field_t";
@@ -49,24 +50,36 @@ public class TestDistributedSearch exten
   public void doTest() throws Exception {
     int backupStress = stress; // make a copy so we can restore
     del("*:*");
-    indexr(id,1, i1, 100, tlong, 100,t1,"now is the time for all good men"
-            ,"foo_f", 1.414f, "foo_b", "true", "foo_d", 1.414d);
-    indexr(id,2, i1, 50 , tlong, 50,t1,"to come to the aid of their country."
-    );
-    indexr(id,3, i1, 2, tlong, 2,t1,"how now brown cow"
-    );
-    indexr(id,4, i1, -100 ,tlong, 101,t1,"the quick fox jumped over the lazy dog"
-    );
-    indexr(id,5, i1, 500, tlong, 500 ,t1,"the quick fox jumped way over the lazy dog"
-    );
+    indexr(id,1, i1, 100, tlong, 100,t1,"now is the time for all good men", 
+           tdate_a, "2010-04-20T11:00:00Z",
+           tdate_b, "2009-08-20T11:00:00Z",
+           "foo_f", 1.414f, "foo_b", "true", "foo_d", 1.414d);
+    indexr(id,2, i1, 50 , tlong, 50,t1,"to come to the aid of their country.", 
+           tdate_a, "2010-05-02T11:00:00Z",
+           tdate_b, "2009-11-02T11:00:00Z");
+    indexr(id,3, i1, 2, tlong, 2,t1,"how now brown cow", 
+           tdate_a, "2010-05-03T11:00:00Z");
+    indexr(id,4, i1, -100 ,tlong, 101,
+           t1,"the quick fox jumped over the lazy dog", 
+           tdate_a, "2010-05-03T11:00:00Z",
+           tdate_b, "2010-05-03T11:00:00Z");
+    indexr(id,5, i1, 500, tlong, 500 ,
+           t1,"the quick fox jumped way over the lazy dog", 
+           tdate_a, "2010-05-05T11:00:00Z");
     indexr(id,6, i1, -600, tlong, 600 ,t1,"humpty dumpy sat on a wall");
     indexr(id,7, i1, 123, tlong, 123 ,t1,"humpty dumpy had a great fall");
-    indexr(id,8, i1, 876, tlong, 876,t1,"all the kings horses and all the kings men");
+    indexr(id,8, i1, 876, tlong, 876,
+           tdate_b, "2010-01-05T11:00:00Z",
+           t1,"all the kings horses and all the kings men");
     indexr(id,9, i1, 7, tlong, 7,t1,"couldn't put humpty together again");
     indexr(id,10, i1, 4321, tlong, 4321,t1,"this too shall pass");
-    indexr(id,11, i1, -987, tlong, 987,t1,"An eye for eye only ends up making the whole world blind.");
-    indexr(id,12, i1, 379, tlong, 379,t1,"Great works are performed, not by strength, but by perseverance.");
-    indexr(id,13, i1, 232, tlong, 232,t1,"no eggs on wall, lesson learned", oddField, "odd man out");
+    indexr(id,11, i1, -987, tlong, 987,
+           t1,"An eye for eye only ends up making the whole world blind.");
+    indexr(id,12, i1, 379, tlong, 379,
+           t1,"Great works are performed, not by strength, but by perseverance.");
+    indexr(id,13, i1, 232, tlong, 232,
+           t1,"no eggs on wall, lesson learned", 
+           oddField, "odd man out");
 
     indexr(id, 14, "SubjectTerms_mfacet", new String[]  {"mathematical models", "mathematical analysis"});
     indexr(id, 15, "SubjectTerms_mfacet", new String[]  {"test 1", "test 2", "test3"});
@@ -155,6 +168,43 @@ public class TestDistributedSearch exten
     query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.offset",1);
     query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.mincount",2);
 
+    // simple date facet on one field
+    query("q","*:*", "rows",100, "facet","true", 
+          "facet.date",tdate_a, 
+          "facet.date.other", "all", 
+          "facet.date.start","2010-05-01T11:00:00Z", 
+          "facet.date.gap","+1DAY", 
+          "facet.date.end","2010-05-20T11:00:00Z");
+
+    // date facet on multiple fields
+    query("q","*:*", "rows",100, "facet","true", 
+          "facet.date",tdate_a, 
+          "facet.date",tdate_b, 
+          "facet.date.other", "all", 
+          "f."+tdate_b+".facet.date.start","2009-05-01T11:00:00Z", 
+          "f."+tdate_b+".facet.date.gap","+3MONTHS", 
+          "facet.date.start","2010-05-01T11:00:00Z", 
+          "facet.date.gap","+1DAY", 
+          "facet.date.end","2010-05-20T11:00:00Z");
+
+    // simple range facet on one field
+    query("q","*:*", "rows",100, "facet","true", 
+          "facet.range",tlong, 
+          "facet.range.start",200, 
+          "facet.range.gap",100, 
+          "facet.range.end",900);
+
+    // range facet on multiple fields
+    query("q","*:*", "rows",100, "facet","true", 
+          "facet.range",tlong, 
+          "facet.range",i1, 
+          "f."+i1+".facet.range.start",300, 
+          "f."+i1+".facet.range.gap",87, 
+          "facet.range.end",900,
+          "facet.range.start",200, 
+          "facet.range.gap",100, 
+          "f."+tlong+".facet.range.end",900);
+
     stress=0;  // turn off stress... we want to tex max combos in min time
     for (int i=0; i<25*RANDOM_MULTIPLIER; i++) {
       String f = fieldNames[random.nextInt(fieldNames.length)];