You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ho...@apache.org on 2011/04/20 23:29:14 UTC

svn commit: r1095517 - in /lucene/dev/trunk/solr: CHANGES.txt src/java/org/apache/solr/handler/component/FacetComponent.java src/test/org/apache/solr/TestDistributedSearch.java

Author: hossman
Date: Wed Apr 20 21:29:13 2011
New Revision: 1095517

URL: http://svn.apache.org/viewvc?rev=1095517&view=rev
Log:
SOLR-1709: Distributed support for Date and Numeric Range Faceting

Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/FacetComponent.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/TestDistributedSearch.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1095517&r1=1095516&r2=1095517&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Wed Apr 20 21:29:13 2011
@@ -131,6 +131,9 @@ New Features
 * SOLR-2335: New 'field("...")' function syntax for refering to complex 
   field names (containing whitespace or special characters) in functions.
 
+* SOLR-1709: Distributed support for Date and Numeric Range Faceting
+  (Peter Sturge, David Smiley, hossman)
+
 Optimizations
 ----------------------
 

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/FacetComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/FacetComponent.java?rev=1095517&r1=1095516&r2=1095517&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/FacetComponent.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/FacetComponent.java Wed Apr 20 21:29:13 2011
@@ -17,23 +17,23 @@
 
 package org.apache.solr.handler.component;
 
-import java.io.IOException;
-import java.net.URL;
-import java.util.*;
-
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.util.OpenBitSet;
+import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.FacetParams;
-import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.SimpleOrderedMap;
 import org.apache.solr.common.util.StrUtils;
-import org.apache.solr.common.SolrException;
 import org.apache.solr.request.SimpleFacets;
-import org.apache.lucene.util.OpenBitSet;
-import org.apache.solr.search.QueryParsing;
 import org.apache.solr.schema.FieldType;
-import org.apache.lucene.queryParser.ParseException;
+import org.apache.solr.search.QueryParsing;
+
+import java.io.IOException;
+import java.net.URL;
+import java.util.*;
 
 /**
  * TODO!
@@ -312,8 +312,95 @@ public class FacetComponent extends Sear
           dff.add(shardNum, (NamedList)facet_fields.get(dff.getKey()), dff.initialLimit);
         }
       }
-    }
 
+      // Distributed facet_dates
+      //
+      // The implementation below uses the first encountered shard's 
+      // facet_dates as the basis for subsequent shards' data to be merged.
+      // (the "NOW" param should ensure consistency)
+      @SuppressWarnings("unchecked")
+      SimpleOrderedMap<SimpleOrderedMap<Object>> facet_dates = 
+        (SimpleOrderedMap<SimpleOrderedMap<Object>>) 
+        facet_counts.get("facet_dates");
+      
+      if (facet_dates != null) {
+
+        // go through each facet_date
+        for (Map.Entry<String,SimpleOrderedMap<Object>> entry : facet_dates) {
+          final String field = entry.getKey();
+          if (fi.dateFacets.get(field) == null) { 
+            // first time we've seen this field, no merging
+            fi.dateFacets.add(field, entry.getValue());
+
+          } else { 
+            // not the first time, merge current field
+
+            SimpleOrderedMap<Object> shardFieldValues 
+              = entry.getValue();
+            SimpleOrderedMap<Object> existFieldValues 
+              = fi.dateFacets.get(field);
+
+            for (Map.Entry<String,Object> existPair : existFieldValues) {
+              final String key = existPair.getKey();
+              if (key.equals("gap") || 
+                  key.equals("end") || 
+                  key.equals("start")) {
+                // we can skip these, must all be the same across shards
+                continue; 
+              }
+              // can be null if inconsistencies in shards responses
+              Integer newValue = (Integer) shardFieldValues.get(key);
+              if  (null != newValue) {
+                Integer oldValue = ((Integer) existPair.getValue());
+                existPair.setValue(oldValue + newValue);
+              }
+            }
+          }
+        }
+      }
+
+      // Distributed facet_ranges
+      //
+      // The implementation below uses the first encountered shard's 
+      // facet_ranges as the basis for subsequent shards' data to be merged.
+      @SuppressWarnings("unchecked")
+      SimpleOrderedMap<SimpleOrderedMap<Object>> facet_ranges = 
+        (SimpleOrderedMap<SimpleOrderedMap<Object>>) 
+        facet_counts.get("facet_ranges");
+      
+      if (facet_ranges != null) {
+
+        // go through each facet_range
+        for (Map.Entry<String,SimpleOrderedMap<Object>> entry : facet_ranges) {
+          final String field = entry.getKey();
+          if (fi.rangeFacets.get(field) == null) { 
+            // first time we've seen this field, no merging
+            fi.rangeFacets.add(field, entry.getValue());
+
+          } else { 
+            // not the first time, merge current field counts
+
+            @SuppressWarnings("unchecked")
+            NamedList<Integer> shardFieldValues 
+              = (NamedList<Integer>) entry.getValue().get("counts");
+
+            @SuppressWarnings("unchecked")
+            NamedList<Integer> existFieldValues 
+              = (NamedList<Integer>) fi.rangeFacets.get(field).get("counts");
+
+            for (Map.Entry<String,Integer> existPair : existFieldValues) {
+              final String key = existPair.getKey();
+              // can be null if inconsistencies in shards responses
+              Integer newValue = shardFieldValues.get(key);
+              if  (null != newValue) {
+                Integer oldValue = existPair.getValue();
+                existPair.setValue(oldValue + newValue);
+              }
+            }
+          }
+        }
+      }
+    }
 
     //
     // This code currently assumes that there will be only a single
@@ -487,9 +574,8 @@ public class FacetComponent extends Sear
       }
     }
 
-    // TODO: facet dates & numbers
-    facet_counts.add("facet_dates", new SimpleOrderedMap());
-    facet_counts.add("facet_ranges", new SimpleOrderedMap());
+    facet_counts.add("facet_dates", fi.dateFacets);
+    facet_counts.add("facet_ranges", fi.rangeFacets);
 
     rb.rsp.add("facet_counts", facet_counts);
 
@@ -541,8 +627,14 @@ public class FacetComponent extends Sear
    * <b>This API is experimental and subject to change</b>
    */
   public static class FacetInfo {
+
     public LinkedHashMap<String,QueryFacet> queryFacets;
     public LinkedHashMap<String,DistribFieldFacet> facets;
+    public SimpleOrderedMap<SimpleOrderedMap<Object>> dateFacets
+      = new SimpleOrderedMap<SimpleOrderedMap<Object>>();
+    public SimpleOrderedMap<SimpleOrderedMap<Object>> rangeFacets
+      = new SimpleOrderedMap<SimpleOrderedMap<Object>>();
+
     public List<String> exceptionList;
 
     void parse(SolrParams params, ResponseBuilder rb) {

Modified: lucene/dev/trunk/solr/src/test/org/apache/solr/TestDistributedSearch.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/org/apache/solr/TestDistributedSearch.java?rev=1095517&r1=1095516&r2=1095517&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/TestDistributedSearch.java (original)
+++ lucene/dev/trunk/solr/src/test/org/apache/solr/TestDistributedSearch.java Wed Apr 20 21:29:13 2011
@@ -40,7 +40,8 @@ public class TestDistributedSearch exten
   String nlong = "n_l";
   String tlong = "other_tl1";
   String ndate = "n_dt";
-  String tdate = "n_tdt";
+  String tdate_a = "a_n_tdt";
+  String tdate_b = "b_n_tdt";
   
   String oddField="oddField_s";
   String missingField="ignore_exception__missing_but_valid_field_t";
@@ -52,24 +53,36 @@ public class TestDistributedSearch exten
 
 
     del("*:*");
-    indexr(id,1, i1, 100, tlong, 100,t1,"now is the time for all good men"
-            ,"foo_f", 1.414f, "foo_b", "true", "foo_d", 1.414d);
-    indexr(id,2, i1, 50 , tlong, 50,t1,"to come to the aid of their country."
-    );
-    indexr(id,3, i1, 2, tlong, 2,t1,"how now brown cow"
-    );
-    indexr(id,4, i1, -100 ,tlong, 101,t1,"the quick fox jumped over the lazy dog"
-    );
-    indexr(id,5, i1, 500, tlong, 500 ,t1,"the quick fox jumped way over the lazy dog"
-    );
+    indexr(id,1, i1, 100, tlong, 100,t1,"now is the time for all good men", 
+           tdate_a, "2010-04-20T11:00:00Z",
+           tdate_b, "2009-08-20T11:00:00Z",
+           "foo_f", 1.414f, "foo_b", "true", "foo_d", 1.414d);
+    indexr(id,2, i1, 50 , tlong, 50,t1,"to come to the aid of their country.", 
+           tdate_a, "2010-05-02T11:00:00Z",
+           tdate_b, "2009-11-02T11:00:00Z");
+    indexr(id,3, i1, 2, tlong, 2,t1,"how now brown cow", 
+           tdate_a, "2010-05-03T11:00:00Z");
+    indexr(id,4, i1, -100 ,tlong, 101,
+           t1,"the quick fox jumped over the lazy dog", 
+           tdate_a, "2010-05-03T11:00:00Z",
+           tdate_b, "2010-05-03T11:00:00Z");
+    indexr(id,5, i1, 500, tlong, 500 ,
+           t1,"the quick fox jumped way over the lazy dog", 
+           tdate_a, "2010-05-05T11:00:00Z");
     indexr(id,6, i1, -600, tlong, 600 ,t1,"humpty dumpy sat on a wall");
     indexr(id,7, i1, 123, tlong, 123 ,t1,"humpty dumpy had a great fall");
-    indexr(id,8, i1, 876, tlong, 876,t1,"all the kings horses and all the kings men");
+    indexr(id,8, i1, 876, tlong, 876,
+           tdate_b, "2010-01-05T11:00:00Z",
+           t1,"all the kings horses and all the kings men");
     indexr(id,9, i1, 7, tlong, 7,t1,"couldn't put humpty together again");
     indexr(id,10, i1, 4321, tlong, 4321,t1,"this too shall pass");
-    indexr(id,11, i1, -987, tlong, 987,t1,"An eye for eye only ends up making the whole world blind.");
-    indexr(id,12, i1, 379, tlong, 379,t1,"Great works are performed, not by strength, but by perseverance.");
-    indexr(id,13, i1, 232, tlong, 232,t1,"no eggs on wall, lesson learned", oddField, "odd man out");
+    indexr(id,11, i1, -987, tlong, 987,
+           t1,"An eye for eye only ends up making the whole world blind.");
+    indexr(id,12, i1, 379, tlong, 379,
+           t1,"Great works are performed, not by strength, but by perseverance.");
+    indexr(id,13, i1, 232, tlong, 232,
+           t1,"no eggs on wall, lesson learned", 
+           oddField, "odd man out");
 
     indexr(id, 14, "SubjectTerms_mfacet", new String[]  {"mathematical models", "mathematical analysis"});
     indexr(id, 15, "SubjectTerms_mfacet", new String[]  {"test 1", "test 2", "test3"});
@@ -140,6 +153,43 @@ public class TestDistributedSearch exten
     query("q","*:*", "rows",0, "facet","true", "facet.query","quick", "facet.query","all", "facet.query","*:*");
     query("q","*:*", "rows",0, "facet","true", "facet.field",t1, "facet.mincount",2);
 
+    // simple date facet on one field
+    query("q","*:*", "rows",100, "facet","true", 
+          "facet.date",tdate_a, 
+          "facet.date.other", "all", 
+          "facet.date.start","2010-05-01T11:00:00Z", 
+          "facet.date.gap","+1DAY", 
+          "facet.date.end","2010-05-20T11:00:00Z");
+
+    // date facet on multiple fields
+    query("q","*:*", "rows",100, "facet","true", 
+          "facet.date",tdate_a, 
+          "facet.date",tdate_b, 
+          "facet.date.other", "all", 
+          "f."+tdate_b+".facet.date.start","2009-05-01T11:00:00Z", 
+          "f."+tdate_b+".facet.date.gap","+3MONTHS", 
+          "facet.date.start","2010-05-01T11:00:00Z", 
+          "facet.date.gap","+1DAY", 
+          "facet.date.end","2010-05-20T11:00:00Z");
+
+    // simple range facet on one field
+    query("q","*:*", "rows",100, "facet","true", 
+          "facet.range",tlong, 
+          "facet.range.start",200, 
+          "facet.range.gap",100, 
+          "facet.range.end",900);
+
+    // range facet on multiple fields
+    query("q","*:*", "rows",100, "facet","true", 
+          "facet.range",tlong, 
+          "facet.range",i1, 
+          "f."+i1+".facet.range.start",300, 
+          "f."+i1+".facet.range.gap",87, 
+          "facet.range.end",900,
+          "facet.range.start",200, 
+          "facet.range.gap",100, 
+          "f."+tlong+".facet.range.end",900);
+
     stress=0;  // turn off stress... we want to tex max combos in min time
     for (int i=0; i<25*RANDOM_MULTIPLIER; i++) {
       String f = fieldNames[random.nextInt(fieldNames.length)];