You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2012/01/17 10:33:46 UTC
svn commit: r1232359 - in /lucene/dev/branches/branch_3x: ./ lucene/
lucene/contrib/analyzers/ lucene/contrib/analyzers/common/
lucene/contrib/analyzers/kuromoji/ solr/ solr/core/
solr/core/src/java/org/apache/solr/handler/component/ solr/core/src/test...
Author: simonw
Date: Tue Jan 17 09:33:46 2012
New Revision: 1232359
URL: http://svn.apache.org/viewvc?rev=1232359&view=rev
Log:
SOLR-1709: Distributed support for Date and Numeric Range Faceting
Modified:
lucene/dev/branches/branch_3x/ (props changed)
lucene/dev/branches/branch_3x/lucene/ (props changed)
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/ (props changed)
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/ (props changed)
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/kuromoji/ (props changed)
lucene/dev/branches/branch_3x/solr/ (props changed)
lucene/dev/branches/branch_3x/solr/CHANGES.txt
lucene/dev/branches/branch_3x/solr/core/ (props changed)
lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java
lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/TestDistributedSearch.java
Modified: lucene/dev/branches/branch_3x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/CHANGES.txt?rev=1232359&r1=1232358&r2=1232359&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/solr/CHANGES.txt Tue Jan 17 09:33:46 2012
@@ -65,6 +65,9 @@ New Features
consistency, and is also propagated to shards in distributed search.
Adding a parameter NOW=<time_in_ms> to the request will override the
current time. (Peter Sturge, yonik, Simon Willnauer)
+
+* SOLR-1709: Distributed support for Date and Numeric Range Faceting
+ (Peter Sturge, David Smiley, hossman, Simon Willnauer)
Optimizations
----------------------
Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java?rev=1232359&r1=1232358&r2=1232359&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java Tue Jan 17 09:33:46 2012
@@ -17,23 +17,23 @@
package org.apache.solr.handler.component;
-import java.io.IOException;
-import java.net.URL;
-import java.util.*;
-
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.util.OpenBitSet;
+import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.FacetParams;
-import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.StrUtils;
-import org.apache.solr.common.SolrException;
import org.apache.solr.request.SimpleFacets;
-import org.apache.lucene.util.OpenBitSet;
-import org.apache.solr.search.QueryParsing;
import org.apache.solr.schema.FieldType;
-import org.apache.lucene.queryParser.ParseException;
+import org.apache.solr.search.QueryParsing;
+
+import java.io.IOException;
+import java.net.URL;
+import java.util.*;
/**
* TODO!
@@ -289,6 +289,94 @@ public class FacetComponent extends Sea
dff.add(shardNum, (NamedList)facet_fields.get(dff.getKey()), dff.initialLimit);
}
}
+
+ // Distributed facet_dates
+ //
+ // The implementation below uses the first encountered shard's
+ // facet_dates as the basis for subsequent shards' data to be merged.
+ // (the "NOW" param should ensure consistency)
+ @SuppressWarnings("unchecked")
+ SimpleOrderedMap<SimpleOrderedMap<Object>> facet_dates =
+ (SimpleOrderedMap<SimpleOrderedMap<Object>>)
+ facet_counts.get("facet_dates");
+
+ if (facet_dates != null) {
+
+ // go through each facet_date
+ for (Map.Entry<String,SimpleOrderedMap<Object>> entry : facet_dates) {
+ final String field = entry.getKey();
+ if (fi.dateFacets.get(field) == null) {
+ // first time we've seen this field, no merging
+ fi.dateFacets.add(field, entry.getValue());
+
+ } else {
+ // not the first time, merge current field
+
+ SimpleOrderedMap<Object> shardFieldValues
+ = entry.getValue();
+ SimpleOrderedMap<Object> existFieldValues
+ = fi.dateFacets.get(field);
+
+ for (Map.Entry<String,Object> existPair : existFieldValues) {
+ final String key = existPair.getKey();
+ if (key.equals("gap") ||
+ key.equals("end") ||
+ key.equals("start")) {
+ // we can skip these, must all be the same across shards
+ continue;
+ }
+ // can be null if inconsistencies in shards responses
+ Integer newValue = (Integer) shardFieldValues.get(key);
+ if (null != newValue) {
+ Integer oldValue = ((Integer) existPair.getValue());
+ existPair.setValue(oldValue + newValue);
+ }
+ }
+ }
+ }
+ }
+
+ // Distributed facet_ranges
+ //
+ // The implementation below uses the first encountered shard's
+ // facet_ranges as the basis for subsequent shards' data to be merged.
+ @SuppressWarnings("unchecked")
+ SimpleOrderedMap<SimpleOrderedMap<Object>> facet_ranges =
+ (SimpleOrderedMap<SimpleOrderedMap<Object>>)
+ facet_counts.get("facet_ranges");
+
+ if (facet_ranges != null) {
+
+ // go through each facet_range
+ for (Map.Entry<String,SimpleOrderedMap<Object>> entry : facet_ranges) {
+ final String field = entry.getKey();
+ if (fi.rangeFacets.get(field) == null) {
+ // first time we've seen this field, no merging
+ fi.rangeFacets.add(field, entry.getValue());
+
+ } else {
+ // not the first time, merge current field counts
+
+ @SuppressWarnings("unchecked")
+ NamedList<Integer> shardFieldValues
+ = (NamedList<Integer>) entry.getValue().get("counts");
+
+ @SuppressWarnings("unchecked")
+ NamedList<Integer> existFieldValues
+ = (NamedList<Integer>) fi.rangeFacets.get(field).get("counts");
+
+ for (Map.Entry<String,Integer> existPair : existFieldValues) {
+ final String key = existPair.getKey();
+ // can be null if inconsistencies in shards responses
+ Integer newValue = shardFieldValues.get(key);
+ if (null != newValue) {
+ Integer oldValue = existPair.getValue();
+ existPair.setValue(oldValue + newValue);
+ }
+ }
+ }
+ }
+ }
}
@@ -395,7 +483,7 @@ public class FacetComponent extends Sea
FacetInfo fi = rb._facetInfo;
- NamedList facet_counts = new SimpleOrderedMap();
+ NamedList<Object> facet_counts = new SimpleOrderedMap<Object>();
NamedList facet_queries = new SimpleOrderedMap();
facet_counts.add("facet_queries",facet_queries);
@@ -403,11 +491,11 @@ public class FacetComponent extends Sea
facet_queries.add(qf.getKey(), num(qf.count));
}
- NamedList facet_fields = new SimpleOrderedMap();
+ NamedList<Object> facet_fields = new SimpleOrderedMap<Object>();
facet_counts.add("facet_fields", facet_fields);
for (DistribFieldFacet dff : fi.facets.values()) {
- NamedList fieldCounts = new NamedList(); // order is more important for facets
+ NamedList<Object> fieldCounts = new NamedList<Object>(); // order is more important for facets
facet_fields.add(dff.getKey(), fieldCounts);
ShardFacetCount[] counts;
@@ -456,9 +544,8 @@ public class FacetComponent extends Sea
}
}
- // TODO: facet dates & numbers
- facet_counts.add("facet_dates", new SimpleOrderedMap());
- facet_counts.add("facet_ranges", new SimpleOrderedMap());
+ facet_counts.add("facet_dates", fi.dateFacets);
+ facet_counts.add("facet_ranges", fi.rangeFacets);
rb.rsp.add("facet_counts", facet_counts);
@@ -512,6 +599,11 @@ public class FacetComponent extends Sea
public static class FacetInfo {
public LinkedHashMap<String,QueryFacet> queryFacets;
public LinkedHashMap<String,DistribFieldFacet> facets;
+ public SimpleOrderedMap<SimpleOrderedMap<Object>> dateFacets
+ = new SimpleOrderedMap<SimpleOrderedMap<Object>>();
+ public SimpleOrderedMap<SimpleOrderedMap<Object>> rangeFacets
+ = new SimpleOrderedMap<SimpleOrderedMap<Object>>();
+
public List exceptionList;
void parse(SolrParams params, ResponseBuilder rb) {
Modified: lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/TestDistributedSearch.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/TestDistributedSearch.java?rev=1232359&r1=1232358&r2=1232359&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/TestDistributedSearch.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/TestDistributedSearch.java Tue Jan 17 09:33:46 2012
@@ -39,7 +39,8 @@ public class TestDistributedSearch exten
String nlong = "n_l";
String tlong = "n_tl";
String ndate = "n_dt";
- String tdate = "n_tdt";
+ String tdate_a = "a_n_tdt";
+ String tdate_b = "b_n_tdt";
String oddField="oddField_s";
String missingField="ignore_exception__missing_but_valid_field_t";
@@ -49,24 +50,36 @@ public class TestDistributedSearch exten
public void doTest() throws Exception {
int backupStress = stress; // make a copy so we can restore
del("*:*");
- indexr(id,1, i1, 100, tlong, 100,t1,"now is the time for all good men"
- ,"foo_f", 1.414f, "foo_b", "true", "foo_d", 1.414d);
- indexr(id,2, i1, 50 , tlong, 50,t1,"to come to the aid of their country."
- );
- indexr(id,3, i1, 2, tlong, 2,t1,"how now brown cow"
- );
- indexr(id,4, i1, -100 ,tlong, 101,t1,"the quick fox jumped over the lazy dog"
- );
- indexr(id,5, i1, 500, tlong, 500 ,t1,"the quick fox jumped way over the lazy dog"
- );
+ indexr(id,1, i1, 100, tlong, 100,t1,"now is the time for all good men",
+ tdate_a, "2010-04-20T11:00:00Z",
+ tdate_b, "2009-08-20T11:00:00Z",
+ "foo_f", 1.414f, "foo_b", "true", "foo_d", 1.414d);
+ indexr(id,2, i1, 50 , tlong, 50,t1,"to come to the aid of their country.",
+ tdate_a, "2010-05-02T11:00:00Z",
+ tdate_b, "2009-11-02T11:00:00Z");
+ indexr(id,3, i1, 2, tlong, 2,t1,"how now brown cow",
+ tdate_a, "2010-05-03T11:00:00Z");
+ indexr(id,4, i1, -100 ,tlong, 101,
+ t1,"the quick fox jumped over the lazy dog",
+ tdate_a, "2010-05-03T11:00:00Z",
+ tdate_b, "2010-05-03T11:00:00Z");
+ indexr(id,5, i1, 500, tlong, 500 ,
+ t1,"the quick fox jumped way over the lazy dog",
+ tdate_a, "2010-05-05T11:00:00Z");
indexr(id,6, i1, -600, tlong, 600 ,t1,"humpty dumpy sat on a wall");
indexr(id,7, i1, 123, tlong, 123 ,t1,"humpty dumpy had a great fall");
- indexr(id,8, i1, 876, tlong, 876,t1,"all the kings horses and all the kings men");
+ indexr(id,8, i1, 876, tlong, 876,
+ tdate_b, "2010-01-05T11:00:00Z",
+ t1,"all the kings horses and all the kings men");
indexr(id,9, i1, 7, tlong, 7,t1,"couldn't put humpty together again");
indexr(id,10, i1, 4321, tlong, 4321,t1,"this too shall pass");
- indexr(id,11, i1, -987, tlong, 987,t1,"An eye for eye only ends up making the whole world blind.");
- indexr(id,12, i1, 379, tlong, 379,t1,"Great works are performed, not by strength, but by perseverance.");
- indexr(id,13, i1, 232, tlong, 232,t1,"no eggs on wall, lesson learned", oddField, "odd man out");
+ indexr(id,11, i1, -987, tlong, 987,
+ t1,"An eye for eye only ends up making the whole world blind.");
+ indexr(id,12, i1, 379, tlong, 379,
+ t1,"Great works are performed, not by strength, but by perseverance.");
+ indexr(id,13, i1, 232, tlong, 232,
+ t1,"no eggs on wall, lesson learned",
+ oddField, "odd man out");
indexr(id, 14, "SubjectTerms_mfacet", new String[] {"mathematical models", "mathematical analysis"});
indexr(id, 15, "SubjectTerms_mfacet", new String[] {"test 1", "test 2", "test3"});
@@ -155,6 +168,43 @@ public class TestDistributedSearch exten
query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.offset",1);
query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.mincount",2);
+ // simple date facet on one field
+ query("q","*:*", "rows",100, "facet","true",
+ "facet.date",tdate_a,
+ "facet.date.other", "all",
+ "facet.date.start","2010-05-01T11:00:00Z",
+ "facet.date.gap","+1DAY",
+ "facet.date.end","2010-05-20T11:00:00Z");
+
+ // date facet on multiple fields
+ query("q","*:*", "rows",100, "facet","true",
+ "facet.date",tdate_a,
+ "facet.date",tdate_b,
+ "facet.date.other", "all",
+ "f."+tdate_b+".facet.date.start","2009-05-01T11:00:00Z",
+ "f."+tdate_b+".facet.date.gap","+3MONTHS",
+ "facet.date.start","2010-05-01T11:00:00Z",
+ "facet.date.gap","+1DAY",
+ "facet.date.end","2010-05-20T11:00:00Z");
+
+ // simple range facet on one field
+ query("q","*:*", "rows",100, "facet","true",
+ "facet.range",tlong,
+ "facet.range.start",200,
+ "facet.range.gap",100,
+ "facet.range.end",900);
+
+ // range facet on multiple fields
+ query("q","*:*", "rows",100, "facet","true",
+ "facet.range",tlong,
+ "facet.range",i1,
+ "f."+i1+".facet.range.start",300,
+ "f."+i1+".facet.range.gap",87,
+ "facet.range.end",900,
+ "facet.range.start",200,
+ "facet.range.gap",100,
+ "f."+tlong+".facet.range.end",900);
+
stress=0; // turn off stress... we want to tex max combos in min time
for (int i=0; i<25*RANDOM_MULTIPLIER; i++) {
String f = fieldNames[random.nextInt(fieldNames.length)];