You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mv...@apache.org on 2012/03/07 23:51:18 UTC

svn commit: r1298186 - in /lucene/dev/trunk/solr: CHANGES.txt core/src/java/org/apache/solr/request/SimpleFacets.java core/src/test/org/apache/solr/request/SimpleFacetsTest.java solrj/src/java/org/apache/solr/common/params/GroupParams.java

Author: mvg
Date: Wed Mar  7 22:51:18 2012
New Revision: 1298186

URL: http://svn.apache.org/viewvc?rev=1298186&view=rev
Log:
SOLR-2898: Support grouped faceting.

Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/request/SimpleFacets.java
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/request/SimpleFacetsTest.java
    lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/params/GroupParams.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1298186&r1=1298185&r2=1298186&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Wed Mar  7 22:51:18 2012
@@ -232,6 +232,7 @@ New Features
 * SOLR-3134: Include shard info in distributed response when shards.info=true 
   (Russell Black, ryan)
 
+* SOLR-2898: Support grouped faceting. (Martijn van Groningen)
 
 Optimizations
 ----------------------

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/request/SimpleFacets.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/request/SimpleFacets.java?rev=1298186&r1=1298185&r2=1298186&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/request/SimpleFacets.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/request/SimpleFacets.java Wed Mar  7 22:51:18 2012
@@ -21,22 +21,20 @@ import org.apache.lucene.index.*;
 import org.apache.lucene.queryparser.classic.ParseException;
 import org.apache.lucene.search.*;
 import org.apache.lucene.search.grouping.AbstractAllGroupHeadsCollector;
+import org.apache.lucene.search.grouping.term.TermGroupFacetCollector;
 import org.apache.lucene.util.*;
 import org.apache.lucene.util.packed.PackedInts;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrException.ErrorCode;
-import org.apache.solr.common.params.FacetParams;
-import org.apache.solr.common.params.RequiredSolrParams;
-import org.apache.solr.common.params.SolrParams;
-import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.*;
 import org.apache.solr.common.params.FacetParams.FacetRangeOther;
 import org.apache.solr.common.params.FacetParams.FacetRangeInclude;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.SimpleOrderedMap;
 import org.apache.solr.common.util.StrUtils;
-import org.apache.solr.core.SolrCore;
 import org.apache.solr.schema.*;
 import org.apache.solr.search.*;
+import org.apache.solr.search.grouping.GroupingSpecification;
 import org.apache.solr.util.BoundedTreeSet;
 import org.apache.solr.util.DateMathParser;
 import org.apache.solr.util.DefaultSolrThreadFactory;
@@ -290,32 +288,74 @@ public class SimpleFacets {
       multiToken = true;
     }
 
-    // unless the enum method is explicitly specified, use a counting method.
-    if (enumMethod) {
-      counts = getFacetTermEnumCounts(searcher, base, field, offset, limit, mincount,missing,sort,prefix);
+    if (params.getFieldBool(field, GroupParams.GROUP_FACET, false)) {
+      counts = getGroupedCounts(searcher, base, field, multiToken, offset,limit, mincount, missing, sort, prefix);
     } else {
-      if (multiToken) {
-        UnInvertedField uif = UnInvertedField.getUnInvertedField(field, searcher);
-        counts = uif.getCounts(searcher, base, offset, limit, mincount,missing,sort,prefix);
+      // unless the enum method is explicitly specified, use a counting method.
+      if (enumMethod) {
+        counts = getFacetTermEnumCounts(searcher, base, field, offset, limit, mincount,missing,sort,prefix);
       } else {
-        // TODO: future logic could use filters instead of the fieldcache if
-        // the number of terms in the field is small enough.
-
-        if (per_segment) {
-          PerSegmentSingleValuedFaceting ps = new PerSegmentSingleValuedFaceting(searcher, base, field, offset,limit, mincount, missing, sort, prefix);
-          Executor executor = threads==0 ? directExecutor : facetExecutor;
-          ps.setNumThreads(threads);
-          counts = ps.getFacetCounts(executor);
+        if (multiToken) {
+          UnInvertedField uif = UnInvertedField.getUnInvertedField(field, searcher);
+          counts = uif.getCounts(searcher, base, offset, limit, mincount,missing,sort,prefix);
         } else {
-          counts = getFieldCacheCounts(searcher, base, field, offset,limit, mincount, missing, sort, prefix);         
-        }
+          // TODO: future logic could use filters instead of the fieldcache if
+          // the number of terms in the field is small enough.
+          if (per_segment) {
+            PerSegmentSingleValuedFaceting ps = new PerSegmentSingleValuedFaceting(searcher, base, field, offset,limit, mincount, missing, sort, prefix);
+            Executor executor = threads == 0 ? directExecutor : facetExecutor;
+            ps.setNumThreads(threads);
+            counts = ps.getFacetCounts(executor);
+          } else {
+            counts = getFieldCacheCounts(searcher, base, field, offset,limit, mincount, missing, sort, prefix);
+          }
 
+        }
       }
     }
 
     return counts;
   }
 
+  public NamedList<Integer> getGroupedCounts(SolrIndexSearcher searcher,
+                                             DocSet base,
+                                             String field,
+                                             boolean multiToken,
+                                             int offset,
+                                             int limit,
+                                             int mincount,
+                                             boolean missing,
+                                             String sort,
+                                             String prefix) throws IOException {
+    GroupingSpecification groupingSpecification = rb.getGroupingSpec();
+    String groupField  = groupingSpecification != null ? groupingSpecification.getFields()[0] : null;
+    if (groupField == null) {
+      throw new SolrException (
+          SolrException.ErrorCode.BAD_REQUEST,
+          "Specify the group.field as parameter or local parameter"
+      );
+    }
+
+    BytesRef prefixBR = prefix != null ? new BytesRef(prefix) : null;
+    TermGroupFacetCollector collector = TermGroupFacetCollector.createTermGroupFacetCollector(groupField, field, multiToken, prefixBR, 128);
+    searcher.search(new MatchAllDocsQuery(), base.getTopFilter(), collector);
+    boolean orderByCount = sort.equals(FacetParams.FACET_SORT_COUNT) || sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY);
+    TermGroupFacetCollector.GroupedFacetResult result = collector.mergeSegmentResults(offset + limit, mincount, orderByCount);
+
+    NamedList<Integer> facetCounts = new NamedList<Integer>();
+    List<TermGroupFacetCollector.FacetEntry> scopedEntries = result.getFacetEntries(offset, limit);
+    for (TermGroupFacetCollector.FacetEntry facetEntry : scopedEntries) {
+      String facetDisplayValue = facetEntry.getValue().utf8ToString();
+      facetCounts.add(facetDisplayValue, facetEntry.getCount());
+    }
+
+    if (missing) {
+      facetCounts.add(null, result.getTotalMissingCount());
+    }
+
+    return facetCounts;
+  }
+
 
   static final Executor directExecutor = new Executor() {
     public void execute(Runnable r) {

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/request/SimpleFacetsTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/request/SimpleFacetsTest.java?rev=1298186&r1=1298185&r2=1298186&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/request/SimpleFacetsTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/request/SimpleFacetsTest.java Wed Mar  7 22:51:18 2012
@@ -19,6 +19,7 @@ package org.apache.solr.request;
 
 import org.apache.noggit.ObjectBuilder;
 import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.schema.SchemaField;
 import org.junit.BeforeClass;
@@ -66,8 +67,9 @@ public class SimpleFacetsTest extends So
     indexFacetSingleValued();
     indexFacetPrefixMultiValued();
     indexFacetPrefixSingleValued();
-    
-   Collections.shuffle(pendingDocs, random);
+    indexSimpleGroupedFacetCounts();
+
+    Collections.shuffle(pendingDocs, random);
     for (String[] doc : pendingDocs) {
       assertU(adoc(doc));
       randomCommit(random_commit_percent);
@@ -102,6 +104,88 @@ public class SimpleFacetsTest extends So
             "zerolen_s","");   
   }
 
+  static void indexSimpleGroupedFacetCounts() {
+    add_doc("id", "2000", "hotel_s1", "a", "airport_s1", "ams", "duration_i1", "5");
+    add_doc("id", "2001", "hotel_s1", "a", "airport_s1", "dus", "duration_i1", "10");
+    add_doc("id", "2002", "hotel_s1", "b", "airport_s1", "ams", "duration_i1", "10");
+    add_doc("id", "2003", "hotel_s1", "b", "airport_s1", "ams", "duration_i1", "5");
+    add_doc("id", "2004", "hotel_s1", "b", "airport_s1", "ams", "duration_i1", "5");
+  }
+
+  @Test
+  public void testSimpleGroupedFacets() throws Exception {
+    assertQ(
+        "Return 5 docs with id range 1937 till 1940",
+         req("id:[2000 TO 2004]"),
+        "*[count(//doc)=5]"
+    );
+    assertQ(
+        "Return two facet counts for field airport_a",
+         req(
+             "q", "*:*",
+             "fq", "id:[2000 TO 2004]",
+             "group", "true",
+             "group.facet", "true",
+             "group.field", "hotel_s1",
+             "facet", "true",
+             "facet.field", "airport_s1"
+         ),
+        "//lst[@name='facet_fields']/lst[@name='airport_s1']",
+        "*[count(//lst[@name='airport_s1']/int)=2]",
+        "//lst[@name='airport_s1']/int[@name='ams'][.='2']",
+        "//lst[@name='airport_s1']/int[@name='dus'][.='1']"
+    );
+    assertQ(
+        "Return two facet counts for field airport_a with fq",
+         req(
+             "q", "*:*",
+             "fq", "id:[2000 TO 2004]",
+             "fq", "duration_i1:5",
+             "group", "true",
+             "group.facet", "true",
+             "group.field", "hotel_s1",
+             "facet", "true",
+             "facet.field", "airport_s1"
+         ),
+        "//lst[@name='facet_fields']/lst[@name='airport_s1']",
+        "*[count(//lst[@name='airport_s1']/int)=2]",
+        "//lst[@name='airport_s1']/int[@name='ams'][.='2']",
+        "//lst[@name='airport_s1']/int[@name='dus'][.='0']"
+    );
+    assertQ(
+        "Return one facet count for field airport_s1 with prefix a",
+         req(
+             "q", "*:*",
+             "fq", "id:[2000 TO 2004]",
+             "group", "true",
+             "group.facet", "true",
+             "group.field", "hotel_s1",
+             "facet", "true",
+             "facet.field", "airport_s1",
+             "facet.prefix", "a"
+         ),
+        "//lst[@name='facet_fields']/lst[@name='airport_s1']",
+        "*[count(//lst[@name='airport_s1']/int)=1]",
+        "//lst[@name='airport_s1']/int[@name='ams'][.='2']"
+    );
+    
+    try {
+      h.query(
+           req(
+               "q", "*:*",
+               "fq", "id:[2000 TO 2004]",
+               "group.facet", "true",
+               "facet", "true",
+               "facet.field", "airport_s1",
+               "facet.prefix", "a"
+           )
+      );
+      fail("Exception should have been thrown");
+    } catch (SolrException e) {
+      assertEquals(SolrException.ErrorCode.BAD_REQUEST.code, e.code());
+    }
+  }
+
   @Test
   public void testEmptyFacetCounts() throws Exception {
     doEmptyFacetCounts();

Modified: lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/params/GroupParams.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/params/GroupParams.java?rev=1298186&r1=1298185&r2=1298186&view=diff
==============================================================================
--- lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/params/GroupParams.java (original)
+++ lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/params/GroupParams.java Wed Mar  7 22:51:18 2012
@@ -54,5 +54,8 @@ public interface GroupParams {
 
   /** Whether the group count should be included in the response. */
   public static final String GROUP_TOTAL_COUNT = GROUP + ".ngroups";
+
+  /** Whether to compute grouped facets based on the first specified group. */
+  public static final String GROUP_FACET = GROUP + ".facet";
 }