You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2013/02/23 17:34:43 UTC

svn commit: r1449365 - in /lucene/dev/branches/branch_4x: ./ solr/ solr/core/ solr/core/src/java/org/apache/solr/handler/component/ solr/core/src/java/org/apache/solr/request/ solr/core/src/test/org/apache/solr/request/

Author: jpountz
Date: Sat Feb 23 16:34:42 2013
New Revision: 1449365

URL: http://svn.apache.org/r1449365
Log:
SOLR-3855: Fix faceting on numeric fields with precisionStep < Integer.MAX_VALUE, facet.mincount=0 and facet.method=fcs.

Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/solr/   (props changed)
    lucene/dev/branches/branch_4x/solr/core/   (props changed)
    lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java
    lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/request/NumericFacets.java
    lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/request/SimpleFacets.java
    lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/request/TestFaceting.java

Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java?rev=1449365&r1=1449364&r2=1449365&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/handler/component/StatsComponent.java Sat Feb 23 16:34:42 2013
@@ -35,7 +35,6 @@ import org.apache.solr.request.SolrQuery
 import org.apache.solr.request.UnInvertedField;
 import org.apache.solr.schema.FieldType;
 import org.apache.solr.schema.SchemaField;
-import org.apache.solr.schema.TrieField;
 import org.apache.solr.search.DocIterator;
 import org.apache.solr.search.DocSet;
 import org.apache.solr.search.SolrIndexSearcher;
@@ -214,10 +213,7 @@ class SimpleStats {
         FieldType ft = sf.getType();
         NamedList<?> stv;
 
-        // Currently, only UnInvertedField can deal with multi-part trie fields
-        String prefix = TrieField.getMainValuePrefix(ft);
-
-        if (sf.multiValued() || ft.multiValuedFieldCache() || prefix!=null) {
+        if (sf.multiValued() || ft.multiValuedFieldCache()) {
           //use UnInvertedField for multivalued fields
           UnInvertedField uif = UnInvertedField.getUnInvertedField(f, searcher);
           stv = uif.getStats(searcher, docs, facets).getStatsValues();

Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/request/NumericFacets.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/request/NumericFacets.java?rev=1449365&r1=1449364&r2=1449365&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/request/NumericFacets.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/request/NumericFacets.java Sat Feb 23 16:34:42 2013
@@ -40,10 +40,12 @@ import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.PriorityQueue;
+import org.apache.lucene.util.StringHelper;
 import org.apache.solr.common.params.FacetParams;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.schema.FieldType;
 import org.apache.solr.schema.SchemaField;
+import org.apache.solr.schema.TrieField;
 import org.apache.solr.search.DocIterator;
 import org.apache.solr.search.DocSet;
 import org.apache.solr.search.SolrIndexSearcher;
@@ -266,10 +268,28 @@ final class NumericFacets {
         }
         final Terms terms = searcher.getAtomicReader().terms(fieldName);
         if (terms != null) {
+          final String prefixStr = TrieField.getMainValuePrefix(ft);
+          final BytesRef prefix;
+          if (prefixStr != null) {
+            prefix = new BytesRef(prefixStr);
+          } else {
+            prefix = new BytesRef();
+          }
           final TermsEnum termsEnum = terms.iterator(null);
-          BytesRef term = termsEnum.next();
+          BytesRef term;
+          switch (termsEnum.seekCeil(prefix)) {
+            case FOUND:
+            case NOT_FOUND:
+              term = termsEnum.term();
+              break;
+            case END:
+              term = null;
+              break;
+            default:
+              throw new AssertionError();
+          }
           final CharsRef spare = new CharsRef();
-          for (int skipped = hashTable.size; skipped < offset && term != null; ) {
+          for (int skipped = hashTable.size; skipped < offset && term != null && StringHelper.startsWith(term, prefix); ) {
             ft.indexedToReadable(term, spare);
             final String termStr = spare.toString();
             if (!alreadySeen.contains(termStr)) {
@@ -277,7 +297,7 @@ final class NumericFacets {
             }
             term = termsEnum.next();
           }
-          for ( ; term != null && (limit < 0 || result.size() < limit); term = termsEnum.next()) {
+          for ( ; term != null && StringHelper.startsWith(term, prefix) && (limit < 0 || result.size() < limit); term = termsEnum.next()) {
             ft.indexedToReadable(term, spare);
             final String termStr = spare.toString();
             if (!alreadySeen.contains(termStr)) {
@@ -301,13 +321,31 @@ final class NumericFacets {
       }
       final Terms terms = searcher.getAtomicReader().terms(fieldName);
       if (terms != null) {
+        final String prefixStr = TrieField.getMainValuePrefix(ft);
+        final BytesRef prefix;
+        if (prefixStr != null) {
+          prefix = new BytesRef(prefixStr);
+        } else {
+          prefix = new BytesRef();
+        }
         final TermsEnum termsEnum = terms.iterator(null);
+        BytesRef term;
+        switch (termsEnum.seekCeil(prefix)) {
+          case FOUND:
+          case NOT_FOUND:
+            term = termsEnum.term();
+            break;
+          case END:
+            term = null;
+            break;
+          default:
+            throw new AssertionError();
+        }
         final CharsRef spare = new CharsRef();
-        BytesRef term = termsEnum.next();
-        for (int i = 0; i < offset && term != null; ++i) {
+        for (int i = 0; i < offset && term != null && StringHelper.startsWith(term, prefix); ++i) {
           term = termsEnum.next();
         }
-        for ( ; term != null && (limit < 0 || result.size() < limit); term = termsEnum.next()) {
+        for ( ; term != null && StringHelper.startsWith(term, prefix) && (limit < 0 || result.size() < limit); term = termsEnum.next()) {
           ft.indexedToReadable(term, spare);
           final String termStr = spare.toString();
           Integer count = counts.get(termStr);

Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/request/SimpleFacets.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/request/SimpleFacets.java?rev=1449365&r1=1449364&r2=1449365&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/request/SimpleFacets.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/request/SimpleFacets.java Sat Feb 23 16:34:42 2013
@@ -320,6 +320,10 @@ public class SimpleFacets {
     return collector.getGroupCount();
   }
 
+  enum FacetMethod {
+    ENUM, FC, FCS;
+  }
+
   public NamedList<Integer> getTermCounts(String field) throws IOException {
     int offset = params.getFieldInt(field, FacetParams.FACET_OFFSET, 0);
     int limit = params.getFieldInt(field, FacetParams.FACET_LIMIT, 100);
@@ -342,57 +346,84 @@ public class SimpleFacets {
     FieldType ft = sf.getType();
 
     // determine what type of faceting method to use
-    String method = params.getFieldParam(field, FacetParams.FACET_METHOD);
-    boolean enumMethod = FacetParams.FACET_METHOD_enum.equals(method);
+    final String methodStr = params.getFieldParam(field, FacetParams.FACET_METHOD);
+    FacetMethod method = null;
+    if (FacetParams.FACET_METHOD_enum.equals(methodStr)) {
+      method = FacetMethod.ENUM;
+    } else if (FacetParams.FACET_METHOD_fcs.equals(methodStr)) {
+      method = FacetMethod.FCS;
+    } else if (FacetParams.FACET_METHOD_fc.equals(methodStr)) {
+      method = FacetMethod.FC;
+    }
 
-    // TODO: default to per-segment or not?
-    boolean per_segment = FacetParams.FACET_METHOD_fcs.equals(method) // explicit
-        || (ft.getNumericType() != null && sf.hasDocValues()); // numeric doc values are per-segment by default
+    if (method == FacetMethod.ENUM && TrieField.getMainValuePrefix(ft) != null) {
+      // enum can't deal with trie fields that index several terms per value
+      method = sf.multiValued() ? FacetMethod.FC : FacetMethod.FCS;
+    }
 
     if (method == null && ft instanceof BoolField) {
       // Always use filters for booleans... we know the number of values is very small.
-      enumMethod = true;
+      method = FacetMethod.ENUM;
+    }
+
+    final boolean multiToken = sf.multiValued() || ft.multiValuedFieldCache();
+    
+    if (method == null && ft.getNumericType() != null && !sf.multiValued()) {
+      // the per-segment approach is optimal for numeric field types since there
+      // are no global ords to merge and no need to create an expensive
+      // top-level reader
+      method = FacetMethod.FCS;
+    }
+
+    if (ft.getNumericType() != null && sf.hasDocValues()) {
+      // only fcs is able to leverage the numeric field caches
+      method = FacetMethod.FCS;
     }
-    boolean multiToken = sf.multiValued() || ft.multiValuedFieldCache();
 
-    if (TrieField.getMainValuePrefix(ft) != null) {
-      // A TrieField with multiple parts indexed per value... currently only
-      // UnInvertedField can handle this case, so force it's use.
-      enumMethod = false;
-      multiToken = true;
+    if (method == null) {
+      // TODO: default to per-segment or not?
+      method = FacetMethod.FC;
+    }
+
+    if (method == FacetMethod.FCS && multiToken) {
+      // only fc knows how to deal with multi-token fields
+      method = FacetMethod.FC;
     }
 
     if (params.getFieldBool(field, GroupParams.GROUP_FACET, false)) {
       counts = getGroupedCounts(searcher, docs, field, multiToken, offset,limit, mincount, missing, sort, prefix);
     } else {
-      // unless the enum method is explicitly specified, use a counting method.
-      if (enumMethod) {
-        counts = getFacetTermEnumCounts(searcher, docs, field, offset, limit, mincount,missing,sort,prefix);
-      } else {
-        if (multiToken) {
-          UnInvertedField uif = UnInvertedField.getUnInvertedField(field, searcher);
-          counts = uif.getCounts(searcher, docs, offset, limit, mincount,missing,sort,prefix);
-        } else {
-          // TODO: future logic could use filters instead of the fieldcache if
-          // the number of terms in the field is small enough.
-          if (per_segment) {
-            if (ft.getNumericType() != null && !sf.multiValued()) {
-              // force numeric faceting
-              if (prefix != null && !prefix.isEmpty()) {
-                throw new SolrException(ErrorCode.BAD_REQUEST, FacetParams.FACET_PREFIX + " is not supported on numeric types");
-              }
-              counts = NumericFacets.getCounts(searcher, docs, field, offset, limit, mincount, missing, sort);
-            } else {
-              PerSegmentSingleValuedFaceting ps = new PerSegmentSingleValuedFaceting(searcher, docs, field, offset,limit, mincount, missing, sort, prefix);
-              Executor executor = threads == 0 ? directExecutor : facetExecutor;
-              ps.setNumThreads(threads);
-              counts = ps.getFacetCounts(executor);
+      assert method != null;
+      switch (method) {
+        case ENUM:
+          assert TrieField.getMainValuePrefix(ft) == null;
+          counts = getFacetTermEnumCounts(searcher, docs, field, offset, limit, mincount,missing,sort,prefix);
+          break;
+        case FCS:
+          assert !multiToken;
+          if (ft.getNumericType() != null && !sf.multiValued()) {
+            // force numeric faceting
+            if (prefix != null && !prefix.isEmpty()) {
+              throw new SolrException(ErrorCode.BAD_REQUEST, FacetParams.FACET_PREFIX + " is not supported on numeric types");
             }
+            counts = NumericFacets.getCounts(searcher, docs, field, offset, limit, mincount, missing, sort);
+          } else {
+            PerSegmentSingleValuedFaceting ps = new PerSegmentSingleValuedFaceting(searcher, docs, field, offset,limit, mincount, missing, sort, prefix);
+            Executor executor = threads == 0 ? directExecutor : facetExecutor;
+            ps.setNumThreads(threads);
+            counts = ps.getFacetCounts(executor);
+          }
+          break;
+        case FC:
+          if (multiToken || TrieField.getMainValuePrefix(ft) != null) {
+            UnInvertedField uif = UnInvertedField.getUnInvertedField(field, searcher);
+            counts = uif.getCounts(searcher, docs, offset, limit, mincount,missing,sort,prefix);
           } else {
             counts = getFieldCacheCounts(searcher, docs, field, offset,limit, mincount, missing, sort, prefix);
           }
-
-        }
+          break;
+        default:
+          throw new AssertionError();
       }
     }
 

Modified: lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/request/TestFaceting.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/request/TestFaceting.java?rev=1449365&r1=1449364&r2=1449365&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/request/TestFaceting.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/request/TestFaceting.java Sat Feb 23 16:34:42 2013
@@ -17,6 +17,8 @@
 
 package org.apache.solr.request;
 
+import java.util.ArrayList;
+import java.util.List;
 import java.util.Locale;
 import java.util.Random;
 
@@ -25,6 +27,7 @@ import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.util.BytesRef;
 import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.params.FacetParams;
 import org.junit.After;
 import org.junit.BeforeClass;
 import org.junit.Test;
@@ -263,5 +266,32 @@ public class TestFaceting extends SolrTe
             );
   }
 
+  @Test
+  public void testTrieFields() {
+    // make sure that terms are correctly filtered even for trie fields that index several
+    // terms for a single value
+    List<String> fields = new ArrayList<String>();
+    fields.add("id");
+    fields.add("7");
+    final String[] suffixes = new String[] {"ti", "tis", "tf", "tfs", "tl", "tls", "td", "tds"};
+    for (String suffix : suffixes) {
+      fields.add("f_" + suffix);
+      fields.add("42");
+    }
+    assertU(adoc(fields.toArray(new String[0])));
+    assertU(commit());
+    for (String suffix : suffixes) {
+      for (String facetMethod : new String[] {FacetParams.FACET_METHOD_enum, FacetParams.FACET_METHOD_fc, FacetParams.FACET_METHOD_fcs}) {
+        for (String facetSort : new String[] {FacetParams.FACET_SORT_COUNT, FacetParams.FACET_SORT_INDEX}) {
+          for (String value : new String[] {"42", "43"}) { // match or not
+            final String field = "f_" + suffix;
+            assertQ("field=" + field + ",method=" + facetMethod + ",sort=" + facetSort,
+                req("q", field + ":" + value, FacetParams.FACET, "true", FacetParams.FACET_FIELD, field, FacetParams.FACET_MINCOUNT, "0", FacetParams.FACET_SORT, facetSort, FacetParams.FACET_METHOD, facetMethod),
+                "*[count(//lst[@name='" + field + "']/int)=1]"); // exactly 1 facet count
+          }
+        }
+      }
+    }
+  }
 
 }
\ No newline at end of file