You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2015/05/10 05:16:05 UTC

svn commit: r1678536 - in /lucene/dev/branches/branch_5x: ./ solr/ solr/core/ solr/core/src/java/org/apache/solr/search/facet/ solr/core/src/test/org/apache/solr/search/facet/

Author: yonik
Date: Sun May 10 03:16:04 2015
New Revision: 1678536

URL: http://svn.apache.org/r1678536
Log:
SOLR-7522: single valued numeric field faceting

Added:
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorNumeric.java
      - copied unchanged from r1678535, lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorNumeric.java
Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/solr/   (props changed)
    lucene/dev/branches/branch_5x/solr/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_5x/solr/core/   (props changed)
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java
    lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java

Modified: lucene/dev/branches/branch_5x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/CHANGES.txt?rev=1678536&r1=1678535&r2=1678536&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/solr/CHANGES.txt Sun May 10 03:16:04 2015
@@ -120,6 +120,10 @@ New Features
 * SOLR-7461: stats.field now supports individual local params for 'countDistinct' and 'distinctValues'.
   'calcdistinct' is still supported as an alias for both options (hossman)
 
+* SOLR-7522: Facet Module - Implement field/terms faceting over single-valued
+  numeric fields. (yonik)
+
+
 Bug Fixes
 ----------------------
 

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetField.java?rev=1678536&r1=1678535&r2=1678536&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetField.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetField.java Sun May 10 03:16:04 2015
@@ -48,6 +48,7 @@ import org.apache.solr.common.SolrExcept
 import org.apache.solr.common.util.SimpleOrderedMap;
 import org.apache.solr.schema.FieldType;
 import org.apache.solr.schema.SchemaField;
+import org.apache.solr.schema.TrieField;
 import org.apache.solr.search.DocIterator;
 import org.apache.solr.search.DocSet;
 import org.apache.solr.search.HashDocSet;
@@ -120,16 +121,26 @@ public class FacetField extends FacetReq
       return new FacetFieldProcessorStream(fcontext, this, sf);
     }
 
-    if (!multiToken || sf.hasDocValues()) {
+    org.apache.lucene.document.FieldType.NumericType ntype = ft.getNumericType();
+
+    if (sf.hasDocValues() && ntype==null) {
+      // single and multi-valued string docValues
       return new FacetFieldProcessorDV(fcontext, this, sf);
     }
 
-    if (multiToken) {
-      return new FacetFieldProcessorUIF(fcontext, this, sf);
-    } else {
-      // single valued string
-      return new FacetFieldProcessorFC(fcontext, this, sf);
+    if (!multiToken) {
+      if (sf.getType().getNumericType() != null) {
+        // single valued numeric (docvalues or fieldcache)
+        return new FacetFieldProcessorNumeric(fcontext, this, sf);
+      } else {
+        // single valued string...
+        return new FacetFieldProcessorDV(fcontext, this, sf);
+        // what about FacetFieldProcessorFC?
+      }
     }
+
+    // Multi-valued field cache (UIF)
+    return new FacetFieldProcessorUIF(fcontext, this, sf);
   }
 
   @Override
@@ -143,6 +154,7 @@ public class FacetField extends FacetReq
 abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
   SchemaField sf;
   SlotAcc sortAcc;
+  SlotAcc indexOrderAcc;
   int effectiveMincount;
 
   FacetFieldProcessor(FacetContext fcontext, FacetField freq, SchemaField sf) {
@@ -157,6 +169,12 @@ abstract class FacetFieldProcessor exten
   }
 
   void setSortAcc(int numSlots) {
+    if (indexOrderAcc == null) {
+      // This sorting accumulator just goes by the slot number, so does not need to be collected
+      // and hence does not need to find it's way into the accMap or accs array.
+      indexOrderAcc = new SortSlotAcc(fcontext);
+    }
+
     String sortKey = freq.sortVariable;
     sortAcc = accMap.get(sortKey);
 
@@ -164,15 +182,16 @@ abstract class FacetFieldProcessor exten
       if ("count".equals(sortKey)) {
         sortAcc = countAcc;
       } else if ("index".equals(sortKey)) {
-        sortAcc = new SortSlotAcc(fcontext);
-        // This sorting accumulator just goes by the slot number, so does not need to be collected
-        // and hence does not need to find it's way into the accMap or accs array.
+        sortAcc = indexOrderAcc;
       }
     }
   }
 
   static class Slot {
     int slot;
+    public int tiebreakCompare(int slotA, int slotB) {
+      return slotB - slotA;
+    }
   }
 }
 
@@ -249,7 +268,7 @@ abstract class FacetFieldProcessorFCBase
     // add a modest amount of over-request if this is a shard request
     int lim = freq.limit >= 0 ? (fcontext.isShard() ? (int)(freq.limit*1.1+4) : (int)freq.limit) : Integer.MAX_VALUE;
 
-    int maxsize = freq.limit > 0 ?  off + lim : Integer.MAX_VALUE - 1;
+    int maxsize = (int)(freq.limit > 0 ?  freq.offset + lim : Integer.MAX_VALUE - 1);
     maxsize = Math.min(maxsize, nTerms);
 
     final int sortMul = freq.sortDirection.getMultiplier();
@@ -612,6 +631,11 @@ class FacetFieldProcessorStream extends
     if (freq.prefix != null) {
       String indexedPrefix = sf.getType().toInternal(freq.prefix);
       startTermBytes = new BytesRef(indexedPrefix);
+    } else if (sf.getType().getNumericType() != null) {
+      String triePrefix = TrieField.getMainValuePrefix(sf.getType());
+      if (triePrefix != null) {
+        startTermBytes = new BytesRef(triePrefix);
+      }
     }
 
     Fields fields = fcontext.searcher.getLeafReader().fields();
@@ -644,8 +668,6 @@ class FacetFieldProcessorStream extends
 
     List<LeafReaderContext> leafList = fcontext.searcher.getTopReaderContext().leaves();
     leaves = leafList.toArray( new LeafReaderContext[ leafList.size() ]);
-
-
   }
 
 

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java?rev=1678536&r1=1678535&r2=1678536&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java Sun May 10 03:16:04 2015
@@ -24,6 +24,7 @@ import java.util.EnumSet;
 import java.util.List;
 
 import org.apache.lucene.search.Query;
+import org.apache.lucene.util.NumericUtils;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.FacetParams;
 import org.apache.solr.common.util.SimpleOrderedMap;
@@ -101,6 +102,41 @@ class FacetRangeProcessor extends FacetP
     }
   }
 
+  public static Calc getNumericCalc(SchemaField sf) {
+    Calc calc;
+    final FieldType ft = sf.getType();
+
+    if (ft instanceof TrieField) {
+      final TrieField trie = (TrieField)ft;
+
+      switch (trie.getType()) {
+        case FLOAT:
+          calc = new FloatCalc(sf);
+          break;
+        case DOUBLE:
+          calc = new DoubleCalc(sf);
+          break;
+        case INTEGER:
+          calc = new IntCalc(sf);
+          break;
+        case LONG:
+          calc = new LongCalc(sf);
+          break;
+        case DATE:
+          calc = new DateCalc(sf, null);
+          break;
+        default:
+          throw new SolrException
+              (SolrException.ErrorCode.BAD_REQUEST,
+                  "Expected numeric field type :" + sf);
+      }
+    } else {
+      throw new SolrException
+          (SolrException.ErrorCode.BAD_REQUEST,
+              "Expected numeric field type :" + sf);
+    }
+    return calc;
+  }
 
   private SimpleOrderedMap<Object> getRangeCounts() throws IOException {
     final FieldType ft = sf.getType();
@@ -317,14 +353,22 @@ class FacetRangeProcessor extends FacetP
    * directly from some method -- but until then, keep this locked down
    * and private.
    */
-  private static abstract class Calc {
+  static abstract class Calc {
     protected final SchemaField field;
     public Calc(final SchemaField field) {
       this.field = field;
     }
 
+    public Comparable bitsToValue(long bits) {
+      return bits;
+    }
+
+    public long bitsToSortableBits(long bits) {
+      return bits;
+    }
+
     /**
-     * Formats a Range endpoint for use as a range label name in the response.
+     * Formats a value into a label used in a response
      * Default Impl just uses toString()
      */
     public String formatValue(final Comparable val) {
@@ -332,7 +376,7 @@ class FacetRangeProcessor extends FacetP
     }
 
     /**
-     * Parses a String param into an Range endpoint value throwing
+     * Parses a String param into a value throwing
      * an exception if not possible
      */
     public final Comparable getValue(final String rawval) {
@@ -346,7 +390,7 @@ class FacetRangeProcessor extends FacetP
     }
 
     /**
-     * Parses a String param into an Range endpoint.
+     * Parses a String param into a value.
      * Can throw a low level format exception as needed.
      */
     protected abstract Comparable parseStr(final String rawval)
@@ -407,6 +451,16 @@ class FacetRangeProcessor extends FacetP
 
   private static class FloatCalc extends Calc {
 
+    @Override
+    public Comparable bitsToValue(long bits) {
+      return Float.intBitsToFloat( (int)bits );
+    }
+
+    @Override
+    public long bitsToSortableBits(long bits) {
+      return NumericUtils.sortableDoubleBits(bits);
+    }
+
     public FloatCalc(final SchemaField f) { super(f); }
     @Override
     protected Float parseStr(String rawval) {
@@ -418,6 +472,15 @@ class FacetRangeProcessor extends FacetP
     }
   }
   private static class DoubleCalc extends Calc {
+    @Override
+    public Comparable bitsToValue(long bits) {
+      return Double.longBitsToDouble(bits);
+    }
+
+    @Override
+    public long bitsToSortableBits(long bits) {
+      return NumericUtils.sortableDoubleBits(bits);
+    }
 
     public DoubleCalc(final SchemaField f) { super(f); }
     @Override
@@ -463,6 +526,12 @@ class FacetRangeProcessor extends FacetP
         throw new IllegalArgumentException("SchemaField must use field type extending TrieDateField or DateRangeField");
       }
     }
+
+    @Override
+    public Comparable bitsToValue(long bits) {
+      return new Date(bits);
+    }
+
     @Override
     public String formatValue(Comparable val) {
       return ((TrieDateField)field.getType()).toExternal( (Date)val );

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java?rev=1678536&r1=1678535&r2=1678536&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java Sun May 10 03:16:04 2015
@@ -206,8 +206,13 @@ class FacetProcessor<FacetRequestT exten
 
   protected void createAccs(int docCount, int slotCount) throws IOException {
     accMap = new LinkedHashMap<String,SlotAcc>();
-    countAcc = new CountSlotArrAcc(fcontext, slotCount);
-    countAcc.key = "count";
+
+    // allow a custom count acc to be used
+    if (countAcc == null) {
+      countAcc = new CountSlotArrAcc(fcontext, slotCount);
+      countAcc.key = "count";
+    }
+
     for (Map.Entry<String,AggValueSource> entry : freq.getFacetStats().entrySet()) {
       SlotAcc acc = entry.getValue().createSlotAcc(fcontext, docCount, slotCount);
       acc.key = entry.getKey();

Modified: lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java?rev=1678536&r1=1678535&r2=1678536&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java Sun May 10 03:16:04 2015
@@ -28,6 +28,7 @@ import java.util.Map;
 import java.util.Random;
 
 import com.tdunning.math.stats.AVLTreeDigest;
+import org.apache.lucene.queryparser.flexible.standard.processors.NumericQueryNodeProcessor;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.packed.GrowableWriter;
 import org.apache.lucene.util.packed.PackedInts;
@@ -44,10 +45,13 @@ import org.junit.Test;
 public class TestJsonFacets extends SolrTestCaseHS {
 
   private static SolrInstances servers;  // for distributed testing
+  private static int origTableSize;
 
   @BeforeClass
   public static void beforeTests() throws Exception {
     JSONTestUtil.failRepeatedKeys = true;
+    origTableSize = FacetFieldProcessorNumeric.MAXIMUM_STARTING_TABLE_SIZE;
+    FacetFieldProcessorNumeric.MAXIMUM_STARTING_TABLE_SIZE=2; // stress test resizing
     initCore("solrconfig-tlog.xml","schema_latest.xml");
   }
 
@@ -60,6 +64,7 @@ public class TestJsonFacets extends Solr
   @AfterClass
   public static void afterTests() throws Exception {
     JSONTestUtil.failRepeatedKeys = false;
+    FacetFieldProcessorNumeric.MAXIMUM_STARTING_TABLE_SIZE=origTableSize;
     if (servers != null) {
       servers.stop();
       servers = null;
@@ -368,7 +373,8 @@ public class TestJsonFacets extends Solr
     client.commit();
 
 
-    // straight query facets
+
+        // straight query facets
     client.testJQ(params(p, "q", "*:*"
             , "json.facet", "{catA:{query:{q:'${cat_s}:A'}},  catA2:{query:{query:'${cat_s}:A'}},  catA3:{query:'${cat_s}:A'}    }"
         )
@@ -883,6 +889,50 @@ public class TestJsonFacets extends Solr
     );
 
 
+    //
+    // facet on numbers
+    //
+    client.testJQ(params(p, "q", "*:*"
+            , "json.facet", "{" +
+                " f1:{ type:field, field:${num_i} }" +
+                ",f2:{ type:field, field:${num_i}, sort:'count asc' }" +
+                ",f3:{ type:field, field:${num_i}, sort:'index asc' }" +
+                ",f4:{ type:field, field:${num_i}, sort:'index desc' }" +
+                ",f5:{ type:field, field:${num_i}, sort:'index desc', limit:1, missing:true, allBuckets:true, numBuckets:true }" +
+                ",f6:{ type:field, field:${num_i}, sort:'index desc', mincount:2, numBuckets:true }" +   // mincount should lower numbuckets
+                ",f7:{ type:field, field:${num_i}, sort:'index desc', offset:2, numBuckets:true }" +     // test offset
+                ",f8:{ type:field, field:${num_i}, sort:'index desc', offset:100, numBuckets:true }" +   // test high offset
+                ",f9:{ type:field, field:${num_i}, sort:'x desc', facet:{x:'avg(${num_d})'}, missing:true, allBuckets:true, numBuckets:true }" +            // test stats
+                ",f10:{ type:field, field:${num_i}, facet:{a:{query:'${cat_s}:A'}}, missing:true, allBuckets:true, numBuckets:true }" +     // test subfacets
+                "}"
+        )
+        , "facets=={count:6 " +
+            ",f1:{ buckets:[{val:-5,count:2},{val:2,count:1},{val:3,count:1},{val:7,count:1} ] } " +
+            ",f2:{ buckets:[{val:2,count:1},{val:3,count:1},{val:7,count:1},{val:-5,count:2} ] } " +
+            ",f3:{ buckets:[{val:-5,count:2},{val:2,count:1},{val:3,count:1},{val:7,count:1} ] } " +
+            ",f4:{ buckets:[{val:7,count:1},{val:3,count:1},{val:2,count:1},{val:-5,count:2} ] } " +
+            ",f5:{ buckets:[{val:7,count:1}]   , numBuckets:4, allBuckets:{count:5}, missing:{count:1}  } " +
+            ",f6:{ buckets:[{val:-5,count:2}]  , numBuckets:1  } " +
+            ",f7:{ buckets:[{val:2,count:1},{val:-5,count:2}] , numBuckets:4 } " +
+            ",f8:{ buckets:[] , numBuckets:4 } " +
+            ",f9:{ buckets:[{val:7,count:1,x:11.0},{val:2,count:1,x:4.0},{val:3,count:1,x:2.0},{val:-5,count:2,x:-7.0} ],  numBuckets:4, allBuckets:{count:5,x:0.6},missing:{count:1,x:0.0} } " +  // TODO: should missing exclude "x" because no values were collected?
+            ",f10:{ buckets:[{val:-5,count:2,a:{count:0}},{val:2,count:1,a:{count:1}},{val:3,count:1,a:{count:1}},{val:7,count:1,a:{count:0}} ],  numBuckets:4, allBuckets:{count:5},missing:{count:1,a:{count:0}} } " +
+            "}"
+    );
+
+
+    // facet on a float field - shares same code with integers/longs currently, so we only need to test labels/sorting
+    client.testJQ(params(p, "q", "*:*"
+            , "json.facet", "{" +
+                " f1:{ type:field, field:${num_d} }" +
+                ",f2:{ type:field, field:${num_d}, sort:'index desc' }" +
+                "}"
+        )
+        , "facets=={count:6 " +
+            ",f1:{ buckets:[{val:-9.0,count:1},{val:-5.0,count:1},{val:2.0,count:1},{val:4.0,count:1},{val:11.0,count:1} ] } " +
+            ",f2:{ buckets:[{val:11.0,count:1},{val:4.0,count:1},{val:2.0,count:1},{val:-5.0,count:1},{val:-9.0,count:1} ] } " +
+            "}"
+    );
 
   }