You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2015/05/10 05:16:05 UTC
svn commit: r1678536 - in /lucene/dev/branches/branch_5x: ./ solr/
solr/core/ solr/core/src/java/org/apache/solr/search/facet/
solr/core/src/test/org/apache/solr/search/facet/
Author: yonik
Date: Sun May 10 03:16:04 2015
New Revision: 1678536
URL: http://svn.apache.org/r1678536
Log:
SOLR-7522: single valued numeric field faceting
Added:
lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorNumeric.java
- copied unchanged from r1678535, lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorNumeric.java
Modified:
lucene/dev/branches/branch_5x/ (props changed)
lucene/dev/branches/branch_5x/solr/ (props changed)
lucene/dev/branches/branch_5x/solr/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_5x/solr/core/ (props changed)
lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java
lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java
lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
Modified: lucene/dev/branches/branch_5x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/CHANGES.txt?rev=1678536&r1=1678535&r2=1678536&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/solr/CHANGES.txt Sun May 10 03:16:04 2015
@@ -120,6 +120,10 @@ New Features
* SOLR-7461: stats.field now supports individual local params for 'countDistinct' and 'distinctValues'.
'calcdistinct' is still supported as an alias for both options (hossman)
+* SOLR-7522: Facet Module - Implement field/terms faceting over single-valued
+ numeric fields. (yonik)
+
+
Bug Fixes
----------------------
Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetField.java?rev=1678536&r1=1678535&r2=1678536&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetField.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetField.java Sun May 10 03:16:04 2015
@@ -48,6 +48,7 @@ import org.apache.solr.common.SolrExcept
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
+import org.apache.solr.schema.TrieField;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.HashDocSet;
@@ -120,16 +121,26 @@ public class FacetField extends FacetReq
return new FacetFieldProcessorStream(fcontext, this, sf);
}
- if (!multiToken || sf.hasDocValues()) {
+ org.apache.lucene.document.FieldType.NumericType ntype = ft.getNumericType();
+
+ if (sf.hasDocValues() && ntype==null) {
+ // single and multi-valued string docValues
return new FacetFieldProcessorDV(fcontext, this, sf);
}
- if (multiToken) {
- return new FacetFieldProcessorUIF(fcontext, this, sf);
- } else {
- // single valued string
- return new FacetFieldProcessorFC(fcontext, this, sf);
+ if (!multiToken) {
+ if (sf.getType().getNumericType() != null) {
+ // single valued numeric (docvalues or fieldcache)
+ return new FacetFieldProcessorNumeric(fcontext, this, sf);
+ } else {
+ // single valued string...
+ return new FacetFieldProcessorDV(fcontext, this, sf);
+ // what about FacetFieldProcessorFC?
+ }
}
+
+ // Multi-valued field cache (UIF)
+ return new FacetFieldProcessorUIF(fcontext, this, sf);
}
@Override
@@ -143,6 +154,7 @@ public class FacetField extends FacetReq
abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
SchemaField sf;
SlotAcc sortAcc;
+ SlotAcc indexOrderAcc;
int effectiveMincount;
FacetFieldProcessor(FacetContext fcontext, FacetField freq, SchemaField sf) {
@@ -157,6 +169,12 @@ abstract class FacetFieldProcessor exten
}
void setSortAcc(int numSlots) {
+ if (indexOrderAcc == null) {
+ // This sorting accumulator just goes by the slot number, so does not need to be collected
+ // and hence does not need to find it's way into the accMap or accs array.
+ indexOrderAcc = new SortSlotAcc(fcontext);
+ }
+
String sortKey = freq.sortVariable;
sortAcc = accMap.get(sortKey);
@@ -164,15 +182,16 @@ abstract class FacetFieldProcessor exten
if ("count".equals(sortKey)) {
sortAcc = countAcc;
} else if ("index".equals(sortKey)) {
- sortAcc = new SortSlotAcc(fcontext);
- // This sorting accumulator just goes by the slot number, so does not need to be collected
- // and hence does not need to find it's way into the accMap or accs array.
+ sortAcc = indexOrderAcc;
}
}
}
static class Slot {
int slot;
+ public int tiebreakCompare(int slotA, int slotB) {
+ return slotB - slotA;
+ }
}
}
@@ -249,7 +268,7 @@ abstract class FacetFieldProcessorFCBase
// add a modest amount of over-request if this is a shard request
int lim = freq.limit >= 0 ? (fcontext.isShard() ? (int)(freq.limit*1.1+4) : (int)freq.limit) : Integer.MAX_VALUE;
- int maxsize = freq.limit > 0 ? off + lim : Integer.MAX_VALUE - 1;
+ int maxsize = (int)(freq.limit > 0 ? freq.offset + lim : Integer.MAX_VALUE - 1);
maxsize = Math.min(maxsize, nTerms);
final int sortMul = freq.sortDirection.getMultiplier();
@@ -612,6 +631,11 @@ class FacetFieldProcessorStream extends
if (freq.prefix != null) {
String indexedPrefix = sf.getType().toInternal(freq.prefix);
startTermBytes = new BytesRef(indexedPrefix);
+ } else if (sf.getType().getNumericType() != null) {
+ String triePrefix = TrieField.getMainValuePrefix(sf.getType());
+ if (triePrefix != null) {
+ startTermBytes = new BytesRef(triePrefix);
+ }
}
Fields fields = fcontext.searcher.getLeafReader().fields();
@@ -644,8 +668,6 @@ class FacetFieldProcessorStream extends
List<LeafReaderContext> leafList = fcontext.searcher.getTopReaderContext().leaves();
leaves = leafList.toArray( new LeafReaderContext[ leafList.size() ]);
-
-
}
Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java?rev=1678536&r1=1678535&r2=1678536&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java Sun May 10 03:16:04 2015
@@ -24,6 +24,7 @@ import java.util.EnumSet;
import java.util.List;
import org.apache.lucene.search.Query;
+import org.apache.lucene.util.NumericUtils;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.util.SimpleOrderedMap;
@@ -101,6 +102,41 @@ class FacetRangeProcessor extends FacetP
}
}
+ public static Calc getNumericCalc(SchemaField sf) {
+ Calc calc;
+ final FieldType ft = sf.getType();
+
+ if (ft instanceof TrieField) {
+ final TrieField trie = (TrieField)ft;
+
+ switch (trie.getType()) {
+ case FLOAT:
+ calc = new FloatCalc(sf);
+ break;
+ case DOUBLE:
+ calc = new DoubleCalc(sf);
+ break;
+ case INTEGER:
+ calc = new IntCalc(sf);
+ break;
+ case LONG:
+ calc = new LongCalc(sf);
+ break;
+ case DATE:
+ calc = new DateCalc(sf, null);
+ break;
+ default:
+ throw new SolrException
+ (SolrException.ErrorCode.BAD_REQUEST,
+ "Expected numeric field type :" + sf);
+ }
+ } else {
+ throw new SolrException
+ (SolrException.ErrorCode.BAD_REQUEST,
+ "Expected numeric field type :" + sf);
+ }
+ return calc;
+ }
private SimpleOrderedMap<Object> getRangeCounts() throws IOException {
final FieldType ft = sf.getType();
@@ -317,14 +353,22 @@ class FacetRangeProcessor extends FacetP
* directly from some method -- but until then, keep this locked down
* and private.
*/
- private static abstract class Calc {
+ static abstract class Calc {
protected final SchemaField field;
public Calc(final SchemaField field) {
this.field = field;
}
+ public Comparable bitsToValue(long bits) {
+ return bits;
+ }
+
+ public long bitsToSortableBits(long bits) {
+ return bits;
+ }
+
/**
- * Formats a Range endpoint for use as a range label name in the response.
+ * Formats a value into a label used in a response
* Default Impl just uses toString()
*/
public String formatValue(final Comparable val) {
@@ -332,7 +376,7 @@ class FacetRangeProcessor extends FacetP
}
/**
- * Parses a String param into an Range endpoint value throwing
+ * Parses a String param into a value throwing
* an exception if not possible
*/
public final Comparable getValue(final String rawval) {
@@ -346,7 +390,7 @@ class FacetRangeProcessor extends FacetP
}
/**
- * Parses a String param into an Range endpoint.
+ * Parses a String param into a value.
* Can throw a low level format exception as needed.
*/
protected abstract Comparable parseStr(final String rawval)
@@ -407,6 +451,16 @@ class FacetRangeProcessor extends FacetP
private static class FloatCalc extends Calc {
+ @Override
+ public Comparable bitsToValue(long bits) {
+ return Float.intBitsToFloat( (int)bits );
+ }
+
+ @Override
+ public long bitsToSortableBits(long bits) {
+ return NumericUtils.sortableDoubleBits(bits);
+ }
+
public FloatCalc(final SchemaField f) { super(f); }
@Override
protected Float parseStr(String rawval) {
@@ -418,6 +472,15 @@ class FacetRangeProcessor extends FacetP
}
}
private static class DoubleCalc extends Calc {
+ @Override
+ public Comparable bitsToValue(long bits) {
+ return Double.longBitsToDouble(bits);
+ }
+
+ @Override
+ public long bitsToSortableBits(long bits) {
+ return NumericUtils.sortableDoubleBits(bits);
+ }
public DoubleCalc(final SchemaField f) { super(f); }
@Override
@@ -463,6 +526,12 @@ class FacetRangeProcessor extends FacetP
throw new IllegalArgumentException("SchemaField must use field type extending TrieDateField or DateRangeField");
}
}
+
+ @Override
+ public Comparable bitsToValue(long bits) {
+ return new Date(bits);
+ }
+
@Override
public String formatValue(Comparable val) {
return ((TrieDateField)field.getType()).toExternal( (Date)val );
Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java?rev=1678536&r1=1678535&r2=1678536&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java Sun May 10 03:16:04 2015
@@ -206,8 +206,13 @@ class FacetProcessor<FacetRequestT exten
protected void createAccs(int docCount, int slotCount) throws IOException {
accMap = new LinkedHashMap<String,SlotAcc>();
- countAcc = new CountSlotArrAcc(fcontext, slotCount);
- countAcc.key = "count";
+
+ // allow a custom count acc to be used
+ if (countAcc == null) {
+ countAcc = new CountSlotArrAcc(fcontext, slotCount);
+ countAcc.key = "count";
+ }
+
for (Map.Entry<String,AggValueSource> entry : freq.getFacetStats().entrySet()) {
SlotAcc acc = entry.getValue().createSlotAcc(fcontext, docCount, slotCount);
acc.key = entry.getKey();
Modified: lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java?rev=1678536&r1=1678535&r2=1678536&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java Sun May 10 03:16:04 2015
@@ -28,6 +28,7 @@ import java.util.Map;
import java.util.Random;
import com.tdunning.math.stats.AVLTreeDigest;
+import org.apache.lucene.queryparser.flexible.standard.processors.NumericQueryNodeProcessor;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.packed.GrowableWriter;
import org.apache.lucene.util.packed.PackedInts;
@@ -44,10 +45,13 @@ import org.junit.Test;
public class TestJsonFacets extends SolrTestCaseHS {
private static SolrInstances servers; // for distributed testing
+ private static int origTableSize;
@BeforeClass
public static void beforeTests() throws Exception {
JSONTestUtil.failRepeatedKeys = true;
+ origTableSize = FacetFieldProcessorNumeric.MAXIMUM_STARTING_TABLE_SIZE;
+ FacetFieldProcessorNumeric.MAXIMUM_STARTING_TABLE_SIZE=2; // stress test resizing
initCore("solrconfig-tlog.xml","schema_latest.xml");
}
@@ -60,6 +64,7 @@ public class TestJsonFacets extends Solr
@AfterClass
public static void afterTests() throws Exception {
JSONTestUtil.failRepeatedKeys = false;
+ FacetFieldProcessorNumeric.MAXIMUM_STARTING_TABLE_SIZE=origTableSize;
if (servers != null) {
servers.stop();
servers = null;
@@ -368,7 +373,8 @@ public class TestJsonFacets extends Solr
client.commit();
- // straight query facets
+
+ // straight query facets
client.testJQ(params(p, "q", "*:*"
, "json.facet", "{catA:{query:{q:'${cat_s}:A'}}, catA2:{query:{query:'${cat_s}:A'}}, catA3:{query:'${cat_s}:A'} }"
)
@@ -883,6 +889,50 @@ public class TestJsonFacets extends Solr
);
+ //
+ // facet on numbers
+ //
+ client.testJQ(params(p, "q", "*:*"
+ , "json.facet", "{" +
+ " f1:{ type:field, field:${num_i} }" +
+ ",f2:{ type:field, field:${num_i}, sort:'count asc' }" +
+ ",f3:{ type:field, field:${num_i}, sort:'index asc' }" +
+ ",f4:{ type:field, field:${num_i}, sort:'index desc' }" +
+ ",f5:{ type:field, field:${num_i}, sort:'index desc', limit:1, missing:true, allBuckets:true, numBuckets:true }" +
+ ",f6:{ type:field, field:${num_i}, sort:'index desc', mincount:2, numBuckets:true }" + // mincount should lower numbuckets
+ ",f7:{ type:field, field:${num_i}, sort:'index desc', offset:2, numBuckets:true }" + // test offset
+ ",f8:{ type:field, field:${num_i}, sort:'index desc', offset:100, numBuckets:true }" + // test high offset
+ ",f9:{ type:field, field:${num_i}, sort:'x desc', facet:{x:'avg(${num_d})'}, missing:true, allBuckets:true, numBuckets:true }" + // test stats
+ ",f10:{ type:field, field:${num_i}, facet:{a:{query:'${cat_s}:A'}}, missing:true, allBuckets:true, numBuckets:true }" + // test subfacets
+ "}"
+ )
+ , "facets=={count:6 " +
+ ",f1:{ buckets:[{val:-5,count:2},{val:2,count:1},{val:3,count:1},{val:7,count:1} ] } " +
+ ",f2:{ buckets:[{val:2,count:1},{val:3,count:1},{val:7,count:1},{val:-5,count:2} ] } " +
+ ",f3:{ buckets:[{val:-5,count:2},{val:2,count:1},{val:3,count:1},{val:7,count:1} ] } " +
+ ",f4:{ buckets:[{val:7,count:1},{val:3,count:1},{val:2,count:1},{val:-5,count:2} ] } " +
+ ",f5:{ buckets:[{val:7,count:1}] , numBuckets:4, allBuckets:{count:5}, missing:{count:1} } " +
+ ",f6:{ buckets:[{val:-5,count:2}] , numBuckets:1 } " +
+ ",f7:{ buckets:[{val:2,count:1},{val:-5,count:2}] , numBuckets:4 } " +
+ ",f8:{ buckets:[] , numBuckets:4 } " +
+ ",f9:{ buckets:[{val:7,count:1,x:11.0},{val:2,count:1,x:4.0},{val:3,count:1,x:2.0},{val:-5,count:2,x:-7.0} ], numBuckets:4, allBuckets:{count:5,x:0.6},missing:{count:1,x:0.0} } " + // TODO: should missing exclude "x" because no values were collected?
+ ",f10:{ buckets:[{val:-5,count:2,a:{count:0}},{val:2,count:1,a:{count:1}},{val:3,count:1,a:{count:1}},{val:7,count:1,a:{count:0}} ], numBuckets:4, allBuckets:{count:5},missing:{count:1,a:{count:0}} } " +
+ "}"
+ );
+
+
+ // facet on a float field - shares same code with integers/longs currently, so we only need to test labels/sorting
+ client.testJQ(params(p, "q", "*:*"
+ , "json.facet", "{" +
+ " f1:{ type:field, field:${num_d} }" +
+ ",f2:{ type:field, field:${num_d}, sort:'index desc' }" +
+ "}"
+ )
+ , "facets=={count:6 " +
+ ",f1:{ buckets:[{val:-9.0,count:1},{val:-5.0,count:1},{val:2.0,count:1},{val:4.0,count:1},{val:11.0,count:1} ] } " +
+ ",f2:{ buckets:[{val:11.0,count:1},{val:4.0,count:1},{val:2.0,count:1},{val:-5.0,count:1},{val:-9.0,count:1} ] } " +
+ "}"
+ );
}