You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2015/11/04 19:53:39 UTC
svn commit: r1712611 - in /lucene/dev/branches/branch_5x: ./ solr/
solr/core/ solr/core/src/java/org/apache/solr/search/
solr/core/src/java/org/apache/solr/search/facet/
Author: yonik
Date: Wed Nov 4 18:53:39 2015
New Revision: 1712611
URL: http://svn.apache.org/viewvc?rev=1712611&view=rev
Log:
SOLR-8222: optimize method=dv faceting for counts
Added:
lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorDV.java
- copied, changed from r1712608, lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorDV.java
Modified:
lucene/dev/branches/branch_5x/ (props changed)
lucene/dev/branches/branch_5x/solr/ (props changed)
lucene/dev/branches/branch_5x/solr/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_5x/solr/core/ (props changed)
lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/ (props changed)
lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java
Modified: lucene/dev/branches/branch_5x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/CHANGES.txt?rev=1712611&r1=1712610&r2=1712611&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/solr/CHANGES.txt Wed Nov 4 18:53:39 2015
@@ -229,6 +229,12 @@ Optimizations
* SOLR-7983: Utils.toUTF8 uses larger buffer than necessary for holding UTF8 data. (shalin)
+* SOLR-8222: JSON Facet API optimization to faceting by count on docvalue fields (or indexed fields
+ with method=dv) when there are multiple hits expected for enoug buckets. For example, this
+ more than doubled the performance of faceting 5M documents over a field with 1M unique values.
+ (yonik)
+
+
Other Changes
----------------------
Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetField.java?rev=1712611&r1=1712610&r2=1712611&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetField.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetField.java Wed Nov 4 18:53:39 2015
@@ -73,6 +73,9 @@ public class FacetField extends FacetReq
FacetMethod method;
int cacheDf; // 0 means "default", -1 means "never cache"
+ // experimental - force perSeg collection when using dv method, currently for testing purposes only.
+ Boolean perSeg;
+
// TODO: put this somewhere more generic?
public static enum SortDirection {
asc(-1) ,
@@ -692,158 +695,6 @@ abstract class FacetFieldProcessorFCBase
}
-class FacetFieldProcessorDV extends FacetFieldProcessorFCBase {
- static boolean unwrap_singleValued_multiDv = true; // only set to false for test coverage
-
- boolean multiValuedField;
- SortedSetDocValues si; // only used for term lookups (for both single and multi-valued)
- MultiDocValues.OrdinalMap ordinalMap = null; // maps per-segment ords to global ords
-
-
- public FacetFieldProcessorDV(FacetContext fcontext, FacetField freq, SchemaField sf) {
- super(fcontext, freq, sf);
- multiValuedField = sf.multiValued() || sf.getType().multiValuedFieldCache();
- }
-
- protected BytesRef lookupOrd(int ord) throws IOException {
- return si.lookupOrd(ord);
- }
-
- protected void findStartAndEndOrds() throws IOException {
- if (multiValuedField) {
- si = FieldUtil.getSortedSetDocValues(fcontext.qcontext, sf, null);
- if (si instanceof MultiDocValues.MultiSortedSetDocValues) {
- ordinalMap = ((MultiDocValues.MultiSortedSetDocValues)si).mapping;
- }
- } else {
- SortedDocValues single = FieldUtil.getSortedDocValues(fcontext.qcontext, sf, null);
- si = DocValues.singleton(single); // multi-valued view
- if (single instanceof MultiDocValues.MultiSortedDocValues) {
- ordinalMap = ((MultiDocValues.MultiSortedDocValues)single).mapping;
- }
- }
-
- if (si.getValueCount() >= Integer.MAX_VALUE) {
- throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Field has too many unique values. field=" + sf + " nterms= " + si.getValueCount());
- }
-
- if (prefixRef != null) {
- startTermIndex = (int)si.lookupTerm(prefixRef.get());
- if (startTermIndex < 0) startTermIndex = -startTermIndex - 1;
- prefixRef.append(UnicodeUtil.BIG_TERM);
- endTermIndex = (int)si.lookupTerm(prefixRef.get());
- assert endTermIndex < 0;
- endTermIndex = -endTermIndex - 1;
- } else {
- startTermIndex = 0;
- endTermIndex = (int)si.getValueCount();
- }
-
- nTerms = endTermIndex - startTermIndex;
- }
-
- @Override
- protected void collectDocs() throws IOException {
- if (nTerms <= 0 || fcontext.base.size() < effectiveMincount) { // TODO: what about allBuckets? missing bucket?
- return;
- }
-
- final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves();
- Filter filter = fcontext.base.getTopFilter();
-
- for (int subIdx = 0; subIdx < leaves.size(); subIdx++) {
- LeafReaderContext subCtx = leaves.get(subIdx);
-
- setNextReaderFirstPhase(subCtx);
-
- DocIdSet dis = filter.getDocIdSet(subCtx, null); // solr docsets already exclude any deleted docs
- DocIdSetIterator disi = dis.iterator();
-
- SortedDocValues singleDv = null;
- SortedSetDocValues multiDv = null;
- if (multiValuedField) {
- // TODO: get sub from multi?
- multiDv = subCtx.reader().getSortedSetDocValues(sf.getName());
- if (multiDv == null) {
- multiDv = DocValues.emptySortedSet();
- }
- // some codecs may optimize SortedSet storage for single-valued fields
- // this will be null if this is not a wrapped single valued docvalues.
- if (unwrap_singleValued_multiDv) {
- singleDv = DocValues.unwrapSingleton(multiDv);
- }
- } else {
- singleDv = subCtx.reader().getSortedDocValues(sf.getName());
- if (singleDv == null) {
- singleDv = DocValues.emptySorted();
- }
- }
-
- LongValues toGlobal = ordinalMap == null ? null : ordinalMap.getGlobalOrds(subIdx);
-
- if (singleDv != null) {
- collectDocs(singleDv, disi, toGlobal);
- } else {
- collectDocs(multiDv, disi, toGlobal);
- }
- }
-
- }
-
- protected void collectDocs(SortedDocValues singleDv, DocIdSetIterator disi, LongValues toGlobal) throws IOException {
- int doc;
- while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
- int segOrd = singleDv.getOrd(doc);
- if (segOrd < 0) continue;
- collect(doc, segOrd, toGlobal);
- }
- }
-
- protected void collectDocs(SortedSetDocValues multiDv, DocIdSetIterator disi, LongValues toGlobal) throws IOException {
- int doc;
- while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
- multiDv.setDocument(doc);
- for(;;) {
- int segOrd = (int)multiDv.nextOrd();
- if (segOrd < 0) break;
- collect(doc, segOrd, toGlobal);
- }
- }
- }
-
- private void collect(int doc, int segOrd, LongValues toGlobal) throws IOException {
- int ord = (toGlobal != null && segOrd >= 0) ? (int)toGlobal.get(segOrd) : segOrd;
-
- int arrIdx = ord - startTermIndex;
- if (arrIdx >= 0 && arrIdx < nTerms) {
- countAcc.incrementCount(arrIdx, 1);
- if (collectAcc != null) {
- collectAcc.collect(doc, arrIdx);
- }
- if (allBucketsAcc != null) {
- allBucketsAcc.collect(doc, arrIdx);
- }
- }
- }
-
-}
-
-
-///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Copied: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorDV.java (from r1712608, lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorDV.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorDV.java?p2=lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorDV.java&p1=lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorDV.java&r1=1712608&r2=1712611&rev=1712611&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorDV.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorDV.java Wed Nov 4 18:53:39 2015
@@ -28,12 +28,12 @@ import org.apache.lucene.index.SortedDoc
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.Filter;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.solr.common.SolrException;
import org.apache.solr.schema.SchemaField;
-import org.apache.solr.search.Filter;
class FacetFieldProcessorDV extends FacetFieldProcessorFCBase {
static boolean unwrap_singleValued_multiDv = true; // only set to false for test coverage
Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java?rev=1712611&r1=1712610&r2=1712611&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java Wed Nov 4 18:53:39 2015
@@ -532,6 +532,8 @@ class FacetFieldParser extends FacetPars
facet.method = FacetField.FacetMethod.fromString(getString(m, "method", null));
facet.cacheDf = (int)getLong(m, "cacheDf", facet.cacheDf);
+ facet.perSeg = (Boolean)m.get("perSeg");
+
// facet.sort may depend on a facet stat...
// should we be parsing / validating this here, or in the execution environment?
Object o = m.get("facet");