You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2015/11/04 19:53:39 UTC

svn commit: r1712611 - in /lucene/dev/branches/branch_5x: ./ solr/ solr/core/ solr/core/src/java/org/apache/solr/search/ solr/core/src/java/org/apache/solr/search/facet/

Author: yonik
Date: Wed Nov  4 18:53:39 2015
New Revision: 1712611

URL: http://svn.apache.org/viewvc?rev=1712611&view=rev
Log:
SOLR-8222: optimize method=dv faceting for counts

Added:
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorDV.java
      - copied, changed from r1712608, lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorDV.java
Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/solr/   (props changed)
    lucene/dev/branches/branch_5x/solr/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_5x/solr/core/   (props changed)
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/   (props changed)
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java

Modified: lucene/dev/branches/branch_5x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/CHANGES.txt?rev=1712611&r1=1712610&r2=1712611&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/solr/CHANGES.txt Wed Nov  4 18:53:39 2015
@@ -229,6 +229,12 @@ Optimizations
 
 * SOLR-7983: Utils.toUTF8 uses larger buffer than necessary for holding UTF8 data. (shalin)
 
+* SOLR-8222: JSON Facet API optimization to faceting by count on docvalue fields (or indexed fields
+  with method=dv) when there are multiple hits expected for enoug buckets. For example, this
+  more than doubled the performance of faceting 5M documents over a field with 1M unique values.
+  (yonik)
+
+
 Other Changes
 ----------------------
 

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetField.java?rev=1712611&r1=1712610&r2=1712611&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetField.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetField.java Wed Nov  4 18:53:39 2015
@@ -73,6 +73,9 @@ public class FacetField extends FacetReq
   FacetMethod method;
   int cacheDf;  // 0 means "default", -1 means "never cache"
 
+  // experimental - force perSeg collection when using dv method, currently for testing purposes only.
+  Boolean perSeg;
+
   // TODO: put this somewhere more generic?
   public static enum SortDirection {
     asc(-1) ,
@@ -692,158 +695,6 @@ abstract class FacetFieldProcessorFCBase
 }
 
 
-class FacetFieldProcessorDV extends FacetFieldProcessorFCBase {
-  static boolean unwrap_singleValued_multiDv = true;  // only set to false for test coverage
-
-  boolean multiValuedField;
-  SortedSetDocValues si;  // only used for term lookups (for both single and multi-valued)
-  MultiDocValues.OrdinalMap ordinalMap = null; // maps per-segment ords to global ords
-
-
-  public FacetFieldProcessorDV(FacetContext fcontext, FacetField freq, SchemaField sf) {
-    super(fcontext, freq, sf);
-    multiValuedField = sf.multiValued() || sf.getType().multiValuedFieldCache();
-  }
-
-  protected BytesRef lookupOrd(int ord) throws IOException {
-    return si.lookupOrd(ord);
-  }
-
-  protected void findStartAndEndOrds() throws IOException {
-    if (multiValuedField) {
-      si = FieldUtil.getSortedSetDocValues(fcontext.qcontext, sf, null);
-      if (si instanceof MultiDocValues.MultiSortedSetDocValues) {
-        ordinalMap = ((MultiDocValues.MultiSortedSetDocValues)si).mapping;
-      }
-    } else {
-      SortedDocValues single = FieldUtil.getSortedDocValues(fcontext.qcontext, sf, null);
-      si = DocValues.singleton(single);  // multi-valued view
-      if (single instanceof MultiDocValues.MultiSortedDocValues) {
-        ordinalMap = ((MultiDocValues.MultiSortedDocValues)single).mapping;
-      }
-    }
-
-    if (si.getValueCount() >= Integer.MAX_VALUE) {
-      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Field has too many unique values. field=" + sf + " nterms= " + si.getValueCount());
-    }
-
-    if (prefixRef != null) {
-      startTermIndex = (int)si.lookupTerm(prefixRef.get());
-      if (startTermIndex < 0) startTermIndex = -startTermIndex - 1;
-      prefixRef.append(UnicodeUtil.BIG_TERM);
-      endTermIndex = (int)si.lookupTerm(prefixRef.get());
-      assert endTermIndex < 0;
-      endTermIndex = -endTermIndex - 1;
-    } else {
-      startTermIndex = 0;
-      endTermIndex = (int)si.getValueCount();
-    }
-
-    nTerms = endTermIndex - startTermIndex;
-  }
-
-  @Override
-  protected void collectDocs() throws IOException {
-    if (nTerms <= 0 || fcontext.base.size() < effectiveMincount) { // TODO: what about allBuckets? missing bucket?
-      return;
-    }
-
-    final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves();
-    Filter filter = fcontext.base.getTopFilter();
-
-    for (int subIdx = 0; subIdx < leaves.size(); subIdx++) {
-      LeafReaderContext subCtx = leaves.get(subIdx);
-
-      setNextReaderFirstPhase(subCtx);
-
-      DocIdSet dis = filter.getDocIdSet(subCtx, null); // solr docsets already exclude any deleted docs
-      DocIdSetIterator disi = dis.iterator();
-
-      SortedDocValues singleDv = null;
-      SortedSetDocValues multiDv = null;
-      if (multiValuedField) {
-        // TODO: get sub from multi?
-        multiDv = subCtx.reader().getSortedSetDocValues(sf.getName());
-        if (multiDv == null) {
-          multiDv = DocValues.emptySortedSet();
-        }
-        // some codecs may optimize SortedSet storage for single-valued fields
-        // this will be null if this is not a wrapped single valued docvalues.
-        if (unwrap_singleValued_multiDv) {
-          singleDv = DocValues.unwrapSingleton(multiDv);
-        }
-      } else {
-        singleDv = subCtx.reader().getSortedDocValues(sf.getName());
-        if (singleDv == null) {
-          singleDv = DocValues.emptySorted();
-        }
-      }
-
-      LongValues toGlobal = ordinalMap == null ? null : ordinalMap.getGlobalOrds(subIdx);
-
-      if (singleDv != null) {
-        collectDocs(singleDv, disi, toGlobal);
-      } else {
-        collectDocs(multiDv, disi, toGlobal);
-      }
-    }
-
-  }
-
-  protected void collectDocs(SortedDocValues singleDv, DocIdSetIterator disi, LongValues toGlobal) throws IOException {
-    int doc;
-    while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
-      int segOrd = singleDv.getOrd(doc);
-      if (segOrd < 0) continue;
-      collect(doc, segOrd, toGlobal);
-    }
-  }
-
-  protected void collectDocs(SortedSetDocValues multiDv, DocIdSetIterator disi, LongValues toGlobal) throws IOException {
-    int doc;
-    while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
-      multiDv.setDocument(doc);
-      for(;;) {
-        int segOrd = (int)multiDv.nextOrd();
-        if (segOrd < 0) break;
-        collect(doc, segOrd, toGlobal);
-      }
-    }
-  }
-
-  private void collect(int doc, int segOrd, LongValues toGlobal) throws IOException {
-    int ord = (toGlobal != null && segOrd >= 0) ? (int)toGlobal.get(segOrd) : segOrd;
-
-    int arrIdx = ord - startTermIndex;
-    if (arrIdx >= 0 && arrIdx < nTerms) {
-      countAcc.incrementCount(arrIdx, 1);
-      if (collectAcc != null) {
-        collectAcc.collect(doc, arrIdx);
-      }
-      if (allBucketsAcc != null) {
-        allBucketsAcc.collect(doc, arrIdx);
-      }
-    }
-  }
-
-}
-
-
-///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
-
-
-
-
-
-
-
-
-
-
-
-
-
-
 
 
 

Copied: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorDV.java (from r1712608, lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorDV.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorDV.java?p2=lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorDV.java&p1=lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorDV.java&r1=1712608&r2=1712611&rev=1712611&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorDV.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorDV.java Wed Nov  4 18:53:39 2015
@@ -28,12 +28,12 @@ import org.apache.lucene.index.SortedDoc
 import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.Filter;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LongValues;
 import org.apache.lucene.util.UnicodeUtil;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.schema.SchemaField;
-import org.apache.solr.search.Filter;
 
 class FacetFieldProcessorDV extends FacetFieldProcessorFCBase {
   static boolean unwrap_singleValued_multiDv = true;  // only set to false for test coverage

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java?rev=1712611&r1=1712610&r2=1712611&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java Wed Nov  4 18:53:39 2015
@@ -532,6 +532,8 @@ class FacetFieldParser extends FacetPars
       facet.method = FacetField.FacetMethod.fromString(getString(m, "method", null));
       facet.cacheDf = (int)getLong(m, "cacheDf", facet.cacheDf);
 
+      facet.perSeg = (Boolean)m.get("perSeg");
+
       // facet.sort may depend on a facet stat...
       // should we be parsing / validating this here, or in the execution environment?
       Object o = m.get("facet");