You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2015/07/10 05:47:54 UTC
svn commit: r1690199 - in /lucene/dev/branches/branch_5x: ./ solr/ solr/core/ solr/core/src/java/org/apache/solr/search/facet/

Author: yonik
Date: Fri Jul 10 03:47:54 2015
New Revision: 1690199

URL: http://svn.apache.org/r1690199
Log:
SOLR-7455: defer non-sorting facet stats

Added:
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java
      - copied, changed from r1690189, lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java
Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/solr/   (props changed)
    lucene/dev/branches/branch_5x/solr/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_5x/solr/core/   (props changed)
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorNumeric.java
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java

Modified: lucene/dev/branches/branch_5x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/CHANGES.txt?rev=1690199&r1=1690198&r2=1690199&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/solr/CHANGES.txt Fri Jul 10 03:47:54 2015
@@ -173,6 +173,16 @@ Optimizations
 * SOLR-7751: Minor optimizations to QueryComponent.process (reduce eager instantiations,
   cache method calls) (Christine Poerschke via Ramkumar Aiyengar)
 
+* SOLR-7455: Terms facets with the JSON Facet API now defer calculating non-sorting stats
+  until a second phase, after the top N facets are found. This improves performance
+  proportional to the number of non-sorting statistics being calculated in addition to
+  the number of buckets and domain documents.
+  For Example: The facet request  {type:terms, field:field1, facet:{x:"unique(field2)"}}
+  saw a 7x improvement when field1 and 1M unique terms and field2 had 1000 unique terms.
+  (yonik)
+  
+
+
 Other Changes
 ----------------------
 

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetField.java?rev=1690199&r1=1690198&r2=1690199&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetField.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetField.java Fri Jul 10 03:47:54 2015
@@ -20,8 +20,11 @@ package org.apache.solr.search.facet;
 import java.io.Closeable;
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.Iterator;
+import java.util.LinkedHashMap;
 import java.util.List;
+import java.util.Map;
 
 import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.Fields;
@@ -37,6 +40,7 @@ import org.apache.lucene.index.TermsEnum
 import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.Filter;
+import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
@@ -62,12 +66,12 @@ public class FacetField extends FacetReq
   long limit = 10;
   long mincount = 1;
   boolean missing;
+  boolean allBuckets;   // show cumulative stats across all buckets (this can be different than non-bucketed stats across all docs because of multi-valued docs)
   boolean numBuckets;
   String prefix;
   String sortVariable;
   SortDirection sortDirection;
   FacetMethod method;
-  boolean allBuckets;   // show cumulative stats across all buckets (this can be different than non-bucketed stats across all docs because of multi-valued docs)
   int cacheDf;  // 0 means "default", -1 means "never cache"
 
   // TODO: put this somewhere more generic?
@@ -134,6 +138,7 @@ public class FacetField extends FacetReq
         return new FacetFieldProcessorNumeric(fcontext, this, sf);
       } else {
         // single valued string...
+//        return new FacetFieldProcessorDV(fcontext, this, sf);
         return new FacetFieldProcessorDV(fcontext, this, sf);
         // what about FacetFieldProcessorFC?
       }
@@ -153,10 +158,25 @@ public class FacetField extends FacetReq
 
 abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
   SchemaField sf;
-  SlotAcc sortAcc;
   SlotAcc indexOrderAcc;
   int effectiveMincount;
 
+  Map<String,AggValueSource> deferredAggs;  // null if none
+
+  // TODO: push any of this down to base class?
+
+  //
+  // For sort="x desc", collectAcc would point to "x", and sortAcc would also point to "x".
+  // collectAcc would be used to accumulate all buckets, and sortAcc would be used to sort those buckets.
+  //
+  SlotAcc collectAcc;  // Accumulator to collect across entire domain (in addition to the countAcc).  May be null.
+  SlotAcc sortAcc;     // Accumulator to use for sorting *only* (i.e. not used for collection). May be an alias of countAcc, collectAcc, or indexOrderAcc
+  SlotAcc[] otherAccs; // Accumulators that do not need to be calculated across all buckets.
+
+  SpecialSlotAcc allBucketsAcc;  // this can internally refer to otherAccs and/or collectAcc. setNextReader should be called on otherAccs directly if they exist.
+  SpecialSlotAcc missingAcc;     // this can internally refer to otherAccs and/or collectAcc. setNextReader should be called on otherAccs directly if they exist.
+
+
   FacetFieldProcessor(FacetContext fcontext, FacetField freq, SchemaField sf) {
     super(fcontext, freq);
     this.sf = sf;
@@ -168,21 +188,221 @@ abstract class FacetFieldProcessor exten
     return response;
   }
 
-  void setSortAcc(int numSlots) {
-    if (indexOrderAcc == null) {
-      // This sorting accumulator just goes by the slot number, so does not need to be collected
-      // and hence does not need to find it's way into the accMap or accs array.
-      indexOrderAcc = new SortSlotAcc(fcontext);
+  // This is used to create accs for second phase (or to create accs for all aggs)
+  @Override
+  protected void createAccs(int docCount, int slotCount) throws IOException {
+    if (accMap == null) {
+      accMap = new LinkedHashMap<>();
+    }
+
+    // allow a custom count acc to be used
+    if (countAcc == null) {
+      countAcc = new CountSlotArrAcc(fcontext, slotCount);
+      countAcc.key = "count";
+    }
+
+    if (accs != null) {
+      // reuse these accs, but reset them first
+      for (SlotAcc acc : accs) {
+        acc.reset();
+      }
+      return;
+    } else {
+      accs = new SlotAcc[ freq.getFacetStats().size() ];
+    }
+
+    int accIdx = 0;
+    for (Map.Entry<String,AggValueSource> entry : freq.getFacetStats().entrySet()) {
+      SlotAcc acc = null;
+      if (slotCount == 1) {
+        acc = accMap.get(entry.getKey());
+        if (acc != null) {
+          acc.reset();
+        }
+      }
+      if (acc == null) {
+        acc = entry.getValue().createSlotAcc(fcontext, docCount, slotCount);
+        acc.key = entry.getKey();
+        accMap.put(acc.key, acc);
+      }
+      accs[accIdx++] = acc;
+    }
+  }
+
+  void createCollectAcc(int numDocs, int numSlots) throws IOException {
+    accMap = new LinkedHashMap<>();
+
+    // we always count...
+    // allow a subclass to set a custom counter.
+    if (countAcc == null) {
+      countAcc = new CountSlotArrAcc(fcontext, numSlots);
+    }
+
+    if ("count".equals(freq.sortVariable)) {
+      sortAcc = countAcc;
+      deferredAggs = freq.getFacetStats();
+    } else if ("index".equals(freq.sortVariable)) {
+      // allow subclass to set indexOrderAcc first
+      if (indexOrderAcc == null) {
+        // This sorting accumulator just goes by the slot number, so does not need to be collected
+        // and hence does not need to find it's way into the accMap or accs array.
+        indexOrderAcc = new SortSlotAcc(fcontext);
+      }
+      sortAcc = indexOrderAcc;
+      deferredAggs = freq.getFacetStats();
+    } else {
+      AggValueSource sortAgg = freq.getFacetStats().get(freq.sortVariable);
+      if (sortAgg != null) {
+        collectAcc = sortAgg.createSlotAcc(fcontext, numDocs, numSlots);
+        collectAcc.key = freq.sortVariable; // TODO: improve this
+      }
+      sortAcc = collectAcc;
+      deferredAggs = new HashMap<>(freq.getFacetStats());
+      deferredAggs.remove(freq.sortVariable);
     }
 
-    String sortKey = freq.sortVariable;
-    sortAcc = accMap.get(sortKey);
-
-    if (sortAcc == null) {
-      if ("count".equals(sortKey)) {
-        sortAcc = countAcc;
-      } else if ("index".equals(sortKey)) {
-        sortAcc = indexOrderAcc;
+    if (deferredAggs.size() == 0) {
+      deferredAggs = null;
+    }
+
+    boolean needOtherAccs = freq.allBuckets;  // TODO: use for missing too...
+
+    if (!needOtherAccs) {
+      // we may need them later, but we don't want to create them now
+      // otherwise we won't know if we need to call setNextReader on them.
+      return;
+    }
+
+    // create the deffered aggs up front for use by allBuckets
+    createOtherAccs(numDocs, 1);
+  }
+
+
+  void createOtherAccs(int numDocs, int numSlots) throws IOException {
+    if (otherAccs != null) {
+      // reuse existing accumulators
+      for (SlotAcc acc : otherAccs) {
+        acc.reset();  // todo - make reset take numDocs and numSlots?
+      }
+      return;
+    }
+
+    int numDeferred = deferredAggs == null ? 0 : deferredAggs.size();
+    if (numDeferred <= 0) return;
+
+    otherAccs = new SlotAcc[ numDeferred ];
+
+    int otherAccIdx = 0;
+    for (Map.Entry<String,AggValueSource> entry : deferredAggs.entrySet()) {
+      AggValueSource agg = entry.getValue();
+      SlotAcc acc = agg.createSlotAcc(fcontext, numDocs, numSlots);
+      acc.key = entry.getKey();
+      accMap.put(acc.key, acc);
+      otherAccs[otherAccIdx++] = acc;
+    }
+
+    if (numDeferred == freq.getFacetStats().size()) {
+      // accs and otherAccs are the same...
+      accs = otherAccs;
+    }
+  }
+
+
+  int collectFirstPhase(DocSet docs, int slot) throws IOException {
+    int num = -1;
+    if (collectAcc != null) {
+      num = collectAcc.collect(docs, slot);
+    }
+    if (allBucketsAcc != null) {
+      num = allBucketsAcc.collect(docs, slot);
+    }
+    return num >= 0 ? num : docs.size();
+  }
+
+  void collectFirstPhase(int segDoc, int slot) throws IOException {
+    if (collectAcc != null) {
+      collectAcc.collect(segDoc, slot);
+    }
+    if (allBucketsAcc != null) {
+      allBucketsAcc.collect(segDoc, slot);
+    }
+  }
+
+
+  void fillBucket(SimpleOrderedMap<Object> target, int count, int slotNum, DocSet subDomain, Query filter) throws IOException {
+    target.add("count", count);
+    if (count <= 0 && !freq.processEmpty) return;
+
+    if (collectAcc != null && slotNum >= 0) {
+      collectAcc.setValues(target, slotNum);
+    }
+
+    createOtherAccs(-1, 1);
+
+    if (otherAccs == null && freq.subFacets.isEmpty()) return;
+
+    if (subDomain == null) {
+      subDomain = fcontext.searcher.getDocSet(filter, fcontext.base);
+    }
+
+    // if no subFacets, we only need a DocSet
+    // otherwise we need more?
+    // TODO: save something generic like "slotNum" in the context and use that to implement things like filter exclusion if necessary?
+    // Hmmm, but we need to look up some stuff anyway (for the label?)
+    // have a method like "DocSet applyConstraint(facet context, DocSet parent)"
+    // that's needed for domain changing things like joins anyway???
+
+    if (otherAccs != null) {
+      // do acc at a time (traversing domain each time) or do all accs for each doc?
+      for (SlotAcc acc : otherAccs) {
+        acc.reset(); // TODO: only needed if we previously used for allBuckets or missing
+        acc.collect(subDomain, 0);
+        acc.setValues(target, 0);
+      }
+    }
+
+    processSubs(target, filter, subDomain);
+  }
+
+
+  @Override
+  protected void processStats(SimpleOrderedMap<Object> bucket, DocSet docs, int docCount) throws IOException {
+    if (docCount == 0 && !freq.processEmpty || freq.getFacetStats().size() == 0) {
+      bucket.add("count", docCount);
+      return;
+    }
+    createAccs(docCount, 1);
+    int collected = collect(docs, 0);
+
+    // countAcc.incrementCount(0, collected);  // should we set the counton the acc instead of just passing it?
+
+    assert collected == docCount;
+    addStats(bucket, collected, 0);
+  }
+
+  // overrides but with different signature!
+  void addStats(SimpleOrderedMap<Object> target, int count, int slotNum) throws IOException {
+    target.add("count", count);
+    if (count > 0 || freq.processEmpty) {
+      for (SlotAcc acc : accs) {
+        acc.setValues(target, slotNum);
+      }
+    }
+  }
+
+  @Override
+  void setNextReader(LeafReaderContext ctx) throws IOException {
+    // base class calls this (for missing bucket...) ...  go over accs[] in that case
+    super.setNextReader(ctx);
+  }
+
+  void setNextReaderFirstPhase(LeafReaderContext ctx) throws IOException {
+    if (collectAcc != null) {
+      collectAcc.setNextReader(ctx);
+    }
+    if (otherAccs != null) {
+      for (SlotAcc acc : otherAccs) {
+        acc.setNextReader(ctx);
       }
     }
   }
@@ -195,6 +415,82 @@ abstract class FacetFieldProcessor exten
   }
 }
 
+class SpecialSlotAcc extends SlotAcc {
+  SlotAcc collectAcc;
+  SlotAcc[] otherAccs;
+  int collectAccSlot;
+  int otherAccsSlot;
+  long count;
+
+  public SpecialSlotAcc(FacetContext fcontext, SlotAcc collectAcc, int collectAccSlot, SlotAcc[] otherAccs, int otherAccsSlot) {
+    super(fcontext);
+    this.collectAcc = collectAcc;
+    this.collectAccSlot = collectAccSlot;
+    this.otherAccs = otherAccs;
+    this.otherAccsSlot = otherAccsSlot;
+  }
+
+  public int getCollectAccSlot() { return collectAccSlot; }
+  public int getOtherAccSlot() { return otherAccsSlot; }
+
+  public long getSpecialCount() {
+    return count;
+  }
+
+  @Override
+  public void collect(int doc, int slot) throws IOException {
+    assert slot != collectAccSlot || slot < 0;
+    count++;
+    if (collectAcc != null) {
+      collectAcc.collect(doc, collectAccSlot);
+    }
+    if (otherAccs != null) {
+      for (SlotAcc otherAcc : otherAccs) {
+        otherAcc.collect(doc, otherAccsSlot);
+      }
+    }
+  }
+
+  @Override
+  public int compare(int slotA, int slotB) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public Object getValue(int slotNum) throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void setValues(SimpleOrderedMap<Object> bucket, int slotNum) throws IOException {
+    if (collectAcc != null) {
+      collectAcc.setValues(bucket, collectAccSlot);
+    }
+    if (otherAccs != null) {
+      for (SlotAcc otherAcc : otherAccs) {
+        otherAcc.setValues(bucket, otherAccsSlot);
+      }
+    }
+  }
+
+  @Override
+  public void reset() {
+    // reset should be called on underlying accs
+    // TODO: but in case something does need to be done here, should we require this method to be called but do nothing for now?
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void resize(Resizer resizer) {
+    // someone else will call resize on collectAcc directly
+    if (collectAccSlot >= 0) {
+      collectAccSlot = resizer.getNewSlot(collectAccSlot);
+    }
+  }
+}
+
+
+
 
 // base class for FC style of facet counting (single and multi-valued strings)
 abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor {
@@ -204,8 +500,9 @@ abstract class FacetFieldProcessorFCBase
   int nTerms;
   int nDocs;
   int maxSlots;
-  int allBucketsSlot;
 
+  int allBucketsSlot = -1;  // slot for the primary Accs (countAcc, collectAcc)
+  int missingSlot = -1;
 
   public FacetFieldProcessorFCBase(FacetContext fcontext, FacetField freq, SchemaField sf) {
     super(fcontext, freq, sf);
@@ -235,19 +532,25 @@ abstract class FacetFieldProcessorFCBase
 
     findStartAndEndOrds();
 
-    // if we need an extra slot for the "missing" bucket, and it wasn't able to be tacked onto the beginning,
-    // then lets add room for it at the end.
-    maxSlots = (freq.missing && startTermIndex != -1) ? nTerms + 1 : nTerms;
+    maxSlots = nTerms;
 
     if (freq.allBuckets) {
-      allBucketsSlot = maxSlots;
-      maxSlots++;
-    } else {
-      allBucketsSlot = -1;
+      allBucketsSlot = maxSlots++;
+    }
+    if (freq.missing) {
+      missingSlot = maxSlots++;
+    }
+
+    createCollectAcc(nDocs, maxSlots);
+
+    if (freq.allBuckets) {
+      allBucketsAcc = new SpecialSlotAcc(fcontext, collectAcc, allBucketsSlot, otherAccs, 0);
+    }
+
+    if (freq.missing) {
+      // TODO: optimize case when missingSlot can be contiguous with other slots
+      missingAcc = new SpecialSlotAcc(fcontext, collectAcc, missingSlot, otherAccs, 1);
     }
-    createAccs(nDocs, maxSlots);
-    setSortAcc(maxSlots);
-    prepareForCollection();
 
     collectDocs();
 
@@ -284,7 +587,8 @@ abstract class FacetFieldProcessorFCBase
 
     Slot bottom = null;
     for (int i = (startTermIndex == -1) ? 1 : 0; i < nTerms; i++) {
-      if (countAcc.getCount(i) < effectiveMincount) {
+      // screen out buckets not matching mincount immediately (i.e. don't even increment numBuckets)
+      if (effectiveMincount > 0 && countAcc.getCount(i) < effectiveMincount) {
         continue;
       }
 
@@ -334,9 +638,9 @@ abstract class FacetFieldProcessorFCBase
 
     if (freq.allBuckets) {
       SimpleOrderedMap<Object> allBuckets = new SimpleOrderedMap<>();
-      countAcc.setValues(allBuckets, allBucketsSlot);
-      for (SlotAcc acc : accs) {
-        acc.setValues(allBuckets, allBucketsSlot);
+      allBuckets.add("count", allBucketsAcc.getSpecialCount());
+      if (allBucketsAcc != null) {
+        allBucketsAcc.setValues(allBuckets, allBucketsSlot);
       }
       res.add("allBuckets", allBuckets);
     }
@@ -345,6 +649,9 @@ abstract class FacetFieldProcessorFCBase
     res.add("buckets", bucketList);
 
 
+    // TODO: do this with a callback instead?
+    boolean needFilter = deferredAggs != null || freq.getSubFacets().size() > 0;
+
     for (int slotNum : sortedSlots) {
       SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>();
 
@@ -355,52 +662,38 @@ abstract class FacetFieldProcessorFCBase
       Object val = sf.getType().toObject(sf, br);
 
       bucket.add("val", val);
-      // add stats for this bucket
-      addStats(bucket, slotNum);
 
-      // handle sub-facets for this bucket
-      if (freq.getSubFacets().size() > 0) {
-        TermQuery filter = new TermQuery(new Term(sf.getName(), br.clone()));
-        try {
-          processSubs(bucket, filter, fcontext.searcher.getDocSet(filter, fcontext.base) );
-        } finally {
-          // subContext.base.decref();  // OFF-HEAP
-          // subContext.base = null;  // do not modify context after creation... there may be deferred execution (i.e. streaming)
-        }
-      }
+      TermQuery filter = needFilter ? new TermQuery(new Term(sf.getName(), br.clone())) : null;
+      fillBucket(bucket, countAcc.getCount(slotNum), slotNum, null, filter);
 
       bucketList.add(bucket);
     }
 
     if (freq.missing) {
       SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>();
-      DocSet missingDocSet = null;
-      try {
-        if (startTermIndex == -1) {
-          addStats(missingBucket, 0);
-        } else {
-          missingDocSet = getFieldMissing(fcontext.searcher, fcontext.base, freq.field);
-          // an extra slot was added to the end for this missing bucket
-          countAcc.incrementCount(nTerms, missingDocSet.size());
-          collect(missingDocSet, nTerms);
-          addStats(missingBucket, nTerms);
-        }
+      fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field));
+      res.add("missing", missingBucket);
 
-        if (freq.getSubFacets().size() > 0) {
-          // TODO: we can do better than this!
-          if (missingDocSet == null) {
-            missingDocSet = getFieldMissing(fcontext.searcher, fcontext.base, freq.field);
-          }
-          processSubs(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), missingDocSet);
-        }
+      /*** TODO - OPTIMIZE
+      DocSet missingDocSet = null;
+      if (startTermIndex == -1) {
+        fillBucket(missingBucket, countAcc.getCount(0), null);
+      } else {
+        missingDocSet = getFieldMissing(fcontext.searcher, fcontext.base, freq.field);
+        // an extra slot was added to the end for this missing bucket
+        countAcc.incrementCount(nTerms, missingDocSet.size());
+        collect(missingDocSet, nTerms);
+        addStats(missingBucket, nTerms);
+      }
 
-        res.add("missing", missingBucket);
-      } finally {
-        if (missingDocSet != null) {
-          // missingDocSet.decref(); // OFF-HEAP
-          missingDocSet = null;
+      if (freq.getSubFacets().size() > 0) {
+        // TODO: we can do better than this!
+        if (missingDocSet == null) {
+          missingDocSet = getFieldMissing(fcontext.searcher, fcontext.base, freq.field);
         }
+        processSubs(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), missingDocSet);
       }
+       ***/
     }
 
     return res;
@@ -410,31 +703,51 @@ abstract class FacetFieldProcessorFCBase
 }
 
 
-class FacetFieldProcessorFC extends FacetFieldProcessorFCBase {
-  SortedDocValues sortedDocValues;
+class FacetFieldProcessorDV extends FacetFieldProcessorFCBase {
+  static boolean unwrap_singleValued_multiDv = true;  // only set to false for test coverage
+
+  boolean multiValuedField;
+  SortedSetDocValues si;  // only used for term lookups (for both single and multi-valued)
+  MultiDocValues.OrdinalMap ordinalMap = null; // maps per-segment ords to global ords
 
 
-  public FacetFieldProcessorFC(FacetContext fcontext, FacetField freq, SchemaField sf) {
+  public FacetFieldProcessorDV(FacetContext fcontext, FacetField freq, SchemaField sf) {
     super(fcontext, freq, sf);
+    multiValuedField = sf.multiValued() || sf.getType().multiValuedFieldCache();
   }
 
   protected BytesRef lookupOrd(int ord) throws IOException {
-    return sortedDocValues.lookupOrd(ord);
+    return si.lookupOrd(ord);
   }
 
   protected void findStartAndEndOrds() throws IOException {
-    sortedDocValues = FieldUtil.getSortedDocValues(fcontext.qcontext, sf, null);
+    if (multiValuedField) {
+      si = FieldUtil.getSortedSetDocValues(fcontext.qcontext, sf, null);
+      if (si instanceof MultiDocValues.MultiSortedSetDocValues) {
+        ordinalMap = ((MultiDocValues.MultiSortedSetDocValues)si).mapping;
+      }
+    } else {
+      SortedDocValues single = FieldUtil.getSortedDocValues(fcontext.qcontext, sf, null);
+      si = DocValues.singleton(single);  // multi-valued view
+      if (single instanceof MultiDocValues.MultiSortedDocValues) {
+        ordinalMap = ((MultiDocValues.MultiSortedDocValues)single).mapping;
+      }
+    }
+
+    if (si.getValueCount() >= Integer.MAX_VALUE) {
+      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Field has too many unique values. field=" + sf + " nterms= " + si.getValueCount());
+    }
 
     if (prefixRef != null) {
-      startTermIndex = sortedDocValues.lookupTerm(prefixRef.get());
+      startTermIndex = (int)si.lookupTerm(prefixRef.get());
       if (startTermIndex < 0) startTermIndex = -startTermIndex - 1;
       prefixRef.append(UnicodeUtil.BIG_TERM);
-      endTermIndex = sortedDocValues.lookupTerm(prefixRef.get());
+      endTermIndex = (int)si.lookupTerm(prefixRef.get());
       assert endTermIndex < 0;
       endTermIndex = -endTermIndex - 1;
     } else {
       startTermIndex = 0;
-      endTermIndex = sortedDocValues.getValueCount();
+      endTermIndex = (int)si.getValueCount();
     }
 
     // optimize collecting the "missing" bucket when startTermindex is 0 (since the "missing" ord is -1)
@@ -443,43 +756,116 @@ class FacetFieldProcessorFC extends Face
     nTerms = endTermIndex - startTermIndex;
   }
 
+  @Override
   protected void collectDocs() throws IOException {
+    if (nTerms <= 0 || fcontext.base.size() < effectiveMincount) { // TODO: what about allBuckets? missing bucket?
+      return;
+    }
+
     final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves();
-    final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
-    LeafReaderContext ctx = null;
-    int segBase = 0;
-    int segMax;
-    int adjustedMax = 0;
-    for (DocIterator docsIt = fcontext.base.iterator(); docsIt.hasNext(); ) {
-      final int doc = docsIt.nextDoc();
-      if (doc >= adjustedMax) {
-        do {
-          ctx = ctxIt.next();
-          segBase = ctx.docBase;
-          segMax = ctx.reader().maxDoc();
-          adjustedMax = segBase + segMax;
-        } while (doc >= adjustedMax);
-        assert doc >= ctx.docBase;
-        setNextReader(ctx);
-      }
-
-      int term = sortedDocValues.getOrd( doc );
-      int arrIdx = term - startTermIndex;
-      if (arrIdx>=0 && arrIdx<nTerms) {
-        countAcc.incrementCount(arrIdx, 1);
-        collect(doc - segBase, arrIdx);  // per-seg collectors
-        if (allBucketsSlot >= 0 && term >= 0) {
-          countAcc.incrementCount(allBucketsSlot, 1);
-          collect(doc - segBase, allBucketsSlot);  // per-seg collectors
+    Filter filter = fcontext.base.getTopFilter();
+
+    for (int subIdx = 0; subIdx < leaves.size(); subIdx++) {
+      LeafReaderContext subCtx = leaves.get(subIdx);
+
+      setNextReaderFirstPhase(subCtx);
+
+      DocIdSet dis = filter.getDocIdSet(subCtx, null); // solr docsets already exclude any deleted docs
+      DocIdSetIterator disi = dis.iterator();
+
+      SortedDocValues singleDv = null;
+      SortedSetDocValues multiDv = null;
+      if (multiValuedField) {
+        // TODO: get sub from multi?
+        multiDv = subCtx.reader().getSortedSetDocValues(sf.getName());
+        if (multiDv == null) {
+          multiDv = DocValues.emptySortedSet();
+        }
+        // some codecs may optimize SortedSet storage for single-valued fields
+        // this will be null if this is not a wrapped single valued docvalues.
+        if (unwrap_singleValued_multiDv) {
+          singleDv = DocValues.unwrapSingleton(multiDv);
+        }
+      } else {
+        singleDv = subCtx.reader().getSortedDocValues(sf.getName());
+        if (singleDv == null) {
+          singleDv = DocValues.emptySorted();
         }
       }
+
+      LongValues toGlobal = ordinalMap == null ? null : ordinalMap.getGlobalOrds(subIdx);
+
+      if (singleDv != null) {
+        collectDocs(singleDv, disi, toGlobal);
+      } else {
+        collectDocs(multiDv, disi, toGlobal);
+      }
+    }
+
+  }
+
+  protected void collectDocs(SortedDocValues singleDv, DocIdSetIterator disi, LongValues toGlobal) throws IOException {
+    int doc;
+    while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+      int segOrd = singleDv.getOrd(doc);
+      collect(doc, segOrd, toGlobal);
+    }
+  }
+
+  protected void collectDocs(SortedSetDocValues multiDv, DocIdSetIterator disi, LongValues toGlobal) throws IOException {
+    int doc;
+    while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+      multiDv.setDocument(doc);
+      int segOrd = (int)multiDv.nextOrd();
+      collect(doc, segOrd, toGlobal); // collect anything the first time (even -1 for missing)
+      if (segOrd < 0) continue;
+      for(;;) {
+        segOrd = (int)multiDv.nextOrd();
+        if (segOrd < 0) break;
+        collect(doc, segOrd, toGlobal);
+      }
+    }
+  }
+
+  private void collect(int doc, int segOrd, LongValues toGlobal) throws IOException {
+    int ord = (toGlobal != null && segOrd >= 0) ? (int)toGlobal.get(segOrd) : segOrd;
+
+    int arrIdx = ord - startTermIndex;
+    if (arrIdx >= 0 && arrIdx < nTerms) {
+      countAcc.incrementCount(arrIdx, 1);
+      if (collectAcc != null) {
+        collectAcc.collect(doc, arrIdx);
+      }
+      // since this can be called for missing, we need to ensure it's currently not.
+      if (allBucketsAcc != null && ord >= 0) {
+        allBucketsAcc.collect(doc, arrIdx);
+      }
     }
   }
 
 }
 
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 // UnInvertedField implementation of field faceting
-class FacetFieldProcessorUIF extends FacetFieldProcessorFC {
+class FacetFieldProcessorUIF extends FacetFieldProcessorFCBase {
   UnInvertedField uif;
   TermsEnum te;
 
@@ -615,10 +1001,7 @@ class FacetFieldProcessorStream extends
     hasSubFacets = freq.subFacets.size() > 0;
     bucketsToSkip = freq.offset;
 
-
-
     createAccs(-1, 1);
-    prepareForCollection();
 
     // Minimum term docFreq in order to use the filterCache for that term.
     int defaultMinDf = Math.max(fcontext.searcher.maxDoc() >> 4, 3);  // (minimum of 3 is for test coverage purposes)
@@ -846,143 +1229,3 @@ class FacetFieldProcessorStream extends
 }
 
 
-
-class FacetFieldProcessorDV extends FacetFieldProcessorFCBase {
-  static boolean unwrap_singleValued_multiDv = true;  // only set to false for test coverage
-
-  boolean multiValuedField;
-  SortedSetDocValues si;  // only used for term lookups (for both single and multi-valued)
-  MultiDocValues.OrdinalMap ordinalMap = null; // maps per-segment ords to global ords
-
-
-  public FacetFieldProcessorDV(FacetContext fcontext, FacetField freq, SchemaField sf) {
-    super(fcontext, freq, sf);
-    multiValuedField = sf.multiValued() || sf.getType().multiValuedFieldCache();
-  }
-
-  protected BytesRef lookupOrd(int ord) throws IOException {
-    return si.lookupOrd(ord);
-  }
-
-  protected void findStartAndEndOrds() throws IOException {
-    if (multiValuedField) {
-      si = FieldUtil.getSortedSetDocValues(fcontext.qcontext, sf, null);
-      if (si instanceof MultiDocValues.MultiSortedSetDocValues) {
-        ordinalMap = ((MultiDocValues.MultiSortedSetDocValues)si).mapping;
-      }
-    } else {
-      SortedDocValues single = FieldUtil.getSortedDocValues(fcontext.qcontext, sf, null);
-      si = DocValues.singleton(single);  // multi-valued view
-      if (single instanceof MultiDocValues.MultiSortedDocValues) {
-        ordinalMap = ((MultiDocValues.MultiSortedDocValues)single).mapping;
-      }
-    }
-
-    if (si.getValueCount() >= Integer.MAX_VALUE) {
-      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Field has too many unique values. field=" + sf + " nterms= " + si.getValueCount());
-    }
-
-    if (prefixRef != null) {
-      startTermIndex = (int)si.lookupTerm(prefixRef.get());
-      if (startTermIndex < 0) startTermIndex = -startTermIndex - 1;
-      prefixRef.append(UnicodeUtil.BIG_TERM);
-      endTermIndex = (int)si.lookupTerm(prefixRef.get());
-      assert endTermIndex < 0;
-      endTermIndex = -endTermIndex - 1;
-    } else {
-      startTermIndex = 0;
-      endTermIndex = (int)si.getValueCount();
-    }
-
-    // optimize collecting the "missing" bucket when startTermindex is 0 (since the "missing" ord is -1)
-    startTermIndex = startTermIndex==0 && freq.missing ? -1 : startTermIndex;
-
-    nTerms = endTermIndex - startTermIndex;
-  }
-
-  @Override
-  protected void collectDocs() throws IOException {
-     if (nTerms <= 0 || fcontext.base.size() < effectiveMincount) { // TODO: what about allBuckets? missing bucket?
-       return;
-     }
-
-    final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves();
-    Filter filter = fcontext.base.getTopFilter();
-
-    for (int subIdx = 0; subIdx < leaves.size(); subIdx++) {
-      LeafReaderContext subCtx = leaves.get(subIdx);
-
-      setNextReader(subCtx);
-
-      DocIdSet dis = filter.getDocIdSet(subCtx, null); // solr docsets already exclude any deleted docs
-      DocIdSetIterator disi = dis.iterator();
-
-      SortedDocValues singleDv = null;
-      SortedSetDocValues multiDv = null;
-      if (multiValuedField) {
-        // TODO: get sub from multi?
-        multiDv = subCtx.reader().getSortedSetDocValues(sf.getName());
-        if (multiDv == null) {
-          multiDv = DocValues.emptySortedSet();
-        }
-        // some codecs may optimize SortedSet storage for single-valued fields
-        // this will be null if this is not a wrapped single valued docvalues.
-        if (unwrap_singleValued_multiDv) {
-          singleDv = DocValues.unwrapSingleton(multiDv);
-        }
-      } else {
-        singleDv = subCtx.reader().getSortedDocValues(sf.getName());
-        if (singleDv == null) {
-          singleDv = DocValues.emptySorted();
-        }
-      }
-
-      LongValues toGlobal = ordinalMap == null ? null : ordinalMap.getGlobalOrds(subIdx);
-
-      if (singleDv != null) {
-        collectDocs(singleDv, disi, toGlobal);
-      } else {
-        collectDocs(multiDv, disi, toGlobal);
-      }
-    }
-
-  }
-
-  protected void collectDocs(SortedDocValues singleDv, DocIdSetIterator disi, LongValues toGlobal) throws IOException {
-    int doc;
-    while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
-      int segOrd = singleDv.getOrd(doc);
-      collect(doc, segOrd, toGlobal);
-    }
-  }
-
-  protected void collectDocs(SortedSetDocValues multiDv, DocIdSetIterator disi, LongValues toGlobal) throws IOException {
-    int doc;
-    while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
-      multiDv.setDocument(doc);
-      int segOrd = (int)multiDv.nextOrd();
-      collect(doc, segOrd, toGlobal); // collect anything the first time (even -1 for missing)
-      if (segOrd < 0) continue;
-      for(;;) {
-        segOrd = (int)multiDv.nextOrd();
-        if (segOrd < 0) break;
-        collect(doc, segOrd, toGlobal);
-      }
-    }
-  }
-
-  private void collect(int doc, int segOrd, LongValues toGlobal) throws IOException {
-    int ord = (toGlobal != null && segOrd >= 0) ? (int)toGlobal.get(segOrd) : segOrd;
-
-    int arrIdx = ord - startTermIndex;
-    if (arrIdx >= 0 && arrIdx < nTerms) {
-      countAcc.incrementCount(arrIdx, 1);
-      collect(doc, arrIdx);  // per-seg collectors
-      if (allBucketsSlot >= 0 && ord >= 0) {
-        countAcc.incrementCount(allBucketsSlot, 1);
-        collect(doc, allBucketsSlot);  // per-seg collectors
-      }
-    }
-  }
-
-}

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorNumeric.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorNumeric.java?rev=1690199&r1=1690198&r2=1690199&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorNumeric.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorNumeric.java Fri Jul 10 03:47:54 2015
@@ -25,17 +25,13 @@ import java.util.List;
 import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.NumericDocValues;
-import org.apache.lucene.index.Term;
 import org.apache.lucene.search.Query;
-import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.util.BitUtil;
 import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.PriorityQueue;
 import org.apache.solr.common.util.SimpleOrderedMap;
 import org.apache.solr.schema.SchemaField;
 import org.apache.solr.search.DocIterator;
-import org.apache.solr.search.DocSet;
 
 class FacetFieldProcessorNumeric extends FacetFieldProcessor {
   static int MAXIMUM_STARTING_TABLE_SIZE=1024;  // must be a power of two, non-final to support setting by tests
@@ -151,11 +147,11 @@ class FacetFieldProcessorNumeric extends
     response = calcFacets();
   }
 
-
   private void doRehash(LongCounts table) {
-    if (accs.length == 0) return;  // TODO: FUTURE: only need to resize acc we will sort on
+    if (collectAcc == null && missingAcc == null && allBucketsAcc == null) return;
 
     // Our "count" acc is backed by the hash table and will already be rehashed
+    // otherAccs don't need to be rehashed
 
     int newTableSize = table.numSlots();
     int numSlots = newTableSize;
@@ -164,7 +160,7 @@ class FacetFieldProcessorNumeric extends
     if (oldMissingSlot >= 0) {
       missingSlot = numSlots++;
     }
-    if (allBucketsSlot >= 0) {
+    if (oldAllBucketsSlot >= 0) {
       allBucketsSlot = numSlots++;
     }
 
@@ -192,8 +188,15 @@ class FacetFieldProcessorNumeric extends
       }
     };
 
-    for (SlotAcc acc : accs) {
-      acc.resize( resizer );
+    // NOTE: resizing isn't strictly necessary for missing/allBuckets... we could just set the new slot directly
+    if (collectAcc != null) {
+      collectAcc.resize(resizer);
+    }
+    if (missingAcc != null) {
+      missingAcc.resize(resizer);
+    }
+    if (allBucketsAcc != null) {
+      allBucketsAcc.resize(resizer);
     }
   }
 
@@ -292,12 +295,17 @@ class FacetFieldProcessorNumeric extends
       }
     };
 
+    // we set the countAcc & indexAcc first so generic ones won't be created for us.
+    createCollectAcc(fcontext.base.size(), numSlots);
 
-    // we set the countAcc first so it won't be created here
-    createAccs(fcontext.base.size(), numSlots);
-    setSortAcc(numSlots);
-    prepareForCollection();
+    if (freq.allBuckets) {
+      allBucketsAcc = new SpecialSlotAcc(fcontext, collectAcc, allBucketsSlot, otherAccs, 0);
+    }
 
+    if (freq.missing) {
+      // TODO: optimize case when missingSlot can be contiguous with other slots
+      missingAcc = new SpecialSlotAcc(fcontext, collectAcc, missingSlot, otherAccs, 1);
+    }
 
     NumericDocValues values = null;
     Bits docsWithField = null;
@@ -319,7 +327,7 @@ class FacetFieldProcessorNumeric extends
           adjustedMax = segBase + segMax;
         } while (doc >= adjustedMax);
         assert doc >= ctx.docBase;
-        setNextReader(ctx);
+        setNextReaderFirstPhase(ctx);
 
         values = DocValues.getNumeric(ctx.reader(), sf.getName());
         docsWithField = DocValues.getDocsWithField(ctx.reader(), sf.getName());
@@ -328,19 +336,16 @@ class FacetFieldProcessorNumeric extends
       int segDoc = doc - segBase;
       long val = values.get(segDoc);
       if (val == 0 && !docsWithField.get(segDoc)) {
-        // missing
-        if (missingSlot >= 0) {
-          numMissing++;
-          collect(segDoc, missingSlot);
+        if (missingAcc != null) {
+          missingAcc.collect(segDoc, -1);
         }
       } else {
         int slot = table.add(val);  // this can trigger a rehash rehash
 
-        collect(segDoc, slot);
+        // countAcc.incrementCount(slot, 1);
+        // our countAcc is virtual, so this is not needed
 
-        if (allBucketsSlot >= 0) {
-          collect(segDoc, allBucketsSlot);
-        }
+        collectFirstPhase(segDoc, slot);
       }
     }
 
@@ -414,29 +419,16 @@ class FacetFieldProcessorNumeric extends
       SimpleOrderedMap<Object> allBuckets = new SimpleOrderedMap<>();
       // countAcc.setValues(allBuckets, allBucketsSlot);
       allBuckets.add("count", table.numAdds);
-      for (SlotAcc acc : accs) {
-        acc.setValues(allBuckets, allBucketsSlot);
-      }
+      allBucketsAcc.setValues(allBuckets, -1);
       // allBuckets currently doesn't execute sub-facets (because it doesn't change the domain?)
       res.add("allBuckets", allBuckets);
     }
 
     if (freq.missing) {
-      SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>();
-      // countAcc.setValues(missingBucket, missingSlot);
-      missingBucket.add("count", numMissing);
-      for (SlotAcc acc : accs) {
-        acc.setValues(missingBucket, missingSlot);
-      }
+      // TODO: it would be more efficient to buid up a missing DocSet if we need it here anyway.
 
-      if (freq.getSubFacets().size() > 0) {
-        // TODO: we can do better than this!
-        DocSet missingDocSet = null;
-        if (missingDocSet == null) {
-          missingDocSet = getFieldMissing(fcontext.searcher, fcontext.base, freq.field);
-        }
-        processSubs(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), missingDocSet);
-      }
+      SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>();
+      fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field));
       res.add("missing", missingBucket);
     }
 
@@ -451,26 +443,16 @@ class FacetFieldProcessorNumeric extends
     ArrayList bucketList = new ArrayList(collectCount);
     res.add("buckets", bucketList);
 
+    boolean needFilter = deferredAggs != null || freq.getSubFacets().size() > 0;
 
     for (int slotNum : sortedSlots) {
       SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>();
       Comparable val = calc.bitsToValue(table.vals[slotNum]);
       bucket.add("val", val);
 
-      // add stats for this bucket
-      // TODO: this gets count from countAcc
-      // addStats(bucket, slotNum);
-      bucket.add("count", table.counts[slotNum]);
-
-      for (SlotAcc acc : accs) {
-        acc.setValues(bucket, slotNum);
-      }
+      Query filter = needFilter ? sf.getType().getFieldQuery(null, sf, calc.formatValue(val)) : null;
 
-      // handle sub-facets for this bucket
-      if (freq.getSubFacets().size() > 0) {
-        Query filter = sf.getType().getFieldQuery(null, sf, calc.formatValue(val));
-        processSubs(bucket, filter, fcontext.searcher.getDocSet(filter, fcontext.base) );
-      }
+      fillBucket(bucket, table.counts[slotNum], slotNum, null, filter);
 
       bucketList.add(bucket);
     }

Copied: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java (from r1690189, lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java?p2=lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java&p1=lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java&r1=1690189&r2=1690199&rev=1690199&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java Fri Jul 10 03:47:54 2015
@@ -329,9 +329,14 @@ public class FacetProcessor<FacetRequest
   public static Query getFieldMissingQuery(SolrIndexSearcher searcher, String fieldName) throws IOException {
     SchemaField sf = searcher.getSchema().getField(fieldName);
     Query hasVal = sf.getType().getRangeQuery(null, sf, null, null, false, false);
+    BooleanQuery noVal = new BooleanQuery();
+    noVal.add(hasVal, BooleanClause.Occur.MUST_NOT);
+    return noVal;
+    /*** Lucene 6 version... BooleanQuery.Builder is not yet part of Lucene 5
     BooleanQuery.Builder noVal = new BooleanQuery.Builder();
     noVal.add(hasVal, BooleanClause.Occur.MUST_NOT);
     return noVal.build();
+     ***/
   }
 
 }

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java?rev=1690199&r1=1690198&r2=1690199&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java Fri Jul 10 03:47:54 2015
@@ -265,7 +265,6 @@ class FacetRangeProcessor extends FacetP
 
 
     createAccs(fcontext.base.size(), slotCount);
-    prepareForCollection();
 
     for (int idx = 0; idx<rangeList.size(); idx++) {
       rangeStats(rangeList.get(idx), idx);

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java?rev=1690199&r1=1690198&r2=1690199&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java Fri Jul 10 03:47:54 2015
@@ -132,302 +132,6 @@ class FacetContext {
 }
 
 
-class FacetProcessor<FacetRequestT extends FacetRequest>  {
-  protected SimpleOrderedMap<Object> response;
-  protected FacetContext fcontext;
-  protected FacetRequestT freq;
-
-  LinkedHashMap<String,SlotAcc> accMap;
-  protected SlotAcc[] accs;
-  protected CountSlotAcc countAcc;
-
-  FacetProcessor(FacetContext fcontext, FacetRequestT freq) {
-    this.fcontext = fcontext;
-    this.freq = freq;
-  }
-
-  public void process() throws IOException {
-    handleDomainChanges();
-  }
-
-  protected void handleDomainChanges() throws IOException {
-    if (freq.domain == null) return;
-    handleFilterExclusions();
-    handleBlockJoin();
-  }
-
-  private void handleBlockJoin() throws IOException {
-    if (!(freq.domain.toChildren || freq.domain.toParent)) return;
-
-    // TODO: avoid query parsing per-bucket somehow...
-    String parentStr = freq.domain.parents;
-    Query parentQuery;
-    try {
-      QParser parser = QParser.getParser(parentStr, null, fcontext.req);
-      parentQuery = parser.getQuery();
-    } catch (SyntaxError err) {
-      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error parsing block join parent specification: " + parentStr);
-    }
-
-    BitDocSet parents = fcontext.searcher.getDocSetBits(parentQuery);
-    DocSet input = fcontext.base;
-    DocSet result;
-
-    if (freq.domain.toChildren) {
-      DocSet filt = fcontext.searcher.getDocSetBits( new MatchAllDocsQuery() );
-      result = BlockJoin.toChildren(input, parents, filt, fcontext.qcontext);
-    } else {
-      result = BlockJoin.toParents(input, parents, fcontext.qcontext);
-    }
-
-    fcontext.base = result;
-  }
-
-  private void handleFilterExclusions() throws IOException {
-    List<String> excludeTags = freq.domain.excludeTags;
-
-    if (excludeTags == null || excludeTags.size() == 0) {
-      return;
-    }
-
-    // TODO: somehow remove responsebuilder dependency
-    ResponseBuilder rb = SolrRequestInfo.getRequestInfo().getResponseBuilder();
-    Map tagMap = (Map) rb.req.getContext().get("tags");
-    if (tagMap == null) {
-      // no filters were tagged
-      return;
-    }
-
-    IdentityHashMap<Query,Boolean> excludeSet = new IdentityHashMap<>();
-    for (String excludeTag : excludeTags) {
-      Object olst = tagMap.get(excludeTag);
-      // tagMap has entries of List<String,List<QParser>>, but subject to change in the future
-      if (!(olst instanceof Collection)) continue;
-      for (Object o : (Collection<?>)olst) {
-        if (!(o instanceof QParser)) continue;
-        QParser qp = (QParser)o;
-        try {
-          excludeSet.put(qp.getQuery(), Boolean.TRUE);
-        } catch (SyntaxError syntaxError) {
-          // This should not happen since we should only be retrieving a previously parsed query
-          throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, syntaxError);
-        }
-      }
-    }
-    if (excludeSet.size() == 0) return;
-
-    List<Query> qlist = new ArrayList<>();
-
-    // add the base query
-    if (!excludeSet.containsKey(rb.getQuery())) {
-      qlist.add(rb.getQuery());
-    }
-
-    // add the filters
-    if (rb.getFilters() != null) {
-      for (Query q : rb.getFilters()) {
-        if (!excludeSet.containsKey(q)) {
-          qlist.add(q);
-        }
-      }
-    }
-
-    // now walk back up the context tree
-    // TODO: we lose parent exclusions...
-    for (FacetContext curr = fcontext; curr != null; curr = curr.parent) {
-      if (curr.filter != null) {
-        qlist.add( curr.filter );
-      }
-    }
-
-    // recompute the base domain
-    fcontext.base = fcontext.searcher.getDocSet(qlist);
-  }
-
-
-  public Object getResponse() {
-    return null;
-  }
-
-
-  protected void createAccs(int docCount, int slotCount) throws IOException {
-    accMap = new LinkedHashMap<String,SlotAcc>();
-
-    // allow a custom count acc to be used
-    if (countAcc == null) {
-      countAcc = new CountSlotArrAcc(fcontext, slotCount);
-      countAcc.key = "count";
-    }
-
-    for (Map.Entry<String,AggValueSource> entry : freq.getFacetStats().entrySet()) {
-      SlotAcc acc = entry.getValue().createSlotAcc(fcontext, docCount, slotCount);
-      acc.key = entry.getKey();
-      accMap.put(acc.key, acc);
-    }
-  }
-
-  /** Create the actual accs array from accMap before starting to collect stats. */
-  protected void prepareForCollection() {
-    accs = new SlotAcc[accMap.size()];
-    int i=0;
-    for (SlotAcc acc : accMap.values()) {
-      accs[i++] = acc;
-    }
-  }
-
-  protected void resetStats() {
-    countAcc.reset();
-    for (SlotAcc acc : accs) {
-      acc.reset();
-    }
-  }
-
-  protected void processStats(SimpleOrderedMap<Object> bucket, DocSet docs, int docCount) throws IOException {
-    if (docCount == 0 && !freq.processEmpty || freq.getFacetStats().size() == 0) {
-      bucket.add("count", docCount);
-      return;
-    }
-    createAccs(docCount, 1);
-    prepareForCollection();
-    int collected = collect(docs, 0);
-    countAcc.incrementCount(0, collected);
-    assert collected == docCount;
-    addStats(bucket, 0);
-  }
-
-
-  protected void processSubs(SimpleOrderedMap<Object> response, Query filter, DocSet domain) throws IOException {
-
-    // TODO: what if a zero bucket has a sub-facet with an exclusion that would yield results?
-    // should we check for domain-altering exclusions, or even ask the sub-facet for
-    // it's domain and then only skip it if it's 0?
-
-    if (domain == null || domain.size() == 0 && !freq.processEmpty) {
-      return;
-    }
-
-    for (Map.Entry<String,FacetRequest> sub : freq.getSubFacets().entrySet()) {
-      // make a new context for each sub-facet since they can change the domain
-      FacetContext subContext = fcontext.sub(filter, domain);
-      FacetProcessor subProcessor = sub.getValue().createFacetProcessor(subContext);
-      subProcessor.process();
-      response.add( sub.getKey(), subProcessor.getResponse() );
-    }
-  }
-
-  int collect(DocSet docs, int slot) throws IOException {
-    int count = 0;
-    SolrIndexSearcher searcher = fcontext.searcher;
-
-    final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
-    final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
-    LeafReaderContext ctx = null;
-    int segBase = 0;
-    int segMax;
-    int adjustedMax = 0;
-    for (DocIterator docsIt = docs.iterator(); docsIt.hasNext(); ) {
-      final int doc = docsIt.nextDoc();
-      if (doc >= adjustedMax) {
-        do {
-          ctx = ctxIt.next();
-          if (ctx == null) {
-            // should be impossible
-            throw new RuntimeException("INTERNAL FACET ERROR");
-          }
-          segBase = ctx.docBase;
-          segMax = ctx.reader().maxDoc();
-          adjustedMax = segBase + segMax;
-        } while (doc >= adjustedMax);
-        assert doc >= ctx.docBase;
-        setNextReader(ctx);
-      }
-      count++;
-      collect(doc - segBase, slot);  // per-seg collectors
-    }
-    return count;
-  }
-
-  void collect(int segDoc, int slot) throws IOException {
-    for (SlotAcc acc : accs) {
-      acc.collect(segDoc, slot);
-    }
-  }
-
-  void setNextReader(LeafReaderContext ctx) throws IOException {
-    // countAcc.setNextReader is a no-op
-    for (SlotAcc acc : accs) {
-      acc.setNextReader(ctx);
-    }
-  }
-
-  void addStats(SimpleOrderedMap<Object> target, int slotNum) throws IOException {
-    int count = countAcc.getCount(slotNum);
-    target.add("count", count);
-    if (count > 0 || freq.processEmpty) {
-      for (SlotAcc acc : accs) {
-        acc.setValues(target, slotNum);
-      }
-    }
-  }
-
-
-  public void fillBucket(SimpleOrderedMap<Object> bucket, Query q) throws IOException {
-    boolean needDocSet = freq.getFacetStats().size() > 0 || freq.getSubFacets().size() > 0;
-
-    // TODO: always collect counts or not???
-
-    DocSet result = null;
-    int count;
-
-    if (needDocSet) {
-      if (q == null) {
-        result = fcontext.base;
-        // result.incref(); // OFF-HEAP
-      } else {
-        result = fcontext.searcher.getDocSet(q, fcontext.base);
-      }
-      count = result.size();
-    } else {
-      if (q == null) {
-        count = fcontext.base.size();
-      } else {
-        count = fcontext.searcher.numDocs(q, fcontext.base);
-      }
-    }
-
-    try {
-      processStats(bucket, result, (int) count);
-      processSubs(bucket, q, result);
-    } finally {
-      if (result != null) {
-        // result.decref(); // OFF-HEAP
-        result = null;
-      }
-    }
-  }
-
-  public static DocSet getFieldMissing(SolrIndexSearcher searcher, DocSet docs, String fieldName) throws IOException {
-    SchemaField sf = searcher.getSchema().getField(fieldName);
-    DocSet hasVal = searcher.getDocSet(sf.getType().getRangeQuery(null, sf, null, null, false, false));
-    DocSet answer = docs.andNot(hasVal);
-    // hasVal.decref(); // OFF-HEAP
-    return answer;
-  }
-
-  public static Query getFieldMissingQuery(SolrIndexSearcher searcher, String fieldName) throws IOException {
-    SchemaField sf = searcher.getSchema().getField(fieldName);
-    Query hasVal = sf.getType().getRangeQuery(null, sf, null, null, false, false);
-    BooleanQuery noVal = new BooleanQuery();
-    noVal.add(hasVal, BooleanClause.Occur.MUST_NOT);
-    return noVal;
-  }
-
-}
-
-
-
-
-
 abstract class FacetParser<FacetRequestT extends FacetRequest> {
   protected FacetRequestT facet;
   protected FacetParser parent;

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java?rev=1690199&r1=1690198&r2=1690199&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java Fri Jul 10 03:47:54 2015
@@ -29,6 +29,8 @@ import org.apache.lucene.util.FixedBitSe
 import org.apache.lucene.util.LongValues;
 import org.apache.solr.common.util.SimpleOrderedMap;
 import org.apache.solr.schema.SchemaField;
+import org.apache.solr.search.DocIterator;
+import org.apache.solr.search.DocSet;
 import org.apache.solr.search.SolrIndexSearcher;
 
 import java.io.Closeable;
@@ -36,6 +38,7 @@ import java.io.IOException;
 import java.lang.reflect.Array;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Iterator;
 import java.util.List;
 
 
@@ -52,6 +55,39 @@ public abstract class SlotAcc implements
 
   public abstract void collect(int doc, int slot) throws IOException;
 
+  public int collect(DocSet docs, int slot) throws IOException {
+    int count = 0;
+    SolrIndexSearcher searcher = fcontext.searcher;
+
+    final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
+    final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
+    LeafReaderContext ctx = null;
+    int segBase = 0;
+    int segMax;
+    int adjustedMax = 0;
+    for (DocIterator docsIt = docs.iterator(); docsIt.hasNext(); ) {
+      final int doc = docsIt.nextDoc();
+      if (doc >= adjustedMax) {
+        do {
+          ctx = ctxIt.next();
+          if (ctx == null) {
+            // should be impossible
+            throw new RuntimeException("INTERNAL FACET ERROR");
+          }
+          segBase = ctx.docBase;
+          segMax = ctx.reader().maxDoc();
+          adjustedMax = segBase + segMax;
+        } while (doc >= adjustedMax);
+        assert doc >= ctx.docBase;
+        setNextReader(ctx);
+      }
+      count++;
+      collect(doc - segBase, slot);  // per-seg collectors
+    }
+    return count;
+  }
+
+
   public abstract int compare(int slotA, int slotB);
 
   public abstract Object getValue(int slotNum) throws IOException;

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java?rev=1690199&r1=1690198&r2=1690199&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java Fri Jul 10 03:47:54 2015
@@ -240,7 +240,7 @@ public class UnInvertedField extends Doc
       return te;
     }
 
-    public void getTerms(int doc, Callback target) throws IOException {
+    public void getBigTerms(int doc, Callback target) throws IOException {
       if (bigTermSets != null) {
         for (int i=0; i<bigTermSets.length; i++) {
           if (bigTermSets[i].exists(doc)) {
@@ -248,12 +248,9 @@ public class UnInvertedField extends Doc
           }
         }
       }
-
-      getNonBigTerms(doc, target);
     }
 
-
-    public void getNonBigTerms(int doc, Callback target) {
+    public void getSmallTerms(int doc, Callback target) {
       if (termInstances > 0) {
         int code = index[doc];
 
@@ -309,6 +306,7 @@ public class UnInvertedField extends Doc
     int baseSize = docs.size();
     int maxDoc = searcher.maxDoc();
 
+    // what about allBuckets?
     if (baseSize < processor.effectiveMincount) {
       return;
     }
@@ -382,6 +380,7 @@ public class UnInvertedField extends Doc
       }
     }
 
+    /*** TODO - future optimization to handle allBuckets
     if (processor.allBucketsSlot >= 0) {
       int all = 0;  // overflow potential
       for (int i=0; i<numTermsInField; i++) {
@@ -389,33 +388,14 @@ public class UnInvertedField extends Doc
       }
       counts.incrementCount(processor.allBucketsSlot, all);
     }
+     ***/
   }
 
 
 
   public void collectDocs(FacetFieldProcessorUIF processor) throws IOException {
-    if (processor.accs.length == 0 && processor.startTermIndex == 0 && processor.endTermIndex >= numTermsInField)
-    {
+    if (processor.collectAcc==null && processor.missingAcc == null && processor.allBucketsAcc == null && processor.startTermIndex == 0 && processor.endTermIndex >= numTermsInField) {
       getCounts(processor, processor.countAcc);
-
-      /*** debugging
-      int sz = processor.countAcc.getCountArray().length;
-      CountSlotAcc acc = processor.countAcc;
-      CountSlotAcc acc2 = new CountSlotAcc(processor.fcontext, sz);
-      processor.countAcc = acc2;
-      collectDocsGeneric(processor); // hopefully we can call this again?
-
-      for (int i=0; i<sz; i++) {
-        if (acc.getCount(i) != acc2.getCount(i)) {
-          System.out.println("ERROR! ERROR! i=" + i + " counts=" + acc.getCount(i) + " " + acc2.getCount(i));
-          CountSlotAcc acc3 = new CountSlotAcc(processor.fcontext, sz);  // put breakpoint here and re-execute
-          processor.countAcc = acc3;
-          int[] arr3 = processor.countAcc.getCountArray();
-          getCountsInArray(processor, arr3);
-        }
-      }
-       ***/
-
       return;
     }
 
@@ -433,18 +413,15 @@ public class UnInvertedField extends Doc
     DocSet docs = processor.fcontext.base;
 
     int uniqueTerms = 0;
+    final CountSlotAcc countAcc = processor.countAcc;
 
     for (TopTerm tt : bigTerms.values()) {
       if (tt.termNum >= startTermIndex && tt.termNum < endTermIndex) {
         // handle the biggest terms
         try ( DocSet intersection = searcher.getDocSet(tt.termQuery, docs); )
         {
-          int collected = processor.collect(intersection, tt.termNum - startTermIndex);
-          processor.countAcc.incrementCount(tt.termNum - startTermIndex, collected);
-          if (processor.allBucketsSlot >= 0) {
-            processor.collect(intersection, processor.allBucketsSlot);
-            processor.countAcc.incrementCount(processor.allBucketsSlot, collected);
-          }
+          int collected = processor.collectFirstPhase(intersection, tt.termNum - startTermIndex);
+          countAcc.incrementCount(tt.termNum - startTermIndex, collected);
           if (collected > 0) {
             uniqueTerms++;
           }
@@ -452,6 +429,7 @@ public class UnInvertedField extends Doc
       }
     }
 
+
     if (termInstances > 0) {
 
       final List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();
@@ -480,7 +458,7 @@ public class UnInvertedField extends Doc
             adjustedMax = segBase + segMax;
           } while (doc >= adjustedMax);
           assert doc >= ctx.docBase;
-          processor.setNextReader(ctx);
+          processor.setNextReaderFirstPhase(ctx);
         }
         int segDoc = doc - segBase;
 
@@ -504,12 +482,8 @@ public class UnInvertedField extends Doc
             int arrIdx = tnum - startTermIndex;
             if (arrIdx < 0) continue;
             if (arrIdx >= nTerms) break;
-            processor.countAcc.incrementCount(arrIdx, 1);
-            processor.collect(segDoc, arrIdx);
-            if (processor.allBucketsSlot >= 0) {
-              processor.countAcc.incrementCount(processor.allBucketsSlot, 1);
-              processor.collect(segDoc, processor.allBucketsSlot);
-            }
+            countAcc.incrementCount(arrIdx, 1);
+            processor.collectFirstPhase(segDoc, arrIdx);
           }
         } else {
           int tnum = 0;
@@ -522,13 +496,8 @@ public class UnInvertedField extends Doc
               int arrIdx = tnum - startTermIndex;
               if (arrIdx < 0) continue;
               if (arrIdx >= nTerms) break;
-              processor.countAcc.incrementCount(arrIdx, 1);
-              processor.collect(segDoc, arrIdx);
-              if (processor.allBucketsSlot >= 0) {
-                processor.countAcc.incrementCount(processor.allBucketsSlot, 1);
-                processor.collect(segDoc, processor.allBucketsSlot);
-              }
-
+              countAcc.incrementCount(arrIdx, 1);
+              processor.collectFirstPhase(segDoc, arrIdx);
               delta = 0;
             }
             code >>>= 8;

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java?rev=1690199&r1=1690198&r2=1690199&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java Fri Jul 10 03:47:54 2015
@@ -311,7 +311,8 @@ class UniqueMultivaluedSlotAcc extends U
       bits = new FixedBitSet(nTerms);
       arr[slotNum] = bits;
     }
-    docToTerm.getTerms(doc + currentDocBase, this);  // this will call back to our Callback.call(int termNum)
+    docToTerm.getBigTerms(doc + currentDocBase, this);  // this will call back to our Callback.call(int termNum)
+    docToTerm.getSmallTerms(doc + currentDocBase, this);
   }
 
   @Override