You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ds...@apache.org on 2016/08/16 03:38:31 UTC
[1/2] lucene-solr:master: SOLR-9404: Refactor move/renames in JSON
FacetProcessor and FacetFieldProcessor.
Repository: lucene-solr
Updated Branches:
refs/heads/master 6d1f1f6c7 -> 7072458ea
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7072458e/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByHashNumeric.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByHashNumeric.java b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByHashNumeric.java
new file mode 100644
index 0000000..842df20
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByHashNumeric.java
@@ -0,0 +1,439 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.search.facet;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.util.BitUtil;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.PriorityQueue;
+import org.apache.solr.common.util.SimpleOrderedMap;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.search.DocIterator;
+
+/**
+ * Facets numbers into a hash table.
+ * It currently only works with {@link NumericDocValues} (single-valued).
+ */
+class FacetFieldProcessorByHashNumeric extends FacetFieldProcessor {
+ static int MAXIMUM_STARTING_TABLE_SIZE=1024; // must be a power of two, non-final to support setting by tests
+
+ /** a hash table with long keys (what we're counting) and integer values (counts) */
+ private static class LongCounts {
+
+ static final float LOAD_FACTOR = 0.7f;
+
+ long numAdds;
+ long[] vals;
+ int[] counts; // maintain the counts here since we need them to tell if there was actually a value anyway
+ int[] oldToNewMapping;
+
+ int cardinality;
+ int threshold;
+
+ /** sz must be a power of two */
+ LongCounts(int sz) {
+ vals = new long[sz];
+ counts = new int[sz];
+ threshold = (int) (sz * LOAD_FACTOR);
+ }
+
+ /** Current number of slots in the hash table */
+ int numSlots() {
+ return vals.length;
+ }
+
+ private int hash(long val) {
+ // For floats: exponent bits start at bit 23 for single precision,
+ // and bit 52 for double precision.
+ // Many values will only have significant bits just to the right of that,
+ // and the leftmost bits will all be zero.
+
+ // For now, lets just settle to get first 8 significant mantissa bits of double or float in the lowest bits of our hash
+ // The upper bits of our hash will be irrelevant.
+ int h = (int) (val + (val >>> 44) + (val >>> 15));
+ return h;
+ }
+
+ /** returns the slot */
+ int add(long val) {
+ if (cardinality >= threshold) {
+ rehash();
+ }
+
+ numAdds++;
+ int h = hash(val);
+ for (int slot = h & (vals.length-1); ;slot = (slot + ((h>>7)|1)) & (vals.length-1)) {
+ int count = counts[slot];
+ if (count == 0) {
+ counts[slot] = 1;
+ vals[slot] = val;
+ cardinality++;
+ return slot;
+ } else if (vals[slot] == val) {
+ // val is already in the set
+ counts[slot] = count + 1;
+ return slot;
+ }
+ }
+ }
+
+ protected void rehash() {
+ long[] oldVals = vals;
+ int[] oldCounts = counts; // after retrieving the count, this array is reused as a mapping to new array
+ int newCapacity = vals.length << 1;
+ vals = new long[newCapacity];
+ counts = new int[newCapacity];
+ threshold = (int) (newCapacity * LOAD_FACTOR);
+
+ for (int i=0; i<oldVals.length; i++) {
+ int count = oldCounts[i];
+ if (count == 0) {
+ oldCounts[i] = -1;
+ continue;
+ }
+
+ long val = oldVals[i];
+
+ int h = hash(val);
+ int slot = h & (vals.length-1);
+ while (counts[slot] != 0) {
+ slot = (slot + ((h>>7)|1)) & (vals.length-1);
+ }
+ counts[slot] = count;
+ vals[slot] = val;
+ oldCounts[i] = slot;
+ }
+
+ oldToNewMapping = oldCounts;
+ }
+
+ int cardinality() {
+ return cardinality;
+ }
+
+ }
+
+ int allBucketsSlot = -1;
+
+ FacetFieldProcessorByHashNumeric(FacetContext fcontext, FacetField freq, SchemaField sf) {
+ super(fcontext, freq, sf);
+ }
+
+ @Override
+ public void process() throws IOException {
+ super.process();
+ response = calcFacets();
+ }
+
+ private SimpleOrderedMap<Object> calcFacets() throws IOException {
+
+ final FacetRangeProcessor.Calc calc = FacetRangeProcessor.getNumericCalc(sf);
+
+ // TODO: it would be really nice to know the number of unique values!!!!
+
+ int possibleValues = fcontext.base.size();
+ // size smaller tables so that no resize will be necessary
+ int currHashSize = BitUtil.nextHighestPowerOfTwo((int) (possibleValues * (1 / LongCounts.LOAD_FACTOR) + 1));
+ currHashSize = Math.min(currHashSize, MAXIMUM_STARTING_TABLE_SIZE);
+ final LongCounts table = new LongCounts(currHashSize) {
+ @Override
+ protected void rehash() {
+ super.rehash();
+ doRehash(this);
+ oldToNewMapping = null; // allow for gc
+ }
+ };
+
+ int numSlots = currHashSize;
+
+ int numMissing = 0;
+
+ if (freq.allBuckets) {
+ allBucketsSlot = numSlots++;
+ }
+
+ indexOrderAcc = new SlotAcc(fcontext) {
+ @Override
+ public void collect(int doc, int slot) throws IOException {
+ }
+
+ @Override
+ public int compare(int slotA, int slotB) {
+ long s1 = calc.bitsToSortableBits(table.vals[slotA]);
+ long s2 = calc.bitsToSortableBits(table.vals[slotB]);
+ return Long.compare(s1, s2);
+ }
+
+ @Override
+ public Object getValue(int slotNum) throws IOException {
+ return null;
+ }
+
+ @Override
+ public void reset() {
+ }
+
+ @Override
+ public void resize(Resizer resizer) {
+ }
+ };
+
+ countAcc = new CountSlotAcc(fcontext) {
+ @Override
+ public void incrementCount(int slot, int count) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public int getCount(int slot) {
+ return table.counts[slot];
+ }
+
+ @Override
+ public Object getValue(int slotNum) {
+ return getCount(slotNum);
+ }
+
+ @Override
+ public void reset() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void collect(int doc, int slot) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public int compare(int slotA, int slotB) {
+ return Integer.compare( table.counts[slotA], table.counts[slotB] );
+ }
+
+ @Override
+ public void resize(Resizer resizer) {
+ throw new UnsupportedOperationException();
+ }
+ };
+
+ // we set the countAcc & indexAcc first so generic ones won't be created for us.
+ createCollectAcc(fcontext.base.size(), numSlots);
+
+ if (freq.allBuckets) {
+ allBucketsAcc = new SpecialSlotAcc(fcontext, collectAcc, allBucketsSlot, otherAccs, 0);
+ }
+
+ NumericDocValues values = null;
+ Bits docsWithField = null;
+
+ // TODO: factor this code out so it can be shared...
+ final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves();
+ final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
+ LeafReaderContext ctx = null;
+ int segBase = 0;
+ int segMax;
+ int adjustedMax = 0;
+ for (DocIterator docsIt = fcontext.base.iterator(); docsIt.hasNext(); ) {
+ final int doc = docsIt.nextDoc();
+ if (doc >= adjustedMax) {
+ do {
+ ctx = ctxIt.next();
+ segBase = ctx.docBase;
+ segMax = ctx.reader().maxDoc();
+ adjustedMax = segBase + segMax;
+ } while (doc >= adjustedMax);
+ assert doc >= ctx.docBase;
+ setNextReaderFirstPhase(ctx);
+
+ values = DocValues.getNumeric(ctx.reader(), sf.getName());
+ docsWithField = DocValues.getDocsWithField(ctx.reader(), sf.getName());
+ }
+
+ int segDoc = doc - segBase;
+ long val = values.get(segDoc);
+ if (val != 0 || docsWithField.get(segDoc)) {
+ int slot = table.add(val); // this can trigger a rehash rehash
+
+ // countAcc.incrementCount(slot, 1);
+ // our countAcc is virtual, so this is not needed
+
+ collectFirstPhase(segDoc, slot);
+ }
+ }
+
+ //
+ // collection done, time to find the top slots
+ //
+
+ int numBuckets = 0;
+ List<Object> bucketVals = null;
+ if (freq.numBuckets && fcontext.isShard()) {
+ bucketVals = new ArrayList<>(100);
+ }
+
+ int off = fcontext.isShard() ? 0 : (int) freq.offset;
+ // add a modest amount of over-request if this is a shard request
+ int lim = freq.limit >= 0 ? (fcontext.isShard() ? (int)(freq.limit*1.1+4) : (int)freq.limit) : Integer.MAX_VALUE;
+
+ int maxsize = (int)(freq.limit >= 0 ? freq.offset + lim : Integer.MAX_VALUE - 1);
+ maxsize = Math.min(maxsize, table.cardinality);
+
+ final int sortMul = freq.sortDirection.getMultiplier();
+
+ PriorityQueue<Slot> queue = new PriorityQueue<Slot>(maxsize) {
+ @Override
+ protected boolean lessThan(Slot a, Slot b) {
+ // TODO: sort-by-index-order
+ int cmp = sortAcc.compare(a.slot, b.slot) * sortMul;
+ return cmp == 0 ? (indexOrderAcc.compare(a.slot, b.slot) > 0) : cmp < 0;
+ }
+ };
+
+ // TODO: create a countAcc that wrapps the table so we can reuse more code?
+
+ Slot bottom = null;
+ for (int i=0; i<table.counts.length; i++) {
+ int count = table.counts[i];
+ if (count < effectiveMincount) {
+ // either not a valid slot, or count not high enough
+ continue;
+ }
+ numBuckets++; // can be different from the table cardinality if mincount > 1
+
+ long val = table.vals[i];
+ if (bucketVals != null && bucketVals.size()<100) {
+ bucketVals.add( calc.bitsToValue(val) );
+ }
+
+ if (bottom == null) {
+ bottom = new Slot();
+ }
+ bottom.slot = i;
+
+ bottom = queue.insertWithOverflow(bottom);
+ }
+
+ SimpleOrderedMap<Object> res = new SimpleOrderedMap<>();
+ if (freq.numBuckets) {
+ if (!fcontext.isShard()) {
+ res.add("numBuckets", numBuckets);
+ } else {
+ SimpleOrderedMap<Object> map = new SimpleOrderedMap<>(2);
+ map.add("numBuckets", numBuckets);
+ map.add("vals", bucketVals);
+ res.add("numBuckets", map);
+ }
+ }
+
+ FacetDebugInfo fdebug = fcontext.getDebugInfo();
+ if (fdebug != null) fdebug.putInfoItem("numBuckets", (long) numBuckets);
+
+ if (freq.allBuckets) {
+ SimpleOrderedMap<Object> allBuckets = new SimpleOrderedMap<>();
+ // countAcc.setValues(allBuckets, allBucketsSlot);
+ allBuckets.add("count", table.numAdds);
+ allBucketsAcc.setValues(allBuckets, -1);
+ // allBuckets currently doesn't execute sub-facets (because it doesn't change the domain?)
+ res.add("allBuckets", allBuckets);
+ }
+
+ if (freq.missing) {
+ // TODO: it would be more efficient to buid up a missing DocSet if we need it here anyway.
+
+ SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>();
+ fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null);
+ res.add("missing", missingBucket);
+ }
+
+ // if we are deep paging, we don't have to order the highest "offset" counts.
+ int collectCount = Math.max(0, queue.size() - off);
+ assert collectCount <= lim;
+ int[] sortedSlots = new int[collectCount];
+ for (int i = collectCount - 1; i >= 0; i--) {
+ sortedSlots[i] = queue.pop().slot;
+ }
+
+ ArrayList<SimpleOrderedMap> bucketList = new ArrayList<>(collectCount);
+ res.add("buckets", bucketList);
+
+ boolean needFilter = deferredAggs != null || freq.getSubFacets().size() > 0;
+
+ for (int slotNum : sortedSlots) {
+ SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>();
+ Comparable val = calc.bitsToValue(table.vals[slotNum]);
+ bucket.add("val", val);
+
+ Query filter = needFilter ? sf.getType().getFieldQuery(null, sf, calc.formatValue(val)) : null;
+
+ fillBucket(bucket, table.counts[slotNum], slotNum, null, filter);
+
+ bucketList.add(bucket);
+ }
+
+ return res;
+ }
+
+ private void doRehash(LongCounts table) {
+ if (collectAcc == null && allBucketsAcc == null) return;
+
+ // Our "count" acc is backed by the hash table and will already be rehashed
+ // otherAccs don't need to be rehashed
+
+ int newTableSize = table.numSlots();
+ int numSlots = newTableSize;
+ final int oldAllBucketsSlot = allBucketsSlot;
+ if (oldAllBucketsSlot >= 0) {
+ allBucketsSlot = numSlots++;
+ }
+
+ final int finalNumSlots = numSlots;
+ final int[] mapping = table.oldToNewMapping;
+
+ SlotAcc.Resizer resizer = new SlotAcc.Resizer() {
+ @Override
+ public int getNewSize() {
+ return finalNumSlots;
+ }
+
+ @Override
+ public int getNewSlot(int oldSlot) {
+ if (oldSlot < mapping.length) {
+ return mapping[oldSlot];
+ }
+ if (oldSlot == oldAllBucketsSlot) {
+ return allBucketsSlot;
+ }
+ return -1;
+ }
+ };
+
+ // NOTE: resizing isn't strictly necessary for missing/allBuckets... we could just set the new slot directly
+ if (collectAcc != null) {
+ collectAcc.resize(resizer);
+ }
+ if (allBucketsAcc != null) {
+ allBucketsAcc.resize(resizer);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7072458e/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorDV.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorDV.java b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorDV.java
deleted file mode 100644
index 12056aa..0000000
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorDV.java
+++ /dev/null
@@ -1,291 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.search.facet;
-
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.lucene.index.DocValues;
-import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.MultiDocValues;
-import org.apache.lucene.index.SortedDocValues;
-import org.apache.lucene.index.SortedSetDocValues;
-import org.apache.lucene.search.DocIdSet;
-import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.LongValues;
-import org.apache.lucene.util.UnicodeUtil;
-import org.apache.solr.common.SolrException;
-import org.apache.solr.schema.SchemaField;
-import org.apache.solr.search.Filter;
-
-class FacetFieldProcessorDV extends FacetFieldProcessorFCBase {
- static boolean unwrap_singleValued_multiDv = true; // only set to false for test coverage
-
- boolean multiValuedField;
- SortedSetDocValues si; // only used for term lookups (for both single and multi-valued)
- MultiDocValues.OrdinalMap ordinalMap = null; // maps per-segment ords to global ords
-
-
- public FacetFieldProcessorDV(FacetContext fcontext, FacetField freq, SchemaField sf) {
- super(fcontext, freq, sf);
- multiValuedField = sf.multiValued() || sf.getType().multiValuedFieldCache();
- }
-
- protected BytesRef lookupOrd(int ord) throws IOException {
- return si.lookupOrd(ord);
- }
-
- protected void findStartAndEndOrds() throws IOException {
- if (multiValuedField) {
- si = FieldUtil.getSortedSetDocValues(fcontext.qcontext, sf, null);
- if (si instanceof MultiDocValues.MultiSortedSetDocValues) {
- ordinalMap = ((MultiDocValues.MultiSortedSetDocValues)si).mapping;
- }
- } else {
- SortedDocValues single = FieldUtil.getSortedDocValues(fcontext.qcontext, sf, null);
- si = DocValues.singleton(single); // multi-valued view
- if (single instanceof MultiDocValues.MultiSortedDocValues) {
- ordinalMap = ((MultiDocValues.MultiSortedDocValues)single).mapping;
- }
- }
-
- if (si.getValueCount() >= Integer.MAX_VALUE) {
- throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Field has too many unique values. field=" + sf + " nterms= " + si.getValueCount());
- }
-
- if (prefixRef != null) {
- startTermIndex = (int)si.lookupTerm(prefixRef.get());
- if (startTermIndex < 0) startTermIndex = -startTermIndex - 1;
- prefixRef.append(UnicodeUtil.BIG_TERM);
- endTermIndex = (int)si.lookupTerm(prefixRef.get());
- assert endTermIndex < 0;
- endTermIndex = -endTermIndex - 1;
- } else {
- startTermIndex = 0;
- endTermIndex = (int)si.getValueCount();
- }
-
- nTerms = endTermIndex - startTermIndex;
- }
-
- @Override
- protected void collectDocs() throws IOException {
- int domainSize = fcontext.base.size();
-
- if (nTerms <= 0 || domainSize < effectiveMincount) { // TODO: what about allBuckets? missing bucket?
- return;
- }
-
- // TODO: refactor some of this logic into a base class
- boolean countOnly = collectAcc==null && allBucketsAcc==null;
- boolean fullRange = startTermIndex == 0 && endTermIndex == si.getValueCount();
-
- // Are we expecting many hits per bucket?
- // FUTURE: pro-rate for nTerms?
- // FUTURE: better take into account number of values in multi-valued fields. This info is available for indexed fields.
- // FUTURE: take into account that bigger ord maps are more expensive than smaller ones
- // One test: 5M doc index, faceting on a single-valued field with almost 1M unique values, crossover point where global counting was slower
- // than per-segment counting was a domain of 658k docs. At that point, top 10 buckets had 6-7 matches each.
- // this was for heap docvalues produced by UninvertingReader
- // Since these values were randomly distributed, lets round our domain multiplier up to account for less random real world data.
- long domainMultiplier = multiValuedField ? 4L : 2L;
- boolean manyHitsPerBucket = domainSize * domainMultiplier > (si.getValueCount() + 3); // +3 to increase test coverage with small tests
-
- // If we're only calculating counts, we're not prefixing, and we expect to collect many documents per unique value,
- // then collect per-segment before mapping to global ords at the end. This will save redundant seg->global ord mappings.
- // FUTURE: there are probably some other non "countOnly" cases where we can use this as well (i.e. those where
- // the docid is not used)
- boolean canDoPerSeg = countOnly && fullRange;
- boolean accumSeg = manyHitsPerBucket && canDoPerSeg;
-
- if (freq.perSeg != null) accumSeg = canDoPerSeg && freq.perSeg; // internal - override perSeg heuristic
-
- final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves();
- Filter filter = fcontext.base.getTopFilter();
-
- for (int subIdx = 0; subIdx < leaves.size(); subIdx++) {
- LeafReaderContext subCtx = leaves.get(subIdx);
-
- setNextReaderFirstPhase(subCtx);
-
- DocIdSet dis = filter.getDocIdSet(subCtx, null); // solr docsets already exclude any deleted docs
- DocIdSetIterator disi = dis.iterator();
-
- SortedDocValues singleDv = null;
- SortedSetDocValues multiDv = null;
- if (multiValuedField) {
- // TODO: get sub from multi?
- multiDv = subCtx.reader().getSortedSetDocValues(sf.getName());
- if (multiDv == null) {
- multiDv = DocValues.emptySortedSet();
- }
- // some codecs may optimize SortedSet storage for single-valued fields
- // this will be null if this is not a wrapped single valued docvalues.
- if (unwrap_singleValued_multiDv) {
- singleDv = DocValues.unwrapSingleton(multiDv);
- }
- } else {
- singleDv = subCtx.reader().getSortedDocValues(sf.getName());
- if (singleDv == null) {
- singleDv = DocValues.emptySorted();
- }
- }
-
- LongValues toGlobal = ordinalMap == null ? null : ordinalMap.getGlobalOrds(subIdx);
-
- if (singleDv != null) {
- if (accumSeg) {
- collectPerSeg(singleDv, disi, toGlobal);
- } else {
- if (canDoPerSeg && toGlobal != null) {
- collectCounts(singleDv, disi, toGlobal);
- } else {
- collectDocs(singleDv, disi, toGlobal);
- }
- }
- } else {
- if (accumSeg) {
- collectPerSeg(multiDv, disi, toGlobal);
- } else {
- if (canDoPerSeg && toGlobal != null) {
- collectCounts(multiDv, disi, toGlobal);
- } else {
- collectDocs(multiDv, disi, toGlobal);
- }
- }
- }
- }
-
- reuse = null; // better GC
- }
-
- private int[] reuse;
- private int[] getCountArr(int maxNeeded) {
- if (reuse == null) {
- // make the count array large enough for any segment
- // FUTURE: (optionally) directly use the array of the CountAcc for an optimized index..
- reuse = new int[(int) si.getValueCount() + 1];
- } else {
- Arrays.fill(reuse, 0, maxNeeded, 0);
- }
- return reuse;
- }
-
- private void collectPerSeg(SortedDocValues singleDv, DocIdSetIterator disi, LongValues toGlobal) throws IOException {
- int segMax = singleDv.getValueCount() + 1;
- final int[] counts = getCountArr( segMax );
-
- int doc;
- while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
- counts[ singleDv.getOrd(doc) + 1 ]++;
- }
-
- for (int i=1; i<segMax; i++) {
- int segCount = counts[i];
- if (segCount > 0) {
- int slot = toGlobal == null ? (i - 1) : (int) toGlobal.get(i - 1);
- countAcc.incrementCount(slot, segCount);
- }
- }
- }
-
-
- private void collectPerSeg(SortedSetDocValues multiDv, DocIdSetIterator disi, LongValues toGlobal) throws IOException {
- int segMax = (int)multiDv.getValueCount();
- final int[] counts = getCountArr( segMax );
-
- int doc;
- while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
- multiDv.setDocument(doc);
- for(;;) {
- int segOrd = (int)multiDv.nextOrd();
- if (segOrd < 0) break;
- counts[segOrd]++;
- }
- }
-
- for (int i=0; i<segMax; i++) {
- int segCount = counts[i];
- if (segCount > 0) {
- int slot = toGlobal == null ? (i) : (int) toGlobal.get(i);
- countAcc.incrementCount(slot, segCount);
- }
- }
- }
-
- private void collectDocs(SortedDocValues singleDv, DocIdSetIterator disi, LongValues toGlobal) throws IOException {
- int doc;
- while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
- int segOrd = singleDv.getOrd(doc);
- if (segOrd < 0) continue;
- collect(doc, segOrd, toGlobal);
- }
- }
-
- private void collectCounts(SortedDocValues singleDv, DocIdSetIterator disi, LongValues toGlobal) throws IOException {
- int doc;
- while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
- int segOrd = singleDv.getOrd(doc);
- if (segOrd < 0) continue;
- int ord = (int)toGlobal.get(segOrd);
- countAcc.incrementCount(ord, 1);
- }
- }
-
- private void collectDocs(SortedSetDocValues multiDv, DocIdSetIterator disi, LongValues toGlobal) throws IOException {
- int doc;
- while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
- multiDv.setDocument(doc);
- for(;;) {
- int segOrd = (int)multiDv.nextOrd();
- if (segOrd < 0) break;
- collect(doc, segOrd, toGlobal);
- }
- }
- }
-
- private void collectCounts(SortedSetDocValues multiDv, DocIdSetIterator disi, LongValues toGlobal) throws IOException {
- int doc;
- while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
- multiDv.setDocument(doc);
- for(;;) {
- int segOrd = (int)multiDv.nextOrd();
- if (segOrd < 0) break;
- int ord = (int)toGlobal.get(segOrd);
- countAcc.incrementCount(ord, 1);
- }
- }
- }
-
- private void collect(int doc, int segOrd, LongValues toGlobal) throws IOException {
- int ord = (toGlobal != null && segOrd >= 0) ? (int)toGlobal.get(segOrd) : segOrd;
-
- int arrIdx = ord - startTermIndex;
- if (arrIdx >= 0 && arrIdx < nTerms) {
- countAcc.incrementCount(arrIdx, 1);
- if (collectAcc != null) {
- collectAcc.collect(doc, arrIdx);
- }
- if (allBucketsAcc != null) {
- allBucketsAcc.collect(doc, arrIdx);
- }
- }
- }
-
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7072458e/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorNumeric.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorNumeric.java b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorNumeric.java
deleted file mode 100644
index 6ab4c26..0000000
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorNumeric.java
+++ /dev/null
@@ -1,443 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.search.facet;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.lucene.index.DocValues;
-import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.NumericDocValues;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.util.BitUtil;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.PriorityQueue;
-import org.apache.solr.common.util.SimpleOrderedMap;
-import org.apache.solr.schema.SchemaField;
-import org.apache.solr.search.DocIterator;
-
-class FacetFieldProcessorNumeric extends FacetFieldProcessor {
- static int MAXIMUM_STARTING_TABLE_SIZE=1024; // must be a power of two, non-final to support setting by tests
-
- static class LongCounts {
-
- static final float LOAD_FACTOR = 0.7f;
-
- long numAdds;
- long[] vals;
- int[] counts; // maintain the counts here since we need them to tell if there was actually a value anyway
- int[] oldToNewMapping;
-
- int cardinality;
- int threshold;
-
- /** sz must be a power of two */
- LongCounts(int sz) {
- vals = new long[sz];
- counts = new int[sz];
- threshold = (int) (sz * LOAD_FACTOR);
- }
-
- /** Current number of slots in the hash table */
- public int numSlots() {
- return vals.length;
- }
-
- private int hash(long val) {
- // For floats: exponent bits start at bit 23 for single precision,
- // and bit 52 for double precision.
- // Many values will only have significant bits just to the right of that,
- // and the leftmost bits will all be zero.
-
- // For now, lets just settle to get first 8 significant mantissa bits of double or float in the lowest bits of our hash
- // The upper bits of our hash will be irrelevant.
- int h = (int) (val + (val >>> 44) + (val >>> 15));
- return h;
- }
-
- /** returns the slot */
- int add(long val) {
- if (cardinality >= threshold) {
- rehash();
- }
-
- numAdds++;
- int h = hash(val);
- for (int slot = h & (vals.length-1); ;slot = (slot + ((h>>7)|1)) & (vals.length-1)) {
- int count = counts[slot];
- if (count == 0) {
- counts[slot] = 1;
- vals[slot] = val;
- cardinality++;
- return slot;
- } else if (vals[slot] == val) {
- // val is already in the set
- counts[slot] = count + 1;
- return slot;
- }
- }
- }
-
- protected void rehash() {
- long[] oldVals = vals;
- int[] oldCounts = counts; // after retrieving the count, this array is reused as a mapping to new array
- int newCapacity = vals.length << 1;
- vals = new long[newCapacity];
- counts = new int[newCapacity];
- threshold = (int) (newCapacity * LOAD_FACTOR);
-
- for (int i=0; i<oldVals.length; i++) {
- int count = oldCounts[i];
- if (count == 0) {
- oldCounts[i] = -1;
- continue;
- }
-
- long val = oldVals[i];
-
- int h = hash(val);
- int slot = h & (vals.length-1);
- while (counts[slot] != 0) {
- slot = (slot + ((h>>7)|1)) & (vals.length-1);
- }
- counts[slot] = count;
- vals[slot] = val;
- oldCounts[i] = slot;
- }
-
- oldToNewMapping = oldCounts;
- }
-
- int cardinality() {
- return cardinality;
- }
-
- }
-
-
-
- FacetFieldProcessorNumeric(FacetContext fcontext, FacetField freq, SchemaField sf) {
- super(fcontext, freq, sf);
- }
-
- int allBucketsSlot = -1;
-
- @Override
- public void process() throws IOException {
- super.process();
- response = calcFacets();
- }
-
- private void doRehash(LongCounts table) {
- if (collectAcc == null && allBucketsAcc == null) return;
-
- // Our "count" acc is backed by the hash table and will already be rehashed
- // otherAccs don't need to be rehashed
-
- int newTableSize = table.numSlots();
- int numSlots = newTableSize;
- final int oldAllBucketsSlot = allBucketsSlot;
- if (oldAllBucketsSlot >= 0) {
- allBucketsSlot = numSlots++;
- }
-
- final int finalNumSlots = numSlots;
- final int[] mapping = table.oldToNewMapping;
-
- SlotAcc.Resizer resizer = new SlotAcc.Resizer() {
- @Override
- public int getNewSize() {
- return finalNumSlots;
- }
-
- @Override
- public int getNewSlot(int oldSlot) {
- if (oldSlot < mapping.length) {
- return mapping[oldSlot];
- }
- if (oldSlot == oldAllBucketsSlot) {
- return allBucketsSlot;
- }
- return -1;
- }
- };
-
- // NOTE: resizing isn't strictly necessary for missing/allBuckets... we could just set the new slot directly
- if (collectAcc != null) {
- collectAcc.resize(resizer);
- }
- if (allBucketsAcc != null) {
- allBucketsAcc.resize(resizer);
- }
- }
-
- public SimpleOrderedMap<Object> calcFacets() throws IOException {
-
-
- final FacetRangeProcessor.Calc calc = FacetRangeProcessor.getNumericCalc(sf);
-
-
- // TODO: it would be really nice to know the number of unique values!!!!
-
- int possibleValues = fcontext.base.size();
- // size smaller tables so that no resize will be necessary
- int currHashSize = BitUtil.nextHighestPowerOfTwo((int) (possibleValues * (1 / LongCounts.LOAD_FACTOR) + 1));
- currHashSize = Math.min(currHashSize, MAXIMUM_STARTING_TABLE_SIZE);
- final LongCounts table = new LongCounts(currHashSize) {
- @Override
- protected void rehash() {
- super.rehash();
- doRehash(this);
- oldToNewMapping = null; // allow for gc
- }
- };
-
- int numSlots = currHashSize;
-
- int numMissing = 0;
-
-
- if (freq.allBuckets) {
- allBucketsSlot = numSlots++;
- }
-
- indexOrderAcc = new SlotAcc(fcontext) {
- @Override
- public void collect(int doc, int slot) throws IOException {
- }
-
- @Override
- public int compare(int slotA, int slotB) {
- long s1 = calc.bitsToSortableBits(table.vals[slotA]);
- long s2 = calc.bitsToSortableBits(table.vals[slotB]);
- return Long.compare(s1, s2);
- }
-
- @Override
- public Object getValue(int slotNum) throws IOException {
- return null;
- }
-
- @Override
- public void reset() {
- }
-
- @Override
- public void resize(Resizer resizer) {
- }
- };
-
- countAcc = new CountSlotAcc(fcontext) {
- @Override
- public void incrementCount(int slot, int count) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public int getCount(int slot) {
- return table.counts[slot];
- }
-
- @Override
- public Object getValue(int slotNum) {
- return getCount(slotNum);
- }
-
- @Override
- public void reset() {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void collect(int doc, int slot) throws IOException {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public int compare(int slotA, int slotB) {
- return Integer.compare( table.counts[slotA], table.counts[slotB] );
- }
-
- @Override
- public void resize(Resizer resizer) {
- throw new UnsupportedOperationException();
- }
- };
-
- // we set the countAcc & indexAcc first so generic ones won't be created for us.
- createCollectAcc(fcontext.base.size(), numSlots);
-
- if (freq.allBuckets) {
- allBucketsAcc = new SpecialSlotAcc(fcontext, collectAcc, allBucketsSlot, otherAccs, 0);
- }
-
- NumericDocValues values = null;
- Bits docsWithField = null;
-
- // TODO: factor this code out so it can be shared...
- final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves();
- final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
- LeafReaderContext ctx = null;
- int segBase = 0;
- int segMax;
- int adjustedMax = 0;
- for (DocIterator docsIt = fcontext.base.iterator(); docsIt.hasNext(); ) {
- final int doc = docsIt.nextDoc();
- if (doc >= adjustedMax) {
- do {
- ctx = ctxIt.next();
- segBase = ctx.docBase;
- segMax = ctx.reader().maxDoc();
- adjustedMax = segBase + segMax;
- } while (doc >= adjustedMax);
- assert doc >= ctx.docBase;
- setNextReaderFirstPhase(ctx);
-
- values = DocValues.getNumeric(ctx.reader(), sf.getName());
- docsWithField = DocValues.getDocsWithField(ctx.reader(), sf.getName());
- }
-
- int segDoc = doc - segBase;
- long val = values.get(segDoc);
- if (val != 0 || docsWithField.get(segDoc)) {
- int slot = table.add(val); // this can trigger a rehash rehash
-
- // countAcc.incrementCount(slot, 1);
- // our countAcc is virtual, so this is not needed
-
- collectFirstPhase(segDoc, slot);
- }
- }
-
-
- //
- // collection done, time to find the top slots
- //
-
- int numBuckets = 0;
- List<Object> bucketVals = null;
- if (freq.numBuckets && fcontext.isShard()) {
- bucketVals = new ArrayList(100);
- }
-
- int off = fcontext.isShard() ? 0 : (int) freq.offset;
- // add a modest amount of over-request if this is a shard request
- int lim = freq.limit >= 0 ? (fcontext.isShard() ? (int)(freq.limit*1.1+4) : (int)freq.limit) : Integer.MAX_VALUE;
-
- int maxsize = (int)(freq.limit >= 0 ? freq.offset + lim : Integer.MAX_VALUE - 1);
- maxsize = Math.min(maxsize, table.cardinality);
-
- final int sortMul = freq.sortDirection.getMultiplier();
-
- PriorityQueue<Slot> queue = new PriorityQueue<Slot>(maxsize) {
- @Override
- protected boolean lessThan(Slot a, Slot b) {
- // TODO: sort-by-index-order
- int cmp = sortAcc.compare(a.slot, b.slot) * sortMul;
- return cmp == 0 ? (indexOrderAcc.compare(a.slot, b.slot) > 0) : cmp < 0;
- }
- };
-
- // TODO: create a countAcc that wrapps the table so we can reuse more code?
-
- Slot bottom = null;
- for (int i=0; i<table.counts.length; i++) {
- int count = table.counts[i];
- if (count < effectiveMincount) {
- // either not a valid slot, or count not high enough
- continue;
- }
- numBuckets++; // can be different from the table cardinality if mincount > 1
-
- long val = table.vals[i];
- if (bucketVals != null && bucketVals.size()<100) {
- bucketVals.add( calc.bitsToValue(val) );
- }
-
- if (bottom == null) {
- bottom = new Slot();
- }
- bottom.slot = i;
-
- bottom = queue.insertWithOverflow(bottom);
- }
-
-
- SimpleOrderedMap res = new SimpleOrderedMap();
- if (freq.numBuckets) {
- if (!fcontext.isShard()) {
- res.add("numBuckets", numBuckets);
- } else {
- SimpleOrderedMap map = new SimpleOrderedMap(2);
- map.add("numBuckets", numBuckets);
- map.add("vals", bucketVals);
- res.add("numBuckets", map);
- }
- }
-
- FacetDebugInfo fdebug = fcontext.getDebugInfo();
- if (fdebug != null) fdebug.putInfoItem("numBuckets", new Long(numBuckets));
-
- if (freq.allBuckets) {
- SimpleOrderedMap<Object> allBuckets = new SimpleOrderedMap<>();
- // countAcc.setValues(allBuckets, allBucketsSlot);
- allBuckets.add("count", table.numAdds);
- allBucketsAcc.setValues(allBuckets, -1);
- // allBuckets currently doesn't execute sub-facets (because it doesn't change the domain?)
- res.add("allBuckets", allBuckets);
- }
-
- if (freq.missing) {
- // TODO: it would be more efficient to buid up a missing DocSet if we need it here anyway.
-
- SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>();
- fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null);
- res.add("missing", missingBucket);
- }
-
- // if we are deep paging, we don't have to order the highest "offset" counts.
- int collectCount = Math.max(0, queue.size() - off);
- assert collectCount <= lim;
- int[] sortedSlots = new int[collectCount];
- for (int i = collectCount - 1; i >= 0; i--) {
- sortedSlots[i] = queue.pop().slot;
- }
-
- ArrayList bucketList = new ArrayList(collectCount);
- res.add("buckets", bucketList);
-
- boolean needFilter = deferredAggs != null || freq.getSubFacets().size() > 0;
-
- for (int slotNum : sortedSlots) {
- SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>();
- Comparable val = calc.bitsToValue(table.vals[slotNum]);
- bucket.add("val", val);
-
- Query filter = needFilter ? sf.getType().getFieldQuery(null, sf, calc.formatValue(val)) : null;
-
- fillBucket(bucket, table.counts[slotNum], slotNum, null, filter);
-
- bucketList.add(bucket);
- }
-
-
-
- return res;
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7072458e/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java b/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java
index b1281f4..fa26319 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetProcessor.java
@@ -45,27 +45,18 @@ import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.util.RTimer;
-public class FacetProcessor<FacetRequestT extends FacetRequest> {
- protected SimpleOrderedMap<Object> response;
- protected FacetContext fcontext;
- protected FacetRequestT freq;
+public abstract class FacetProcessor<FacetRequestT extends FacetRequest> {
+ SimpleOrderedMap<Object> response;
+ FacetContext fcontext;
+ FacetRequestT freq;
LinkedHashMap<String,SlotAcc> accMap;
- protected SlotAcc[] accs;
- protected CountSlotAcc countAcc;
+ SlotAcc[] accs;
+ CountSlotAcc countAcc;
- FacetProcessor(FacetContext fcontext, FacetRequestT freq) {
- this.fcontext = fcontext;
- this.freq = freq;
- }
-
- public void process() throws IOException {
- handleDomainChanges();
- }
-
/** factory method for invoking json facet framework as whole */
- public static FacetProcessor<?> createProcessor(SolrQueryRequest req,
- Map<String, Object> params, DocSet docs){
+ public static FacetProcessor<?> createProcessor(SolrQueryRequest req,
+ Map<String, Object> params, DocSet docs){
FacetParser parser = new FacetTopParser(req);
FacetRequest facetRequest = null;
try {
@@ -83,37 +74,23 @@ public class FacetProcessor<FacetRequestT extends FacetRequest> {
return facetRequest.createFacetProcessor(fcontext);
}
- protected void handleDomainChanges() throws IOException {
- if (freq.domain == null) return;
- handleFilterExclusions();
- handleBlockJoin();
+ FacetProcessor(FacetContext fcontext, FacetRequestT freq) {
+ this.fcontext = fcontext;
+ this.freq = freq;
}
- private void handleBlockJoin() throws IOException {
- if (!(freq.domain.toChildren || freq.domain.toParent)) return;
-
- // TODO: avoid query parsing per-bucket somehow...
- String parentStr = freq.domain.parents;
- Query parentQuery;
- try {
- QParser parser = QParser.getParser(parentStr, fcontext.req);
- parentQuery = parser.getQuery();
- } catch (SyntaxError err) {
- throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error parsing block join parent specification: " + parentStr);
- }
-
- BitDocSet parents = fcontext.searcher.getDocSetBits(parentQuery);
- DocSet input = fcontext.base;
- DocSet result;
+ public Object getResponse() {
+ return response;
+ }
- if (freq.domain.toChildren) {
- DocSet filt = fcontext.searcher.getDocSetBits( new MatchAllDocsQuery() );
- result = BlockJoin.toChildren(input, parents, filt, fcontext.qcontext);
- } else {
- result = BlockJoin.toParents(input, parents, fcontext.qcontext);
- }
+ public void process() throws IOException {
+ handleDomainChanges();
+ }
- fcontext.base = result;
+ private void handleDomainChanges() throws IOException {
+ if (freq.domain == null) return;
+ handleFilterExclusions();
+ handleBlockJoin();
}
private void handleFilterExclusions() throws IOException {
@@ -177,11 +154,44 @@ public class FacetProcessor<FacetRequestT extends FacetRequest> {
fcontext.base = fcontext.searcher.getDocSet(qlist);
}
+ private void handleBlockJoin() throws IOException {
+ if (!(freq.domain.toChildren || freq.domain.toParent)) return;
- public Object getResponse() {
- return null;
+ // TODO: avoid query parsing per-bucket somehow...
+ String parentStr = freq.domain.parents;
+ Query parentQuery;
+ try {
+ QParser parser = QParser.getParser(parentStr, fcontext.req);
+ parentQuery = parser.getQuery();
+ } catch (SyntaxError err) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error parsing block join parent specification: " + parentStr);
+ }
+
+ BitDocSet parents = fcontext.searcher.getDocSetBits(parentQuery);
+ DocSet input = fcontext.base;
+ DocSet result;
+
+ if (freq.domain.toChildren) {
+ DocSet filt = fcontext.searcher.getDocSetBits( new MatchAllDocsQuery() );
+ result = BlockJoin.toChildren(input, parents, filt, fcontext.qcontext);
+ } else {
+ result = BlockJoin.toParents(input, parents, fcontext.qcontext);
+ }
+
+ fcontext.base = result;
}
+ protected void processStats(SimpleOrderedMap<Object> bucket, DocSet docs, int docCount) throws IOException {
+ if (docCount == 0 && !freq.processEmpty || freq.getFacetStats().size() == 0) {
+ bucket.add("count", docCount);
+ return;
+ }
+ createAccs(docCount, 1);
+ int collected = collect(docs, 0);
+ countAcc.incrementCount(0, collected);
+ assert collected == docCount;
+ addStats(bucket, 0);
+ }
protected void createAccs(int docCount, int slotCount) throws IOException {
accMap = new LinkedHashMap<>();
@@ -198,7 +208,6 @@ public class FacetProcessor<FacetRequestT extends FacetRequest> {
accMap.put(acc.key, acc);
}
-
accs = new SlotAcc[accMap.size()];
int i=0;
for (SlotAcc acc : accMap.values()) {
@@ -206,63 +215,14 @@ public class FacetProcessor<FacetRequestT extends FacetRequest> {
}
}
-
- protected void resetStats() {
+ // note: only called by enum/stream prior to collect
+ void resetStats() {
countAcc.reset();
for (SlotAcc acc : accs) {
acc.reset();
}
}
- protected void processStats(SimpleOrderedMap<Object> bucket, DocSet docs, int docCount) throws IOException {
- if (docCount == 0 && !freq.processEmpty || freq.getFacetStats().size() == 0) {
- bucket.add("count", docCount);
- return;
- }
- createAccs(docCount, 1);
- int collected = collect(docs, 0);
- countAcc.incrementCount(0, collected);
- assert collected == docCount;
- addStats(bucket, 0);
- }
-
-
- protected void processSubs(SimpleOrderedMap<Object> response, Query filter, DocSet domain) throws IOException {
-
- // TODO: what if a zero bucket has a sub-facet with an exclusion that would yield results?
- // should we check for domain-altering exclusions, or even ask the sub-facet for
- // it's domain and then only skip it if it's 0?
-
- if (domain == null || domain.size() == 0 && !freq.processEmpty) {
- return;
- }
-
- for (Map.Entry<String,FacetRequest> sub : freq.getSubFacets().entrySet()) {
- // make a new context for each sub-facet since they can change the domain
- FacetContext subContext = fcontext.sub(filter, domain);
- FacetProcessor subProcessor = sub.getValue().createFacetProcessor(subContext);
- if (fcontext.getDebugInfo() != null) { // if fcontext.debugInfo != null, it means rb.debug() == true
- FacetDebugInfo fdebug = new FacetDebugInfo();
- subContext.setDebugInfo(fdebug);
- fcontext.getDebugInfo().addChild(fdebug);
-
- fdebug.setReqDescription(sub.getValue().getFacetDescription());
- fdebug.setProcessor(subProcessor.getClass().getSimpleName());
- if (subContext.filter != null) fdebug.setFilter(subContext.filter.toString());
-
- final RTimer timer = new RTimer();
- subProcessor.process();
- long timeElapsed = (long) timer.getTime();
- fdebug.setElapse(timeElapsed);
- fdebug.putInfoItem("domainSize", (long)subContext.base.size());
- } else {
- subProcessor.process();
- }
-
- response.add( sub.getKey(), subProcessor.getResponse() );
- }
- }
-
int collect(DocSet docs, int slot) throws IOException {
int count = 0;
SolrIndexSearcher searcher = fcontext.searcher;
@@ -310,7 +270,6 @@ public class FacetProcessor<FacetRequestT extends FacetRequest> {
}
}
-
void addStats(SimpleOrderedMap<Object> target, int slotNum) throws IOException {
int count = countAcc.getCount(slotNum);
target.add("count", count);
@@ -321,8 +280,7 @@ public class FacetProcessor<FacetRequestT extends FacetRequest> {
}
}
-
- public void fillBucket(SimpleOrderedMap<Object> bucket, Query q, DocSet result) throws IOException {
+ void fillBucket(SimpleOrderedMap<Object> bucket, Query q, DocSet result) throws IOException {
boolean needDocSet = freq.getFacetStats().size() > 0 || freq.getSubFacets().size() > 0;
// TODO: always collect counts or not???
@@ -348,7 +306,7 @@ public class FacetProcessor<FacetRequestT extends FacetRequest> {
}
try {
- processStats(bucket, result, (int) count);
+ processStats(bucket, result, count);
processSubs(bucket, q, result);
} finally {
if (result != null) {
@@ -358,7 +316,44 @@ public class FacetProcessor<FacetRequestT extends FacetRequest> {
}
}
- public static DocSet getFieldMissing(SolrIndexSearcher searcher, DocSet docs, String fieldName) throws IOException {
+ void processSubs(SimpleOrderedMap<Object> response, Query filter, DocSet domain) throws IOException {
+
+ // TODO: what if a zero bucket has a sub-facet with an exclusion that would yield results?
+ // should we check for domain-altering exclusions, or even ask the sub-facet for
+ // it's domain and then only skip it if it's 0?
+
+ if (domain == null || domain.size() == 0 && !freq.processEmpty) {
+ return;
+ }
+
+ for (Map.Entry<String,FacetRequest> sub : freq.getSubFacets().entrySet()) {
+ // make a new context for each sub-facet since they can change the domain
+ FacetContext subContext = fcontext.sub(filter, domain);
+ FacetProcessor subProcessor = sub.getValue().createFacetProcessor(subContext);
+ if (fcontext.getDebugInfo() != null) { // if fcontext.debugInfo != null, it means rb.debug() == true
+ FacetDebugInfo fdebug = new FacetDebugInfo();
+ subContext.setDebugInfo(fdebug);
+ fcontext.getDebugInfo().addChild(fdebug);
+
+ fdebug.setReqDescription(sub.getValue().getFacetDescription());
+ fdebug.setProcessor(subProcessor.getClass().getSimpleName());
+ if (subContext.filter != null) fdebug.setFilter(subContext.filter.toString());
+
+ final RTimer timer = new RTimer();
+ subProcessor.process();
+ long timeElapsed = (long) timer.getTime();
+ fdebug.setElapse(timeElapsed);
+ fdebug.putInfoItem("domainSize", (long)subContext.base.size());
+ } else {
+ subProcessor.process();
+ }
+
+ response.add( sub.getKey(), subProcessor.getResponse() );
+ }
+ }
+
+ @SuppressWarnings("unused")
+ static DocSet getFieldMissing(SolrIndexSearcher searcher, DocSet docs, String fieldName) throws IOException {
SchemaField sf = searcher.getSchema().getField(fieldName);
DocSet hasVal = searcher.getDocSet(sf.getType().getRangeQuery(null, sf, null, null, false, false));
DocSet answer = docs.andNot(hasVal);
@@ -366,7 +361,7 @@ public class FacetProcessor<FacetRequestT extends FacetRequest> {
return answer;
}
- public static Query getFieldMissingQuery(SolrIndexSearcher searcher, String fieldName) throws IOException {
+ static Query getFieldMissingQuery(SolrIndexSearcher searcher, String fieldName) throws IOException {
SchemaField sf = searcher.getSchema().getField(fieldName);
Query hasVal = sf.getType().getRangeQuery(null, sf, null, null, false, false);
BooleanQuery.Builder noVal = new BooleanQuery.Builder();
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7072458e/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java b/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java
index ac6d7f1..174b832 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java
@@ -54,11 +54,6 @@ class FacetQueryProcessor extends FacetProcessor<FacetQuery> {
}
@Override
- public Object getResponse() {
- return response;
- }
-
- @Override
public void process() throws IOException {
super.process();
response = new SimpleOrderedMap<>();
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7072458e/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java b/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java
index 8d3d0f5..1b98de0 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java
@@ -93,11 +93,6 @@ class FacetRangeProcessor extends FacetProcessor<FacetRange> {
response = getRangeCounts();
}
- @Override
- public Object getResponse() {
- return response;
- }
-
private static class Range {
Object label;
Comparable low;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7072458e/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java b/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java
index aa8f395..ad3baf0 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/UnInvertedField.java
@@ -305,7 +305,7 @@ public class UnInvertedField extends DocTermOrds {
- private void getCounts(FacetFieldProcessorUIF processor, CountSlotAcc counts) throws IOException {
+ private void getCounts(FacetFieldProcessorByArrayUIF processor, CountSlotAcc counts) throws IOException {
DocSet docs = processor.fcontext.base;
int baseSize = docs.size();
int maxDoc = searcher.maxDoc();
@@ -397,7 +397,7 @@ public class UnInvertedField extends DocTermOrds {
- public void collectDocs(FacetFieldProcessorUIF processor) throws IOException {
+ public void collectDocs(FacetFieldProcessorByArrayUIF processor) throws IOException {
if (processor.collectAcc==null && processor.allBucketsAcc == null && processor.startTermIndex == 0 && processor.endTermIndex >= numTermsInField) {
getCounts(processor, processor.countAcc);
return;
@@ -408,7 +408,7 @@ public class UnInvertedField extends DocTermOrds {
// called from FieldFacetProcessor
// TODO: do a callback version that can be specialized!
- public void collectDocsGeneric(FacetFieldProcessorUIF processor) throws IOException {
+ public void collectDocsGeneric(FacetFieldProcessorByArrayUIF processor) throws IOException {
use.incrementAndGet();
int startTermIndex = processor.startTermIndex;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7072458e/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
index 93369be..7b5a561 100644
--- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
@@ -47,8 +47,8 @@ public class TestJsonFacets extends SolrTestCaseHS {
@BeforeClass
public static void beforeTests() throws Exception {
JSONTestUtil.failRepeatedKeys = true;
- origTableSize = FacetFieldProcessorNumeric.MAXIMUM_STARTING_TABLE_SIZE;
- FacetFieldProcessorNumeric.MAXIMUM_STARTING_TABLE_SIZE=2; // stress test resizing
+ origTableSize = FacetFieldProcessorByHashNumeric.MAXIMUM_STARTING_TABLE_SIZE;
+ FacetFieldProcessorByHashNumeric.MAXIMUM_STARTING_TABLE_SIZE=2; // stress test resizing
initCore("solrconfig-tlog.xml","schema_latest.xml");
}
@@ -61,7 +61,7 @@ public class TestJsonFacets extends SolrTestCaseHS {
@AfterClass
public static void afterTests() throws Exception {
JSONTestUtil.failRepeatedKeys = false;
- FacetFieldProcessorNumeric.MAXIMUM_STARTING_TABLE_SIZE=origTableSize;
+ FacetFieldProcessorByHashNumeric.MAXIMUM_STARTING_TABLE_SIZE=origTableSize;
if (servers != null) {
servers.stop();
servers = null;
@@ -349,11 +349,11 @@ public class TestJsonFacets extends SolrTestCaseHS {
doStatsTemplated(client, params(p, "rows","0", "noexist","noexist_sd", "cat_s","cat_sd", "where_s","where_sd", "num_d","num_dd", "num_i","num_id", "num_is","num_lds", "num_fs","num_dds", "super_s","super_sd", "val_b","val_b", "date","date_dtd", "sparse_s","sparse_sd" ,"multi_ss","multi_sds") );
// multi-valued docvalues
- FacetFieldProcessorDV.unwrap_singleValued_multiDv = false; // better multi-valued coverage
+ FacetFieldProcessorByArrayDV.unwrap_singleValued_multiDv = false; // better multi-valued coverage
doStatsTemplated(client, params(p, "rows","0", "noexist","noexist_sds", "cat_s","cat_sds", "where_s","where_sds", "num_d","num_d", "num_i","num_i", "num_is","num_ids", "num_fs","num_fds", "super_s","super_sds", "val_b","val_b", "date","date_dtds", "sparse_s","sparse_sds" ,"multi_ss","multi_sds") );
// multi-valued docvalues
- FacetFieldProcessorDV.unwrap_singleValued_multiDv = true;
+ FacetFieldProcessorByArrayDV.unwrap_singleValued_multiDv = true;
doStatsTemplated(client, params(p, "rows","0", "noexist","noexist_sds", "cat_s","cat_sds", "where_s","where_sds", "num_d","num_d", "num_i","num_i", "num_is","num_ids", "num_fs","num_fds", "super_s","super_sds", "val_b","val_b", "date","date_dtds", "sparse_s","sparse_sds" ,"multi_ss","multi_sds") );
}
[2/2] lucene-solr:master: SOLR-9404: Refactor move/renames in JSON
FacetProcessor and FacetFieldProcessor.
Posted by ds...@apache.org.
SOLR-9404: Refactor move/renames in JSON FacetProcessor and FacetFieldProcessor.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/7072458e
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/7072458e
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/7072458e
Branch: refs/heads/master
Commit: 7072458ea44be181195882fd366ce6a545af55df
Parents: 6d1f1f6
Author: David Smiley <ds...@apache.org>
Authored: Mon Aug 15 23:34:03 2016 -0400
Committer: David Smiley <ds...@apache.org>
Committed: Mon Aug 15 23:34:03 2016 -0400
----------------------------------------------------------------------
solr/CHANGES.txt | 2 +
.../apache/solr/search/facet/FacetField.java | 952 +------------------
.../solr/search/facet/FacetFieldProcessor.java | 369 +++++++
.../facet/FacetFieldProcessorByArray.java | 213 +++++
.../facet/FacetFieldProcessorByArrayDV.java | 294 ++++++
.../facet/FacetFieldProcessorByArrayUIF.java | 71 ++
.../FacetFieldProcessorByEnumTermsStream.java | 356 +++++++
.../facet/FacetFieldProcessorByHashNumeric.java | 439 +++++++++
.../search/facet/FacetFieldProcessorDV.java | 291 ------
.../facet/FacetFieldProcessorNumeric.java | 443 ---------
.../solr/search/facet/FacetProcessor.java | 203 ++--
.../apache/solr/search/facet/FacetQuery.java | 5 -
.../apache/solr/search/facet/FacetRange.java | 5 -
.../solr/search/facet/UnInvertedField.java | 6 +-
.../solr/search/facet/TestJsonFacets.java | 10 +-
15 files changed, 1860 insertions(+), 1799 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7072458e/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index fc0bde0..7458f46 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -280,6 +280,8 @@ Other Changes
* SOLR-9410: Make ReRankQParserPlugin's private ReRankWeight a public class of its own. (Christine Poerschke)
+* SOLR-9404: Refactor move/renames in JSON FacetProcessor and FacetFieldProcessor. (David Smiley)
+
================== 6.1.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7072458e/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetField.java b/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
index a5ec1db..c06e182 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
@@ -16,39 +16,12 @@
*/
package org.apache.solr.search.facet;
-import java.io.Closeable;
-import java.io.IOException;
-import java.util.ArrayList;
import java.util.HashMap;
-import java.util.Iterator;
-import java.util.LinkedHashMap;
-import java.util.List;
import java.util.Map;
-import org.apache.lucene.index.Fields;
-import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.MultiPostingsEnum;
-import org.apache.lucene.index.PostingsEnum;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.Terms;
-import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.BytesRefBuilder;
-import org.apache.lucene.util.PriorityQueue;
-import org.apache.lucene.util.StringHelper;
-import org.apache.lucene.util.UnicodeUtil;
import org.apache.solr.common.SolrException;
-import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
-import org.apache.solr.schema.TrieField;
-import org.apache.solr.search.DocSet;
-import org.apache.solr.search.HashDocSet;
-import org.apache.solr.search.SolrIndexSearcher;
-import org.apache.solr.search.SortedIntDocSet;
public class FacetField extends FacetRequest {
@@ -69,7 +42,7 @@ public class FacetField extends FacetRequest {
Boolean perSeg;
// TODO: put this somewhere more generic?
- public static enum SortDirection {
+ public enum SortDirection {
asc(-1) ,
desc(1);
@@ -84,7 +57,7 @@ public class FacetField extends FacetRequest {
}
}
- public static enum FacetMethod {
+ public enum FacetMethod {
DV, // DocValues
UIF, // UnInvertedField
ENUM,
@@ -109,7 +82,6 @@ public class FacetField extends FacetRequest {
}
}
-
@Override
public FacetProcessor createFacetProcessor(FacetContext fcontext) {
SchemaField sf = fcontext.searcher.getSchema().getField(field);
@@ -119,7 +91,7 @@ public class FacetField extends FacetRequest {
if (method == FacetMethod.ENUM && sf.indexed()) {
throw new UnsupportedOperationException();
} else if (method == FacetMethod.STREAM && sf.indexed()) {
- return new FacetFieldProcessorStream(fcontext, this, sf);
+ return new FacetFieldProcessorByEnumTermsStream(fcontext, this, sf);
}
org.apache.lucene.document.FieldType.LegacyNumericType ntype = ft.getNumericType();
@@ -127,10 +99,10 @@ public class FacetField extends FacetRequest {
if (!multiToken) {
if (ntype != null) {
// single valued numeric (docvalues or fieldcache)
- return new FacetFieldProcessorNumeric(fcontext, this, sf);
+ return new FacetFieldProcessorByHashNumeric(fcontext, this, sf);
} else {
// single valued string...
- return new FacetFieldProcessorDV(fcontext, this, sf);
+ return new FacetFieldProcessorByArrayDV(fcontext, this, sf);
}
}
@@ -138,11 +110,11 @@ public class FacetField extends FacetRequest {
if (sf.hasDocValues() || method == FacetMethod.DV) {
// single and multi-valued string docValues
- return new FacetFieldProcessorDV(fcontext, this, sf);
+ return new FacetFieldProcessorByArrayDV(fcontext, this, sf);
}
// Top-level multi-valued field cache (UIF)
- return new FacetFieldProcessorUIF(fcontext, this, sf);
+ return new FacetFieldProcessorByArrayUIF(fcontext, this, sf);
}
@Override
@@ -152,918 +124,12 @@ public class FacetField extends FacetRequest {
@Override
public Map<String, Object> getFacetDescription() {
- Map<String, Object> descr = new HashMap<String, Object>();
+ Map<String, Object> descr = new HashMap<>();
descr.put("field", field);
- descr.put("limit", new Long(limit));
+ descr.put("limit", limit);
return descr;
}
}
-abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
- SchemaField sf;
- SlotAcc indexOrderAcc;
- int effectiveMincount;
-
- Map<String,AggValueSource> deferredAggs; // null if none
-
- // TODO: push any of this down to base class?
-
- //
- // For sort="x desc", collectAcc would point to "x", and sortAcc would also point to "x".
- // collectAcc would be used to accumulate all buckets, and sortAcc would be used to sort those buckets.
- //
- SlotAcc collectAcc; // Accumulator to collect across entire domain (in addition to the countAcc). May be null.
- SlotAcc sortAcc; // Accumulator to use for sorting *only* (i.e. not used for collection). May be an alias of countAcc, collectAcc, or indexOrderAcc
- SlotAcc[] otherAccs; // Accumulators that do not need to be calculated across all buckets.
-
- SpecialSlotAcc allBucketsAcc; // this can internally refer to otherAccs and/or collectAcc. setNextReader should be called on otherAccs directly if they exist.
-
-
- FacetFieldProcessor(FacetContext fcontext, FacetField freq, SchemaField sf) {
- super(fcontext, freq);
- this.sf = sf;
- this.effectiveMincount = (int)(fcontext.isShard() ? Math.min(1 , freq.mincount) : freq.mincount);
- }
-
- @Override
- public Object getResponse() {
- return response;
- }
-
- // This is used to create accs for second phase (or to create accs for all aggs)
- @Override
- protected void createAccs(int docCount, int slotCount) throws IOException {
- if (accMap == null) {
- accMap = new LinkedHashMap<>();
- }
-
- // allow a custom count acc to be used
- if (countAcc == null) {
- countAcc = new CountSlotArrAcc(fcontext, slotCount);
- countAcc.key = "count";
- }
-
- if (accs != null) {
- // reuse these accs, but reset them first
- for (SlotAcc acc : accs) {
- acc.reset();
- }
- return;
- } else {
- accs = new SlotAcc[ freq.getFacetStats().size() ];
- }
-
- int accIdx = 0;
- for (Map.Entry<String,AggValueSource> entry : freq.getFacetStats().entrySet()) {
- SlotAcc acc = null;
- if (slotCount == 1) {
- acc = accMap.get(entry.getKey());
- if (acc != null) {
- acc.reset();
- }
- }
- if (acc == null) {
- acc = entry.getValue().createSlotAcc(fcontext, docCount, slotCount);
- acc.key = entry.getKey();
- accMap.put(acc.key, acc);
- }
- accs[accIdx++] = acc;
- }
- }
-
- void createCollectAcc(int numDocs, int numSlots) throws IOException {
- accMap = new LinkedHashMap<>();
-
- // we always count...
- // allow a subclass to set a custom counter.
- if (countAcc == null) {
- countAcc = new CountSlotArrAcc(fcontext, numSlots);
- }
-
- if ("count".equals(freq.sortVariable)) {
- sortAcc = countAcc;
- deferredAggs = freq.getFacetStats();
- } else if ("index".equals(freq.sortVariable)) {
- // allow subclass to set indexOrderAcc first
- if (indexOrderAcc == null) {
- // This sorting accumulator just goes by the slot number, so does not need to be collected
- // and hence does not need to find it's way into the accMap or accs array.
- indexOrderAcc = new SortSlotAcc(fcontext);
- }
- sortAcc = indexOrderAcc;
- deferredAggs = freq.getFacetStats();
- } else {
- AggValueSource sortAgg = freq.getFacetStats().get(freq.sortVariable);
- if (sortAgg != null) {
- collectAcc = sortAgg.createSlotAcc(fcontext, numDocs, numSlots);
- collectAcc.key = freq.sortVariable; // TODO: improve this
- }
- sortAcc = collectAcc;
- deferredAggs = new HashMap<>(freq.getFacetStats());
- deferredAggs.remove(freq.sortVariable);
- }
-
- if (deferredAggs.size() == 0) {
- deferredAggs = null;
- }
-
- boolean needOtherAccs = freq.allBuckets; // TODO: use for missing too...
-
- if (!needOtherAccs) {
- // we may need them later, but we don't want to create them now
- // otherwise we won't know if we need to call setNextReader on them.
- return;
- }
-
- // create the deferred aggs up front for use by allBuckets
- createOtherAccs(numDocs, 1);
- }
-
-
- void createOtherAccs(int numDocs, int numSlots) throws IOException {
- if (otherAccs != null) {
- // reuse existing accumulators
- for (SlotAcc acc : otherAccs) {
- acc.reset(); // todo - make reset take numDocs and numSlots?
- }
- return;
- }
-
- int numDeferred = deferredAggs == null ? 0 : deferredAggs.size();
- if (numDeferred <= 0) return;
-
- otherAccs = new SlotAcc[ numDeferred ];
-
- int otherAccIdx = 0;
- for (Map.Entry<String,AggValueSource> entry : deferredAggs.entrySet()) {
- AggValueSource agg = entry.getValue();
- SlotAcc acc = agg.createSlotAcc(fcontext, numDocs, numSlots);
- acc.key = entry.getKey();
- accMap.put(acc.key, acc);
- otherAccs[otherAccIdx++] = acc;
- }
-
- if (numDeferred == freq.getFacetStats().size()) {
- // accs and otherAccs are the same...
- accs = otherAccs;
- }
- }
-
-
- int collectFirstPhase(DocSet docs, int slot) throws IOException {
- int num = -1;
- if (collectAcc != null) {
- num = collectAcc.collect(docs, slot);
- }
- if (allBucketsAcc != null) {
- num = allBucketsAcc.collect(docs, slot);
- }
- return num >= 0 ? num : docs.size();
- }
-
- void collectFirstPhase(int segDoc, int slot) throws IOException {
- if (collectAcc != null) {
- collectAcc.collect(segDoc, slot);
- }
- if (allBucketsAcc != null) {
- allBucketsAcc.collect(segDoc, slot);
- }
- }
-
-
- void fillBucket(SimpleOrderedMap<Object> target, int count, int slotNum, DocSet subDomain, Query filter) throws IOException {
- target.add("count", count);
- if (count <= 0 && !freq.processEmpty) return;
-
- if (collectAcc != null && slotNum >= 0) {
- collectAcc.setValues(target, slotNum);
- }
-
- createOtherAccs(-1, 1);
-
- if (otherAccs == null && freq.subFacets.isEmpty()) return;
-
- if (subDomain == null) {
- subDomain = fcontext.searcher.getDocSet(filter, fcontext.base);
- }
-
- // if no subFacets, we only need a DocSet
- // otherwise we need more?
- // TODO: save something generic like "slotNum" in the context and use that to implement things like filter exclusion if necessary?
- // Hmmm, but we need to look up some stuff anyway (for the label?)
- // have a method like "DocSet applyConstraint(facet context, DocSet parent)"
- // that's needed for domain changing things like joins anyway???
-
- if (otherAccs != null) {
- // do acc at a time (traversing domain each time) or do all accs for each doc?
- for (SlotAcc acc : otherAccs) {
- acc.reset(); // TODO: only needed if we previously used for allBuckets or missing
- acc.collect(subDomain, 0);
- acc.setValues(target, 0);
- }
- }
-
- processSubs(target, filter, subDomain);
- }
-
-
- @Override
- protected void processStats(SimpleOrderedMap<Object> bucket, DocSet docs, int docCount) throws IOException {
- if (docCount == 0 && !freq.processEmpty || freq.getFacetStats().size() == 0) {
- bucket.add("count", docCount);
- return;
- }
- createAccs(docCount, 1);
- int collected = collect(docs, 0);
-
- // countAcc.incrementCount(0, collected); // should we set the counton the acc instead of just passing it?
-
- assert collected == docCount;
- addStats(bucket, collected, 0);
- }
-
- // overrides but with different signature!
- void addStats(SimpleOrderedMap<Object> target, int count, int slotNum) throws IOException {
- target.add("count", count);
- if (count > 0 || freq.processEmpty) {
- for (SlotAcc acc : accs) {
- acc.setValues(target, slotNum);
- }
- }
- }
-
- @Override
- void setNextReader(LeafReaderContext ctx) throws IOException {
- // base class calls this (for missing bucket...) ... go over accs[] in that case
- super.setNextReader(ctx);
- }
-
- void setNextReaderFirstPhase(LeafReaderContext ctx) throws IOException {
- if (collectAcc != null) {
- collectAcc.setNextReader(ctx);
- }
- if (otherAccs != null) {
- for (SlotAcc acc : otherAccs) {
- acc.setNextReader(ctx);
- }
- }
- }
-
- static class Slot {
- int slot;
- public int tiebreakCompare(int slotA, int slotB) {
- return slotB - slotA;
- }
- }
-}
-
-class SpecialSlotAcc extends SlotAcc {
- SlotAcc collectAcc;
- SlotAcc[] otherAccs;
- int collectAccSlot;
- int otherAccsSlot;
- long count;
-
- public SpecialSlotAcc(FacetContext fcontext, SlotAcc collectAcc, int collectAccSlot, SlotAcc[] otherAccs, int otherAccsSlot) {
- super(fcontext);
- this.collectAcc = collectAcc;
- this.collectAccSlot = collectAccSlot;
- this.otherAccs = otherAccs;
- this.otherAccsSlot = otherAccsSlot;
- }
-
- public int getCollectAccSlot() { return collectAccSlot; }
- public int getOtherAccSlot() { return otherAccsSlot; }
-
- public long getSpecialCount() {
- return count;
- }
-
- @Override
- public void collect(int doc, int slot) throws IOException {
- assert slot != collectAccSlot || slot < 0;
- count++;
- if (collectAcc != null) {
- collectAcc.collect(doc, collectAccSlot);
- }
- if (otherAccs != null) {
- for (SlotAcc otherAcc : otherAccs) {
- otherAcc.collect(doc, otherAccsSlot);
- }
- }
- }
-
- @Override
- public void setNextReader(LeafReaderContext readerContext) throws IOException {
- // collectAcc and otherAccs will normally have setNextReader called directly on them.
- // This, however, will be used when collect(DocSet,slot) variant is used on this Acc.
- if (collectAcc != null) {
- collectAcc.setNextReader(readerContext);
- }
- if (otherAccs != null) {
- for (SlotAcc otherAcc : otherAccs) {
- otherAcc.setNextReader(readerContext);
- }
- }
- }
-
- @Override
- public int compare(int slotA, int slotB) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public Object getValue(int slotNum) throws IOException {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void setValues(SimpleOrderedMap<Object> bucket, int slotNum) throws IOException {
- if (collectAcc != null) {
- collectAcc.setValues(bucket, collectAccSlot);
- }
- if (otherAccs != null) {
- for (SlotAcc otherAcc : otherAccs) {
- otherAcc.setValues(bucket, otherAccsSlot);
- }
- }
- }
-
- @Override
- public void reset() {
- // reset should be called on underlying accs
- // TODO: but in case something does need to be done here, should we require this method to be called but do nothing for now?
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void resize(Resizer resizer) {
- // someone else will call resize on collectAcc directly
- if (collectAccSlot >= 0) {
- collectAccSlot = resizer.getNewSlot(collectAccSlot);
- }
- }
-}
-
-
-
-
-// base class for FC style of facet counting (single and multi-valued strings)
-abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor {
- BytesRefBuilder prefixRef;
- int startTermIndex;
- int endTermIndex;
- int nTerms;
- int nDocs;
- int maxSlots;
-
- int allBucketsSlot = -1; // slot for the primary Accs (countAcc, collectAcc)
-
- public FacetFieldProcessorFCBase(FacetContext fcontext, FacetField freq, SchemaField sf) {
- super(fcontext, freq, sf);
- }
-
- @Override
- public void process() throws IOException {
- super.process();
- sf = fcontext.searcher.getSchema().getField(freq.field);
- response = getFieldCacheCounts();
- }
-
-
- /** this BytesRef may be shared across calls and should be deep-cloned if necessary */
- abstract protected BytesRef lookupOrd(int ord) throws IOException;
- abstract protected void findStartAndEndOrds() throws IOException;
- abstract protected void collectDocs() throws IOException;
-
-
- public SimpleOrderedMap<Object> getFieldCacheCounts() throws IOException {
- String prefix = freq.prefix;
- if (prefix == null || prefix.length() == 0) {
- prefixRef = null;
- } else {
- prefixRef = new BytesRefBuilder();
- prefixRef.copyChars(prefix);
- }
-
- findStartAndEndOrds();
-
- maxSlots = nTerms;
-
- if (freq.allBuckets) {
- allBucketsSlot = maxSlots++;
- }
-
- createCollectAcc(nDocs, maxSlots);
-
- if (freq.allBuckets) {
- allBucketsAcc = new SpecialSlotAcc(fcontext, collectAcc, allBucketsSlot, otherAccs, 0);
- }
-
- collectDocs();
-
- return findTopSlots();
- }
-
-
- protected SimpleOrderedMap<Object> findTopSlots() throws IOException {
- SimpleOrderedMap<Object> res = new SimpleOrderedMap<>();
-
- int numBuckets = 0;
- List<Object> bucketVals = null;
- if (freq.numBuckets && fcontext.isShard()) {
- bucketVals = new ArrayList(100);
- }
-
- int off = fcontext.isShard() ? 0 : (int) freq.offset;
- // add a modest amount of over-request if this is a shard request
- int lim = freq.limit >= 0 ? (fcontext.isShard() ? (int)(freq.limit*1.1+4) : (int)freq.limit) : Integer.MAX_VALUE;
-
- int maxsize = (int)(freq.limit >= 0 ? freq.offset + lim : Integer.MAX_VALUE - 1);
- maxsize = Math.min(maxsize, nTerms);
-
- final int sortMul = freq.sortDirection.getMultiplier();
- final SlotAcc sortAcc = this.sortAcc;
-
- PriorityQueue<Slot> queue = new PriorityQueue<Slot>(maxsize) {
- @Override
- protected boolean lessThan(Slot a, Slot b) {
- int cmp = sortAcc.compare(a.slot, b.slot) * sortMul;
- return cmp == 0 ? b.slot < a.slot : cmp < 0;
- }
- };
-
- Slot bottom = null;
- for (int i = 0; i < nTerms; i++) {
- // screen out buckets not matching mincount immediately (i.e. don't even increment numBuckets)
- if (effectiveMincount > 0 && countAcc.getCount(i) < effectiveMincount) {
- continue;
- }
-
- numBuckets++;
- if (bucketVals != null && bucketVals.size()<100) {
- int ord = startTermIndex + i;
- BytesRef br = lookupOrd(ord);
- Object val = sf.getType().toObject(sf, br);
- bucketVals.add(val);
- }
-
-
- if (bottom != null) {
- if (sortAcc.compare(bottom.slot, i) * sortMul < 0) {
- bottom.slot = i;
- bottom = queue.updateTop();
- }
- } else if (lim > 0) {
- // queue not full
- Slot s = new Slot();
- s.slot = i;
- queue.add(s);
- if (queue.size() >= maxsize) {
- bottom = queue.top();
- }
- }
- }
-
- if (freq.numBuckets) {
- if (!fcontext.isShard()) {
- res.add("numBuckets", numBuckets);
- } else {
- SimpleOrderedMap map = new SimpleOrderedMap(2);
- map.add("numBuckets", numBuckets);
- map.add("vals", bucketVals);
- res.add("numBuckets", map);
- }
- }
-
- FacetDebugInfo fdebug = fcontext.getDebugInfo();
- if (fdebug != null) fdebug.putInfoItem("numBuckets", new Long(numBuckets));
-
- // if we are deep paging, we don't have to order the highest "offset" counts.
- int collectCount = Math.max(0, queue.size() - off);
- assert collectCount <= lim;
- int[] sortedSlots = new int[collectCount];
- for (int i = collectCount - 1; i >= 0; i--) {
- sortedSlots[i] = queue.pop().slot;
- }
-
- if (freq.allBuckets) {
- SimpleOrderedMap<Object> allBuckets = new SimpleOrderedMap<>();
- allBuckets.add("count", allBucketsAcc.getSpecialCount());
- if (allBucketsAcc != null) {
- allBucketsAcc.setValues(allBuckets, allBucketsSlot);
- }
- res.add("allBuckets", allBuckets);
- }
-
- ArrayList bucketList = new ArrayList(collectCount);
- res.add("buckets", bucketList);
-
-
- // TODO: do this with a callback instead?
- boolean needFilter = deferredAggs != null || freq.getSubFacets().size() > 0;
-
- for (int slotNum : sortedSlots) {
- SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>();
-
- // get the ord of the slot...
- int ord = startTermIndex + slotNum;
-
- BytesRef br = lookupOrd(ord);
- Object val = sf.getType().toObject(sf, br);
-
- bucket.add("val", val);
-
- TermQuery filter = needFilter ? new TermQuery(new Term(sf.getName(), br)) : null;
- fillBucket(bucket, countAcc.getCount(slotNum), slotNum, null, filter);
-
- bucketList.add(bucket);
- }
-
- if (freq.missing) {
- SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>();
- fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null);
- res.add("missing", missingBucket);
- }
-
- return res;
- }
-
-
-}
-
-
-
-
-
-// UnInvertedField implementation of field faceting
-class FacetFieldProcessorUIF extends FacetFieldProcessorFCBase {
- UnInvertedField uif;
- TermsEnum te;
-
- FacetFieldProcessorUIF(FacetContext fcontext, FacetField freq, SchemaField sf) {
- super(fcontext, freq, sf);
- }
-
- @Override
- protected void findStartAndEndOrds() throws IOException {
- uif = UnInvertedField.getUnInvertedField(freq.field, fcontext.searcher);
- te = uif.getOrdTermsEnum( fcontext.searcher.getLeafReader() ); // "te" can be null
-
- startTermIndex = 0;
- endTermIndex = uif.numTerms(); // one past the end
-
- if (prefixRef != null && te != null) {
- if (te.seekCeil(prefixRef.get()) == TermsEnum.SeekStatus.END) {
- startTermIndex = uif.numTerms();
- } else {
- startTermIndex = (int) te.ord();
- }
- prefixRef.append(UnicodeUtil.BIG_TERM);
- if (te.seekCeil(prefixRef.get()) == TermsEnum.SeekStatus.END) {
- endTermIndex = uif.numTerms();
- } else {
- endTermIndex = (int) te.ord();
- }
- }
-
- nTerms = endTermIndex - startTermIndex;
- }
-
- @Override
- protected BytesRef lookupOrd(int ord) throws IOException {
- return uif.getTermValue(te, ord);
- }
-
- @Override
- protected void collectDocs() throws IOException {
- uif.collectDocs(this);
- }
-}
-
-
-
-class FacetFieldProcessorStream extends FacetFieldProcessor implements Closeable {
- long bucketsToSkip;
- long bucketsReturned;
-
- boolean closed;
- boolean countOnly;
- boolean hasSubFacets; // true if there are subfacets
- int minDfFilterCache;
- DocSet docs;
- DocSet fastForRandomSet;
- TermsEnum termsEnum = null;
- SolrIndexSearcher.DocsEnumState deState = null;
- PostingsEnum postingsEnum;
- BytesRef startTermBytes;
- BytesRef term;
- LeafReaderContext[] leaves;
-
-
-
- FacetFieldProcessorStream(FacetContext fcontext, FacetField freq, SchemaField sf) {
- super(fcontext, freq, sf);
- }
-
- @Override
- public void close() throws IOException {
- if (!closed) {
- closed = true;
- // fcontext.base.decref(); // OFF-HEAP
- }
- }
-
-
- @Override
- public void process() throws IOException {
- super.process();
-
- // We need to keep the fcontext open after processing is done (since we will be streaming in the response writer).
- // But if the connection is broken, we want to clean up.
- // fcontext.base.incref(); // OFF-HEAP
- fcontext.qcontext.addCloseHook(this);
-
- setup();
- response = new SimpleOrderedMap<>();
- response.add("buckets", new Iterator() {
- boolean retrieveNext = true;
- Object val;
-
- @Override
- public boolean hasNext() {
- if (retrieveNext) {
- val = nextBucket();
- }
- retrieveNext = false;
- return val != null;
- }
-
- @Override
- public Object next() {
- if (retrieveNext) {
- val = nextBucket();
- }
- retrieveNext = true;
- if (val == null) {
- // Last value, so clean up. In the case that we are doing streaming facets within streaming facets,
- // the number of close hooks could grow very large, so we want to remove ourselves.
- boolean removed = fcontext.qcontext.removeCloseHook(FacetFieldProcessorStream.this);
- assert removed;
- try {
- close();
- } catch (IOException e) {
- throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error during facet streaming close", e);
- }
- }
- return val;
- }
-
- @Override
- public void remove() {
- throw new UnsupportedOperationException();
- }
- });
- }
-
-
-
- public void setup() throws IOException {
-
- countOnly = freq.facetStats.size() == 0 || freq.facetStats.values().iterator().next() instanceof CountAgg;
- hasSubFacets = freq.subFacets.size() > 0;
- bucketsToSkip = freq.offset;
-
- createAccs(-1, 1);
-
- // Minimum term docFreq in order to use the filterCache for that term.
- if (freq.cacheDf == -1) { // -1 means never cache
- minDfFilterCache = Integer.MAX_VALUE;
- } else if (freq.cacheDf == 0) { // default; compute as fraction of maxDoc
- minDfFilterCache = Math.max(fcontext.searcher.maxDoc() >> 4, 3); // (minimum of 3 is for test coverage purposes)
- } else {
- minDfFilterCache = freq.cacheDf;
- }
-
- docs = fcontext.base;
- fastForRandomSet = null;
-
- if (freq.prefix != null) {
- String indexedPrefix = sf.getType().toInternal(freq.prefix);
- startTermBytes = new BytesRef(indexedPrefix);
- } else if (sf.getType().getNumericType() != null) {
- String triePrefix = TrieField.getMainValuePrefix(sf.getType());
- if (triePrefix != null) {
- startTermBytes = new BytesRef(triePrefix);
- }
- }
-
- Fields fields = fcontext.searcher.getLeafReader().fields();
- Terms terms = fields == null ? null : fields.terms(sf.getName());
-
-
- termsEnum = null;
- deState = null;
- term = null;
-
-
- if (terms != null) {
-
- termsEnum = terms.iterator();
-
- // TODO: OPT: if seek(ord) is supported for this termsEnum, then we could use it for
- // facet.offset when sorting by index order.
-
- if (startTermBytes != null) {
- if (termsEnum.seekCeil(startTermBytes) == TermsEnum.SeekStatus.END) {
- termsEnum = null;
- } else {
- term = termsEnum.term();
- }
- } else {
- // position termsEnum on first term
- term = termsEnum.next();
- }
- }
-
- List<LeafReaderContext> leafList = fcontext.searcher.getTopReaderContext().leaves();
- leaves = leafList.toArray( new LeafReaderContext[ leafList.size() ]);
- }
-
-
- public SimpleOrderedMap<Object> nextBucket() {
- try {
- return _nextBucket();
- } catch (Exception e) {
- throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error during facet streaming", e);
- }
- }
-
- public SimpleOrderedMap<Object> _nextBucket() throws IOException {
- DocSet termSet = null;
-
- try {
- while (term != null) {
-
- if (startTermBytes != null && !StringHelper.startsWith(term, startTermBytes)) {
- break;
- }
-
- int df = termsEnum.docFreq();
- if (df < effectiveMincount) {
- term = termsEnum.next();
- continue;
- }
-
- if (termSet != null) {
- // termSet.decref(); // OFF-HEAP
- termSet = null;
- }
-
- int c = 0;
-
- if (hasSubFacets || df >= minDfFilterCache) {
- // use the filter cache
-
- if (deState == null) {
- deState = new SolrIndexSearcher.DocsEnumState();
- deState.fieldName = sf.getName();
- deState.liveDocs = fcontext.searcher.getLeafReader().getLiveDocs();
- deState.termsEnum = termsEnum;
- deState.postingsEnum = postingsEnum;
- deState.minSetSizeCached = minDfFilterCache;
- }
-
- if (hasSubFacets || !countOnly) {
- DocSet termsAll = fcontext.searcher.getDocSet(deState);
- termSet = docs.intersection(termsAll);
- // termsAll.decref(); // OFF-HEAP
- c = termSet.size();
- } else {
- c = fcontext.searcher.numDocs(docs, deState);
- }
- postingsEnum = deState.postingsEnum;
-
- resetStats();
-
- if (!countOnly) {
- collect(termSet, 0);
- }
-
- } else {
- // We don't need the docset here (meaning no sub-facets).
- // if countOnly, then we are calculating some other stats...
- resetStats();
-
- // lazy convert to fastForRandomSet
- if (fastForRandomSet == null) {
- fastForRandomSet = docs;
- if (docs instanceof SortedIntDocSet) { // OFF-HEAP todo: also check for native version
- SortedIntDocSet sset = (SortedIntDocSet) docs;
- fastForRandomSet = new HashDocSet(sset.getDocs(), 0, sset.size());
- }
- }
- // iterate over TermDocs to calculate the intersection
- postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
-
- if (postingsEnum instanceof MultiPostingsEnum) {
- MultiPostingsEnum.EnumWithSlice[] subs = ((MultiPostingsEnum) postingsEnum).getSubs();
- int numSubs = ((MultiPostingsEnum) postingsEnum).getNumSubs();
- for (int subindex = 0; subindex < numSubs; subindex++) {
- MultiPostingsEnum.EnumWithSlice sub = subs[subindex];
- if (sub.postingsEnum == null) continue;
- int base = sub.slice.start;
- int docid;
-
- if (countOnly) {
- while ((docid = sub.postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
- if (fastForRandomSet.exists(docid + base)) c++;
- }
- } else {
- setNextReader(leaves[sub.slice.readerIndex]);
- while ((docid = sub.postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
- if (fastForRandomSet.exists(docid + base)) {
- c++;
- collect(docid, 0);
- }
- }
- }
-
- }
- } else {
- int docid;
- if (countOnly) {
- while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
- if (fastForRandomSet.exists(docid)) c++;
- }
- } else {
- setNextReader(leaves[0]);
- while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
- if (fastForRandomSet.exists(docid)) {
- c++;
- collect(docid, 0);
- }
- }
- }
- }
-
- }
-
-
-
- if (c < effectiveMincount) {
- term = termsEnum.next();
- continue;
- }
-
- // handle offset and limit
- if (bucketsToSkip > 0) {
- bucketsToSkip--;
- term = termsEnum.next();
- continue;
- }
-
- if (freq.limit >= 0 && ++bucketsReturned > freq.limit) {
- return null;
- }
-
- // set count in case other stats depend on it
- countAcc.incrementCount(0, c);
-
- // OK, we have a good bucket to return... first get bucket value before moving to next term
- Object bucketVal = sf.getType().toObject(sf, term);
- TermQuery bucketQuery = hasSubFacets ? new TermQuery(new Term(freq.field, term)) : null;
- term = termsEnum.next();
-
- SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>();
- bucket.add("val", bucketVal);
- addStats(bucket, 0);
- if (hasSubFacets) {
- processSubs(bucket, bucketQuery, termSet);
- }
-
- // TODO... termSet needs to stick around for streaming sub-facets?
-
- return bucket;
-
- }
-
- } finally {
- if (termSet != null) {
- // termSet.decref(); // OFF-HEAP
- termSet = null;
- }
- }
-
-
- // end of the iteration
- return null;
- }
-
-
-
-}
-
-
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7072458e/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java
new file mode 100644
index 0000000..a737321
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java
@@ -0,0 +1,369 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.search.facet;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.Query;
+import org.apache.solr.common.util.SimpleOrderedMap;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.search.DocSet;
+
+/**
+ * Facet processing based on field values. (not range nor by query)
+ * @see FacetField
+ */
+abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
+ SchemaField sf;
+ SlotAcc indexOrderAcc;
+ int effectiveMincount;
+
+ Map<String,AggValueSource> deferredAggs; // null if none
+
+ // TODO: push any of this down to base class?
+
+ //
+ // For sort="x desc", collectAcc would point to "x", and sortAcc would also point to "x".
+ // collectAcc would be used to accumulate all buckets, and sortAcc would be used to sort those buckets.
+ //
+ SlotAcc collectAcc; // Accumulator to collect across entire domain (in addition to the countAcc). May be null.
+ SlotAcc sortAcc; // Accumulator to use for sorting *only* (i.e. not used for collection). May be an alias of countAcc, collectAcc, or indexOrderAcc
+ SlotAcc[] otherAccs; // Accumulators that do not need to be calculated across all buckets.
+
+ SpecialSlotAcc allBucketsAcc; // this can internally refer to otherAccs and/or collectAcc. setNextReader should be called on otherAccs directly if they exist.
+
+ FacetFieldProcessor(FacetContext fcontext, FacetField freq, SchemaField sf) {
+ super(fcontext, freq);
+ this.sf = sf;
+ this.effectiveMincount = (int)(fcontext.isShard() ? Math.min(1 , freq.mincount) : freq.mincount);
+ }
+
+ // This is used to create accs for second phase (or to create accs for all aggs)
+ @Override
+ protected void createAccs(int docCount, int slotCount) throws IOException {
+ if (accMap == null) {
+ accMap = new LinkedHashMap<>();
+ }
+
+ // allow a custom count acc to be used
+ if (countAcc == null) {
+ countAcc = new CountSlotArrAcc(fcontext, slotCount);
+ countAcc.key = "count";
+ }
+
+ if (accs != null) {
+ // reuse these accs, but reset them first
+ for (SlotAcc acc : accs) {
+ acc.reset();
+ }
+ return;
+ } else {
+ accs = new SlotAcc[ freq.getFacetStats().size() ];
+ }
+
+ int accIdx = 0;
+ for (Map.Entry<String,AggValueSource> entry : freq.getFacetStats().entrySet()) {
+ SlotAcc acc = null;
+ if (slotCount == 1) {
+ acc = accMap.get(entry.getKey());
+ if (acc != null) {
+ acc.reset();
+ }
+ }
+ if (acc == null) {
+ acc = entry.getValue().createSlotAcc(fcontext, docCount, slotCount);
+ acc.key = entry.getKey();
+ accMap.put(acc.key, acc);
+ }
+ accs[accIdx++] = acc;
+ }
+ }
+
+ void createCollectAcc(int numDocs, int numSlots) throws IOException {
+ accMap = new LinkedHashMap<>();
+
+ // we always count...
+ // allow a subclass to set a custom counter.
+ if (countAcc == null) {
+ countAcc = new CountSlotArrAcc(fcontext, numSlots);
+ }
+
+ if ("count".equals(freq.sortVariable)) {
+ sortAcc = countAcc;
+ deferredAggs = freq.getFacetStats();
+ } else if ("index".equals(freq.sortVariable)) {
+ // allow subclass to set indexOrderAcc first
+ if (indexOrderAcc == null) {
+ // This sorting accumulator just goes by the slot number, so does not need to be collected
+ // and hence does not need to find it's way into the accMap or accs array.
+ indexOrderAcc = new SortSlotAcc(fcontext);
+ }
+ sortAcc = indexOrderAcc;
+ deferredAggs = freq.getFacetStats();
+ } else {
+ AggValueSource sortAgg = freq.getFacetStats().get(freq.sortVariable);
+ if (sortAgg != null) {
+ collectAcc = sortAgg.createSlotAcc(fcontext, numDocs, numSlots);
+ collectAcc.key = freq.sortVariable; // TODO: improve this
+ }
+ sortAcc = collectAcc;
+ deferredAggs = new HashMap<>(freq.getFacetStats());
+ deferredAggs.remove(freq.sortVariable);
+ }
+
+ if (deferredAggs.size() == 0) {
+ deferredAggs = null;
+ }
+
+ boolean needOtherAccs = freq.allBuckets; // TODO: use for missing too...
+
+ if (!needOtherAccs) {
+ // we may need them later, but we don't want to create them now
+ // otherwise we won't know if we need to call setNextReader on them.
+ return;
+ }
+
+ // create the deferred aggs up front for use by allBuckets
+ createOtherAccs(numDocs, 1);
+ }
+
+ private void createOtherAccs(int numDocs, int numSlots) throws IOException {
+ if (otherAccs != null) {
+ // reuse existing accumulators
+ for (SlotAcc acc : otherAccs) {
+ acc.reset(); // todo - make reset take numDocs and numSlots?
+ }
+ return;
+ }
+
+ int numDeferred = deferredAggs == null ? 0 : deferredAggs.size();
+ if (numDeferred <= 0) return;
+
+ otherAccs = new SlotAcc[ numDeferred ];
+
+ int otherAccIdx = 0;
+ for (Map.Entry<String,AggValueSource> entry : deferredAggs.entrySet()) {
+ AggValueSource agg = entry.getValue();
+ SlotAcc acc = agg.createSlotAcc(fcontext, numDocs, numSlots);
+ acc.key = entry.getKey();
+ accMap.put(acc.key, acc);
+ otherAccs[otherAccIdx++] = acc;
+ }
+
+ if (numDeferred == freq.getFacetStats().size()) {
+ // accs and otherAccs are the same...
+ accs = otherAccs;
+ }
+ }
+
+ int collectFirstPhase(DocSet docs, int slot) throws IOException {
+ int num = -1;
+ if (collectAcc != null) {
+ num = collectAcc.collect(docs, slot);
+ }
+ if (allBucketsAcc != null) {
+ num = allBucketsAcc.collect(docs, slot);
+ }
+ return num >= 0 ? num : docs.size();
+ }
+
+ void collectFirstPhase(int segDoc, int slot) throws IOException {
+ if (collectAcc != null) {
+ collectAcc.collect(segDoc, slot);
+ }
+ if (allBucketsAcc != null) {
+ allBucketsAcc.collect(segDoc, slot);
+ }
+ }
+
+ void fillBucket(SimpleOrderedMap<Object> target, int count, int slotNum, DocSet subDomain, Query filter) throws IOException {
+ target.add("count", count);
+ if (count <= 0 && !freq.processEmpty) return;
+
+ if (collectAcc != null && slotNum >= 0) {
+ collectAcc.setValues(target, slotNum);
+ }
+
+ createOtherAccs(-1, 1);
+
+ if (otherAccs == null && freq.subFacets.isEmpty()) return;
+
+ if (subDomain == null) {
+ subDomain = fcontext.searcher.getDocSet(filter, fcontext.base);
+ }
+
+ // if no subFacets, we only need a DocSet
+ // otherwise we need more?
+ // TODO: save something generic like "slotNum" in the context and use that to implement things like filter exclusion if necessary?
+ // Hmmm, but we need to look up some stuff anyway (for the label?)
+ // have a method like "DocSet applyConstraint(facet context, DocSet parent)"
+ // that's needed for domain changing things like joins anyway???
+
+ if (otherAccs != null) {
+ // do acc at a time (traversing domain each time) or do all accs for each doc?
+ for (SlotAcc acc : otherAccs) {
+ acc.reset(); // TODO: only needed if we previously used for allBuckets or missing
+ acc.collect(subDomain, 0);
+ acc.setValues(target, 0);
+ }
+ }
+
+ processSubs(target, filter, subDomain);
+ }
+
+ @Override
+ protected void processStats(SimpleOrderedMap<Object> bucket, DocSet docs, int docCount) throws IOException {
+ if (docCount == 0 && !freq.processEmpty || freq.getFacetStats().size() == 0) {
+ bucket.add("count", docCount);
+ return;
+ }
+ createAccs(docCount, 1);
+ int collected = collect(docs, 0);
+
+ // countAcc.incrementCount(0, collected); // should we set the counton the acc instead of just passing it?
+
+ assert collected == docCount;
+ addStats(bucket, collected, 0);
+ }
+
+ // overrides but with different signature!
+ private void addStats(SimpleOrderedMap<Object> target, int count, int slotNum) throws IOException {
+ target.add("count", count);
+ if (count > 0 || freq.processEmpty) {
+ for (SlotAcc acc : accs) {
+ acc.setValues(target, slotNum);
+ }
+ }
+ }
+
+ @Override
+ void setNextReader(LeafReaderContext ctx) throws IOException {
+ // base class calls this (for missing bucket...) ... go over accs[] in that case
+ super.setNextReader(ctx);
+ }
+
+ void setNextReaderFirstPhase(LeafReaderContext ctx) throws IOException {
+ if (collectAcc != null) {
+ collectAcc.setNextReader(ctx);
+ }
+ if (otherAccs != null) {
+ for (SlotAcc acc : otherAccs) {
+ acc.setNextReader(ctx);
+ }
+ }
+ }
+
+ static class Slot {
+ int slot;
+ public int tiebreakCompare(int slotA, int slotB) {
+ return slotB - slotA;
+ }
+ }
+
+ static class SpecialSlotAcc extends SlotAcc {
+ SlotAcc collectAcc;
+ SlotAcc[] otherAccs;
+ int collectAccSlot;
+ int otherAccsSlot;
+ long count;
+
+ SpecialSlotAcc(FacetContext fcontext, SlotAcc collectAcc, int collectAccSlot, SlotAcc[] otherAccs, int otherAccsSlot) {
+ super(fcontext);
+ this.collectAcc = collectAcc;
+ this.collectAccSlot = collectAccSlot;
+ this.otherAccs = otherAccs;
+ this.otherAccsSlot = otherAccsSlot;
+ }
+
+ public int getCollectAccSlot() { return collectAccSlot; }
+ public int getOtherAccSlot() { return otherAccsSlot; }
+
+ long getSpecialCount() {
+ return count;
+ }
+
+ @Override
+ public void collect(int doc, int slot) throws IOException {
+ assert slot != collectAccSlot || slot < 0;
+ count++;
+ if (collectAcc != null) {
+ collectAcc.collect(doc, collectAccSlot);
+ }
+ if (otherAccs != null) {
+ for (SlotAcc otherAcc : otherAccs) {
+ otherAcc.collect(doc, otherAccsSlot);
+ }
+ }
+ }
+
+ @Override
+ public void setNextReader(LeafReaderContext readerContext) throws IOException {
+ // collectAcc and otherAccs will normally have setNextReader called directly on them.
+ // This, however, will be used when collect(DocSet,slot) variant is used on this Acc.
+ if (collectAcc != null) {
+ collectAcc.setNextReader(readerContext);
+ }
+ if (otherAccs != null) {
+ for (SlotAcc otherAcc : otherAccs) {
+ otherAcc.setNextReader(readerContext);
+ }
+ }
+ }
+
+ @Override
+ public int compare(int slotA, int slotB) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public Object getValue(int slotNum) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void setValues(SimpleOrderedMap<Object> bucket, int slotNum) throws IOException {
+ if (collectAcc != null) {
+ collectAcc.setValues(bucket, collectAccSlot);
+ }
+ if (otherAccs != null) {
+ for (SlotAcc otherAcc : otherAccs) {
+ otherAcc.setValues(bucket, otherAccsSlot);
+ }
+ }
+ }
+
+ @Override
+ public void reset() {
+ // reset should be called on underlying accs
+ // TODO: but in case something does need to be done here, should we require this method to be called but do nothing for now?
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void resize(Resizer resizer) {
+ // someone else will call resize on collectAcc directly
+ if (collectAccSlot >= 0) {
+ collectAccSlot = resizer.getNewSlot(collectAccSlot);
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7072458e/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArray.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArray.java b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArray.java
new file mode 100644
index 0000000..10aa4d9
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArray.java
@@ -0,0 +1,213 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.search.facet;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefBuilder;
+import org.apache.lucene.util.PriorityQueue;
+import org.apache.solr.common.util.SimpleOrderedMap;
+import org.apache.solr.schema.SchemaField;
+
+/**
+ * Base class for DV/UIF accumulating counts into an array by ordinal.
+ * It can handle terms (strings), not numbers directly but those encoded as terms, and is multi-valued capable.
+ */
+abstract class FacetFieldProcessorByArray extends FacetFieldProcessor {
+ BytesRefBuilder prefixRef;
+ int startTermIndex;
+ int endTermIndex;
+ int nTerms;
+ int nDocs;
+ int maxSlots;
+
+ int allBucketsSlot = -1; // slot for the primary Accs (countAcc, collectAcc)
+
+ FacetFieldProcessorByArray(FacetContext fcontext, FacetField freq, SchemaField sf) {
+ super(fcontext, freq, sf);
+ }
+
+ abstract protected void findStartAndEndOrds() throws IOException;
+
+ abstract protected void collectDocs() throws IOException;
+
+ /** this BytesRef may be shared across calls and should be deep-cloned if necessary */
+ abstract protected BytesRef lookupOrd(int ord) throws IOException;
+
+ @Override
+ public void process() throws IOException {
+ super.process();
+ sf = fcontext.searcher.getSchema().getField(freq.field);
+ response = getFieldCacheCounts();
+ }
+
+ private SimpleOrderedMap<Object> getFieldCacheCounts() throws IOException {
+ String prefix = freq.prefix;
+ if (prefix == null || prefix.length() == 0) {
+ prefixRef = null;
+ } else {
+ prefixRef = new BytesRefBuilder();
+ prefixRef.copyChars(prefix);
+ }
+
+ findStartAndEndOrds();
+
+ maxSlots = nTerms;
+
+ if (freq.allBuckets) {
+ allBucketsSlot = maxSlots++;
+ }
+
+ createCollectAcc(nDocs, maxSlots);
+
+ if (freq.allBuckets) {
+ allBucketsAcc = new SpecialSlotAcc(fcontext, collectAcc, allBucketsSlot, otherAccs, 0);
+ }
+
+ collectDocs();
+
+ return findTopSlots();
+ }
+
+ private SimpleOrderedMap<Object> findTopSlots() throws IOException {
+ SimpleOrderedMap<Object> res = new SimpleOrderedMap<>();
+
+ int numBuckets = 0;
+ List<Object> bucketVals = null;
+ if (freq.numBuckets && fcontext.isShard()) {
+ bucketVals = new ArrayList<>(100);
+ }
+
+ int off = fcontext.isShard() ? 0 : (int) freq.offset;
+ // add a modest amount of over-request if this is a shard request
+ int lim = freq.limit >= 0 ? (fcontext.isShard() ? (int)(freq.limit*1.1+4) : (int)freq.limit) : Integer.MAX_VALUE;
+
+ int maxsize = (int)(freq.limit >= 0 ? freq.offset + lim : Integer.MAX_VALUE - 1);
+ maxsize = Math.min(maxsize, nTerms);
+
+ final int sortMul = freq.sortDirection.getMultiplier();
+ final SlotAcc sortAcc = this.sortAcc;
+
+ PriorityQueue<Slot> queue = new PriorityQueue<Slot>(maxsize) {
+ @Override
+ protected boolean lessThan(Slot a, Slot b) {
+ int cmp = sortAcc.compare(a.slot, b.slot) * sortMul;
+ return cmp == 0 ? b.slot < a.slot : cmp < 0;
+ }
+ };
+
+ Slot bottom = null;
+ for (int i = 0; i < nTerms; i++) {
+ // screen out buckets not matching mincount immediately (i.e. don't even increment numBuckets)
+ if (effectiveMincount > 0 && countAcc.getCount(i) < effectiveMincount) {
+ continue;
+ }
+
+ numBuckets++;
+ if (bucketVals != null && bucketVals.size()<100) {
+ int ord = startTermIndex + i;
+ BytesRef br = lookupOrd(ord);
+ Object val = sf.getType().toObject(sf, br);
+ bucketVals.add(val);
+ }
+
+ if (bottom != null) {
+ if (sortAcc.compare(bottom.slot, i) * sortMul < 0) {
+ bottom.slot = i;
+ bottom = queue.updateTop();
+ }
+ } else if (lim > 0) {
+ // queue not full
+ Slot s = new Slot();
+ s.slot = i;
+ queue.add(s);
+ if (queue.size() >= maxsize) {
+ bottom = queue.top();
+ }
+ }
+ }
+
+ if (freq.numBuckets) {
+ if (!fcontext.isShard()) {
+ res.add("numBuckets", numBuckets);
+ } else {
+ SimpleOrderedMap<Object> map = new SimpleOrderedMap<>(2);
+ map.add("numBuckets", numBuckets);
+ map.add("vals", bucketVals);
+ res.add("numBuckets", map);
+ }
+ }
+
+ FacetDebugInfo fdebug = fcontext.getDebugInfo();
+ if (fdebug != null) fdebug.putInfoItem("numBuckets", (long) numBuckets);
+
+ // if we are deep paging, we don't have to order the highest "offset" counts.
+ int collectCount = Math.max(0, queue.size() - off);
+ assert collectCount <= lim;
+ int[] sortedSlots = new int[collectCount];
+ for (int i = collectCount - 1; i >= 0; i--) {
+ sortedSlots[i] = queue.pop().slot;
+ }
+
+ if (freq.allBuckets) {
+ SimpleOrderedMap<Object> allBuckets = new SimpleOrderedMap<>();
+ allBuckets.add("count", allBucketsAcc.getSpecialCount());
+ if (allBucketsAcc != null) {
+ allBucketsAcc.setValues(allBuckets, allBucketsSlot);
+ }
+ res.add("allBuckets", allBuckets);
+ }
+
+ ArrayList<SimpleOrderedMap<Object>> bucketList = new ArrayList<>(collectCount);
+ res.add("buckets", bucketList);
+
+ // TODO: do this with a callback instead?
+ boolean needFilter = deferredAggs != null || freq.getSubFacets().size() > 0;
+
+ for (int slotNum : sortedSlots) {
+ SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>();
+
+ // get the ord of the slot...
+ int ord = startTermIndex + slotNum;
+
+ BytesRef br = lookupOrd(ord);
+ Object val = sf.getType().toObject(sf, br);
+
+ bucket.add("val", val);
+
+ TermQuery filter = needFilter ? new TermQuery(new Term(sf.getName(), br)) : null;
+ fillBucket(bucket, countAcc.getCount(slotNum), slotNum, null, filter);
+
+ bucketList.add(bucket);
+ }
+
+ if (freq.missing) {
+ SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>();
+ fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null);
+ res.add("missing", missingBucket);
+ }
+
+ return res;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7072458e/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayDV.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayDV.java b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayDV.java
new file mode 100644
index 0000000..1ef4284
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayDV.java
@@ -0,0 +1,294 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.search.facet;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.MultiDocValues;
+import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LongValues;
+import org.apache.lucene.util.UnicodeUtil;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.search.Filter;
+
+/**
+ * Grabs values from {@link DocValues}.
+ */
+class FacetFieldProcessorByArrayDV extends FacetFieldProcessorByArray {
+ static boolean unwrap_singleValued_multiDv = true; // only set to false for test coverage
+
+ boolean multiValuedField;
+ SortedSetDocValues si; // only used for term lookups (for both single and multi-valued)
+ MultiDocValues.OrdinalMap ordinalMap = null; // maps per-segment ords to global ords
+
+ FacetFieldProcessorByArrayDV(FacetContext fcontext, FacetField freq, SchemaField sf) {
+ super(fcontext, freq, sf);
+ multiValuedField = sf.multiValued() || sf.getType().multiValuedFieldCache();
+ }
+
+ @Override
+ protected void findStartAndEndOrds() throws IOException {
+ if (multiValuedField) {
+ si = FieldUtil.getSortedSetDocValues(fcontext.qcontext, sf, null);
+ if (si instanceof MultiDocValues.MultiSortedSetDocValues) {
+ ordinalMap = ((MultiDocValues.MultiSortedSetDocValues)si).mapping;
+ }
+ } else {
+ SortedDocValues single = FieldUtil.getSortedDocValues(fcontext.qcontext, sf, null);
+ si = DocValues.singleton(single); // multi-valued view
+ if (single instanceof MultiDocValues.MultiSortedDocValues) {
+ ordinalMap = ((MultiDocValues.MultiSortedDocValues)single).mapping;
+ }
+ }
+
+ if (si.getValueCount() >= Integer.MAX_VALUE) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Field has too many unique values. field=" + sf + " nterms= " + si.getValueCount());
+ }
+
+ if (prefixRef != null) {
+ startTermIndex = (int)si.lookupTerm(prefixRef.get());
+ if (startTermIndex < 0) startTermIndex = -startTermIndex - 1;
+ prefixRef.append(UnicodeUtil.BIG_TERM);
+ endTermIndex = (int)si.lookupTerm(prefixRef.get());
+ assert endTermIndex < 0;
+ endTermIndex = -endTermIndex - 1;
+ } else {
+ startTermIndex = 0;
+ endTermIndex = (int)si.getValueCount();
+ }
+
+ nTerms = endTermIndex - startTermIndex;
+ }
+
+ @Override
+ protected void collectDocs() throws IOException {
+ int domainSize = fcontext.base.size();
+
+ if (nTerms <= 0 || domainSize < effectiveMincount) { // TODO: what about allBuckets? missing bucket?
+ return;
+ }
+
+ // TODO: refactor some of this logic into a base class
+ boolean countOnly = collectAcc==null && allBucketsAcc==null;
+ boolean fullRange = startTermIndex == 0 && endTermIndex == si.getValueCount();
+
+ // Are we expecting many hits per bucket?
+ // FUTURE: pro-rate for nTerms?
+ // FUTURE: better take into account number of values in multi-valued fields. This info is available for indexed fields.
+ // FUTURE: take into account that bigger ord maps are more expensive than smaller ones
+ // One test: 5M doc index, faceting on a single-valued field with almost 1M unique values, crossover point where global counting was slower
+ // than per-segment counting was a domain of 658k docs. At that point, top 10 buckets had 6-7 matches each.
+ // this was for heap docvalues produced by UninvertingReader
+ // Since these values were randomly distributed, lets round our domain multiplier up to account for less random real world data.
+ long domainMultiplier = multiValuedField ? 4L : 2L;
+ boolean manyHitsPerBucket = domainSize * domainMultiplier > (si.getValueCount() + 3); // +3 to increase test coverage with small tests
+
+ // If we're only calculating counts, we're not prefixing, and we expect to collect many documents per unique value,
+ // then collect per-segment before mapping to global ords at the end. This will save redundant seg->global ord mappings.
+ // FUTURE: there are probably some other non "countOnly" cases where we can use this as well (i.e. those where
+ // the docid is not used)
+ boolean canDoPerSeg = countOnly && fullRange;
+ boolean accumSeg = manyHitsPerBucket && canDoPerSeg;
+
+ if (freq.perSeg != null) accumSeg = canDoPerSeg && freq.perSeg; // internal - override perSeg heuristic
+
+ final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves();
+ Filter filter = fcontext.base.getTopFilter();
+
+ for (int subIdx = 0; subIdx < leaves.size(); subIdx++) {
+ LeafReaderContext subCtx = leaves.get(subIdx);
+
+ setNextReaderFirstPhase(subCtx);
+
+ DocIdSet dis = filter.getDocIdSet(subCtx, null); // solr docsets already exclude any deleted docs
+ DocIdSetIterator disi = dis.iterator();
+
+ SortedDocValues singleDv = null;
+ SortedSetDocValues multiDv = null;
+ if (multiValuedField) {
+ // TODO: get sub from multi?
+ multiDv = subCtx.reader().getSortedSetDocValues(sf.getName());
+ if (multiDv == null) {
+ multiDv = DocValues.emptySortedSet();
+ }
+ // some codecs may optimize SortedSet storage for single-valued fields
+ // this will be null if this is not a wrapped single valued docvalues.
+ if (unwrap_singleValued_multiDv) {
+ singleDv = DocValues.unwrapSingleton(multiDv);
+ }
+ } else {
+ singleDv = subCtx.reader().getSortedDocValues(sf.getName());
+ if (singleDv == null) {
+ singleDv = DocValues.emptySorted();
+ }
+ }
+
+ LongValues toGlobal = ordinalMap == null ? null : ordinalMap.getGlobalOrds(subIdx);
+
+ if (singleDv != null) {
+ if (accumSeg) {
+ collectPerSeg(singleDv, disi, toGlobal);
+ } else {
+ if (canDoPerSeg && toGlobal != null) {
+ collectCounts(singleDv, disi, toGlobal);
+ } else {
+ collectDocs(singleDv, disi, toGlobal);
+ }
+ }
+ } else {
+ if (accumSeg) {
+ collectPerSeg(multiDv, disi, toGlobal);
+ } else {
+ if (canDoPerSeg && toGlobal != null) {
+ collectCounts(multiDv, disi, toGlobal);
+ } else {
+ collectDocs(multiDv, disi, toGlobal);
+ }
+ }
+ }
+ }
+
+ reuse = null; // better GC
+ }
+
+ @Override
+ protected BytesRef lookupOrd(int ord) throws IOException {
+ return si.lookupOrd(ord);
+ }
+
+ private void collectPerSeg(SortedDocValues singleDv, DocIdSetIterator disi, LongValues toGlobal) throws IOException {
+ int segMax = singleDv.getValueCount() + 1;
+ final int[] counts = getCountArr( segMax );
+
+ int doc;
+ while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ counts[ singleDv.getOrd(doc) + 1 ]++;
+ }
+
+ for (int i=1; i<segMax; i++) {
+ int segCount = counts[i];
+ if (segCount > 0) {
+ int slot = toGlobal == null ? (i - 1) : (int) toGlobal.get(i - 1);
+ countAcc.incrementCount(slot, segCount);
+ }
+ }
+ }
+
+ private void collectPerSeg(SortedSetDocValues multiDv, DocIdSetIterator disi, LongValues toGlobal) throws IOException {
+ int segMax = (int)multiDv.getValueCount();
+ final int[] counts = getCountArr( segMax );
+
+ int doc;
+ while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ multiDv.setDocument(doc);
+ for(;;) {
+ int segOrd = (int)multiDv.nextOrd();
+ if (segOrd < 0) break;
+ counts[segOrd]++;
+ }
+ }
+
+ for (int i=0; i<segMax; i++) {
+ int segCount = counts[i];
+ if (segCount > 0) {
+ int slot = toGlobal == null ? (i) : (int) toGlobal.get(i);
+ countAcc.incrementCount(slot, segCount);
+ }
+ }
+ }
+
+ private int[] reuse;
+ private int[] getCountArr(int maxNeeded) {
+ if (reuse == null) {
+ // make the count array large enough for any segment
+ // FUTURE: (optionally) directly use the array of the CountAcc for an optimized index..
+ reuse = new int[(int) si.getValueCount() + 1];
+ } else {
+ Arrays.fill(reuse, 0, maxNeeded, 0);
+ }
+ return reuse;
+ }
+
+ private void collectDocs(SortedDocValues singleDv, DocIdSetIterator disi, LongValues toGlobal) throws IOException {
+ int doc;
+ while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ int segOrd = singleDv.getOrd(doc);
+ if (segOrd < 0) continue;
+ collect(doc, segOrd, toGlobal);
+ }
+ }
+
+ private void collectCounts(SortedDocValues singleDv, DocIdSetIterator disi, LongValues toGlobal) throws IOException {
+ int doc;
+ while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ int segOrd = singleDv.getOrd(doc);
+ if (segOrd < 0) continue;
+ int ord = (int)toGlobal.get(segOrd);
+ countAcc.incrementCount(ord, 1);
+ }
+ }
+
+ private void collectDocs(SortedSetDocValues multiDv, DocIdSetIterator disi, LongValues toGlobal) throws IOException {
+ int doc;
+ while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ multiDv.setDocument(doc);
+ for(;;) {
+ int segOrd = (int)multiDv.nextOrd();
+ if (segOrd < 0) break;
+ collect(doc, segOrd, toGlobal);
+ }
+ }
+ }
+
+ private void collectCounts(SortedSetDocValues multiDv, DocIdSetIterator disi, LongValues toGlobal) throws IOException {
+ int doc;
+ while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ multiDv.setDocument(doc);
+ for(;;) {
+ int segOrd = (int)multiDv.nextOrd();
+ if (segOrd < 0) break;
+ int ord = (int)toGlobal.get(segOrd);
+ countAcc.incrementCount(ord, 1);
+ }
+ }
+ }
+
+ private void collect(int doc, int segOrd, LongValues toGlobal) throws IOException {
+ int ord = (toGlobal != null && segOrd >= 0) ? (int)toGlobal.get(segOrd) : segOrd;
+
+ int arrIdx = ord - startTermIndex;
+ if (arrIdx >= 0 && arrIdx < nTerms) {
+ countAcc.incrementCount(arrIdx, 1);
+ if (collectAcc != null) {
+ collectAcc.collect(doc, arrIdx);
+ }
+ if (allBucketsAcc != null) {
+ allBucketsAcc.collect(doc, arrIdx);
+ }
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7072458e/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayUIF.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayUIF.java b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayUIF.java
new file mode 100644
index 0000000..dfee257
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayUIF.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.search.facet;
+
+import java.io.IOException;
+
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.UnicodeUtil;
+import org.apache.solr.schema.SchemaField;
+
+/** {@link UnInvertedField} implementation of field faceting.
+ * It's a top-level term cache. */
+class FacetFieldProcessorByArrayUIF extends FacetFieldProcessorByArray {
+ UnInvertedField uif;
+ TermsEnum te;
+
+ FacetFieldProcessorByArrayUIF(FacetContext fcontext, FacetField freq, SchemaField sf) {
+ super(fcontext, freq, sf);
+ }
+
+ @Override
+ protected void findStartAndEndOrds() throws IOException {
+ uif = UnInvertedField.getUnInvertedField(freq.field, fcontext.searcher);
+ te = uif.getOrdTermsEnum( fcontext.searcher.getLeafReader() ); // "te" can be null
+
+ startTermIndex = 0;
+ endTermIndex = uif.numTerms(); // one past the end
+
+ if (prefixRef != null && te != null) {
+ if (te.seekCeil(prefixRef.get()) == TermsEnum.SeekStatus.END) {
+ startTermIndex = uif.numTerms();
+ } else {
+ startTermIndex = (int) te.ord();
+ }
+ prefixRef.append(UnicodeUtil.BIG_TERM);
+ if (te.seekCeil(prefixRef.get()) == TermsEnum.SeekStatus.END) {
+ endTermIndex = uif.numTerms();
+ } else {
+ endTermIndex = (int) te.ord();
+ }
+ }
+
+ nTerms = endTermIndex - startTermIndex;
+ }
+
+ @Override
+ protected void collectDocs() throws IOException {
+ uif.collectDocs(this);
+ }
+
+ @Override
+ protected BytesRef lookupOrd(int ord) throws IOException {
+ return uif.getTermValue(te, ord);
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7072458e/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByEnumTermsStream.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByEnumTermsStream.java b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByEnumTermsStream.java
new file mode 100644
index 0000000..005f4d1
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByEnumTermsStream.java
@@ -0,0 +1,356 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.search.facet;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.MultiPostingsEnum;
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.StringHelper;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.util.SimpleOrderedMap;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.schema.TrieField;
+import org.apache.solr.search.DocSet;
+import org.apache.solr.search.HashDocSet;
+import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.search.SortedIntDocSet;
+
+/**
+ * Enumerates indexed terms in order in a streaming fashion.
+ * It's able to stream since no data needs to be accumulated so long as it's index order.
+ */
+class FacetFieldProcessorByEnumTermsStream extends FacetFieldProcessor implements Closeable {
+ long bucketsToSkip;
+ long bucketsReturned;
+
+ boolean closed;
+ boolean countOnly;
+ boolean hasSubFacets; // true if there are subfacets
+ int minDfFilterCache;
+ DocSet docs;
+ DocSet fastForRandomSet;
+ TermsEnum termsEnum = null;
+ SolrIndexSearcher.DocsEnumState deState = null;
+ PostingsEnum postingsEnum;
+ BytesRef startTermBytes;
+ BytesRef term;
+ LeafReaderContext[] leaves;
+
+ FacetFieldProcessorByEnumTermsStream(FacetContext fcontext, FacetField freq, SchemaField sf) {
+ super(fcontext, freq, sf);
+ }
+
+ @Override
+ public void close() throws IOException {
+ if (!closed) {
+ closed = true;
+ // fcontext.base.decref(); // OFF-HEAP
+ }
+ }
+
+ @Override
+ public void process() throws IOException {
+ super.process();
+
+ // We need to keep the fcontext open after processing is done (since we will be streaming in the response writer).
+ // But if the connection is broken, we want to clean up.
+ // fcontext.base.incref(); // OFF-HEAP
+ fcontext.qcontext.addCloseHook(this);
+
+ setup();
+ response = new SimpleOrderedMap<>();
+ response.add("buckets", new Iterator() {
+ boolean retrieveNext = true;
+ Object val;
+
+ @Override
+ public boolean hasNext() {
+ if (retrieveNext) {
+ val = nextBucket();
+ }
+ retrieveNext = false;
+ return val != null;
+ }
+
+ @Override
+ public Object next() {
+ if (retrieveNext) {
+ val = nextBucket();
+ }
+ retrieveNext = true;
+ if (val == null) {
+ // Last value, so clean up. In the case that we are doing streaming facets within streaming facets,
+ // the number of close hooks could grow very large, so we want to remove ourselves.
+ boolean removed = fcontext.qcontext.removeCloseHook(FacetFieldProcessorByEnumTermsStream.this);
+ assert removed;
+ try {
+ close();
+ } catch (IOException e) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error during facet streaming close", e);
+ }
+ }
+ return val;
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ });
+ }
+
+ private void setup() throws IOException {
+
+ countOnly = freq.facetStats.size() == 0 || freq.facetStats.values().iterator().next() instanceof CountAgg;
+ hasSubFacets = freq.subFacets.size() > 0;
+ bucketsToSkip = freq.offset;
+
+ createAccs(-1, 1);
+
+ // Minimum term docFreq in order to use the filterCache for that term.
+ if (freq.cacheDf == -1) { // -1 means never cache
+ minDfFilterCache = Integer.MAX_VALUE;
+ } else if (freq.cacheDf == 0) { // default; compute as fraction of maxDoc
+ minDfFilterCache = Math.max(fcontext.searcher.maxDoc() >> 4, 3); // (minimum of 3 is for test coverage purposes)
+ } else {
+ minDfFilterCache = freq.cacheDf;
+ }
+
+ docs = fcontext.base;
+ fastForRandomSet = null;
+
+ if (freq.prefix != null) {
+ String indexedPrefix = sf.getType().toInternal(freq.prefix);
+ startTermBytes = new BytesRef(indexedPrefix);
+ } else if (sf.getType().getNumericType() != null) {
+ String triePrefix = TrieField.getMainValuePrefix(sf.getType());
+ if (triePrefix != null) {
+ startTermBytes = new BytesRef(triePrefix);
+ }
+ }
+
+ Fields fields = fcontext.searcher.getLeafReader().fields();
+ Terms terms = fields == null ? null : fields.terms(sf.getName());
+
+ termsEnum = null;
+ deState = null;
+ term = null;
+
+
+ if (terms != null) {
+
+ termsEnum = terms.iterator();
+
+ // TODO: OPT: if seek(ord) is supported for this termsEnum, then we could use it for
+ // facet.offset when sorting by index order.
+
+ if (startTermBytes != null) {
+ if (termsEnum.seekCeil(startTermBytes) == TermsEnum.SeekStatus.END) {
+ termsEnum = null;
+ } else {
+ term = termsEnum.term();
+ }
+ } else {
+ // position termsEnum on first term
+ term = termsEnum.next();
+ }
+ }
+
+ List<LeafReaderContext> leafList = fcontext.searcher.getTopReaderContext().leaves();
+ leaves = leafList.toArray( new LeafReaderContext[ leafList.size() ]);
+ }
+
+ private SimpleOrderedMap<Object> nextBucket() {
+ try {
+ return _nextBucket();
+ } catch (Exception e) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error during facet streaming", e);
+ }
+ }
+
+ private SimpleOrderedMap<Object> _nextBucket() throws IOException {
+ DocSet termSet = null;
+
+ try {
+ while (term != null) {
+
+ if (startTermBytes != null && !StringHelper.startsWith(term, startTermBytes)) {
+ break;
+ }
+
+ int df = termsEnum.docFreq();
+ if (df < effectiveMincount) {
+ term = termsEnum.next();
+ continue;
+ }
+
+ if (termSet != null) {
+ // termSet.decref(); // OFF-HEAP
+ termSet = null;
+ }
+
+ int c = 0;
+
+ if (hasSubFacets || df >= minDfFilterCache) {
+ // use the filter cache
+
+ if (deState == null) {
+ deState = new SolrIndexSearcher.DocsEnumState();
+ deState.fieldName = sf.getName();
+ deState.liveDocs = fcontext.searcher.getLeafReader().getLiveDocs();
+ deState.termsEnum = termsEnum;
+ deState.postingsEnum = postingsEnum;
+ deState.minSetSizeCached = minDfFilterCache;
+ }
+
+ if (hasSubFacets || !countOnly) {
+ DocSet termsAll = fcontext.searcher.getDocSet(deState);
+ termSet = docs.intersection(termsAll);
+ // termsAll.decref(); // OFF-HEAP
+ c = termSet.size();
+ } else {
+ c = fcontext.searcher.numDocs(docs, deState);
+ }
+ postingsEnum = deState.postingsEnum;
+
+ resetStats();
+
+ if (!countOnly) {
+ collect(termSet, 0);
+ }
+
+ } else {
+ // We don't need the docset here (meaning no sub-facets).
+ // if countOnly, then we are calculating some other stats...
+ resetStats();
+
+ // lazy convert to fastForRandomSet
+ if (fastForRandomSet == null) {
+ fastForRandomSet = docs;
+ if (docs instanceof SortedIntDocSet) { // OFF-HEAP todo: also check for native version
+ SortedIntDocSet sset = (SortedIntDocSet) docs;
+ fastForRandomSet = new HashDocSet(sset.getDocs(), 0, sset.size());
+ }
+ }
+ // iterate over TermDocs to calculate the intersection
+ postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
+
+ if (postingsEnum instanceof MultiPostingsEnum) {
+ MultiPostingsEnum.EnumWithSlice[] subs = ((MultiPostingsEnum) postingsEnum).getSubs();
+ int numSubs = ((MultiPostingsEnum) postingsEnum).getNumSubs();
+ for (int subindex = 0; subindex < numSubs; subindex++) {
+ MultiPostingsEnum.EnumWithSlice sub = subs[subindex];
+ if (sub.postingsEnum == null) continue;
+ int base = sub.slice.start;
+ int docid;
+
+ if (countOnly) {
+ while ((docid = sub.postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ if (fastForRandomSet.exists(docid + base)) c++;
+ }
+ } else {
+ setNextReader(leaves[sub.slice.readerIndex]);
+ while ((docid = sub.postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ if (fastForRandomSet.exists(docid + base)) {
+ c++;
+ collect(docid, 0);
+ }
+ }
+ }
+
+ }
+ } else {
+ int docid;
+ if (countOnly) {
+ while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ if (fastForRandomSet.exists(docid)) c++;
+ }
+ } else {
+ setNextReader(leaves[0]);
+ while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ if (fastForRandomSet.exists(docid)) {
+ c++;
+ collect(docid, 0);
+ }
+ }
+ }
+ }
+
+ }
+
+ if (c < effectiveMincount) {
+ term = termsEnum.next();
+ continue;
+ }
+
+ // handle offset and limit
+ if (bucketsToSkip > 0) {
+ bucketsToSkip--;
+ term = termsEnum.next();
+ continue;
+ }
+
+ if (freq.limit >= 0 && ++bucketsReturned > freq.limit) {
+ return null;
+ }
+
+ // set count in case other stats depend on it
+ countAcc.incrementCount(0, c);
+
+ // OK, we have a good bucket to return... first get bucket value before moving to next term
+ Object bucketVal = sf.getType().toObject(sf, term);
+ TermQuery bucketQuery = hasSubFacets ? new TermQuery(new Term(freq.field, term)) : null;
+ term = termsEnum.next();
+
+ SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>();
+ bucket.add("val", bucketVal);
+ addStats(bucket, 0);
+ if (hasSubFacets) {
+ processSubs(bucket, bucketQuery, termSet);
+ }
+
+ // TODO... termSet needs to stick around for streaming sub-facets?
+
+ return bucket;
+
+ }
+
+ } finally {
+ if (termSet != null) {
+ // termSet.decref(); // OFF-HEAP
+ termSet = null;
+ }
+ }
+
+ // end of the iteration
+ return null;
+ }
+
+}