You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by no...@apache.org on 2016/09/07 09:57:24 UTC
[36/50] [abbrv] lucene-solr:apiv2: SOLR-9142: rename FFPByHashNumeric
to FFPByHashDV as it's not just for numerics anymore
SOLR-9142: rename FFPByHashNumeric to FFPByHashDV as it's not just for numerics anymore
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/6a4184c6
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/6a4184c6
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/6a4184c6
Branch: refs/heads/apiv2
Commit: 6a4184c6742e4ef3764bfc2184015af6b95d31bb
Parents: 7b5df8a
Author: David Smiley <ds...@apache.org>
Authored: Wed Aug 31 16:56:21 2016 -0400
Committer: David Smiley <ds...@apache.org>
Committed: Wed Aug 31 16:56:21 2016 -0400
----------------------------------------------------------------------
.../apache/solr/search/facet/FacetField.java | 2 +-
.../facet/FacetFieldProcessorByHashDV.java | 442 ++++++++++++++++++
.../facet/FacetFieldProcessorByHashNumeric.java | 443 -------------------
.../solr/search/facet/TestJsonFacets.java | 6 +-
4 files changed, 446 insertions(+), 447 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6a4184c6/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetField.java b/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
index 4d56513..3f8cb0b 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
@@ -126,7 +126,7 @@ public class FacetField extends FacetRequestSorted {
if (mincount > 0 && prefix == null && (ntype != null || method == FacetMethod.DVHASH)) {
// TODO can we auto-pick for strings when term cardinality is much greater than DocSet cardinality?
// or if we don't know cardinality but DocSet size is very small
- return new FacetFieldProcessorByHashNumeric(fcontext, this, sf);
+ return new FacetFieldProcessorByHashDV(fcontext, this, sf);
} else if (ntype == null) {
// single valued string...
return new FacetFieldProcessorByArrayDV(fcontext, this, sf);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6a4184c6/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByHashDV.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByHashDV.java b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByHashDV.java
new file mode 100644
index 0000000..fb93417
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByHashDV.java
@@ -0,0 +1,442 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.search.facet;
+
+import java.io.IOException;
+import java.text.ParseException;
+import java.util.function.IntFunction;
+
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.DocValuesType;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.MultiDocValues;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.search.SimpleCollector;
+import org.apache.lucene.util.BitUtil;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LongValues;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.util.SimpleOrderedMap;
+import org.apache.solr.schema.SchemaField;
+import org.apache.solr.search.DocSetUtil;
+
+/**
+ * Facets numbers into a hash table. The number is either a raw numeric DocValues value, or
+ * a term global ordinal integer.
+ * Limitations:
+ * <ul>
+ * <li>doesn't handle multiValued, but could easily be added</li>
+ * <li>doesn't handle prefix, but could easily be added</li>
+ * <li>doesn't handle mincount==0 -- you're better off with an array alg</li>
+ * </ul>
+ */
+class FacetFieldProcessorByHashDV extends FacetFieldProcessor {
+ static int MAXIMUM_STARTING_TABLE_SIZE=1024; // must be a power of two, non-final to support setting by tests
+
+ /** a hash table with long keys (what we're counting) and integer values (counts) */
+ private static class LongCounts {
+
+ static final float LOAD_FACTOR = 0.7f;
+
+ long[] vals;
+ int[] counts; // maintain the counts here since we need them to tell if there was actually a value anyway
+ int[] oldToNewMapping;
+
+ int cardinality;
+ int threshold;
+
+ /** sz must be a power of two */
+ LongCounts(int sz) {
+ vals = new long[sz];
+ counts = new int[sz];
+ threshold = (int) (sz * LOAD_FACTOR);
+ }
+
+ /** Current number of slots in the hash table */
+ int numSlots() {
+ return vals.length;
+ }
+
+ private int hash(long val) {
+ // For floats: exponent bits start at bit 23 for single precision,
+ // and bit 52 for double precision.
+ // Many values will only have significant bits just to the right of that,
+ // and the leftmost bits will all be zero.
+
+ // For now, lets just settle to get first 8 significant mantissa bits of double or float in the lowest bits of our hash
+ // The upper bits of our hash will be irrelevant.
+ int h = (int) (val + (val >>> 44) + (val >>> 15));
+ return h;
+ }
+
+ /** returns the slot */
+ int add(long val) {
+ if (cardinality >= threshold) {
+ rehash();
+ }
+
+ int h = hash(val);
+ for (int slot = h & (vals.length-1); ;slot = (slot + ((h>>7)|1)) & (vals.length-1)) {
+ int count = counts[slot];
+ if (count == 0) {
+ counts[slot] = 1;
+ vals[slot] = val;
+ cardinality++;
+ return slot;
+ } else if (vals[slot] == val) {
+ // val is already in the set
+ counts[slot] = count + 1;
+ return slot;
+ }
+ }
+ }
+
+ protected void rehash() {
+ long[] oldVals = vals;
+ int[] oldCounts = counts; // after retrieving the count, this array is reused as a mapping to new array
+ int newCapacity = vals.length << 1;
+ vals = new long[newCapacity];
+ counts = new int[newCapacity];
+ threshold = (int) (newCapacity * LOAD_FACTOR);
+
+ for (int i=0; i<oldVals.length; i++) {
+ int count = oldCounts[i];
+ if (count == 0) {
+ oldCounts[i] = -1;
+ continue;
+ }
+
+ long val = oldVals[i];
+
+ int h = hash(val);
+ int slot = h & (vals.length-1);
+ while (counts[slot] != 0) {
+ slot = (slot + ((h>>7)|1)) & (vals.length-1);
+ }
+ counts[slot] = count;
+ vals[slot] = val;
+ oldCounts[i] = slot;
+ }
+
+ oldToNewMapping = oldCounts;
+ }
+
+ int cardinality() {
+ return cardinality;
+ }
+
+ }
+
+ /** A hack instance of Calc for Term ordinals in DocValues. */
+ // TODO consider making FacetRangeProcessor.Calc facet top level; then less of a hack?
+ private class TermOrdCalc extends FacetRangeProcessor.Calc {
+
+ IntFunction<BytesRef> lookupOrdFunction; // set in collectDocs()!
+
+ TermOrdCalc() throws IOException {
+ super(sf);
+ }
+
+ @Override
+ public long bitsToSortableBits(long globalOrd) {
+ return globalOrd;
+ }
+
+ /** To be returned in "buckets"/"val" */
+ @Override
+ public Comparable bitsToValue(long globalOrd) {
+ BytesRef bytesRef = lookupOrdFunction.apply((int) globalOrd);
+ // note FacetFieldProcessorByArray.findTopSlots also calls SchemaFieldType.toObject
+ return sf.getType().toObject(sf, bytesRef).toString();
+ }
+
+ @Override
+ public String formatValue(Comparable val) {
+ return (String) val;
+ }
+
+ @Override
+ protected Comparable parseStr(String rawval) throws ParseException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ protected Comparable parseAndAddGap(Comparable value, String gap) throws ParseException {
+ throw new UnsupportedOperationException();
+ }
+
+ }
+
+ FacetRangeProcessor.Calc calc;
+ LongCounts table;
+ int allBucketsSlot = -1;
+
+ FacetFieldProcessorByHashDV(FacetContext fcontext, FacetField freq, SchemaField sf) {
+ super(fcontext, freq, sf);
+ if (freq.mincount == 0) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+ getClass()+" doesn't support mincount=0");
+ }
+ if (freq.prefix != null) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+ getClass()+" doesn't support prefix"); // yet, but it could
+ }
+ FieldInfo fieldInfo = fcontext.searcher.getLeafReader().getFieldInfos().fieldInfo(sf.getName());
+ if (fieldInfo != null &&
+ fieldInfo.getDocValuesType() != DocValuesType.NUMERIC &&
+ fieldInfo.getDocValuesType() != DocValuesType.SORTED) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+ getClass()+" only support single valued number/string with docValues");
+ }
+ }
+
+ @Override
+ public void process() throws IOException {
+ super.process();
+ response = calcFacets();
+ table = null;//gc
+ }
+
+ private SimpleOrderedMap<Object> calcFacets() throws IOException {
+
+ if (sf.getType().getNumericType() != null) {
+ calc = FacetRangeProcessor.getNumericCalc(sf);
+ } else {
+ calc = new TermOrdCalc(); // kind of a hack
+ }
+
+ // TODO: Use the number of indexed terms, if present, as an estimate!
+ // Even for NumericDocValues, we could check for a terms index for an estimate.
+ // Our estimation should aim high to avoid expensive rehashes.
+
+ int possibleValues = fcontext.base.size();
+ // size smaller tables so that no resize will be necessary
+ int currHashSize = BitUtil.nextHighestPowerOfTwo((int) (possibleValues * (1 / LongCounts.LOAD_FACTOR) + 1));
+ currHashSize = Math.min(currHashSize, MAXIMUM_STARTING_TABLE_SIZE);
+ table = new LongCounts(currHashSize) {
+ @Override
+ protected void rehash() {
+ super.rehash();
+ doRehash(this);
+ oldToNewMapping = null; // allow for gc
+ }
+ };
+
+ // note: these methods/phases align with FacetFieldProcessorByArray's
+
+ createCollectAcc();
+
+ collectDocs();
+
+ return super.findTopSlots(table.numSlots(), table.cardinality(),
+ slotNum -> calc.bitsToValue(table.vals[slotNum]), // getBucketValFromSlotNum
+ val -> calc.formatValue(val)); // getFieldQueryVal
+ }
+
+ private void createCollectAcc() throws IOException {
+ int numSlots = table.numSlots();
+
+ if (freq.allBuckets) {
+ allBucketsSlot = numSlots++;
+ }
+
+ indexOrderAcc = new SlotAcc(fcontext) {
+ @Override
+ public void collect(int doc, int slot) throws IOException {
+ }
+
+ @Override
+ public int compare(int slotA, int slotB) {
+ long s1 = calc.bitsToSortableBits(table.vals[slotA]);
+ long s2 = calc.bitsToSortableBits(table.vals[slotB]);
+ return Long.compare(s1, s2);
+ }
+
+ @Override
+ public Object getValue(int slotNum) throws IOException {
+ return null;
+ }
+
+ @Override
+ public void reset() {
+ }
+
+ @Override
+ public void resize(Resizer resizer) {
+ }
+ };
+
+ countAcc = new CountSlotAcc(fcontext) {
+ @Override
+ public void incrementCount(int slot, int count) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public int getCount(int slot) {
+ return table.counts[slot];
+ }
+
+ @Override
+ public Object getValue(int slotNum) {
+ return getCount(slotNum);
+ }
+
+ @Override
+ public void reset() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void collect(int doc, int slot) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public int compare(int slotA, int slotB) {
+ return Integer.compare( table.counts[slotA], table.counts[slotB] );
+ }
+
+ @Override
+ public void resize(Resizer resizer) {
+ throw new UnsupportedOperationException();
+ }
+ };
+
+ // we set the countAcc & indexAcc first so generic ones won't be created for us.
+ super.createCollectAcc(fcontext.base.size(), numSlots);
+
+ if (freq.allBuckets) {
+ allBucketsAcc = new SpecialSlotAcc(fcontext, collectAcc, allBucketsSlot, otherAccs, 0);
+ }
+ }
+
+ private void collectDocs() throws IOException {
+ if (calc instanceof TermOrdCalc) { // Strings
+
+ // TODO support SortedSetDocValues
+ SortedDocValues globalDocValues = FieldUtil.getSortedDocValues(fcontext.qcontext, sf, null);
+ ((TermOrdCalc)calc).lookupOrdFunction = globalDocValues::lookupOrd;
+
+ DocSetUtil.collectSortedDocSet(fcontext.base, fcontext.searcher.getIndexReader(), new SimpleCollector() {
+ SortedDocValues docValues = globalDocValues; // this segment/leaf. NN
+ LongValues toGlobal = LongValues.IDENTITY; // this segment to global ordinal. NN
+
+ @Override public boolean needsScores() { return false; }
+
+ @Override
+ protected void doSetNextReader(LeafReaderContext ctx) throws IOException {
+ setNextReaderFirstPhase(ctx);
+ if (globalDocValues instanceof MultiDocValues.MultiSortedDocValues) {
+ MultiDocValues.MultiSortedDocValues multiDocValues = (MultiDocValues.MultiSortedDocValues) globalDocValues;
+ docValues = multiDocValues.values[ctx.ord];
+ toGlobal = multiDocValues.mapping.getGlobalOrds(ctx.ord);
+ }
+ }
+
+ @Override
+ public void collect(int segDoc) throws IOException {
+ long ord = docValues.getOrd(segDoc);
+ if (ord != -1) {
+ long val = toGlobal.get(ord);
+ collectValFirstPhase(segDoc, val);
+ }
+ }
+ });
+
+ } else { // Numeric:
+
+ // TODO support SortedNumericDocValues
+ DocSetUtil.collectSortedDocSet(fcontext.base, fcontext.searcher.getIndexReader(), new SimpleCollector() {
+ NumericDocValues values = null; //NN
+ Bits docsWithField = null; //NN
+
+ @Override public boolean needsScores() { return false; }
+
+ @Override
+ protected void doSetNextReader(LeafReaderContext ctx) throws IOException {
+ setNextReaderFirstPhase(ctx);
+ values = DocValues.getNumeric(ctx.reader(), sf.getName());
+ docsWithField = DocValues.getDocsWithField(ctx.reader(), sf.getName());
+ }
+
+ @Override
+ public void collect(int segDoc) throws IOException {
+ long val = values.get(segDoc);
+ if (val != 0 || docsWithField.get(segDoc)) {
+ collectValFirstPhase(segDoc, val);
+ }
+ }
+ });
+ }
+ }
+
+ private void collectValFirstPhase(int segDoc, long val) throws IOException {
+ int slot = table.add(val); // this can trigger a rehash
+
+ // Our countAcc is virtual, so this is not needed:
+ // countAcc.incrementCount(slot, 1);
+
+ super.collectFirstPhase(segDoc, slot);
+ }
+
+ private void doRehash(LongCounts table) {
+ if (collectAcc == null && allBucketsAcc == null) return;
+
+ // Our "count" acc is backed by the hash table and will already be rehashed
+ // otherAccs don't need to be rehashed
+
+ int newTableSize = table.numSlots();
+ int numSlots = newTableSize;
+ final int oldAllBucketsSlot = allBucketsSlot;
+ if (oldAllBucketsSlot >= 0) {
+ allBucketsSlot = numSlots++;
+ }
+
+ final int finalNumSlots = numSlots;
+ final int[] mapping = table.oldToNewMapping;
+
+ SlotAcc.Resizer resizer = new SlotAcc.Resizer() {
+ @Override
+ public int getNewSize() {
+ return finalNumSlots;
+ }
+
+ @Override
+ public int getNewSlot(int oldSlot) {
+ if (oldSlot < mapping.length) {
+ return mapping[oldSlot];
+ }
+ if (oldSlot == oldAllBucketsSlot) {
+ return allBucketsSlot;
+ }
+ return -1;
+ }
+ };
+
+ // NOTE: resizing isn't strictly necessary for missing/allBuckets... we could just set the new slot directly
+ if (collectAcc != null) {
+ collectAcc.resize(resizer);
+ }
+ if (allBucketsAcc != null) {
+ allBucketsAcc.resize(resizer);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6a4184c6/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByHashNumeric.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByHashNumeric.java b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByHashNumeric.java
deleted file mode 100644
index 6d5aec5..0000000
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByHashNumeric.java
+++ /dev/null
@@ -1,443 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.search.facet;
-
-import java.io.IOException;
-import java.text.ParseException;
-import java.util.function.IntFunction;
-
-import org.apache.lucene.index.DocValues;
-import org.apache.lucene.index.DocValuesType;
-import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.MultiDocValues;
-import org.apache.lucene.index.NumericDocValues;
-import org.apache.lucene.index.SortedDocValues;
-import org.apache.lucene.search.SimpleCollector;
-import org.apache.lucene.util.BitUtil;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.LongValues;
-import org.apache.solr.common.SolrException;
-import org.apache.solr.common.util.SimpleOrderedMap;
-import org.apache.solr.schema.SchemaField;
-import org.apache.solr.search.DocSetUtil;
-
-/**
- * Facets numbers into a hash table. The number is either a raw numeric DocValues value, or
- * a term global ordinal integer.
- * Limitations:
- * <ul>
- * <li>doesn't handle multiValued, but could easily be added</li>
- * <li>doesn't handle prefix, but could easily be added</li>
- * <li>doesn't handle mincount==0 -- you're better off with an array alg</li>
- * </ul>
- */
-// TODO rename: FacetFieldProcessorByHashDV
-class FacetFieldProcessorByHashNumeric extends FacetFieldProcessor {
- static int MAXIMUM_STARTING_TABLE_SIZE=1024; // must be a power of two, non-final to support setting by tests
-
- /** a hash table with long keys (what we're counting) and integer values (counts) */
- private static class LongCounts {
-
- static final float LOAD_FACTOR = 0.7f;
-
- long[] vals;
- int[] counts; // maintain the counts here since we need them to tell if there was actually a value anyway
- int[] oldToNewMapping;
-
- int cardinality;
- int threshold;
-
- /** sz must be a power of two */
- LongCounts(int sz) {
- vals = new long[sz];
- counts = new int[sz];
- threshold = (int) (sz * LOAD_FACTOR);
- }
-
- /** Current number of slots in the hash table */
- int numSlots() {
- return vals.length;
- }
-
- private int hash(long val) {
- // For floats: exponent bits start at bit 23 for single precision,
- // and bit 52 for double precision.
- // Many values will only have significant bits just to the right of that,
- // and the leftmost bits will all be zero.
-
- // For now, lets just settle to get first 8 significant mantissa bits of double or float in the lowest bits of our hash
- // The upper bits of our hash will be irrelevant.
- int h = (int) (val + (val >>> 44) + (val >>> 15));
- return h;
- }
-
- /** returns the slot */
- int add(long val) {
- if (cardinality >= threshold) {
- rehash();
- }
-
- int h = hash(val);
- for (int slot = h & (vals.length-1); ;slot = (slot + ((h>>7)|1)) & (vals.length-1)) {
- int count = counts[slot];
- if (count == 0) {
- counts[slot] = 1;
- vals[slot] = val;
- cardinality++;
- return slot;
- } else if (vals[slot] == val) {
- // val is already in the set
- counts[slot] = count + 1;
- return slot;
- }
- }
- }
-
- protected void rehash() {
- long[] oldVals = vals;
- int[] oldCounts = counts; // after retrieving the count, this array is reused as a mapping to new array
- int newCapacity = vals.length << 1;
- vals = new long[newCapacity];
- counts = new int[newCapacity];
- threshold = (int) (newCapacity * LOAD_FACTOR);
-
- for (int i=0; i<oldVals.length; i++) {
- int count = oldCounts[i];
- if (count == 0) {
- oldCounts[i] = -1;
- continue;
- }
-
- long val = oldVals[i];
-
- int h = hash(val);
- int slot = h & (vals.length-1);
- while (counts[slot] != 0) {
- slot = (slot + ((h>>7)|1)) & (vals.length-1);
- }
- counts[slot] = count;
- vals[slot] = val;
- oldCounts[i] = slot;
- }
-
- oldToNewMapping = oldCounts;
- }
-
- int cardinality() {
- return cardinality;
- }
-
- }
-
- /** A hack instance of Calc for Term ordinals in DocValues. */
- // TODO consider making FacetRangeProcessor.Calc facet top level; then less of a hack?
- private class TermOrdCalc extends FacetRangeProcessor.Calc {
-
- IntFunction<BytesRef> lookupOrdFunction; // set in collectDocs()!
-
- TermOrdCalc() throws IOException {
- super(sf);
- }
-
- @Override
- public long bitsToSortableBits(long globalOrd) {
- return globalOrd;
- }
-
- /** To be returned in "buckets"/"val" */
- @Override
- public Comparable bitsToValue(long globalOrd) {
- BytesRef bytesRef = lookupOrdFunction.apply((int) globalOrd);
- // note FacetFieldProcessorByArray.findTopSlots also calls SchemaFieldType.toObject
- return sf.getType().toObject(sf, bytesRef).toString();
- }
-
- @Override
- public String formatValue(Comparable val) {
- return (String) val;
- }
-
- @Override
- protected Comparable parseStr(String rawval) throws ParseException {
- throw new UnsupportedOperationException();
- }
-
- @Override
- protected Comparable parseAndAddGap(Comparable value, String gap) throws ParseException {
- throw new UnsupportedOperationException();
- }
-
- }
-
- FacetRangeProcessor.Calc calc;
- LongCounts table;
- int allBucketsSlot = -1;
-
- FacetFieldProcessorByHashNumeric(FacetContext fcontext, FacetField freq, SchemaField sf) {
- super(fcontext, freq, sf);
- if (freq.mincount == 0) {
- throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
- getClass()+" doesn't support mincount=0");
- }
- if (freq.prefix != null) {
- throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
- getClass()+" doesn't support prefix"); // yet, but it could
- }
- FieldInfo fieldInfo = fcontext.searcher.getLeafReader().getFieldInfos().fieldInfo(sf.getName());
- if (fieldInfo != null &&
- fieldInfo.getDocValuesType() != DocValuesType.NUMERIC &&
- fieldInfo.getDocValuesType() != DocValuesType.SORTED) {
- throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
- getClass()+" only support single valued number/string with docValues");
- }
- }
-
- @Override
- public void process() throws IOException {
- super.process();
- response = calcFacets();
- table = null;//gc
- }
-
- private SimpleOrderedMap<Object> calcFacets() throws IOException {
-
- if (sf.getType().getNumericType() != null) {
- calc = FacetRangeProcessor.getNumericCalc(sf);
- } else {
- calc = new TermOrdCalc(); // kind of a hack
- }
-
- // TODO: Use the number of indexed terms, if present, as an estimate!
- // Even for NumericDocValues, we could check for a terms index for an estimate.
- // Our estimation should aim high to avoid expensive rehashes.
-
- int possibleValues = fcontext.base.size();
- // size smaller tables so that no resize will be necessary
- int currHashSize = BitUtil.nextHighestPowerOfTwo((int) (possibleValues * (1 / LongCounts.LOAD_FACTOR) + 1));
- currHashSize = Math.min(currHashSize, MAXIMUM_STARTING_TABLE_SIZE);
- table = new LongCounts(currHashSize) {
- @Override
- protected void rehash() {
- super.rehash();
- doRehash(this);
- oldToNewMapping = null; // allow for gc
- }
- };
-
- // note: these methods/phases align with FacetFieldProcessorByArray's
-
- createCollectAcc();
-
- collectDocs();
-
- return super.findTopSlots(table.numSlots(), table.cardinality(),
- slotNum -> calc.bitsToValue(table.vals[slotNum]), // getBucketValFromSlotNum
- val -> calc.formatValue(val)); // getFieldQueryVal
- }
-
- private void createCollectAcc() throws IOException {
- int numSlots = table.numSlots();
-
- if (freq.allBuckets) {
- allBucketsSlot = numSlots++;
- }
-
- indexOrderAcc = new SlotAcc(fcontext) {
- @Override
- public void collect(int doc, int slot) throws IOException {
- }
-
- @Override
- public int compare(int slotA, int slotB) {
- long s1 = calc.bitsToSortableBits(table.vals[slotA]);
- long s2 = calc.bitsToSortableBits(table.vals[slotB]);
- return Long.compare(s1, s2);
- }
-
- @Override
- public Object getValue(int slotNum) throws IOException {
- return null;
- }
-
- @Override
- public void reset() {
- }
-
- @Override
- public void resize(Resizer resizer) {
- }
- };
-
- countAcc = new CountSlotAcc(fcontext) {
- @Override
- public void incrementCount(int slot, int count) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public int getCount(int slot) {
- return table.counts[slot];
- }
-
- @Override
- public Object getValue(int slotNum) {
- return getCount(slotNum);
- }
-
- @Override
- public void reset() {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public void collect(int doc, int slot) throws IOException {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public int compare(int slotA, int slotB) {
- return Integer.compare( table.counts[slotA], table.counts[slotB] );
- }
-
- @Override
- public void resize(Resizer resizer) {
- throw new UnsupportedOperationException();
- }
- };
-
- // we set the countAcc & indexAcc first so generic ones won't be created for us.
- super.createCollectAcc(fcontext.base.size(), numSlots);
-
- if (freq.allBuckets) {
- allBucketsAcc = new SpecialSlotAcc(fcontext, collectAcc, allBucketsSlot, otherAccs, 0);
- }
- }
-
- private void collectDocs() throws IOException {
- if (calc instanceof TermOrdCalc) { // Strings
-
- // TODO support SortedSetDocValues
- SortedDocValues globalDocValues = FieldUtil.getSortedDocValues(fcontext.qcontext, sf, null);
- ((TermOrdCalc)calc).lookupOrdFunction = globalDocValues::lookupOrd;
-
- DocSetUtil.collectSortedDocSet(fcontext.base, fcontext.searcher.getIndexReader(), new SimpleCollector() {
- SortedDocValues docValues = globalDocValues; // this segment/leaf. NN
- LongValues toGlobal = LongValues.IDENTITY; // this segment to global ordinal. NN
-
- @Override public boolean needsScores() { return false; }
-
- @Override
- protected void doSetNextReader(LeafReaderContext ctx) throws IOException {
- setNextReaderFirstPhase(ctx);
- if (globalDocValues instanceof MultiDocValues.MultiSortedDocValues) {
- MultiDocValues.MultiSortedDocValues multiDocValues = (MultiDocValues.MultiSortedDocValues) globalDocValues;
- docValues = multiDocValues.values[ctx.ord];
- toGlobal = multiDocValues.mapping.getGlobalOrds(ctx.ord);
- }
- }
-
- @Override
- public void collect(int segDoc) throws IOException {
- long ord = docValues.getOrd(segDoc);
- if (ord != -1) {
- long val = toGlobal.get(ord);
- collectValFirstPhase(segDoc, val);
- }
- }
- });
-
- } else { // Numeric:
-
- // TODO support SortedNumericDocValues
- DocSetUtil.collectSortedDocSet(fcontext.base, fcontext.searcher.getIndexReader(), new SimpleCollector() {
- NumericDocValues values = null; //NN
- Bits docsWithField = null; //NN
-
- @Override public boolean needsScores() { return false; }
-
- @Override
- protected void doSetNextReader(LeafReaderContext ctx) throws IOException {
- setNextReaderFirstPhase(ctx);
- values = DocValues.getNumeric(ctx.reader(), sf.getName());
- docsWithField = DocValues.getDocsWithField(ctx.reader(), sf.getName());
- }
-
- @Override
- public void collect(int segDoc) throws IOException {
- long val = values.get(segDoc);
- if (val != 0 || docsWithField.get(segDoc)) {
- collectValFirstPhase(segDoc, val);
- }
- }
- });
- }
- }
-
- private void collectValFirstPhase(int segDoc, long val) throws IOException {
- int slot = table.add(val); // this can trigger a rehash
-
- // Our countAcc is virtual, so this is not needed:
- // countAcc.incrementCount(slot, 1);
-
- super.collectFirstPhase(segDoc, slot);
- }
-
- private void doRehash(LongCounts table) {
- if (collectAcc == null && allBucketsAcc == null) return;
-
- // Our "count" acc is backed by the hash table and will already be rehashed
- // otherAccs don't need to be rehashed
-
- int newTableSize = table.numSlots();
- int numSlots = newTableSize;
- final int oldAllBucketsSlot = allBucketsSlot;
- if (oldAllBucketsSlot >= 0) {
- allBucketsSlot = numSlots++;
- }
-
- final int finalNumSlots = numSlots;
- final int[] mapping = table.oldToNewMapping;
-
- SlotAcc.Resizer resizer = new SlotAcc.Resizer() {
- @Override
- public int getNewSize() {
- return finalNumSlots;
- }
-
- @Override
- public int getNewSlot(int oldSlot) {
- if (oldSlot < mapping.length) {
- return mapping[oldSlot];
- }
- if (oldSlot == oldAllBucketsSlot) {
- return allBucketsSlot;
- }
- return -1;
- }
- };
-
- // NOTE: resizing isn't strictly necessary for missing/allBuckets... we could just set the new slot directly
- if (collectAcc != null) {
- collectAcc.resize(resizer);
- }
- if (allBucketsAcc != null) {
- allBucketsAcc.resize(resizer);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6a4184c6/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
index 6ab25bb..c83d308 100644
--- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
@@ -50,8 +50,8 @@ public class TestJsonFacets extends SolrTestCaseHS {
public static void beforeTests() throws Exception {
JSONTestUtil.failRepeatedKeys = true;
- origTableSize = FacetFieldProcessorByHashNumeric.MAXIMUM_STARTING_TABLE_SIZE;
- FacetFieldProcessorByHashNumeric.MAXIMUM_STARTING_TABLE_SIZE=2; // stress test resizing
+ origTableSize = FacetFieldProcessorByHashDV.MAXIMUM_STARTING_TABLE_SIZE;
+ FacetFieldProcessorByHashDV.MAXIMUM_STARTING_TABLE_SIZE=2; // stress test resizing
origDefaultFacetMethod = FacetField.FacetMethod.DEFAULT_METHOD;
// instead of the following, see the constructor
@@ -69,7 +69,7 @@ public class TestJsonFacets extends SolrTestCaseHS {
@AfterClass
public static void afterTests() throws Exception {
JSONTestUtil.failRepeatedKeys = false;
- FacetFieldProcessorByHashNumeric.MAXIMUM_STARTING_TABLE_SIZE=origTableSize;
+ FacetFieldProcessorByHashDV.MAXIMUM_STARTING_TABLE_SIZE=origTableSize;
FacetField.FacetMethod.DEFAULT_METHOD = origDefaultFacetMethod;
if (servers != null) {
servers.stop();