You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ho...@apache.org on 2016/12/14 22:14:57 UTC
[04/32] lucene-solr:jira/solr-5944: SOLR-9822: speed up single-valued
string fieldcache counting in dv facet processor
SOLR-9822: speed up single-valued string fieldcache counting in dv facet processor
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/ca5e736d
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/ca5e736d
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/ca5e736d
Branch: refs/heads/jira/solr-5944
Commit: ca5e736db1df0cdf35f1b039350bfd5a9cdfa102
Parents: 10500c8
Author: yonik <yo...@apache.org>
Authored: Wed Dec 7 11:08:33 2016 -0500
Committer: yonik <yo...@apache.org>
Committed: Wed Dec 7 11:09:55 2016 -0500
----------------------------------------------------------------------
solr/CHANGES.txt | 5 +
.../facet/FacetFieldProcessorByArrayDV.java | 49 ++++++-
.../org/apache/solr/search/facet/FieldUtil.java | 147 +++++++++++++++++++
.../org/apache/solr/uninverting/FieldCache.java | 2 +-
.../apache/solr/uninverting/FieldCacheImpl.java | 130 ++++++++--------
5 files changed, 264 insertions(+), 69 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ca5e736d/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 14dd2fa..8dee837 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -62,6 +62,11 @@ Optimizations
* SOLR-9579: Make Solr's SchemaField implement Lucene's IndexableFieldType, removing the
creation of a Lucene FieldType every time a field is indexed. (John Call, yonik)
+* SOLR-9822: JSON Facet API: Recover performance lost due to the DocValues transition to
+ an iterator API (LUCENE-7407). This only fixes calculating counts for single-valued
+ string fields from the FieldCache, resulting in up to 56% better throughput for those cases.
+ (yonik)
+
================== 6.4.0 ==================
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ca5e736d/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayDV.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayDV.java b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayDV.java
index 88adf67..1481f18 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayDV.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayDV.java
@@ -33,6 +33,7 @@ import org.apache.lucene.util.UnicodeUtil;
import org.apache.solr.common.SolrException;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.Filter;
+import org.apache.solr.uninverting.FieldCacheImpl;
/**
* Grabs values from {@link DocValues}.
@@ -184,15 +185,33 @@ class FacetFieldProcessorByArrayDV extends FacetFieldProcessorByArray {
int segMax = singleDv.getValueCount() + 1;
final int[] counts = getCountArr( segMax );
+ /** alternate trial implementations
+ // ord
+ // FieldUtil.visitOrds(singleDv, disi, (doc,ord)->{counts[ord+1]++;} );
+
+ FieldUtil.OrdValues ordValues = FieldUtil.getOrdValues(singleDv, disi);
+ while (ordValues.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+ counts[ ordValues.getOrd() + 1]++;
+ }
+ **/
+
+
+ // calculate segment-local counts
int doc;
- while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
- if (singleDv.advanceExact(doc)) {
- counts[ singleDv.ordValue() + 1 ]++;
- } else {
- counts[ 0 ]++;
+ if (singleDv instanceof FieldCacheImpl.SortedDocValuesImpl.Iter) {
+ FieldCacheImpl.SortedDocValuesImpl.Iter fc = (FieldCacheImpl.SortedDocValuesImpl.Iter) singleDv;
+ while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ counts[fc.getOrd(doc) + 1]++;
+ }
+ } else {
+ while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ if (singleDv.advanceExact(doc)) {
+ counts[singleDv.ordValue() + 1]++;
+ }
}
}
+ // convert segment-local counts to global counts
for (int i=1; i<segMax; i++) {
int segCount = counts[i];
if (segCount > 0) {
@@ -250,12 +269,26 @@ class FacetFieldProcessorByArrayDV extends FacetFieldProcessorByArray {
private void collectCounts(SortedDocValues singleDv, DocIdSetIterator disi, LongValues toGlobal) throws IOException {
int doc;
- while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
- if (singleDv.advanceExact(doc)) {
- int segOrd = singleDv.ordValue();
+ if (singleDv instanceof FieldCacheImpl.SortedDocValuesImpl.Iter) {
+
+ FieldCacheImpl.SortedDocValuesImpl.Iter fc = (FieldCacheImpl.SortedDocValuesImpl.Iter)singleDv;
+ while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ int segOrd = fc.getOrd(doc);
+ if (segOrd < 0) continue;
int ord = (int)toGlobal.get(segOrd);
countAcc.incrementCount(ord, 1);
}
+
+ } else {
+
+ while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ if (singleDv.advanceExact(doc)) {
+ int segOrd = singleDv.ordValue();
+ int ord = (int) toGlobal.get(segOrd);
+ countAcc.incrementCount(ord, 1);
+ }
+ }
+
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ca5e736d/solr/core/src/java/org/apache/solr/search/facet/FieldUtil.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FieldUtil.java b/solr/core/src/java/org/apache/solr/search/facet/FieldUtil.java
index 84255b9..389b6d7 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FieldUtil.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FieldUtil.java
@@ -21,10 +21,13 @@ import java.io.IOException;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.BytesRef;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QueryContext;
import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.uninverting.FieldCacheImpl;
/** @lucene.internal
* Porting helper... may be removed if it offers no value in the future.
@@ -52,4 +55,148 @@ public class FieldUtil {
return si == null ? DocValues.emptySortedSet() : si;
}
+
+ /** The following ord visitors and wrappers are a work in progress and experimental
+ * @lucene.experimental */
+ @FunctionalInterface
+ public interface OrdFunc {
+ void handleOrd(int docid, int ord); // TODO: throw exception?
+ }
+
+ public static boolean isFieldCache(SortedDocValues singleDv) {
+ return singleDv instanceof FieldCacheImpl.SortedDocValuesImpl.Iter;
+ }
+
+ public static void visitOrds(SortedDocValues singleDv, DocIdSetIterator disi, OrdFunc ordFunc) throws IOException {
+ int doc;
+ if (singleDv instanceof FieldCacheImpl.SortedDocValuesImpl.Iter) {
+ FieldCacheImpl.SortedDocValuesImpl.Iter fc = (FieldCacheImpl.SortedDocValuesImpl.Iter) singleDv;
+ while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ ordFunc.handleOrd(doc, fc.getOrd(doc));
+ }
+ } else {
+ while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ if (singleDv.advanceExact(doc)) {
+ ordFunc.handleOrd(doc, singleDv.ordValue());
+ } else {
+ // TODO: optionally pass in missingOrd?
+ }
+ }
+ }
+ }
+
+ public static OrdValues getOrdValues(SortedDocValues singleDv, DocIdSetIterator disi) {
+ if (singleDv instanceof FieldCacheImpl.SortedDocValuesImpl.Iter) {
+ FieldCacheImpl.SortedDocValuesImpl.Iter fc = (FieldCacheImpl.SortedDocValuesImpl.Iter) singleDv;
+ return new FCOrdValues(fc, disi);
+ }
+ return new DVOrdValues(singleDv, disi);
+ }
+
+
+ public static abstract class OrdValues extends SortedDocValues {
+ int doc;
+ int ord;
+
+ public int getOrd() {
+ return ord;
+ }
+
+ @Override
+ public int docID() {
+ return doc;
+ }
+
+ @Override
+ public abstract int nextDoc() throws IOException;
+
+ @Override
+ public int advance(int target) throws IOException {
+ return 0; // TODO
+ }
+
+ @Override
+ public long cost() {
+ return 0;
+ }
+
+ @Override
+ public int getValueCount() {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+
+ public static class FCOrdValues extends OrdValues {
+ FieldCacheImpl.SortedDocValuesImpl.Iter vals;
+ DocIdSetIterator disi;
+
+ public FCOrdValues(FieldCacheImpl.SortedDocValuesImpl.Iter iter, DocIdSetIterator disi) {
+ this.vals = iter;
+ this.disi = disi;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ doc = disi.nextDoc();
+ if (doc == NO_MORE_DOCS) return NO_MORE_DOCS;
+ ord = vals.getOrd(doc); // todo: loop until a hit?
+ return doc;
+ }
+
+ @Override
+ public boolean advanceExact(int target) throws IOException {
+ return false;
+ }
+
+ @Override
+ public int ordValue() {
+ return 0;
+ }
+
+ @Override
+ public BytesRef lookupOrd(int ord) throws IOException {
+ return null;
+ }
+ }
+
+ public static class DVOrdValues extends OrdValues {
+ SortedDocValues vals;
+ DocIdSetIterator disi;
+ int valDoc;
+
+ public DVOrdValues(SortedDocValues vals, DocIdSetIterator disi) {
+ this.vals = vals;
+ this.disi = disi;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ for (;;) {
+ // todo - use skipping when appropriate
+ doc = disi.nextDoc();
+ if (doc == NO_MORE_DOCS) return NO_MORE_DOCS;
+ boolean match = vals.advanceExact(doc);
+ if (match) {
+ ord = vals.ordValue();
+ return doc;
+ }
+ }
+ }
+
+ @Override
+ public boolean advanceExact(int target) throws IOException {
+ return false;
+ }
+
+ @Override
+ public int ordValue() {
+ return 0;
+ }
+
+ @Override
+ public BytesRef lookupOrd(int ord) throws IOException {
+ return null;
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ca5e736d/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java b/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java
index ea8f6ea..32f5615 100644
--- a/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java
+++ b/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java
@@ -45,7 +45,7 @@ import org.apache.lucene.util.RamUsageEstimator;
*
* @lucene.internal
*/
-interface FieldCache {
+public interface FieldCache {
/**
* Placeholder indicating creation of this cache is currently in-progress.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ca5e736d/solr/core/src/java/org/apache/solr/uninverting/FieldCacheImpl.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/uninverting/FieldCacheImpl.java b/solr/core/src/java/org/apache/solr/uninverting/FieldCacheImpl.java
index 2224010..0ca687f 100644
--- a/solr/core/src/java/org/apache/solr/uninverting/FieldCacheImpl.java
+++ b/solr/core/src/java/org/apache/solr/uninverting/FieldCacheImpl.java
@@ -58,9 +58,9 @@ import org.apache.lucene.util.packed.PackedLongValues;
* Expert: The default cache implementation, storing all values in memory.
* A WeakHashMap is used for storage.
*
- * @since lucene 1.4
+ * @lucene.internal
*/
-class FieldCacheImpl implements FieldCache {
+public class FieldCacheImpl implements FieldCache {
private Map<Class<?>,Cache> caches;
FieldCacheImpl() {
@@ -786,79 +786,89 @@ class FieldCacheImpl implements FieldCache {
this.termOrdToBytesOffset = termOrdToBytesOffset;
this.numOrd = numOrd;
}
-
+
public SortedDocValues iterator() {
- final BytesRef term = new BytesRef();
- return new SortedDocValues() {
- private int docID = -1;
+ return new Iter();
+ }
- @Override
- public int docID() {
- return docID;
- }
+ public class Iter extends SortedDocValues {
+ private int docID = -1;
+ private final BytesRef term = new BytesRef();
- @Override
- public int nextDoc() {
- while (true) {
- docID++;
- if (docID >= docToTermOrd.size()) {
- docID = NO_MORE_DOCS;
- return docID;
- }
- if (docToTermOrd.get(docID) != 0) {
- return docID;
- }
- }
- }
+ /** @lucene.internal Specific to this implementation and subject to change. For internal optimization only. */
+ public int getOrd(int docID) {
+ // Subtract 1, matching the 1+ord we did when
+ // storing, so that missing values, which are 0 in the
+ // packed ints, are returned as -1 ord:
+ return (int) docToTermOrd.get(docID)-1;
+ }
- @Override
- public int advance(int target) {
- if (target < docToTermOrd.size()) {
- docID = target;
- if (docToTermOrd.get(docID) != 0) {
- return docID;
- } else{
- return nextDoc();
- }
- } else {
+ @Override
+ public int docID() {
+ return docID;
+ }
+
+ @Override
+ public int nextDoc() {
+ while (true) {
+ docID++;
+ if (docID >= docToTermOrd.size()) {
docID = NO_MORE_DOCS;
return docID;
}
+ if (docToTermOrd.get(docID) != 0) {
+ return docID;
+ }
}
+ }
- @Override
- public boolean advanceExact(int target) throws IOException {
+ @Override
+ public int advance(int target) {
+ if (target < docToTermOrd.size()) {
docID = target;
- return docToTermOrd.get(docID) != 0;
+ if (docToTermOrd.get(docID) != 0) {
+ return docID;
+ } else{
+ return nextDoc();
+ }
+ } else {
+ docID = NO_MORE_DOCS;
+ return docID;
}
+ }
- @Override
- public long cost() {
- return 0;
- }
-
- @Override
- public int ordValue() {
- // Subtract 1, matching the 1+ord we did when
- // storing, so that missing values, which are 0 in the
- // packed ints, are returned as -1 ord:
- return (int) docToTermOrd.get(docID)-1;
- }
+ @Override
+ public boolean advanceExact(int target) throws IOException {
+ docID = target;
+ return docToTermOrd.get(docID) != 0;
+ }
- @Override
- public int getValueCount() {
- return numOrd;
- }
+ @Override
+ public long cost() {
+ return 0;
+ }
- @Override
- public BytesRef lookupOrd(int ord) {
- if (ord < 0) {
- throw new IllegalArgumentException("ord must be >=0 (got ord=" + ord + ")");
- }
- bytes.fill(term, termOrdToBytesOffset.get(ord));
- return term;
+ @Override
+ public int ordValue() {
+ // Subtract 1, matching the 1+ord we did when
+ // storing, so that missing values, which are 0 in the
+ // packed ints, are returned as -1 ord:
+ return (int) docToTermOrd.get(docID)-1;
+ }
+
+ @Override
+ public int getValueCount() {
+ return numOrd;
+ }
+
+ @Override
+ public BytesRef lookupOrd(int ord) {
+ if (ord < 0) {
+ throw new IllegalArgumentException("ord must be >=0 (got ord=" + ord + ")");
}
- };
+ bytes.fill(term, termOrdToBytesOffset.get(ord));
+ return term;
+ }
}
@Override