You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2016/12/07 16:10:03 UTC

lucene-solr:master: SOLR-9822: speed up single-valued string fieldcache counting in dv facet processor

Repository: lucene-solr
Updated Branches:
  refs/heads/master 10500c894 -> ca5e736db


SOLR-9822: speed up single-valued string fieldcache counting in dv facet processor


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/ca5e736d
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/ca5e736d
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/ca5e736d

Branch: refs/heads/master
Commit: ca5e736db1df0cdf35f1b039350bfd5a9cdfa102
Parents: 10500c8
Author: yonik <yo...@apache.org>
Authored: Wed Dec 7 11:08:33 2016 -0500
Committer: yonik <yo...@apache.org>
Committed: Wed Dec 7 11:09:55 2016 -0500

----------------------------------------------------------------------
 solr/CHANGES.txt                                |   5 +
 .../facet/FacetFieldProcessorByArrayDV.java     |  49 ++++++-
 .../org/apache/solr/search/facet/FieldUtil.java | 147 +++++++++++++++++++
 .../org/apache/solr/uninverting/FieldCache.java |   2 +-
 .../apache/solr/uninverting/FieldCacheImpl.java | 130 ++++++++--------
 5 files changed, 264 insertions(+), 69 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ca5e736d/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 14dd2fa..8dee837 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -62,6 +62,11 @@ Optimizations
 * SOLR-9579: Make Solr's SchemaField implement Lucene's IndexableFieldType, removing the 
   creation of a Lucene FieldType every time a field is indexed. (John Call, yonik) 
 
+* SOLR-9822: JSON Facet API: Recover performance lost due to the DocValues transition to
+  an iterator API (LUCENE-7407).  This only fixes calculating counts for single-valued
+  string fields from the FieldCache, resulting in up to 56% better throughput for those cases.
+  (yonik)
+
 
 ==================  6.4.0 ==================
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ca5e736d/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayDV.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayDV.java b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayDV.java
index 88adf67..1481f18 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayDV.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayDV.java
@@ -33,6 +33,7 @@ import org.apache.lucene.util.UnicodeUtil;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.schema.SchemaField;
 import org.apache.solr.search.Filter;
+import org.apache.solr.uninverting.FieldCacheImpl;
 
 /**
  * Grabs values from {@link DocValues}.
@@ -184,15 +185,33 @@ class FacetFieldProcessorByArrayDV extends FacetFieldProcessorByArray {
     int segMax = singleDv.getValueCount() + 1;
     final int[] counts = getCountArr( segMax );
 
+    /** alternate trial implementations
+     // ord
+     // FieldUtil.visitOrds(singleDv, disi,  (doc,ord)->{counts[ord+1]++;} );
+
+    FieldUtil.OrdValues ordValues = FieldUtil.getOrdValues(singleDv, disi);
+    while (ordValues.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+      counts[ ordValues.getOrd() + 1]++;
+    }
+     **/
+
+
+    // calculate segment-local counts
     int doc;
-    while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
-      if (singleDv.advanceExact(doc)) {
-        counts[ singleDv.ordValue() + 1 ]++;
-      } else {
-        counts[ 0 ]++;
+    if (singleDv instanceof FieldCacheImpl.SortedDocValuesImpl.Iter) {
+      FieldCacheImpl.SortedDocValuesImpl.Iter fc = (FieldCacheImpl.SortedDocValuesImpl.Iter) singleDv;
+      while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+        counts[fc.getOrd(doc) + 1]++;
+      }
+    } else {
+      while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+        if (singleDv.advanceExact(doc)) {
+          counts[singleDv.ordValue() + 1]++;
+        }
       }
     }
 
+    // convert segment-local counts to global counts
     for (int i=1; i<segMax; i++) {
       int segCount = counts[i];
       if (segCount > 0) {
@@ -250,12 +269,26 @@ class FacetFieldProcessorByArrayDV extends FacetFieldProcessorByArray {
 
   private void collectCounts(SortedDocValues singleDv, DocIdSetIterator disi, LongValues toGlobal) throws IOException {
     int doc;
-    while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
-      if (singleDv.advanceExact(doc)) {
-        int segOrd = singleDv.ordValue();
+    if (singleDv instanceof FieldCacheImpl.SortedDocValuesImpl.Iter) {
+
+      FieldCacheImpl.SortedDocValuesImpl.Iter fc = (FieldCacheImpl.SortedDocValuesImpl.Iter)singleDv;
+      while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+        int segOrd = fc.getOrd(doc);
+        if (segOrd < 0) continue;
         int ord = (int)toGlobal.get(segOrd);
         countAcc.incrementCount(ord, 1);
       }
+
+    } else {
+
+      while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+        if (singleDv.advanceExact(doc)) {
+          int segOrd = singleDv.ordValue();
+          int ord = (int) toGlobal.get(segOrd);
+          countAcc.incrementCount(ord, 1);
+        }
+      }
+
     }
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ca5e736d/solr/core/src/java/org/apache/solr/search/facet/FieldUtil.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FieldUtil.java b/solr/core/src/java/org/apache/solr/search/facet/FieldUtil.java
index 84255b9..389b6d7 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FieldUtil.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FieldUtil.java
@@ -21,10 +21,13 @@ import java.io.IOException;
 import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.BytesRef;
 import org.apache.solr.schema.SchemaField;
 import org.apache.solr.search.QParser;
 import org.apache.solr.search.QueryContext;
 import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.uninverting.FieldCacheImpl;
 
 /** @lucene.internal
  * Porting helper... may be removed if it offers no value in the future.
@@ -52,4 +55,148 @@ public class FieldUtil {
     return si == null ? DocValues.emptySortedSet() : si;
   }
 
+
+  /** The following ord visitors and wrappers are a work in progress and experimental
+   *  @lucene.experimental */
+  @FunctionalInterface
+  public interface OrdFunc {
+    void handleOrd(int docid, int ord); // TODO: throw exception?
+  }
+
+  public static boolean isFieldCache(SortedDocValues singleDv) {
+    return singleDv instanceof FieldCacheImpl.SortedDocValuesImpl.Iter;
+  }
+
+  public static void visitOrds(SortedDocValues singleDv, DocIdSetIterator disi, OrdFunc ordFunc) throws IOException {
+    int doc;
+    if (singleDv instanceof FieldCacheImpl.SortedDocValuesImpl.Iter) {
+      FieldCacheImpl.SortedDocValuesImpl.Iter fc = (FieldCacheImpl.SortedDocValuesImpl.Iter) singleDv;
+      while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+        ordFunc.handleOrd(doc, fc.getOrd(doc));
+      }
+    } else {
+      while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+        if (singleDv.advanceExact(doc)) {
+          ordFunc.handleOrd(doc, singleDv.ordValue());
+        } else {
+          // TODO: optionally pass in missingOrd?
+        }
+      }
+    }
+  }
+
+  public static OrdValues getOrdValues(SortedDocValues singleDv, DocIdSetIterator disi) {
+    if (singleDv instanceof FieldCacheImpl.SortedDocValuesImpl.Iter) {
+      FieldCacheImpl.SortedDocValuesImpl.Iter fc = (FieldCacheImpl.SortedDocValuesImpl.Iter) singleDv;
+      return new FCOrdValues(fc, disi);
+    }
+    return new DVOrdValues(singleDv, disi);
+  }
+
+
+  public static abstract class OrdValues extends SortedDocValues {
+    int doc;
+    int ord;
+
+    public int getOrd() {
+      return ord;
+    }
+
+    @Override
+    public int docID() {
+      return doc;
+    }
+
+    @Override
+    public abstract int nextDoc() throws IOException;
+
+    @Override
+    public int advance(int target) throws IOException {
+      return 0; // TODO
+    }
+
+    @Override
+    public long cost() {
+      return 0;
+    }
+
+    @Override
+    public int getValueCount() {
+      throw new UnsupportedOperationException();
+    }
+  }
+
+
+  public static class FCOrdValues extends OrdValues {
+    FieldCacheImpl.SortedDocValuesImpl.Iter vals;
+    DocIdSetIterator disi;
+
+    public FCOrdValues(FieldCacheImpl.SortedDocValuesImpl.Iter iter, DocIdSetIterator disi) {
+      this.vals = iter;
+      this.disi = disi;
+    }
+
+    @Override
+    public int nextDoc() throws IOException {
+      doc = disi.nextDoc();
+      if (doc == NO_MORE_DOCS) return NO_MORE_DOCS;
+      ord = vals.getOrd(doc); // todo: loop until a hit?
+      return doc;
+    }
+
+    @Override
+    public boolean advanceExact(int target) throws IOException {
+      return false;
+    }
+
+    @Override
+    public int ordValue() {
+      return 0;
+    }
+
+    @Override
+    public BytesRef lookupOrd(int ord) throws IOException {
+      return null;
+    }
+  }
+
+  public static class DVOrdValues extends OrdValues {
+    SortedDocValues vals;
+    DocIdSetIterator disi;
+    int valDoc;
+
+    public DVOrdValues(SortedDocValues vals, DocIdSetIterator disi) {
+      this.vals = vals;
+      this.disi = disi;
+    }
+
+    @Override
+    public int nextDoc() throws IOException {
+      for (;;) {
+        // todo - use skipping when appropriate
+        doc = disi.nextDoc();
+        if (doc == NO_MORE_DOCS) return NO_MORE_DOCS;
+        boolean match = vals.advanceExact(doc);
+        if (match) {
+          ord = vals.ordValue();
+          return doc;
+        }
+      }
+    }
+
+    @Override
+    public boolean advanceExact(int target) throws IOException {
+      return false;
+    }
+
+    @Override
+    public int ordValue() {
+      return 0;
+    }
+
+    @Override
+    public BytesRef lookupOrd(int ord) throws IOException {
+      return null;
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ca5e736d/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java b/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java
index ea8f6ea..32f5615 100644
--- a/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java
+++ b/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java
@@ -45,7 +45,7 @@ import org.apache.lucene.util.RamUsageEstimator;
  *
  * @lucene.internal
  */
-interface FieldCache {
+public interface FieldCache {
 
   /**
    * Placeholder indicating creation of this cache is currently in-progress.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ca5e736d/solr/core/src/java/org/apache/solr/uninverting/FieldCacheImpl.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/uninverting/FieldCacheImpl.java b/solr/core/src/java/org/apache/solr/uninverting/FieldCacheImpl.java
index 2224010..0ca687f 100644
--- a/solr/core/src/java/org/apache/solr/uninverting/FieldCacheImpl.java
+++ b/solr/core/src/java/org/apache/solr/uninverting/FieldCacheImpl.java
@@ -58,9 +58,9 @@ import org.apache.lucene.util.packed.PackedLongValues;
  * Expert: The default cache implementation, storing all values in memory.
  * A WeakHashMap is used for storage.
  *
- * @since   lucene 1.4
+ * @lucene.internal
  */
-class FieldCacheImpl implements FieldCache {
+public class FieldCacheImpl implements FieldCache {
 
   private Map<Class<?>,Cache> caches;
   FieldCacheImpl() {
@@ -786,79 +786,89 @@ class FieldCacheImpl implements FieldCache {
       this.termOrdToBytesOffset = termOrdToBytesOffset;
       this.numOrd = numOrd;
     }
-    
+
     public SortedDocValues iterator() {
-      final BytesRef term = new BytesRef();
-      return new SortedDocValues() {
-        private int docID = -1;
+      return new Iter();
+    }
 
-        @Override
-        public int docID() {
-          return docID;
-        }
+    public class Iter extends SortedDocValues {
+      private int docID = -1;
+      private final BytesRef term = new BytesRef();
 
-        @Override
-        public int nextDoc() {
-          while (true) {
-            docID++;
-            if (docID >= docToTermOrd.size()) {
-              docID = NO_MORE_DOCS;
-              return docID;
-            }
-            if (docToTermOrd.get(docID) != 0) {
-              return docID;
-            }
-          }
-        }
+      /** @lucene.internal Specific to this implementation and subject to change.  For internal optimization only. */
+      public int getOrd(int docID) {
+        // Subtract 1, matching the 1+ord we did when
+        // storing, so that missing values, which are 0 in the
+        // packed ints, are returned as -1 ord:
+        return (int) docToTermOrd.get(docID)-1;
+      }
 
-        @Override
-        public int advance(int target) {
-          if (target < docToTermOrd.size()) {
-            docID = target;
-            if (docToTermOrd.get(docID) != 0) {
-              return docID;
-            } else{
-              return nextDoc();
-            }
-          } else {
+      @Override
+      public int docID() {
+        return docID;
+      }
+
+      @Override
+      public int nextDoc() {
+        while (true) {
+          docID++;
+          if (docID >= docToTermOrd.size()) {
             docID = NO_MORE_DOCS;
             return docID;
           }
+          if (docToTermOrd.get(docID) != 0) {
+            return docID;
+          }
         }
+      }
 
-        @Override
-        public boolean advanceExact(int target) throws IOException {
+      @Override
+      public int advance(int target) {
+        if (target < docToTermOrd.size()) {
           docID = target;
-          return docToTermOrd.get(docID) != 0;
+          if (docToTermOrd.get(docID) != 0) {
+            return docID;
+          } else{
+            return nextDoc();
+          }
+        } else {
+          docID = NO_MORE_DOCS;
+          return docID;
         }
+      }
 
-        @Override
-        public long cost() {
-          return 0;
-        }
-        
-        @Override
-        public int ordValue() {
-          // Subtract 1, matching the 1+ord we did when
-          // storing, so that missing values, which are 0 in the
-          // packed ints, are returned as -1 ord:
-          return (int) docToTermOrd.get(docID)-1;
-        }
+      @Override
+      public boolean advanceExact(int target) throws IOException {
+        docID = target;
+        return docToTermOrd.get(docID) != 0;
+      }
 
-        @Override
-        public int getValueCount() {
-          return numOrd;
-        }
+      @Override
+      public long cost() {
+        return 0;
+      }
 
-        @Override
-        public BytesRef lookupOrd(int ord) {
-          if (ord < 0) {
-            throw new IllegalArgumentException("ord must be >=0 (got ord=" + ord + ")");
-          }
-          bytes.fill(term, termOrdToBytesOffset.get(ord));
-          return term;
+      @Override
+      public int ordValue() {
+        // Subtract 1, matching the 1+ord we did when
+        // storing, so that missing values, which are 0 in the
+        // packed ints, are returned as -1 ord:
+        return (int) docToTermOrd.get(docID)-1;
+      }
+
+      @Override
+      public int getValueCount() {
+        return numOrd;
+      }
+
+      @Override
+      public BytesRef lookupOrd(int ord) {
+        if (ord < 0) {
+          throw new IllegalArgumentException("ord must be >=0 (got ord=" + ord + ")");
         }
-      };
+        bytes.fill(term, termOrdToBytesOffset.get(ord));
+        return term;
+      }
     }
 
     @Override