You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by kr...@apache.org on 2016/10/20 19:31:46 UTC
[37/50] [abbrv] lucene-solr:jira/solr-8593: LUCENE-7506: FastTaxonomyFacetCounts use ConjunctionDISI so cost is in proportion to size of intersected set of documents

LUCENE-7506: FastTaxonomyFacetCounts use ConjunctionDISI so cost is in proportion to size of intersected set of documents


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/d03cc92b
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/d03cc92b
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/d03cc92b

Branch: refs/heads/jira/solr-8593
Commit: d03cc92b222681b5d701a0383d93c2ca5c1a186d
Parents: 731c5f9
Author: Mike McCandless <mi...@apache.org>
Authored: Wed Oct 19 10:04:39 2016 -0400
Committer: Mike McCandless <mi...@apache.org>
Committed: Wed Oct 19 10:04:39 2016 -0400

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  5 +++
 .../facet/taxonomy/FastTaxonomyFacetCounts.java | 41 +++++++++-----------
 2 files changed, 24 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d03cc92b/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 21ded1a..7105330 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -40,6 +40,11 @@ Optimizations
   in the sets of SHOULD and FILTER clauses, or both in MUST/FILTER and MUST_NOT
   clauses. (Spyros Kapnissis via Adrien Grand, Uwe Schindler)
 
+* LUCENE-7506: FastTaxonomyFacetCounts should use CPU in proportion to
+  the size of the intersected set of hits from the query and documents
+  that have a facet value, so sparse faceting works as expected
+  (Adrien Grand via Mike McCandless)
+
 Other
 
 * LUCENE-7328: Remove LegacyNumericEncoding from GeoPointField. (Nick Knize)

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d03cc92b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java
----------------------------------------------------------------------
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java
index 7ad5430..ef96073 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java
@@ -17,12 +17,14 @@
 package org.apache.lucene.facet.taxonomy;
 
 import java.io.IOException;
+import java.util.Arrays;
 import java.util.List;
 
 import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
 import org.apache.lucene.facet.FacetsCollector;
 import org.apache.lucene.facet.FacetsConfig;
 import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.search.ConjunctionDISI;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.util.BytesRef;
 
@@ -55,29 +57,24 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
         continue;
       }
 
-      DocIdSetIterator docs = hits.bits.iterator();
+      DocIdSetIterator it = ConjunctionDISI.intersectIterators(Arrays.asList(
+          hits.bits.iterator(), dv));
       
-      int doc;
-      while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
-        if (dv.docID() < doc) {
-          dv.advance(doc);
-        }
-        if (dv.docID() == doc) {
-          final BytesRef bytesRef = dv.binaryValue();
-          byte[] bytes = bytesRef.bytes;
-          int end = bytesRef.offset + bytesRef.length;
-          int ord = 0;
-          int offset = bytesRef.offset;
-          int prev = 0;
-          while (offset < end) {
-            byte b = bytes[offset++];
-            if (b >= 0) {
-              prev = ord = ((ord << 7) | b) + prev;
-              ++values[ord];
-              ord = 0;
-            } else {
-              ord = (ord << 7) | (b & 0x7F);
-            }
+      for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
+        final BytesRef bytesRef = dv.binaryValue();
+        byte[] bytes = bytesRef.bytes;
+        int end = bytesRef.offset + bytesRef.length;
+        int ord = 0;
+        int offset = bytesRef.offset;
+        int prev = 0;
+        while (offset < end) {
+          byte b = bytes[offset++];
+          if (b >= 0) {
+            prev = ord = ((ord << 7) | b) + prev;
+            ++values[ord];
+            ord = 0;
+          } else {
+            ord = (ord << 7) | (b & 0x7F);
           }
         }
       }