You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by kr...@apache.org on 2016/10/20 19:31:46 UTC
[37/50] [abbrv] lucene-solr:jira/solr-8593: LUCENE-7506:
FastTaxonomyFacetCounts use ConjunctionDISI so cost is in proportion to size
of intersected set of documents
LUCENE-7506: FastTaxonomyFacetCounts use ConjunctionDISI so cost is in proportion to size of intersected set of documents
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/d03cc92b
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/d03cc92b
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/d03cc92b
Branch: refs/heads/jira/solr-8593
Commit: d03cc92b222681b5d701a0383d93c2ca5c1a186d
Parents: 731c5f9
Author: Mike McCandless <mi...@apache.org>
Authored: Wed Oct 19 10:04:39 2016 -0400
Committer: Mike McCandless <mi...@apache.org>
Committed: Wed Oct 19 10:04:39 2016 -0400
----------------------------------------------------------------------
lucene/CHANGES.txt | 5 +++
.../facet/taxonomy/FastTaxonomyFacetCounts.java | 41 +++++++++-----------
2 files changed, 24 insertions(+), 22 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d03cc92b/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 21ded1a..7105330 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -40,6 +40,11 @@ Optimizations
in the sets of SHOULD and FILTER clauses, or both in MUST/FILTER and MUST_NOT
clauses. (Spyros Kapnissis via Adrien Grand, Uwe Schindler)
+* LUCENE-7506: FastTaxonomyFacetCounts should use CPU in proportion to
+ the size of the intersected set of hits from the query and documents
+ that have a facet value, so sparse faceting works as expected
+ (Adrien Grand via Mike McCandless)
+
Other
* LUCENE-7328: Remove LegacyNumericEncoding from GeoPointField. (Nick Knize)
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d03cc92b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java
----------------------------------------------------------------------
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java
index 7ad5430..ef96073 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java
@@ -17,12 +17,14 @@
package org.apache.lucene.facet.taxonomy;
import java.io.IOException;
+import java.util.Arrays;
import java.util.List;
import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.search.ConjunctionDISI;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BytesRef;
@@ -55,29 +57,24 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
continue;
}
- DocIdSetIterator docs = hits.bits.iterator();
+ DocIdSetIterator it = ConjunctionDISI.intersectIterators(Arrays.asList(
+ hits.bits.iterator(), dv));
- int doc;
- while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
- if (dv.docID() < doc) {
- dv.advance(doc);
- }
- if (dv.docID() == doc) {
- final BytesRef bytesRef = dv.binaryValue();
- byte[] bytes = bytesRef.bytes;
- int end = bytesRef.offset + bytesRef.length;
- int ord = 0;
- int offset = bytesRef.offset;
- int prev = 0;
- while (offset < end) {
- byte b = bytes[offset++];
- if (b >= 0) {
- prev = ord = ((ord << 7) | b) + prev;
- ++values[ord];
- ord = 0;
- } else {
- ord = (ord << 7) | (b & 0x7F);
- }
+ for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
+ final BytesRef bytesRef = dv.binaryValue();
+ byte[] bytes = bytesRef.bytes;
+ int end = bytesRef.offset + bytesRef.length;
+ int ord = 0;
+ int offset = bytesRef.offset;
+ int prev = 0;
+ while (offset < end) {
+ byte b = bytes[offset++];
+ if (b >= 0) {
+ prev = ord = ((ord << 7) | b) + prev;
+ ++values[ord];
+ ord = 0;
+ } else {
+ ord = (ord << 7) | (b & 0x7F);
}
}
}