You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2022/01/13 17:18:02 UTC
[lucene] branch main updated: LUCENE-10379: Count directly into the dense values array in FastTaxonomyFacetCounts#countAll (#605)
This is an automated email from the ASF dual-hosted git repository.
gsmiller pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new 2f5e3c3 LUCENE-10379: Count directly into the dense values array in FastTaxonomyFacetCounts#countAll (#605)
2f5e3c3 is described below
commit 2f5e3c323b4db436fa6a9b3c6ee8195b6bd61431
Author: Greg Miller <gs...@gmail.com>
AuthorDate: Thu Jan 13 09:17:55 2022 -0800
LUCENE-10379: Count directly into the dense values array in FastTaxonomyFacetCounts#countAll (#605)
Co-authored-by: guofeng.my <gu...@bytedance.com>
---
lucene/CHANGES.txt | 3 ++
.../facet/taxonomy/FastTaxonomyFacetCounts.java | 48 ++++++++++++++--------
.../lucene/facet/taxonomy/IntTaxonomyFacets.java | 17 ++++++--
3 files changed, 48 insertions(+), 20 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 0e768b1..b82a4fc 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -156,6 +156,9 @@ Optimizations
* LUCENE-10356: Further optimize facet counting for single-valued TaxonomyFacetCounts. (Greg Miller)
+* LUCENE-10379: Count directly into the dense values array in FastTaxonomyFacetCounts#countAll.
+ (Guo Feng, Greg Miller)
+
Changes in runtime behavior
---------------------
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java
index 1e7d831..2b81ee7 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FastTaxonomyFacetCounts.java
@@ -84,13 +84,27 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
ConjunctionUtils.intersectIterators(Arrays.asList(hits.bits.iterator(), valuesIt));
if (singleValued != null) {
- while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
- increment((int) singleValued.longValue());
+ if (values != null) {
+ while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+ values[(int) singleValued.longValue()]++;
+ }
+ } else {
+ while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+ sparseValues.addTo((int) singleValued.longValue(), 1);
+ }
}
} else {
- while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
- for (int i = 0; i < multiValued.docValueCount(); i++) {
- increment((int) multiValued.nextValue());
+ if (values != null) {
+ while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+ for (int i = 0; i < multiValued.docValueCount(); i++) {
+ values[(int) multiValued.nextValue()]++;
+ }
+ }
+ } else {
+ while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+ for (int i = 0; i < multiValued.docValueCount(); i++) {
+ sparseValues.addTo((int) multiValued.nextValue(), 1);
+ }
}
}
}
@@ -100,6 +114,7 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
}
private void countAll(IndexReader reader) throws IOException {
+ assert values != null;
for (LeafReaderContext context : reader.leaves()) {
SortedNumericDocValues multiValued =
context.reader().getSortedNumericDocValues(indexFieldName);
@@ -117,19 +132,18 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
if (liveDocs != null && liveDocs.get(doc) == false) {
continue;
}
- increment((int) singleValued.longValue());
- }
- continue;
- }
-
- for (int doc = multiValued.nextDoc();
- doc != DocIdSetIterator.NO_MORE_DOCS;
- doc = multiValued.nextDoc()) {
- if (liveDocs != null && liveDocs.get(doc) == false) {
- continue;
+ values[(int) singleValued.longValue()]++;
}
- for (int i = 0; i < multiValued.docValueCount(); i++) {
- increment((int) multiValued.nextValue());
+ } else {
+ for (int doc = multiValued.nextDoc();
+ doc != DocIdSetIterator.NO_MORE_DOCS;
+ doc = multiValued.nextDoc()) {
+ if (liveDocs != null && liveDocs.get(doc) == false) {
+ continue;
+ }
+ for (int i = 0; i < multiValued.docValueCount(); i++) {
+ values[(int) multiValued.nextValue()]++;
+ }
}
}
}
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java
index 3f1dc17..cdec3f1 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java
@@ -31,10 +31,21 @@ import org.apache.lucene.facet.TopOrdAndIntQueue;
/** Base class for all taxonomy-based facets that aggregate to a per-ords int[]. */
public abstract class IntTaxonomyFacets extends TaxonomyFacets {
- /** Per-ordinal value. */
- private final int[] values;
+ /**
+ * Dense ordinal values.
+ *
+ * <p>We are making this and {@link #sparseValues} protected for some expert usage. e.g. It can be
+ * checked which is being used before a loop instead of calling {@link #increment} for each
+ * iteration.
+ */
+ protected final int[] values;
- private final IntIntHashMap sparseValues;
+ /**
+ * Sparse ordinal values.
+ *
+ * @see #values for why protected.
+ */
+ protected final IntIntHashMap sparseValues;
/** Sole constructor. */
protected IntTaxonomyFacets(