You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2021/08/07 14:32:55 UTC
[lucene] branch main updated: LUCENE-10046: Fix counting bug in
StringValueFacetCounts (#236)
This is an automated email from the ASF dual-hosted git repository.
gsmiller pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new e937e73 LUCENE-10046: Fix counting bug in StringValueFacetCounts (#236)
e937e73 is described below
commit e937e739f38585a01709aa2b9c471a7b40d52582
Author: Greg Miller <gs...@gmail.com>
AuthorDate: Sat Aug 7 07:32:50 2021 -0700
LUCENE-10046: Fix counting bug in StringValueFacetCounts (#236)
---
lucene/CHANGES.txt | 2 ++
.../lucene/facet/StringValueFacetCounts.java | 2 +-
.../lucene/facet/TestStringValueFacetCounts.java | 34 ++++++++++++++++++++++
3 files changed, 37 insertions(+), 1 deletion(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 2501a93..2605de8 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -433,6 +433,8 @@ Bug Fixes
* LUCENE-10039: Correct CombinedFieldQuery scoring when there is a single
field. (Julie Tibshirani)
+* LUCENE-10046: Counting bug fixed in StringValueFacetCounts. (Greg Miller)
+
Other
---------------------
(No changes)
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java
index bf51e30..6100d5f 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java
@@ -375,7 +375,7 @@ public class StringValueFacetCounts extends Facets {
int term = (int) segValues.nextOrd();
boolean countedDocInTotal = false;
while (term != SortedSetDocValues.NO_MORE_ORDS) {
- increment(term);
+ increment((int) ordMap.get(term));
if (countedDocInTotal == false) {
totalDocCount++;
}
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/TestStringValueFacetCounts.java b/lucene/facet/src/test/org/apache/lucene/facet/TestStringValueFacetCounts.java
index 7d7d888..886c3da 100644
--- a/lucene/facet/src/test/org/apache/lucene/facet/TestStringValueFacetCounts.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/TestStringValueFacetCounts.java
@@ -140,6 +140,40 @@ public class TestStringValueFacetCounts extends FacetTestCase {
IOUtils.close(searcher.getIndexReader(), dir);
}
+ public void testSparseMultiSegmentCase() throws Exception {
+ Directory dir = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+
+ Map<String, Integer> expectedCounts = new HashMap<>();
+
+ // Create two segments, each with only one doc that has a large number of SSDV field values.
+ // This ensures "sparse" counting will occur in StringValueFacetCounts (i.e., small number
+ // of hits relative to the field cardinality):
+ Document doc = new Document();
+ for (int i = 0; i < 100; i++) {
+ doc.add(new SortedSetDocValuesField("field", new BytesRef("foo_" + i)));
+ expectedCounts.put("foo_" + i, 1);
+ }
+ writer.addDocument(doc);
+ writer.commit();
+
+ doc = new Document();
+ for (int i = 0; i < 100; i++) {
+ doc.add(new SortedSetDocValuesField("field", new BytesRef("bar_" + i)));
+ expectedCounts.put("bar_" + i, 1);
+ }
+ writer.addDocument(doc);
+
+ int expectedTotalDocCount = 2;
+
+ IndexSearcher searcher = newSearcher(writer.getReader());
+ writer.close();
+
+ checkFacetResult(expectedCounts, expectedTotalDocCount, searcher, 10, 2, 1, 0);
+
+ IOUtils.close(searcher.getIndexReader(), dir);
+ }
+
public void testMissingSegment() throws Exception {
Directory dir = newDirectory();