You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2021/08/07 14:32:55 UTC

[lucene] branch main updated: LUCENE-10046: Fix counting bug in StringValueFacetCounts (#236)

This is an automated email from the ASF dual-hosted git repository.

gsmiller pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new e937e73  LUCENE-10046: Fix counting bug in StringValueFacetCounts (#236)
e937e73 is described below

commit e937e739f38585a01709aa2b9c471a7b40d52582
Author: Greg Miller <gs...@gmail.com>
AuthorDate: Sat Aug 7 07:32:50 2021 -0700

    LUCENE-10046: Fix counting bug in StringValueFacetCounts (#236)
---
 lucene/CHANGES.txt                                 |  2 ++
 .../lucene/facet/StringValueFacetCounts.java       |  2 +-
 .../lucene/facet/TestStringValueFacetCounts.java   | 34 ++++++++++++++++++++++
 3 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 2501a93..2605de8 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -433,6 +433,8 @@ Bug Fixes
 * LUCENE-10039: Correct CombinedFieldQuery scoring when there is a single
   field. (Julie Tibshirani)
 
+* LUCENE-10046: Counting bug fixed in StringValueFacetCounts. (Greg Miller)
+
 Other
 ---------------------
 (No changes)
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java
index bf51e30..6100d5f 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java
@@ -375,7 +375,7 @@ public class StringValueFacetCounts extends Facets {
           int term = (int) segValues.nextOrd();
           boolean countedDocInTotal = false;
           while (term != SortedSetDocValues.NO_MORE_ORDS) {
-            increment(term);
+            increment((int) ordMap.get(term));
             if (countedDocInTotal == false) {
               totalDocCount++;
             }
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/TestStringValueFacetCounts.java b/lucene/facet/src/test/org/apache/lucene/facet/TestStringValueFacetCounts.java
index 7d7d888..886c3da 100644
--- a/lucene/facet/src/test/org/apache/lucene/facet/TestStringValueFacetCounts.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/TestStringValueFacetCounts.java
@@ -140,6 +140,40 @@ public class TestStringValueFacetCounts extends FacetTestCase {
     IOUtils.close(searcher.getIndexReader(), dir);
   }
 
+  public void testSparseMultiSegmentCase() throws Exception {
+    Directory dir = newDirectory();
+    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+
+    Map<String, Integer> expectedCounts = new HashMap<>();
+
+    // Create two segments, each with only one doc that has a large number of SSDV field values.
+    // This ensures "sparse" counting will occur in StringValueFacetCounts (i.e., small number
+    // of hits relative to the field cardinality):
+    Document doc = new Document();
+    for (int i = 0; i < 100; i++) {
+      doc.add(new SortedSetDocValuesField("field", new BytesRef("foo_" + i)));
+      expectedCounts.put("foo_" + i, 1);
+    }
+    writer.addDocument(doc);
+    writer.commit();
+
+    doc = new Document();
+    for (int i = 0; i < 100; i++) {
+      doc.add(new SortedSetDocValuesField("field", new BytesRef("bar_" + i)));
+      expectedCounts.put("bar_" + i, 1);
+    }
+    writer.addDocument(doc);
+
+    int expectedTotalDocCount = 2;
+
+    IndexSearcher searcher = newSearcher(writer.getReader());
+    writer.close();
+
+    checkFacetResult(expectedCounts, expectedTotalDocCount, searcher, 10, 2, 1, 0);
+
+    IOUtils.close(searcher.getIndexReader(), dir);
+  }
+
   public void testMissingSegment() throws Exception {
 
     Directory dir = newDirectory();