You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2021/08/07 14:33:22 UTC

[lucene-solr] branch branch_8x updated: LUCENE-10046: Fix counting bug in StringValueFacetCounts (#2549)

This is an automated email from the ASF dual-hosted git repository.

gsmiller pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/branch_8x by this push:
     new 6fd0e99  LUCENE-10046: Fix counting bug in StringValueFacetCounts (#2549)
6fd0e99 is described below

commit 6fd0e9954a795b400e1a4e513e64b60e0db53f9b
Author: Greg Miller <gs...@gmail.com>
AuthorDate: Sat Aug 7 07:33:03 2021 -0700

    LUCENE-10046: Fix counting bug in StringValueFacetCounts (#2549)
---
 lucene/CHANGES.txt                                 |  2 ++
 .../lucene/facet/StringValueFacetCounts.java       |  2 +-
 .../lucene/facet/TestStringValueFacetCounts.java   | 34 ++++++++++++++++++++++
 3 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index d14a8d8..13112b0 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -94,6 +94,8 @@ Bug Fixes
 * LUCENE-10039: Correct CombinedFieldQuery scoring when there is a single
   field. (Julie Tibshirani)
 
+* LUCENE-10046: Counting bug fixed in StringValueFacetCounts. (Greg Miller)
+
 Other
 ---------------------
 
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java
index 8b874bc..c6592ba 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java
@@ -375,7 +375,7 @@ public class StringValueFacetCounts extends Facets {
           int term = (int) segValues.nextOrd();
           boolean countedDocInTotal = false;
           while (term != SortedSetDocValues.NO_MORE_ORDS) {
-            increment(term);
+            increment((int) ordMap.get(term));
             if (countedDocInTotal == false) {
               totalDocCount++;
             }
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/TestStringValueFacetCounts.java b/lucene/facet/src/test/org/apache/lucene/facet/TestStringValueFacetCounts.java
index 895bd7a..180a80c 100644
--- a/lucene/facet/src/test/org/apache/lucene/facet/TestStringValueFacetCounts.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/TestStringValueFacetCounts.java
@@ -149,6 +149,40 @@ public class TestStringValueFacetCounts extends FacetTestCase {
     IOUtils.close(searcher.getIndexReader(), dir);
   }
 
+  public void testSparseMultiSegmentCase() throws Exception {
+    Directory dir = newDirectory();
+    RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
+
+    Map<String, Integer> expectedCounts = new HashMap<>();
+
+    // Create two segments, each with only one doc that has a large number of SSDV field values.
+    // This ensures "sparse" counting will occur in StringValueFacetCounts (i.e., small number
+    // of hits relative to the field cardinality):
+    Document doc = new Document();
+    for (int i = 0; i < 100; i++) {
+      doc.add(new SortedSetDocValuesField("field", new BytesRef("foo_" + i)));
+      expectedCounts.put("foo_" + i, 1);
+    }
+    writer.addDocument(doc);
+    writer.commit();
+
+    doc = new Document();
+    for (int i = 0; i < 100; i++) {
+      doc.add(new SortedSetDocValuesField("field", new BytesRef("bar_" + i)));
+      expectedCounts.put("bar_" + i, 1);
+    }
+    writer.addDocument(doc);
+
+    int expectedTotalDocCount = 2;
+
+    IndexSearcher searcher = newSearcher(writer.getReader());
+    writer.close();
+
+    checkFacetResult(expectedCounts, expectedTotalDocCount, searcher, 10, 2, 1, 0);
+
+    IOUtils.close(searcher.getIndexReader(), dir);
+  }
+
   public void testMissingSegment() throws Exception {
 
     Directory dir = newDirectory();