You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2022/09/26 22:06:04 UTC

[lucene] branch main updated: Fix tie-break bug in various Facets implementations (#11768)

This is an automated email from the ASF dual-hosted git repository.

gsmiller pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new 971ae01164d Fix tie-break bug in various Facets implementations (#11768)
971ae01164d is described below

commit 971ae01164d7d48ef98fac648e7508c5cbb99f5c
Author: Greg Miller <gs...@gmail.com>
AuthorDate: Mon Sep 26 15:05:57 2022 -0700

    Fix tie-break bug in various Facets implementations (#11768)
---
 lucene/CHANGES.txt                                            |  5 ++++-
 .../java/org/apache/lucene/facet/StringValueFacetCounts.java  | 10 +++++++---
 .../facet/sortedset/AbstractSortedSetDocValueFacetCounts.java |  4 +++-
 .../org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java | 11 +++++++----
 .../org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java   |  7 +++++--
 .../src/test/org/apache/lucene/facet/TestDrillSideways.java   |  4 ++--
 .../apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java |  8 ++++----
 7 files changed, 32 insertions(+), 17 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index da470df6e54..8abaecf3d92 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -104,7 +104,7 @@ Improvements
   is now tagged. (Namgyu Kim)
 
 * GITHUB#11785: Improve Tessellator performance by delaying calls to the method
-  #isIntersectingPolygon (Ignacio Vera)   
+  #isIntersectingPolygon (Ignacio Vera)
 
 * GITHUB#687: speed up IndexSortSortedNumericDocValuesRangeQuery#BoundedDocIdSetIterator
   construction using bkd binary search. (Jianping Weng)
@@ -115,6 +115,9 @@ Bug Fixes
   trying to apply a dictionary whose size is greater than the maximum supported
   window size for LZ4. (Adrien Grand)
 
+* GITHUB#11768: Taxonomy and SSDV faceting now correctly breaks ties by preferring smaller ordinal
+  values. (Greg Miller)
+
 Optimizations
 ---------------------
 * GITHUB#11738: Optimize MultiTermQueryConstantScoreWrapper when a term is present that matches all
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java
index 55a0bb1ccab..957efec0747 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java
@@ -170,17 +170,19 @@ public class StringValueFacetCounts extends Facets {
     TopOrdAndIntQueue q = null;
     TopOrdAndIntQueue.OrdAndValue reuse = null;
     int bottomCount = 0;
+    int bottomOrd = Integer.MAX_VALUE;
     int childCount = 0; // total number of labels with non-zero count
 
     if (sparseCounts != null) {
       for (IntIntCursor cursor : sparseCounts) {
         childCount++; // every count in sparseValues should be non-zero
+        int ord = cursor.key;
         int count = cursor.value;
-        if (count > bottomCount) {
+        if (count > bottomCount || (count == bottomCount && ord < bottomOrd)) {
           if (reuse == null) {
             reuse = new TopOrdAndIntQueue.OrdAndValue();
           }
-          reuse.ord = cursor.key;
+          reuse.ord = ord;
           reuse.value = count;
           if (q == null) {
             // Lazy init for sparse case:
@@ -189,6 +191,7 @@ public class StringValueFacetCounts extends Facets {
           reuse = q.insertWithOverflow(reuse);
           if (q.size() == topN) {
             bottomCount = q.top().value;
+            bottomOrd = q.top().ord;
           }
         }
       }
@@ -197,7 +200,7 @@ public class StringValueFacetCounts extends Facets {
         int count = denseCounts[i];
         if (count != 0) {
           childCount++;
-          if (count > bottomCount) {
+          if (count > bottomCount || (count == bottomCount && i < bottomOrd)) {
             if (reuse == null) {
               reuse = new TopOrdAndIntQueue.OrdAndValue();
             }
@@ -210,6 +213,7 @@ public class StringValueFacetCounts extends Facets {
             reuse = q.insertWithOverflow(reuse);
             if (q.size() == topN) {
               bottomCount = q.top().value;
+              bottomOrd = q.top().ord;
             }
           }
         }
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/AbstractSortedSetDocValueFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/AbstractSortedSetDocValueFacetCounts.java
index 66bfc56be23..ad3d412c97e 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/AbstractSortedSetDocValueFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/AbstractSortedSetDocValueFacetCounts.java
@@ -303,6 +303,7 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
       PrimitiveIterator.OfInt childOrds, int topN, DimConfig dimConfig, int pathOrd) {
     TopOrdAndIntQueue q = null;
     int bottomCount = 0;
+    int bottomOrd = Integer.MAX_VALUE;
     int pathCount = 0;
     int childCount = 0;
 
@@ -313,7 +314,7 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
       if (count > 0) {
         pathCount += count;
         childCount++;
-        if (count > bottomCount) {
+        if (count > bottomCount || (count == bottomCount && ord < bottomOrd)) {
           if (reuse == null) {
             reuse = new TopOrdAndIntQueue.OrdAndValue();
           }
@@ -327,6 +328,7 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
           reuse = q.insertWithOverflow(reuse);
           if (q.size() == topN) {
             bottomCount = q.top().value;
+            bottomOrd = q.top().value;
           }
         }
       }
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java
index 515f60229d3..35c71a4c661 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java
@@ -179,6 +179,7 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
 
     TopOrdAndFloatQueue q = new TopOrdAndFloatQueue(Math.min(taxoReader.getSize(), topN));
     float bottomValue = 0;
+    int bottomOrd = Integer.MAX_VALUE;
 
     int[] children = getChildren();
     int[] siblings = getSiblings();
@@ -189,18 +190,20 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
 
     TopOrdAndFloatQueue.OrdAndValue reuse = null;
     while (ord != TaxonomyReader.INVALID_ORDINAL) {
-      if (values[ord] > 0) {
-        aggregatedValue = aggregationFunction.aggregate(aggregatedValue, values[ord]);
+      float value = values[ord];
+      if (value > 0) {
+        aggregatedValue = aggregationFunction.aggregate(aggregatedValue, value);
         childCount++;
-        if (values[ord] > bottomValue) {
+        if (value > bottomValue || (value == bottomValue && ord < bottomOrd)) {
           if (reuse == null) {
             reuse = new TopOrdAndFloatQueue.OrdAndValue();
           }
           reuse.ord = ord;
-          reuse.value = values[ord];
+          reuse.value = value;
           reuse = q.insertWithOverflow(reuse);
           if (q.size() == topN) {
             bottomValue = q.top().value;
+            bottomOrd = q.top().ord;
           }
         }
       }
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java
index 1c7ef0df2dc..9a8b94b573b 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java
@@ -251,6 +251,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
       throws IOException {
     TopOrdAndIntQueue q = new TopOrdAndIntQueue(Math.min(taxoReader.getSize(), topN));
     int bottomValue = 0;
+    int bottomOrd = Integer.MAX_VALUE;
 
     int aggregatedValue = 0;
     int childCount = 0;
@@ -265,7 +266,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
         if (parents[ord] == pathOrd && value > 0) {
           aggregatedValue = aggregationFunction.aggregate(aggregatedValue, value);
           childCount++;
-          if (value > bottomValue) {
+          if (value > bottomValue || (value == bottomValue && ord < bottomOrd)) {
             if (reuse == null) {
               reuse = new TopOrdAndIntQueue.OrdAndValue();
             }
@@ -274,6 +275,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
             reuse = q.insertWithOverflow(reuse);
             if (q.size() == topN) {
               bottomValue = q.top().value;
+              bottomOrd = q.top().ord;
             }
           }
         }
@@ -287,7 +289,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
         if (value > 0) {
           aggregatedValue = aggregationFunction.aggregate(aggregatedValue, value);
           childCount++;
-          if (value > bottomValue) {
+          if (value > bottomValue || (value == bottomValue && ord < bottomOrd)) {
             if (reuse == null) {
               reuse = new TopOrdAndIntQueue.OrdAndValue();
             }
@@ -296,6 +298,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
             reuse = q.insertWithOverflow(reuse);
             if (q.size() == topN) {
               bottomValue = q.top().value;
+              bottomOrd = q.top().ord;
             }
           }
         }
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java b/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java
index 2a6f15ad23b..7274bfe7a45 100644
--- a/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java
@@ -626,7 +626,7 @@ public class TestDrillSideways extends FacetTestCase {
     List<FacetResult> topNDimsResult = r.facets.getTopDims(1, 2);
     assertEquals(1, topNDimsResult.size());
     assertEquals(
-        "dim=Author path=[] value=5 childCount=4\n  Lisa (2)\n  Susan (1)\n",
+        "dim=Author path=[] value=5 childCount=4\n  Lisa (2)\n  Bob (1)\n",
         topNDimsResult.get(0).toString());
 
     // test getTopDims(10, 10) and expect same results from getAllDims(10)
@@ -1899,7 +1899,7 @@ public class TestDrillSideways extends FacetTestCase {
     List<FacetResult> topNDimsResult = facets.getTopDims(1, 2);
     assertEquals(1, topNDimsResult.size());
     assertEquals(
-        "dim=Author path=[] value=5 childCount=4\n  Lisa (2)\n  Susan (1)\n",
+        "dim=Author path=[] value=5 childCount=4\n  Lisa (2)\n  Bob (1)\n",
         topNDimsResult.get(0).toString());
 
     // test getAllDims(0)
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java
index cfc06269466..6ab5772c689 100644
--- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java
@@ -253,17 +253,17 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
     List<FacetResult> top1results = facets.getAllDims(1);
 
     assertEquals(3, results.size());
-    assertEquals("dim=a path=[] value=3 childCount=3\n  foo3 (1)\n", top1results.get(0).toString());
-    assertEquals("dim=b path=[] value=3 childCount=3\n  bar2 (1)\n", top1results.get(1).toString());
+    assertEquals("dim=a path=[] value=3 childCount=3\n  foo1 (1)\n", top1results.get(0).toString());
+    assertEquals("dim=b path=[] value=3 childCount=3\n  aar1 (1)\n", top1results.get(1).toString());
     assertEquals("dim=c path=[] value=1 childCount=1\n  baz1 (1)\n", top1results.get(2).toString());
 
     // test default implementation of getTopDims
     List<FacetResult> topNDimsResult = facets.getTopDims(2, 1);
     assertEquals(2, topNDimsResult.size());
     assertEquals(
-        "dim=a path=[] value=3 childCount=3\n  foo3 (1)\n", topNDimsResult.get(0).toString());
+        "dim=a path=[] value=3 childCount=3\n  foo1 (1)\n", topNDimsResult.get(0).toString());
     assertEquals(
-        "dim=b path=[] value=3 childCount=3\n  bar2 (1)\n", topNDimsResult.get(1).toString());
+        "dim=b path=[] value=3 childCount=3\n  aar1 (1)\n", topNDimsResult.get(1).toString());
 
     // test getTopDims(10, 10) and expect same results from getAllDims(10)
     List<FacetResult> allDimsResults = facets.getTopDims(10, 10);