You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2022/09/26 22:23:02 UTC

[lucene] branch branch_9x updated: Fix tie-break bug in various Facets implementations (#11768)

This is an automated email from the ASF dual-hosted git repository.

gsmiller pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/branch_9x by this push:
     new cd9a406b071 Fix tie-break bug in various Facets implementations (#11768)
cd9a406b071 is described below

commit cd9a406b071f9eb118ef0c4ad2ce46306ab006d3
Author: Greg Miller <gs...@gmail.com>
AuthorDate: Mon Sep 26 15:05:57 2022 -0700

    Fix tie-break bug in various Facets implementations (#11768)
---
 lucene/CHANGES.txt                                            |  7 +++++--
 .../java/org/apache/lucene/facet/StringValueFacetCounts.java  | 10 +++++++---
 .../facet/sortedset/AbstractSortedSetDocValueFacetCounts.java |  4 +++-
 .../org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java | 11 +++++++----
 .../org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java   |  7 +++++--
 .../src/test/org/apache/lucene/facet/TestDrillSideways.java   |  4 ++--
 .../apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java |  8 ++++----
 7 files changed, 33 insertions(+), 18 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 4bd69eff95c..9068a225d7b 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -18,17 +18,20 @@ API Changes
 Improvements
 ---------------------
 * GITHUB#11785: Improve Tessellator performance by delaying calls to the method
-  #isIntersectingPolygon (Ignacio Vera) 
+  #isIntersectingPolygon (Ignacio Vera)
 
 * GITHUB#687: speed up IndexSortSortedNumericDocValuesRangeQuery#BoundedDocIdSetIterator
   construction using bkd binary search. (Jianping Weng)
-  
+
 Bug Fixes
 ---------------------
 * GITHUB#11726: Indexing term vectors on large documents could fail due to
   trying to apply a dictionary whose size is greater than the maximum supported
   window size for LZ4. (Adrien Grand)
 
+* GITHUB#11768: Taxonomy and SSDV faceting now correctly breaks ties by preferring smaller ordinal
+  values. (Greg Miller)
+
 Optimizations
 ---------------------
 * GITHUB#11738: Optimize MultiTermQueryConstantScoreWrapper when a term is present that matches all
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java
index 55a0bb1ccab..957efec0747 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java
@@ -170,17 +170,19 @@ public class StringValueFacetCounts extends Facets {
     TopOrdAndIntQueue q = null;
     TopOrdAndIntQueue.OrdAndValue reuse = null;
     int bottomCount = 0;
+    int bottomOrd = Integer.MAX_VALUE;
     int childCount = 0; // total number of labels with non-zero count
 
     if (sparseCounts != null) {
       for (IntIntCursor cursor : sparseCounts) {
         childCount++; // every count in sparseValues should be non-zero
+        int ord = cursor.key;
         int count = cursor.value;
-        if (count > bottomCount) {
+        if (count > bottomCount || (count == bottomCount && ord < bottomOrd)) {
           if (reuse == null) {
             reuse = new TopOrdAndIntQueue.OrdAndValue();
           }
-          reuse.ord = cursor.key;
+          reuse.ord = ord;
           reuse.value = count;
           if (q == null) {
             // Lazy init for sparse case:
@@ -189,6 +191,7 @@ public class StringValueFacetCounts extends Facets {
           reuse = q.insertWithOverflow(reuse);
           if (q.size() == topN) {
             bottomCount = q.top().value;
+            bottomOrd = q.top().ord;
           }
         }
       }
@@ -197,7 +200,7 @@ public class StringValueFacetCounts extends Facets {
         int count = denseCounts[i];
         if (count != 0) {
           childCount++;
-          if (count > bottomCount) {
+          if (count > bottomCount || (count == bottomCount && i < bottomOrd)) {
             if (reuse == null) {
               reuse = new TopOrdAndIntQueue.OrdAndValue();
             }
@@ -210,6 +213,7 @@ public class StringValueFacetCounts extends Facets {
             reuse = q.insertWithOverflow(reuse);
             if (q.size() == topN) {
               bottomCount = q.top().value;
+              bottomOrd = q.top().ord;
             }
           }
         }
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/AbstractSortedSetDocValueFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/AbstractSortedSetDocValueFacetCounts.java
index 00ae1b42e24..4358450e09b 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/AbstractSortedSetDocValueFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/AbstractSortedSetDocValueFacetCounts.java
@@ -304,6 +304,7 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
       PrimitiveIterator.OfInt childOrds, int topN, DimConfig dimConfig, int pathOrd) {
     TopOrdAndIntQueue q = null;
     int bottomCount = 0;
+    int bottomOrd = Integer.MAX_VALUE;
     int pathCount = 0;
     int childCount = 0;
 
@@ -314,7 +315,7 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
       if (count > 0) {
         pathCount += count;
         childCount++;
-        if (count > bottomCount) {
+        if (count > bottomCount || (count == bottomCount && ord < bottomOrd)) {
           if (reuse == null) {
             reuse = new TopOrdAndIntQueue.OrdAndValue();
           }
@@ -328,6 +329,7 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
           reuse = q.insertWithOverflow(reuse);
           if (q.size() == topN) {
             bottomCount = q.top().value;
+            bottomOrd = q.top().value;
           }
         }
       }
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java
index 9e0ae219a4f..d1dea49bb5e 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java
@@ -199,6 +199,7 @@ public abstract class FloatTaxonomyFacets extends TaxonomyFacets {
 
     TopOrdAndFloatQueue q = new TopOrdAndFloatQueue(Math.min(taxoReader.getSize(), topN));
     float bottomValue = 0;
+    int bottomOrd = Integer.MAX_VALUE;
 
     int[] children = getChildren();
     int[] siblings = getSiblings();
@@ -209,18 +210,20 @@ public abstract class FloatTaxonomyFacets extends TaxonomyFacets {
 
     TopOrdAndFloatQueue.OrdAndValue reuse = null;
     while (ord != TaxonomyReader.INVALID_ORDINAL) {
-      if (values[ord] > 0) {
-        aggregatedValue = aggregationFunction.aggregate(aggregatedValue, values[ord]);
+      float value = values[ord];
+      if (value > 0) {
+        aggregatedValue = aggregationFunction.aggregate(aggregatedValue, value);
         childCount++;
-        if (values[ord] > bottomValue) {
+        if (value > bottomValue || (value == bottomValue && ord < bottomOrd)) {
           if (reuse == null) {
             reuse = new TopOrdAndFloatQueue.OrdAndValue();
           }
           reuse.ord = ord;
-          reuse.value = values[ord];
+          reuse.value = value;
           reuse = q.insertWithOverflow(reuse);
           if (q.size() == topN) {
             bottomValue = q.top().value;
+            bottomOrd = q.top().ord;
           }
         }
       }
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java
index 35bb21e4449..c6428207a3f 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java
@@ -303,6 +303,7 @@ public abstract class IntTaxonomyFacets extends TaxonomyFacets {
       throws IOException {
     TopOrdAndIntQueue q = new TopOrdAndIntQueue(Math.min(taxoReader.getSize(), topN));
     int bottomValue = 0;
+    int bottomOrd = Integer.MAX_VALUE;
 
     int aggregatedValue = 0;
     int childCount = 0;
@@ -317,7 +318,7 @@ public abstract class IntTaxonomyFacets extends TaxonomyFacets {
         if (parents[ord] == pathOrd && value > 0) {
           aggregatedValue = aggregationFunction.aggregate(aggregatedValue, value);
           childCount++;
-          if (value > bottomValue) {
+          if (value > bottomValue || (value == bottomValue && ord < bottomOrd)) {
             if (reuse == null) {
               reuse = new TopOrdAndIntQueue.OrdAndValue();
             }
@@ -326,6 +327,7 @@ public abstract class IntTaxonomyFacets extends TaxonomyFacets {
             reuse = q.insertWithOverflow(reuse);
             if (q.size() == topN) {
               bottomValue = q.top().value;
+              bottomOrd = q.top().ord;
             }
           }
         }
@@ -339,7 +341,7 @@ public abstract class IntTaxonomyFacets extends TaxonomyFacets {
         if (value > 0) {
           aggregatedValue = aggregationFunction.aggregate(aggregatedValue, value);
           childCount++;
-          if (value > bottomValue) {
+          if (value > bottomValue || (value == bottomValue && ord < bottomOrd)) {
             if (reuse == null) {
               reuse = new TopOrdAndIntQueue.OrdAndValue();
             }
@@ -348,6 +350,7 @@ public abstract class IntTaxonomyFacets extends TaxonomyFacets {
             reuse = q.insertWithOverflow(reuse);
             if (q.size() == topN) {
               bottomValue = q.top().value;
+              bottomOrd = q.top().ord;
             }
           }
         }
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java b/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java
index 2a6f15ad23b..7274bfe7a45 100644
--- a/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java
@@ -626,7 +626,7 @@ public class TestDrillSideways extends FacetTestCase {
     List<FacetResult> topNDimsResult = r.facets.getTopDims(1, 2);
     assertEquals(1, topNDimsResult.size());
     assertEquals(
-        "dim=Author path=[] value=5 childCount=4\n  Lisa (2)\n  Susan (1)\n",
+        "dim=Author path=[] value=5 childCount=4\n  Lisa (2)\n  Bob (1)\n",
         topNDimsResult.get(0).toString());
 
     // test getTopDims(10, 10) and expect same results from getAllDims(10)
@@ -1899,7 +1899,7 @@ public class TestDrillSideways extends FacetTestCase {
     List<FacetResult> topNDimsResult = facets.getTopDims(1, 2);
     assertEquals(1, topNDimsResult.size());
     assertEquals(
-        "dim=Author path=[] value=5 childCount=4\n  Lisa (2)\n  Susan (1)\n",
+        "dim=Author path=[] value=5 childCount=4\n  Lisa (2)\n  Bob (1)\n",
         topNDimsResult.get(0).toString());
 
     // test getAllDims(0)
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java
index 665d1017e32..f9b7d9204a0 100644
--- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java
@@ -253,17 +253,17 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
     List<FacetResult> top1results = facets.getAllDims(1);
 
     assertEquals(3, results.size());
-    assertEquals("dim=a path=[] value=3 childCount=3\n  foo3 (1)\n", top1results.get(0).toString());
-    assertEquals("dim=b path=[] value=3 childCount=3\n  bar2 (1)\n", top1results.get(1).toString());
+    assertEquals("dim=a path=[] value=3 childCount=3\n  foo1 (1)\n", top1results.get(0).toString());
+    assertEquals("dim=b path=[] value=3 childCount=3\n  aar1 (1)\n", top1results.get(1).toString());
     assertEquals("dim=c path=[] value=1 childCount=1\n  baz1 (1)\n", top1results.get(2).toString());
 
     // test default implementation of getTopDims
     List<FacetResult> topNDimsResult = facets.getTopDims(2, 1);
     assertEquals(2, topNDimsResult.size());
     assertEquals(
-        "dim=a path=[] value=3 childCount=3\n  foo3 (1)\n", topNDimsResult.get(0).toString());
+        "dim=a path=[] value=3 childCount=3\n  foo1 (1)\n", topNDimsResult.get(0).toString());
     assertEquals(
-        "dim=b path=[] value=3 childCount=3\n  bar2 (1)\n", topNDimsResult.get(1).toString());
+        "dim=b path=[] value=3 childCount=3\n  aar1 (1)\n", topNDimsResult.get(1).toString());
 
     // test getTopDims(10, 10) and expect same results from getAllDims(10)
     List<FacetResult> allDimsResults = facets.getTopDims(10, 10);