You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2022/09/26 22:23:02 UTC
[lucene] branch branch_9x updated: Fix tie-break bug in various Facets implementations (#11768)
This is an automated email from the ASF dual-hosted git repository.
gsmiller pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new cd9a406b071 Fix tie-break bug in various Facets implementations (#11768)
cd9a406b071 is described below
commit cd9a406b071f9eb118ef0c4ad2ce46306ab006d3
Author: Greg Miller <gs...@gmail.com>
AuthorDate: Mon Sep 26 15:05:57 2022 -0700
Fix tie-break bug in various Facets implementations (#11768)
---
lucene/CHANGES.txt | 7 +++++--
.../java/org/apache/lucene/facet/StringValueFacetCounts.java | 10 +++++++---
.../facet/sortedset/AbstractSortedSetDocValueFacetCounts.java | 4 +++-
.../org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java | 11 +++++++----
.../org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java | 7 +++++--
.../src/test/org/apache/lucene/facet/TestDrillSideways.java | 4 ++--
.../apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java | 8 ++++----
7 files changed, 33 insertions(+), 18 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 4bd69eff95c..9068a225d7b 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -18,17 +18,20 @@ API Changes
Improvements
---------------------
* GITHUB#11785: Improve Tessellator performance by delaying calls to the method
- #isIntersectingPolygon (Ignacio Vera)
+ #isIntersectingPolygon (Ignacio Vera)
* GITHUB#687: speed up IndexSortSortedNumericDocValuesRangeQuery#BoundedDocIdSetIterator
construction using bkd binary search. (Jianping Weng)
-
+
Bug Fixes
---------------------
* GITHUB#11726: Indexing term vectors on large documents could fail due to
trying to apply a dictionary whose size is greater than the maximum supported
window size for LZ4. (Adrien Grand)
+* GITHUB#11768: Taxonomy and SSDV faceting now correctly breaks ties by preferring smaller ordinal
+ values. (Greg Miller)
+
Optimizations
---------------------
* GITHUB#11738: Optimize MultiTermQueryConstantScoreWrapper when a term is present that matches all
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java
index 55a0bb1ccab..957efec0747 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java
@@ -170,17 +170,19 @@ public class StringValueFacetCounts extends Facets {
TopOrdAndIntQueue q = null;
TopOrdAndIntQueue.OrdAndValue reuse = null;
int bottomCount = 0;
+ int bottomOrd = Integer.MAX_VALUE;
int childCount = 0; // total number of labels with non-zero count
if (sparseCounts != null) {
for (IntIntCursor cursor : sparseCounts) {
childCount++; // every count in sparseValues should be non-zero
+ int ord = cursor.key;
int count = cursor.value;
- if (count > bottomCount) {
+ if (count > bottomCount || (count == bottomCount && ord < bottomOrd)) {
if (reuse == null) {
reuse = new TopOrdAndIntQueue.OrdAndValue();
}
- reuse.ord = cursor.key;
+ reuse.ord = ord;
reuse.value = count;
if (q == null) {
// Lazy init for sparse case:
@@ -189,6 +191,7 @@ public class StringValueFacetCounts extends Facets {
reuse = q.insertWithOverflow(reuse);
if (q.size() == topN) {
bottomCount = q.top().value;
+ bottomOrd = q.top().ord;
}
}
}
@@ -197,7 +200,7 @@ public class StringValueFacetCounts extends Facets {
int count = denseCounts[i];
if (count != 0) {
childCount++;
- if (count > bottomCount) {
+ if (count > bottomCount || (count == bottomCount && i < bottomOrd)) {
if (reuse == null) {
reuse = new TopOrdAndIntQueue.OrdAndValue();
}
@@ -210,6 +213,7 @@ public class StringValueFacetCounts extends Facets {
reuse = q.insertWithOverflow(reuse);
if (q.size() == topN) {
bottomCount = q.top().value;
+ bottomOrd = q.top().ord;
}
}
}
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/AbstractSortedSetDocValueFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/AbstractSortedSetDocValueFacetCounts.java
index 00ae1b42e24..4358450e09b 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/AbstractSortedSetDocValueFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/AbstractSortedSetDocValueFacetCounts.java
@@ -304,6 +304,7 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
PrimitiveIterator.OfInt childOrds, int topN, DimConfig dimConfig, int pathOrd) {
TopOrdAndIntQueue q = null;
int bottomCount = 0;
+ int bottomOrd = Integer.MAX_VALUE;
int pathCount = 0;
int childCount = 0;
@@ -314,7 +315,7 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
if (count > 0) {
pathCount += count;
childCount++;
- if (count > bottomCount) {
+ if (count > bottomCount || (count == bottomCount && ord < bottomOrd)) {
if (reuse == null) {
reuse = new TopOrdAndIntQueue.OrdAndValue();
}
@@ -328,6 +329,7 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
reuse = q.insertWithOverflow(reuse);
if (q.size() == topN) {
bottomCount = q.top().value;
+ bottomOrd = q.top().value;
}
}
}
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java
index 9e0ae219a4f..d1dea49bb5e 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java
@@ -199,6 +199,7 @@ public abstract class FloatTaxonomyFacets extends TaxonomyFacets {
TopOrdAndFloatQueue q = new TopOrdAndFloatQueue(Math.min(taxoReader.getSize(), topN));
float bottomValue = 0;
+ int bottomOrd = Integer.MAX_VALUE;
int[] children = getChildren();
int[] siblings = getSiblings();
@@ -209,18 +210,20 @@ public abstract class FloatTaxonomyFacets extends TaxonomyFacets {
TopOrdAndFloatQueue.OrdAndValue reuse = null;
while (ord != TaxonomyReader.INVALID_ORDINAL) {
- if (values[ord] > 0) {
- aggregatedValue = aggregationFunction.aggregate(aggregatedValue, values[ord]);
+ float value = values[ord];
+ if (value > 0) {
+ aggregatedValue = aggregationFunction.aggregate(aggregatedValue, value);
childCount++;
- if (values[ord] > bottomValue) {
+ if (value > bottomValue || (value == bottomValue && ord < bottomOrd)) {
if (reuse == null) {
reuse = new TopOrdAndFloatQueue.OrdAndValue();
}
reuse.ord = ord;
- reuse.value = values[ord];
+ reuse.value = value;
reuse = q.insertWithOverflow(reuse);
if (q.size() == topN) {
bottomValue = q.top().value;
+ bottomOrd = q.top().ord;
}
}
}
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java
index 35bb21e4449..c6428207a3f 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java
@@ -303,6 +303,7 @@ public abstract class IntTaxonomyFacets extends TaxonomyFacets {
throws IOException {
TopOrdAndIntQueue q = new TopOrdAndIntQueue(Math.min(taxoReader.getSize(), topN));
int bottomValue = 0;
+ int bottomOrd = Integer.MAX_VALUE;
int aggregatedValue = 0;
int childCount = 0;
@@ -317,7 +318,7 @@ public abstract class IntTaxonomyFacets extends TaxonomyFacets {
if (parents[ord] == pathOrd && value > 0) {
aggregatedValue = aggregationFunction.aggregate(aggregatedValue, value);
childCount++;
- if (value > bottomValue) {
+ if (value > bottomValue || (value == bottomValue && ord < bottomOrd)) {
if (reuse == null) {
reuse = new TopOrdAndIntQueue.OrdAndValue();
}
@@ -326,6 +327,7 @@ public abstract class IntTaxonomyFacets extends TaxonomyFacets {
reuse = q.insertWithOverflow(reuse);
if (q.size() == topN) {
bottomValue = q.top().value;
+ bottomOrd = q.top().ord;
}
}
}
@@ -339,7 +341,7 @@ public abstract class IntTaxonomyFacets extends TaxonomyFacets {
if (value > 0) {
aggregatedValue = aggregationFunction.aggregate(aggregatedValue, value);
childCount++;
- if (value > bottomValue) {
+ if (value > bottomValue || (value == bottomValue && ord < bottomOrd)) {
if (reuse == null) {
reuse = new TopOrdAndIntQueue.OrdAndValue();
}
@@ -348,6 +350,7 @@ public abstract class IntTaxonomyFacets extends TaxonomyFacets {
reuse = q.insertWithOverflow(reuse);
if (q.size() == topN) {
bottomValue = q.top().value;
+ bottomOrd = q.top().ord;
}
}
}
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java b/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java
index 2a6f15ad23b..7274bfe7a45 100644
--- a/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java
@@ -626,7 +626,7 @@ public class TestDrillSideways extends FacetTestCase {
List<FacetResult> topNDimsResult = r.facets.getTopDims(1, 2);
assertEquals(1, topNDimsResult.size());
assertEquals(
- "dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Susan (1)\n",
+ "dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n",
topNDimsResult.get(0).toString());
// test getTopDims(10, 10) and expect same results from getAllDims(10)
@@ -1899,7 +1899,7 @@ public class TestDrillSideways extends FacetTestCase {
List<FacetResult> topNDimsResult = facets.getTopDims(1, 2);
assertEquals(1, topNDimsResult.size());
assertEquals(
- "dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Susan (1)\n",
+ "dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n",
topNDimsResult.get(0).toString());
// test getAllDims(0)
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java
index 665d1017e32..f9b7d9204a0 100644
--- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java
@@ -253,17 +253,17 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
List<FacetResult> top1results = facets.getAllDims(1);
assertEquals(3, results.size());
- assertEquals("dim=a path=[] value=3 childCount=3\n foo3 (1)\n", top1results.get(0).toString());
- assertEquals("dim=b path=[] value=3 childCount=3\n bar2 (1)\n", top1results.get(1).toString());
+ assertEquals("dim=a path=[] value=3 childCount=3\n foo1 (1)\n", top1results.get(0).toString());
+ assertEquals("dim=b path=[] value=3 childCount=3\n aar1 (1)\n", top1results.get(1).toString());
assertEquals("dim=c path=[] value=1 childCount=1\n baz1 (1)\n", top1results.get(2).toString());
// test default implementation of getTopDims
List<FacetResult> topNDimsResult = facets.getTopDims(2, 1);
assertEquals(2, topNDimsResult.size());
assertEquals(
- "dim=a path=[] value=3 childCount=3\n foo3 (1)\n", topNDimsResult.get(0).toString());
+ "dim=a path=[] value=3 childCount=3\n foo1 (1)\n", topNDimsResult.get(0).toString());
assertEquals(
- "dim=b path=[] value=3 childCount=3\n bar2 (1)\n", topNDimsResult.get(1).toString());
+ "dim=b path=[] value=3 childCount=3\n aar1 (1)\n", topNDimsResult.get(1).toString());
// test getTopDims(10, 10) and expect same results from getAllDims(10)
List<FacetResult> allDimsResults = facets.getTopDims(10, 10);