You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2022/09/26 22:06:04 UTC
[lucene] branch main updated: Fix tie-break bug in various Facets implementations (#11768)
This is an automated email from the ASF dual-hosted git repository.
gsmiller pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new 971ae01164d Fix tie-break bug in various Facets implementations (#11768)
971ae01164d is described below
commit 971ae01164d7d48ef98fac648e7508c5cbb99f5c
Author: Greg Miller <gs...@gmail.com>
AuthorDate: Mon Sep 26 15:05:57 2022 -0700
Fix tie-break bug in various Facets implementations (#11768)
---
lucene/CHANGES.txt | 5 ++++-
.../java/org/apache/lucene/facet/StringValueFacetCounts.java | 10 +++++++---
.../facet/sortedset/AbstractSortedSetDocValueFacetCounts.java | 4 +++-
.../org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java | 11 +++++++----
.../org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java | 7 +++++--
.../src/test/org/apache/lucene/facet/TestDrillSideways.java | 4 ++--
.../apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java | 8 ++++----
7 files changed, 32 insertions(+), 17 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index da470df6e54..8abaecf3d92 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -104,7 +104,7 @@ Improvements
is now tagged. (Namgyu Kim)
* GITHUB#11785: Improve Tessellator performance by delaying calls to the method
- #isIntersectingPolygon (Ignacio Vera)
+ #isIntersectingPolygon (Ignacio Vera)
* GITHUB#687: speed up IndexSortSortedNumericDocValuesRangeQuery#BoundedDocIdSetIterator
construction using bkd binary search. (Jianping Weng)
@@ -115,6 +115,9 @@ Bug Fixes
trying to apply a dictionary whose size is greater than the maximum supported
window size for LZ4. (Adrien Grand)
+* GITHUB#11768: Taxonomy and SSDV faceting now correctly breaks ties by preferring smaller ordinal
+ values. (Greg Miller)
+
Optimizations
---------------------
* GITHUB#11738: Optimize MultiTermQueryConstantScoreWrapper when a term is present that matches all
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java
index 55a0bb1ccab..957efec0747 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/StringValueFacetCounts.java
@@ -170,17 +170,19 @@ public class StringValueFacetCounts extends Facets {
TopOrdAndIntQueue q = null;
TopOrdAndIntQueue.OrdAndValue reuse = null;
int bottomCount = 0;
+ int bottomOrd = Integer.MAX_VALUE;
int childCount = 0; // total number of labels with non-zero count
if (sparseCounts != null) {
for (IntIntCursor cursor : sparseCounts) {
childCount++; // every count in sparseValues should be non-zero
+ int ord = cursor.key;
int count = cursor.value;
- if (count > bottomCount) {
+ if (count > bottomCount || (count == bottomCount && ord < bottomOrd)) {
if (reuse == null) {
reuse = new TopOrdAndIntQueue.OrdAndValue();
}
- reuse.ord = cursor.key;
+ reuse.ord = ord;
reuse.value = count;
if (q == null) {
// Lazy init for sparse case:
@@ -189,6 +191,7 @@ public class StringValueFacetCounts extends Facets {
reuse = q.insertWithOverflow(reuse);
if (q.size() == topN) {
bottomCount = q.top().value;
+ bottomOrd = q.top().ord;
}
}
}
@@ -197,7 +200,7 @@ public class StringValueFacetCounts extends Facets {
int count = denseCounts[i];
if (count != 0) {
childCount++;
- if (count > bottomCount) {
+ if (count > bottomCount || (count == bottomCount && i < bottomOrd)) {
if (reuse == null) {
reuse = new TopOrdAndIntQueue.OrdAndValue();
}
@@ -210,6 +213,7 @@ public class StringValueFacetCounts extends Facets {
reuse = q.insertWithOverflow(reuse);
if (q.size() == topN) {
bottomCount = q.top().value;
+ bottomOrd = q.top().ord;
}
}
}
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/AbstractSortedSetDocValueFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/AbstractSortedSetDocValueFacetCounts.java
index 66bfc56be23..ad3d412c97e 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/AbstractSortedSetDocValueFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/AbstractSortedSetDocValueFacetCounts.java
@@ -303,6 +303,7 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
PrimitiveIterator.OfInt childOrds, int topN, DimConfig dimConfig, int pathOrd) {
TopOrdAndIntQueue q = null;
int bottomCount = 0;
+ int bottomOrd = Integer.MAX_VALUE;
int pathCount = 0;
int childCount = 0;
@@ -313,7 +314,7 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
if (count > 0) {
pathCount += count;
childCount++;
- if (count > bottomCount) {
+ if (count > bottomCount || (count == bottomCount && ord < bottomOrd)) {
if (reuse == null) {
reuse = new TopOrdAndIntQueue.OrdAndValue();
}
@@ -327,6 +328,7 @@ abstract class AbstractSortedSetDocValueFacetCounts extends Facets {
reuse = q.insertWithOverflow(reuse);
if (q.size() == topN) {
bottomCount = q.top().value;
+ bottomOrd = q.top().value;
}
}
}
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java
index 515f60229d3..35c71a4c661 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/FloatTaxonomyFacets.java
@@ -179,6 +179,7 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
TopOrdAndFloatQueue q = new TopOrdAndFloatQueue(Math.min(taxoReader.getSize(), topN));
float bottomValue = 0;
+ int bottomOrd = Integer.MAX_VALUE;
int[] children = getChildren();
int[] siblings = getSiblings();
@@ -189,18 +190,20 @@ abstract class FloatTaxonomyFacets extends TaxonomyFacets {
TopOrdAndFloatQueue.OrdAndValue reuse = null;
while (ord != TaxonomyReader.INVALID_ORDINAL) {
- if (values[ord] > 0) {
- aggregatedValue = aggregationFunction.aggregate(aggregatedValue, values[ord]);
+ float value = values[ord];
+ if (value > 0) {
+ aggregatedValue = aggregationFunction.aggregate(aggregatedValue, value);
childCount++;
- if (values[ord] > bottomValue) {
+ if (value > bottomValue || (value == bottomValue && ord < bottomOrd)) {
if (reuse == null) {
reuse = new TopOrdAndFloatQueue.OrdAndValue();
}
reuse.ord = ord;
- reuse.value = values[ord];
+ reuse.value = value;
reuse = q.insertWithOverflow(reuse);
if (q.size() == topN) {
bottomValue = q.top().value;
+ bottomOrd = q.top().ord;
}
}
}
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java
index 1c7ef0df2dc..9a8b94b573b 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/IntTaxonomyFacets.java
@@ -251,6 +251,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
throws IOException {
TopOrdAndIntQueue q = new TopOrdAndIntQueue(Math.min(taxoReader.getSize(), topN));
int bottomValue = 0;
+ int bottomOrd = Integer.MAX_VALUE;
int aggregatedValue = 0;
int childCount = 0;
@@ -265,7 +266,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
if (parents[ord] == pathOrd && value > 0) {
aggregatedValue = aggregationFunction.aggregate(aggregatedValue, value);
childCount++;
- if (value > bottomValue) {
+ if (value > bottomValue || (value == bottomValue && ord < bottomOrd)) {
if (reuse == null) {
reuse = new TopOrdAndIntQueue.OrdAndValue();
}
@@ -274,6 +275,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
reuse = q.insertWithOverflow(reuse);
if (q.size() == topN) {
bottomValue = q.top().value;
+ bottomOrd = q.top().ord;
}
}
}
@@ -287,7 +289,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
if (value > 0) {
aggregatedValue = aggregationFunction.aggregate(aggregatedValue, value);
childCount++;
- if (value > bottomValue) {
+ if (value > bottomValue || (value == bottomValue && ord < bottomOrd)) {
if (reuse == null) {
reuse = new TopOrdAndIntQueue.OrdAndValue();
}
@@ -296,6 +298,7 @@ abstract class IntTaxonomyFacets extends TaxonomyFacets {
reuse = q.insertWithOverflow(reuse);
if (q.size() == topN) {
bottomValue = q.top().value;
+ bottomOrd = q.top().ord;
}
}
}
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java b/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java
index 2a6f15ad23b..7274bfe7a45 100644
--- a/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java
@@ -626,7 +626,7 @@ public class TestDrillSideways extends FacetTestCase {
List<FacetResult> topNDimsResult = r.facets.getTopDims(1, 2);
assertEquals(1, topNDimsResult.size());
assertEquals(
- "dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Susan (1)\n",
+ "dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n",
topNDimsResult.get(0).toString());
// test getTopDims(10, 10) and expect same results from getAllDims(10)
@@ -1899,7 +1899,7 @@ public class TestDrillSideways extends FacetTestCase {
List<FacetResult> topNDimsResult = facets.getTopDims(1, 2);
assertEquals(1, topNDimsResult.size());
assertEquals(
- "dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Susan (1)\n",
+ "dim=Author path=[] value=5 childCount=4\n Lisa (2)\n Bob (1)\n",
topNDimsResult.get(0).toString());
// test getAllDims(0)
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java
index cfc06269466..6ab5772c689 100644
--- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestTaxonomyFacetCounts.java
@@ -253,17 +253,17 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
List<FacetResult> top1results = facets.getAllDims(1);
assertEquals(3, results.size());
- assertEquals("dim=a path=[] value=3 childCount=3\n foo3 (1)\n", top1results.get(0).toString());
- assertEquals("dim=b path=[] value=3 childCount=3\n bar2 (1)\n", top1results.get(1).toString());
+ assertEquals("dim=a path=[] value=3 childCount=3\n foo1 (1)\n", top1results.get(0).toString());
+ assertEquals("dim=b path=[] value=3 childCount=3\n aar1 (1)\n", top1results.get(1).toString());
assertEquals("dim=c path=[] value=1 childCount=1\n baz1 (1)\n", top1results.get(2).toString());
// test default implementation of getTopDims
List<FacetResult> topNDimsResult = facets.getTopDims(2, 1);
assertEquals(2, topNDimsResult.size());
assertEquals(
- "dim=a path=[] value=3 childCount=3\n foo3 (1)\n", topNDimsResult.get(0).toString());
+ "dim=a path=[] value=3 childCount=3\n foo1 (1)\n", topNDimsResult.get(0).toString());
assertEquals(
- "dim=b path=[] value=3 childCount=3\n bar2 (1)\n", topNDimsResult.get(1).toString());
+ "dim=b path=[] value=3 childCount=3\n aar1 (1)\n", topNDimsResult.get(1).toString());
// test getTopDims(10, 10) and expect same results from getAllDims(10)
List<FacetResult> allDimsResults = facets.getTopDims(10, 10);