You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2022/09/13 14:01:23 UTC
[lucene] branch branch_9x updated: GITHUB#11742: MatchingFacetSetsCounts#getTopChildren now returns top children instead of all children (#11764)
This is an automated email from the ASF dual-hosted git repository.
gsmiller pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new 3334697e0d6 GITHUB#11742: MatchingFacetSetsCounts#getTopChildren now returns top children instead of all children (#11764)
3334697e0d6 is described below
commit 3334697e0d6308182f0ad99e11166da3d40c5740
Author: Greg Miller <gs...@gmail.com>
AuthorDate: Tue Sep 13 06:50:52 2022 -0700
GITHUB#11742: MatchingFacetSetsCounts#getTopChildren now returns top children instead of all children (#11764)
---
lucene/CHANGES.txt | 5 +
.../facet/facetset/MatchingFacetSetsCounts.java | 59 ++++++++++-
.../facetset/TestMatchingFacetSetsCounts.java | 110 +++++++++++++++++++++
3 files changed, 173 insertions(+), 1 deletion(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 6a3a27535a2..30641e46c78 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -5,6 +5,11 @@ http://s.apache.org/luceneversions
======================== Lucene 9.5.0 =======================
+API Changes
+---------------------
+* GITHUB#11742: MatchingFacetSetsCounts#getTopChildren now properly returns "top" children instead
+ of all children. (Greg Miller)
+
Bug Fixes
---------------------
diff --git a/lucene/facet/src/java/org/apache/lucene/facet/facetset/MatchingFacetSetsCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/facetset/MatchingFacetSetsCounts.java
index e6c8e67e288..c8151ab471e 100644
--- a/lucene/facet/src/java/org/apache/lucene/facet/facetset/MatchingFacetSetsCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/facetset/MatchingFacetSetsCounts.java
@@ -30,6 +30,7 @@ import org.apache.lucene.index.DocValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.PriorityQueue;
/**
* Returns the counts for each given {@link FacetSet}
@@ -156,7 +157,45 @@ public class MatchingFacetSetsCounts extends FacetCountsWithFilterQuery {
@Override
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
validateTopN(topN);
- return getAllChildren(dim, path);
+
+ topN = Math.min(topN, counts.length);
+
+ PriorityQueue<Entry> pq =
+ new PriorityQueue<>(topN, () -> new Entry("", 0)) {
+ @Override
+ protected boolean lessThan(Entry a, Entry b) {
+ return compare(a.count, b.count, a.label, b.label) < 0;
+ }
+ };
+
+ int childCount = 0;
+ Entry reuse = pq.top();
+ for (int i = 0; i < counts.length; i++) {
+ int count = counts[i];
+ if (count > 0) {
+ childCount++;
+ String label = facetSetMatchers[i].label;
+ if (compare(reuse.count, count, reuse.label, label) < 0) {
+ reuse.label = label;
+ reuse.count = count;
+ reuse = pq.updateTop();
+ }
+ }
+ }
+
+ // Pop off any sentinel values in the case that we had fewer child labels with non-zero
+ // counts than the requested top-n:
+ while (childCount < pq.size()) {
+ pq.pop();
+ }
+
+ LabelAndValue[] labelValues = new LabelAndValue[Math.min(topN, childCount)];
+ for (int i = pq.size() - 1; i >= 0; i--) {
+ Entry e = pq.pop();
+ labelValues[i] = new LabelAndValue(e.label, e.count);
+ }
+
+ return new FacetResult(dim, path, totCount, labelValues, childCount);
}
@Override
@@ -176,4 +215,22 @@ public class MatchingFacetSetsCounts extends FacetCountsWithFilterQuery {
return Arrays.stream(facetSetMatchers)
.anyMatch(facetSetMatcher -> facetSetMatcher.dims != dims);
}
+
+ private static int compare(int count1, int count2, String label1, String label2) {
+ int cmp = Integer.compare(count1, count2);
+ if (cmp == 0) {
+ cmp = label2.compareTo(label1);
+ }
+ return cmp;
+ }
+
+ private static final class Entry {
+ String label;
+ int count;
+
+ Entry(String label, int count) {
+ this.label = label;
+ this.count = count;
+ }
+ }
}
diff --git a/lucene/facet/src/test/org/apache/lucene/facet/facetset/TestMatchingFacetSetsCounts.java b/lucene/facet/src/test/org/apache/lucene/facet/facetset/TestMatchingFacetSetsCounts.java
index 51466137f9e..e1bbad55e5c 100644
--- a/lucene/facet/src/test/org/apache/lucene/facet/facetset/TestMatchingFacetSetsCounts.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/facetset/TestMatchingFacetSetsCounts.java
@@ -16,19 +16,30 @@
*/
package org.apache.lucene.facet.facetset;
+import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
import java.io.IOException;
+import java.util.Locale;
import org.apache.lucene.document.Document;
+import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.FacetTestCase;
import org.apache.lucene.facet.Facets;
import org.apache.lucene.facet.FacetsCollector;
import org.apache.lucene.facet.FacetsCollectorManager;
+import org.apache.lucene.facet.LabelAndValue;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.index.RandomIndexWriter;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.InPlaceMergeSorter;
public class TestMatchingFacetSetsCounts extends FacetTestCase {
+ private static final int FORD_ORD = 100;
+ private static final int TOYOTA_ORD = 101;
+ private static final int CHEVY_ORD = 102;
+ private static final int NISSAN_ORD = 103;
+ private static final int[] MANUFACTURER_ORDS = {FORD_ORD, TOYOTA_ORD, CHEVY_ORD, NISSAN_ORD};
public void testInvalidTopN() throws IOException {
Directory d = newDirectory();
@@ -87,4 +98,103 @@ public class TestMatchingFacetSetsCounts extends FacetTestCase {
r.close();
d.close();
}
+
+ public void testTopChildren() throws Exception {
+ Directory d = newDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(random(), d);
+
+ // As a test scenario, we're faceting on the number of vehicles produced per day/make
+ // combination over the past 30 days:
+ final int numBins = 30;
+
+ final int[] expectedCounts = new int[numBins * MANUFACTURER_ORDS.length];
+ FacetSetMatcher[] facetSetMatchers = new FacetSetMatcher[numBins * MANUFACTURER_ORDS.length];
+
+ int totalDocs = 0;
+ int totalNonZeroBins = 0;
+ int index = 0;
+ for (int i = 0; i < numBins; i++) {
+ for (int ord : MANUFACTURER_ORDS) {
+ facetSetMatchers[index] =
+ new ExactFacetSetMatcher(
+ String.format(Locale.ROOT, "%d:%d", i, ord), new LongFacetSet(i, ord));
+
+ int carsManufactured = RandomNumbers.randomIntBetween(random(), 0, 100);
+ for (int k = 0; k < carsManufactured; k++) {
+ // Create a document for every vehicle produced:
+ Document doc = new Document();
+ doc.add(FacetSetsField.create("field", new LongFacetSet(i, ord)));
+ w.addDocument(doc);
+ }
+
+ if (carsManufactured > 0) {
+ totalNonZeroBins++;
+ }
+ totalDocs += carsManufactured;
+ expectedCounts[index] = carsManufactured;
+ index++;
+ }
+ }
+
+ IndexReader r = w.getReader();
+ w.close();
+
+ IndexSearcher s = newSearcher(r);
+ FacetsCollector fc = s.search(new MatchAllDocsQuery(), new FacetsCollectorManager());
+
+ Facets facets =
+ new MatchingFacetSetsCounts("field", fc, FacetSetDecoder::decodeLongs, facetSetMatchers);
+
+ // Sort by count (high-to-low) and tie-break on label, same as in
+ // MatchingFacetCounts#getTopChildren:
+ final int[] originalIndexes = new int[expectedCounts.length];
+ for (int i = 0; i < originalIndexes.length; i++) {
+ originalIndexes[i] = i;
+ }
+ new InPlaceMergeSorter() {
+ @Override
+ protected int compare(int i, int j) {
+ int cmp = Integer.compare(expectedCounts[j], expectedCounts[i]);
+ if (cmp == 0) {
+ int dayBinI = originalIndexes[i] / MANUFACTURER_ORDS.length;
+ int dayBinJ = originalIndexes[j] / MANUFACTURER_ORDS.length;
+ int ordIndexI = originalIndexes[i] % MANUFACTURER_ORDS.length;
+ int ordIndexJ = originalIndexes[j] % MANUFACTURER_ORDS.length;
+ String labelI =
+ String.format(Locale.ROOT, "%d:%d", dayBinI, MANUFACTURER_ORDS[ordIndexI]);
+ String labelJ =
+ String.format(Locale.ROOT, "%d:%d", dayBinJ, MANUFACTURER_ORDS[ordIndexJ]);
+ cmp = new BytesRef(labelI).compareTo(new BytesRef(labelJ));
+ }
+ return cmp;
+ }
+
+ @Override
+ protected void swap(int i, int j) {
+ int tmp = expectedCounts[i];
+ expectedCounts[i] = expectedCounts[j];
+ expectedCounts[j] = tmp;
+ tmp = originalIndexes[i];
+ originalIndexes[i] = originalIndexes[j];
+ originalIndexes[j] = tmp;
+ }
+ }.sort(0, expectedCounts.length);
+
+ final int topN = 10;
+ final LabelAndValue[] expected = new LabelAndValue[topN];
+ for (int i = 0; i < topN; i++) {
+ int count = expectedCounts[i];
+ int dayBin = originalIndexes[i] / MANUFACTURER_ORDS.length;
+ int ordIndex = originalIndexes[i] % MANUFACTURER_ORDS.length;
+ expected[i] =
+ new LabelAndValue(
+ String.format(Locale.ROOT, "%d:%d", dayBin, MANUFACTURER_ORDS[ordIndex]), count);
+ }
+
+ final FacetResult result = facets.getTopChildren(topN, "field");
+ assertFacetResult(result, "field", new String[0], totalNonZeroBins, totalDocs, expected);
+
+ r.close();
+ d.close();
+ }
}