You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2022/09/08 19:19:46 UTC
[lucene] branch main updated: Added a top-n range faceting example (#1035)
This is an automated email from the ASF dual-hosted git repository.
gsmiller pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new 49b596ef023 Added a top-n range faceting example (#1035)
49b596ef023 is described below
commit 49b596ef0233f6d1129d5bc56f4898ff52e44239
Author: Yuting Gan <44...@users.noreply.github.com>
AuthorDate: Thu Sep 8 12:19:42 2022 -0700
Added a top-n range faceting example (#1035)
---
lucene/CHANGES.txt | 2 +
.../lucene/demo/facet/RangeFacetsExample.java | 49 ++++++++++++++++++++++
.../lucene/demo/facet/TestRangeFacetsExample.java | 15 +++++++
3 files changed, 66 insertions(+)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 7fc9d55f926..401ae9b43ed 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -52,6 +52,8 @@ Improvements
* LUCENE-10614: Properly support getTopChildren in RangeFacetCounts. (Yuting Gan)
+* LUCENE-10652: Add a top-n range faceting example to RangeFacetsExample. (Yuting Gan)
+
Optimizations
---------------------
(No changes)
diff --git a/lucene/demo/src/java/org/apache/lucene/demo/facet/RangeFacetsExample.java b/lucene/demo/src/java/org/apache/lucene/demo/facet/RangeFacetsExample.java
index 9c47b563aee..786e7b5d9d9 100644
--- a/lucene/demo/src/java/org/apache/lucene/demo/facet/RangeFacetsExample.java
+++ b/lucene/demo/src/java/org/apache/lucene/demo/facet/RangeFacetsExample.java
@@ -18,10 +18,13 @@ package org.apache.lucene.demo.facet;
import java.io.Closeable;
import java.io.IOException;
+import java.util.Random;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.StringField;
import org.apache.lucene.facet.DrillDownQuery;
import org.apache.lucene.facet.DrillSideways;
import org.apache.lucene.facet.FacetResult;
@@ -45,6 +48,7 @@ public class RangeFacetsExample implements Closeable {
private final Directory indexDir = new ByteBuffersDirectory();
private IndexSearcher searcher;
+ private LongRange[] logTimestampRanges = new LongRange[168];
private final long nowSec = System.currentTimeMillis() / 1000L;
final LongRange PAST_HOUR = new LongRange("Past hour", nowSec - 3600, true, nowSec, true);
@@ -73,6 +77,31 @@ public class RangeFacetsExample implements Closeable {
indexWriter.addDocument(doc);
}
+ // Add documents with a fake timestamp for the past 7 days (24 * 7 = 168 hours), 3600 sec (1
+ // hour) from "now", 7200 sec (2 hours) from "now", ...:
+ long startTime = 0;
+ for (int i = 0; i < 168; i++) {
+ long endTime = (i + 1) * 3600;
+ // Choose a relatively large number, e,g., "35", to create variation in count for
+ // the top n children, so that calling getTopChildren(10) can return top 10 children with
+ // different counts
+ for (int j = 0; j < i % 35; j++) {
+ Document doc = new Document();
+ Random r = new Random();
+ // Randomly generate a timestamp within the current range
+ long randomTimestamp = r.nextLong(1, endTime - startTime) + startTime;
+ // Add as doc values field, so we can compute range facets:
+ doc.add(new NumericDocValuesField("error timestamp", randomTimestamp));
+ doc.add(
+ new StringField(
+ "error message", "server encountered error at " + randomTimestamp, Field.Store.NO));
+ indexWriter.addDocument(doc);
+ }
+ logTimestampRanges[i] =
+ new LongRange("Hour " + i + "-" + (i + 1), startTime, false, endTime, true);
+ startTime = endTime;
+ }
+
// Open near-real-time searcher
searcher = new IndexSearcher(DirectoryReader.open(indexWriter));
indexWriter.close();
@@ -97,6 +126,21 @@ public class RangeFacetsExample implements Closeable {
return facets.getAllChildren("timestamp");
}
+ /** User runs a query and counts facets. */
+ public FacetResult searchTopChildren() throws IOException {
+
+ // Aggregates the facet counts
+ FacetsCollector fc = new FacetsCollector();
+
+ // MatchAllDocsQuery is for "browsing" (counts facets
+ // for all non-deleted docs in the index); normally
+ // you'd use a "normal" query:
+ FacetsCollector.search(searcher, new MatchAllDocsQuery(), 10, fc);
+
+ Facets facets = new LongRangeFacetCounts("error timestamp", fc, logTimestampRanges);
+ return facets.getTopChildren(10, "error timestamp");
+ }
+
/** User drills down on the specified range. */
public TopDocs drillDown(LongRange range) throws IOException {
@@ -152,6 +196,11 @@ public class RangeFacetsExample implements Closeable {
System.out.println("-----------------------");
System.out.println(example.search());
+ System.out.println("\n");
+ System.out.println("Facet counting example:");
+ System.out.println("-----------------------");
+ System.out.println(example.searchTopChildren());
+
System.out.println("\n");
System.out.println("Facet drill-down example (timestamp/Past six hours):");
System.out.println("---------------------------------------------");
diff --git a/lucene/demo/src/test/org/apache/lucene/demo/facet/TestRangeFacetsExample.java b/lucene/demo/src/test/org/apache/lucene/demo/facet/TestRangeFacetsExample.java
index ccb21ce064d..9efdcf5356b 100644
--- a/lucene/demo/src/test/org/apache/lucene/demo/facet/TestRangeFacetsExample.java
+++ b/lucene/demo/src/test/org/apache/lucene/demo/facet/TestRangeFacetsExample.java
@@ -31,6 +31,21 @@ public class TestRangeFacetsExample extends LuceneTestCase {
assertEquals(
"dim=timestamp path=[] value=87 childCount=3\n Past hour (4)\n Past six hours (22)\n Past day (87)\n",
result.toString());
+
+ result = example.searchTopChildren();
+ assertEquals(
+ "dim=error timestamp path=[] value=2758 childCount=163\n"
+ + " Hour 104-105 (34)\n"
+ + " Hour 139-140 (34)\n"
+ + " Hour 34-35 (34)\n"
+ + " Hour 69-70 (34)\n"
+ + " Hour 103-104 (33)\n"
+ + " Hour 138-139 (33)\n"
+ + " Hour 33-34 (33)\n"
+ + " Hour 68-69 (33)\n"
+ + " Hour 102-103 (32)\n"
+ + " Hour 137-138 (32)\n",
+ result.toString());
example.close();
}