You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2022/09/08 19:19:46 UTC

[lucene] branch main updated: Added a top-n range faceting example (#1035)

This is an automated email from the ASF dual-hosted git repository.

gsmiller pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new 49b596ef023 Added a top-n range faceting example (#1035)
49b596ef023 is described below

commit 49b596ef0233f6d1129d5bc56f4898ff52e44239
Author: Yuting Gan <44...@users.noreply.github.com>
AuthorDate: Thu Sep 8 12:19:42 2022 -0700

    Added a top-n range faceting example (#1035)
---
 lucene/CHANGES.txt                                 |  2 +
 .../lucene/demo/facet/RangeFacetsExample.java      | 49 ++++++++++++++++++++++
 .../lucene/demo/facet/TestRangeFacetsExample.java  | 15 +++++++
 3 files changed, 66 insertions(+)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 7fc9d55f926..401ae9b43ed 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -52,6 +52,8 @@ Improvements
 
 * LUCENE-10614: Properly support getTopChildren in RangeFacetCounts. (Yuting Gan)
 
+* LUCENE-10652: Add a top-n range faceting example to RangeFacetsExample. (Yuting Gan)
+
 Optimizations
 ---------------------
 (No changes)
diff --git a/lucene/demo/src/java/org/apache/lucene/demo/facet/RangeFacetsExample.java b/lucene/demo/src/java/org/apache/lucene/demo/facet/RangeFacetsExample.java
index 9c47b563aee..786e7b5d9d9 100644
--- a/lucene/demo/src/java/org/apache/lucene/demo/facet/RangeFacetsExample.java
+++ b/lucene/demo/src/java/org/apache/lucene/demo/facet/RangeFacetsExample.java
@@ -18,10 +18,13 @@ package org.apache.lucene.demo.facet;
 
 import java.io.Closeable;
 import java.io.IOException;
+import java.util.Random;
 import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
 import org.apache.lucene.document.LongPoint;
 import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.StringField;
 import org.apache.lucene.facet.DrillDownQuery;
 import org.apache.lucene.facet.DrillSideways;
 import org.apache.lucene.facet.FacetResult;
@@ -45,6 +48,7 @@ public class RangeFacetsExample implements Closeable {
 
   private final Directory indexDir = new ByteBuffersDirectory();
   private IndexSearcher searcher;
+  private LongRange[] logTimestampRanges = new LongRange[168];
   private final long nowSec = System.currentTimeMillis() / 1000L;
 
   final LongRange PAST_HOUR = new LongRange("Past hour", nowSec - 3600, true, nowSec, true);
@@ -73,6 +77,31 @@ public class RangeFacetsExample implements Closeable {
       indexWriter.addDocument(doc);
     }
 
+    // Add documents with a fake timestamp for the past 7 days (24 * 7 = 168 hours), 3600 sec (1
+    // hour) from "now", 7200 sec (2 hours) from "now", ...:
+    long startTime = 0;
+    for (int i = 0; i < 168; i++) {
+      long endTime = (i + 1) * 3600;
+      // Choose a relatively large number, e,g., "35", to create variation in count for
+      // the top n children, so that calling getTopChildren(10) can return top 10 children with
+      // different counts
+      for (int j = 0; j < i % 35; j++) {
+        Document doc = new Document();
+        Random r = new Random();
+        // Randomly generate a timestamp within the current range
+        long randomTimestamp = r.nextLong(1, endTime - startTime) + startTime;
+        // Add as doc values field, so we can compute range facets:
+        doc.add(new NumericDocValuesField("error timestamp", randomTimestamp));
+        doc.add(
+            new StringField(
+                "error message", "server encountered error at " + randomTimestamp, Field.Store.NO));
+        indexWriter.addDocument(doc);
+      }
+      logTimestampRanges[i] =
+          new LongRange("Hour " + i + "-" + (i + 1), startTime, false, endTime, true);
+      startTime = endTime;
+    }
+
     // Open near-real-time searcher
     searcher = new IndexSearcher(DirectoryReader.open(indexWriter));
     indexWriter.close();
@@ -97,6 +126,21 @@ public class RangeFacetsExample implements Closeable {
     return facets.getAllChildren("timestamp");
   }
 
+  /** User runs a query and counts facets. */
+  public FacetResult searchTopChildren() throws IOException {
+
+    // Aggregates the facet counts
+    FacetsCollector fc = new FacetsCollector();
+
+    // MatchAllDocsQuery is for "browsing" (counts facets
+    // for all non-deleted docs in the index); normally
+    // you'd use a "normal" query:
+    FacetsCollector.search(searcher, new MatchAllDocsQuery(), 10, fc);
+
+    Facets facets = new LongRangeFacetCounts("error timestamp", fc, logTimestampRanges);
+    return facets.getTopChildren(10, "error timestamp");
+  }
+
   /** User drills down on the specified range. */
   public TopDocs drillDown(LongRange range) throws IOException {
 
@@ -152,6 +196,11 @@ public class RangeFacetsExample implements Closeable {
     System.out.println("-----------------------");
     System.out.println(example.search());
 
+    System.out.println("\n");
+    System.out.println("Facet counting example:");
+    System.out.println("-----------------------");
+    System.out.println(example.searchTopChildren());
+
     System.out.println("\n");
     System.out.println("Facet drill-down example (timestamp/Past six hours):");
     System.out.println("---------------------------------------------");
diff --git a/lucene/demo/src/test/org/apache/lucene/demo/facet/TestRangeFacetsExample.java b/lucene/demo/src/test/org/apache/lucene/demo/facet/TestRangeFacetsExample.java
index ccb21ce064d..9efdcf5356b 100644
--- a/lucene/demo/src/test/org/apache/lucene/demo/facet/TestRangeFacetsExample.java
+++ b/lucene/demo/src/test/org/apache/lucene/demo/facet/TestRangeFacetsExample.java
@@ -31,6 +31,21 @@ public class TestRangeFacetsExample extends LuceneTestCase {
     assertEquals(
         "dim=timestamp path=[] value=87 childCount=3\n  Past hour (4)\n  Past six hours (22)\n  Past day (87)\n",
         result.toString());
+
+    result = example.searchTopChildren();
+    assertEquals(
+        "dim=error timestamp path=[] value=2758 childCount=163\n"
+            + "  Hour 104-105 (34)\n"
+            + "  Hour 139-140 (34)\n"
+            + "  Hour 34-35 (34)\n"
+            + "  Hour 69-70 (34)\n"
+            + "  Hour 103-104 (33)\n"
+            + "  Hour 138-139 (33)\n"
+            + "  Hour 33-34 (33)\n"
+            + "  Hour 68-69 (33)\n"
+            + "  Hour 102-103 (32)\n"
+            + "  Hour 137-138 (32)\n",
+        result.toString());
     example.close();
   }