You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2021/07/30 17:36:24 UTC

[lucene-solr] branch branch_8x updated: LUCENE-10027 provide leaf sorter from commit (#2540)

This is an automated email from the ASF dual-hosted git repository.

mayya pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/branch_8x by this push:
     new 3467dd4  LUCENE-10027  provide leaf sorter from commit (#2540)
3467dd4 is described below

commit 3467dd45e665426364be742d84d9d870fd147fe6
Author: Mayya Sharipova <ma...@elastic.co>
AuthorDate: Fri Jul 30 13:36:10 2021 -0400

    LUCENE-10027  provide leaf sorter from commit (#2540)
    
    Provide leaf sorter for directory readers opened from IndexCommit
    
    LUCENE-9507 allowed to provide a leaf sorter for directory readers.
    One API that was missed is to allow to provide a leaf sorter
    for directory readers opened from an index commit.
    This patch adds this API.
---
 lucene/CHANGES.txt                                 |  3 +
 .../org/apache/lucene/index/DirectoryReader.java   | 15 ++++
 .../apache/lucene/index/TestIndexWriterReader.java | 95 +++++++++++-----------
 3 files changed, 66 insertions(+), 47 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 0bb3cd4..71b6532 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -18,6 +18,9 @@ API Changes
 * LUCENE-10036: Replaced the ScoreCachingWrappingScorer ctor with a static factory method that
   ensures unnecessary wrapping doesn't occur. (Greg Miller)
 
+* LUCENE-10027: Add a new Directory reader open API from indexCommit and
+   a custom comparator for sorting leaf readers (Mayya Sharipova).
+
 New Features
 ---------------------
 (No changes)
diff --git a/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java b/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java
index e8298a1..2655afe 100644
--- a/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java
@@ -130,6 +130,21 @@ public abstract class DirectoryReader extends BaseCompositeReader<LeafReader> {
    return StandardDirectoryReader.open(commit.getDirectory(), commit, null);
   }
 
+  /** Expert: returns an IndexReader reading the index in the given
+   *  {@link IndexCommit}.
+   * @param commit the commit point to open
+   * @param leafSorter a comparator for sorting leaf readers. Providing leafSorter is useful for
+   *     indices on which it is expected to run many queries with particular sort criteria (e.g. for
+   *     time-based indices this is usually a descending sort on timestamp). In this case {@code
+   *     leafSorter} should sort leaves according to this sort criteria. Providing leafSorter allows
+   *     to speed up this particular type of sort queries by early terminating while iterating
+   *     through segments and segments' documents.
+   * @throws IOException if there is a low-level IO error
+   */
+  public static DirectoryReader open(final IndexCommit commit, Comparator<LeafReader> leafSorter) throws IOException {
+    return StandardDirectoryReader.open(commit.getDirectory(), commit, leafSorter);
+  }
+
   /**
    * If the index has changed since the provided reader was
    * opened, open and return a new reader; else, return
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterReader.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterReader.java
index 2806a15..5a99630 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterReader.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterReader.java
@@ -1209,14 +1209,7 @@ public class TestIndexWriterReader extends LuceneTestCase {
     // Test1: test that leafReaders are sorted according to leafSorter provided in IndexWriterConfig
     {
       try (DirectoryReader reader = writer.getReader()) {
-        List<LeafReader> lrs =
-            reader.leaves().stream().map(LeafReaderContext::reader).collect(toList());
-        List<LeafReader> expectedSortedlrs =
-            reader.leaves().stream()
-                .map(LeafReaderContext::reader)
-                .sorted(leafSorter)
-                .collect(toList());
-        assertEquals(expectedSortedlrs, lrs);
+        assertLeavesSorted(reader, leafSorter);
 
         // add more documents that should be sorted first
         final long FIRST_VALUE = ASC_SORT ? 0 : 100;
@@ -1229,28 +1222,16 @@ public class TestIndexWriterReader extends LuceneTestCase {
 
         // and open again
         try (DirectoryReader reader2 = DirectoryReader.openIfChanged(reader)) {
-          lrs = reader2.leaves().stream().map(LeafReaderContext::reader).collect(toList());
-          expectedSortedlrs =
-              reader2.leaves().stream()
-                  .map(LeafReaderContext::reader)
-                  .sorted(leafSorter)
-                  .collect(toList());
-          assertEquals(expectedSortedlrs, lrs);
+          assertLeavesSorted(reader2, leafSorter);
         }
       }
     }
 
-    // Test2: test that leafReaders are sorted according to leafSorter provided in DirectoryReader
+    // Test2: test that leafReaders are sorted according to the provided leafSorter when opened from
+    // directory
     {
       try (DirectoryReader reader = DirectoryReader.open(dir, leafSorter)) {
-        List<LeafReader> lrs =
-            reader.leaves().stream().map(LeafReaderContext::reader).collect(toList());
-        List<LeafReader> expectedSortedlrs =
-            reader.leaves().stream()
-                .map(LeafReaderContext::reader)
-                .sorted(leafSorter)
-                .collect(toList());
-        assertEquals(expectedSortedlrs, lrs);
+        assertLeavesSorted(reader, leafSorter);
 
         // add more documents that should be sorted first
         final long FIRST_VALUE = ASC_SORT ? 0 : 100;
@@ -1263,13 +1244,7 @@ public class TestIndexWriterReader extends LuceneTestCase {
 
         // and open again
         try (DirectoryReader reader2 = DirectoryReader.openIfChanged(reader)) {
-          lrs = reader2.leaves().stream().map(LeafReaderContext::reader).collect(toList());
-          expectedSortedlrs =
-              reader2.leaves().stream()
-                  .map(LeafReaderContext::reader)
-                  .sorted(leafSorter)
-                  .collect(toList());
-          assertEquals(expectedSortedlrs, lrs);
+          assertLeavesSorted(reader2, leafSorter);
         }
       }
     }
@@ -1278,15 +1253,33 @@ public class TestIndexWriterReader extends LuceneTestCase {
     // to leafSorter of its wrapped reader
     {
       try (DirectoryReader reader =
-               new AssertingDirectoryReader(DirectoryReader.open(dir, leafSorter))) {
-        List<LeafReader> lrs =
-            reader.leaves().stream().map(LeafReaderContext::reader).collect(toList());
-        List<LeafReader> expectedSortedlrs =
-            reader.leaves().stream()
-                .map(LeafReaderContext::reader)
-                .sorted(leafSorter)
-                .collect(toList());
-        assertEquals(expectedSortedlrs, lrs);
+                   new AssertingDirectoryReader(DirectoryReader.open(dir, leafSorter))) {
+        assertLeavesSorted(reader, leafSorter);
+
+        // add more documents that should be sorted first
+        final long FIRST_VALUE = ASC_SORT ? 0 : 100;
+        for (int i = 0; i < 10; ++i) {
+          final Document doc = new Document();
+          doc.add(new LongPoint(FIELD_NAME, FIRST_VALUE));
+          writer.addDocument(doc);
+        }
+        writer.commit();
+
+        // and open again
+        try (DirectoryReader reader2 = DirectoryReader.openIfChanged(reader)) {
+          assertLeavesSorted(reader2, leafSorter);
+        }
+      }
+    }
+
+    // Test4: test that leafReaders are sorted according to the provided leafSorter when opened from
+    // commit
+    {
+      List<IndexCommit> commits = DirectoryReader.listCommits(dir);
+      IndexCommit latestCommit = commits.get(commits.size() - 1);
+      try (DirectoryReader reader =
+                   DirectoryReader.open(latestCommit, leafSorter)) {
+        assertLeavesSorted(reader, leafSorter);
 
         // add more documents that should be sorted first
         final long FIRST_VALUE = ASC_SORT ? 0 : 100;
@@ -1299,13 +1292,7 @@ public class TestIndexWriterReader extends LuceneTestCase {
 
         // and open again
         try (DirectoryReader reader2 = DirectoryReader.openIfChanged(reader)) {
-          lrs = reader2.leaves().stream().map(LeafReaderContext::reader).collect(toList());
-          expectedSortedlrs =
-              reader2.leaves().stream()
-                  .map(LeafReaderContext::reader)
-                  .sorted(leafSorter)
-                  .collect(toList());
-          assertEquals(expectedSortedlrs, lrs);
+          assertLeavesSorted(reader2, leafSorter);
         }
       }
     }
@@ -1313,4 +1300,18 @@ public class TestIndexWriterReader extends LuceneTestCase {
     writer.close();
     dir.close();
   }
+
+  // assert that the leaf readers of the provided directory reader are sorted according to the
+  // provided leafSorter
+  private static void assertLeavesSorted(
+          DirectoryReader reader, Comparator<LeafReader> leafSorter) {
+    List<LeafReader> lrs =
+            reader.leaves().stream().map(LeafReaderContext::reader).collect(toList());
+    List<LeafReader> expectedSortedlrs =
+            reader.leaves().stream()
+                    .map(LeafReaderContext::reader)
+                    .sorted(leafSorter)
+                    .collect(toList());
+    assertEquals(expectedSortedlrs, lrs);
+  }
 }