You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dn...@apache.org on 2023/02/21 00:20:43 UTC
[lucene] branch branch_9x updated: Ensure caching all leaves from the upper tier (#12147)

This is an automated email from the ASF dual-hosted git repository.

dnhatn pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/branch_9x by this push:
     new 57e1a4a40ef Ensure caching all leaves from the upper tier (#12147)
57e1a4a40ef is described below

commit 57e1a4a40ef149a88d47ed34b98237acb2b3d56d
Author: Nhat Nguyen <nh...@elastic.co>
AuthorDate: Mon Feb 20 16:20:37 2023 -0800

    Ensure caching all leaves from the upper tier (#12147)
    
    This change adjusts the cache policy to ensure that all segments in the
    max tier to be cached. Before, we cache segments that have more than 3%
    of the total documents in the index; now cache segments have more than
    half of the average documents per leave of the index.
    
    Closes #12140
---
 .../org/apache/lucene/search/LRUQueryCache.java    | 21 +++---
 .../apache/lucene/search/TestLRUQueryCache.java    | 82 +++++++++++-----------
 .../TestUsageTrackingFilterCachingPolicy.java      |  6 +-
 3 files changed, 52 insertions(+), 57 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java
index 03d4a29ae73..3479a55e6a9 100644
--- a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java
+++ b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java
@@ -139,25 +139,21 @@ public class LRUQueryCache implements QueryCache, Accountable {
   /**
    * Create a new instance that will cache at most <code>maxSize</code> queries with at most <code>
    * maxRamBytesUsed</code> bytes of memory. Queries will only be cached on leaves that have more
-   * than 10k documents and have more than 3% of the total number of documents in the index. This
-   * should guarantee that all leaves from the upper {@link TieredMergePolicy tier} will be cached
-   * while ensuring that at most <code>33</code> leaves can make it to the cache (very likely less
-   * than 10 in practice), which is useful for this implementation since some operations perform in
-   * linear time with the number of cached leaves. Only clauses whose cost is at most 100x the cost
-   * of the top-level query will be cached in order to not hurt latency too much because of caching.
+   * than 10k documents and have more than half of the average documents per leave of the index.
+   * This should guarantee that all leaves from the upper {@link TieredMergePolicy tier} will be
+   * cached. Only clauses whose cost is at most 100x the cost of the top-level query will be cached
+   * in order to not hurt latency too much because of caching.
    */
   public LRUQueryCache(int maxSize, long maxRamBytesUsed) {
-    this(maxSize, maxRamBytesUsed, new MinSegmentSizePredicate(10000, .03f), 10);
+    this(maxSize, maxRamBytesUsed, new MinSegmentSizePredicate(10000), 10);
   }
 
   // pkg-private for testing
   static class MinSegmentSizePredicate implements Predicate<LeafReaderContext> {
     private final int minSize;
-    private final float minSizeRatio;
 
-    MinSegmentSizePredicate(int minSize, float minSizeRatio) {
+    MinSegmentSizePredicate(int minSize) {
       this.minSize = minSize;
-      this.minSizeRatio = minSizeRatio;
     }
 
     @Override
@@ -167,8 +163,9 @@ public class LRUQueryCache implements QueryCache, Accountable {
         return false;
       }
       final IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext(context);
-      final float sizeRatio = (float) context.reader().maxDoc() / topLevelContext.reader().maxDoc();
-      return sizeRatio >= minSizeRatio;
+      final int averageTotalDocs =
+          topLevelContext.reader().maxDoc() / topLevelContext.leaves().size();
+      return maxDoc * 2 > averageTotalDocs;
     }
   }
 
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java
index e90be74e234..da68d13c669 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java
@@ -20,8 +20,10 @@ import static org.apache.lucene.util.RamUsageEstimator.HASHTABLE_RAM_BYTES_PER_E
 import static org.apache.lucene.util.RamUsageEstimator.LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY;
 import static org.apache.lucene.util.RamUsageEstimator.QUERY_DEFAULT_RAM_BYTES_USED;
 
+import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
 import com.carrotsearch.randomizedtesting.generators.RandomPicks;
 import java.io.IOException;
+import java.io.UncheckedIOException;
 import java.lang.reflect.Field;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -39,6 +41,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.concurrent.atomic.AtomicReference;
+import java.util.function.IntConsumer;
 import java.util.stream.Collectors;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field.Store;
@@ -1387,46 +1390,45 @@ public class TestLRUQueryCache extends LuceneTestCase {
 
   public void testMinSegmentSizePredicate() throws IOException {
     Directory dir = newDirectory();
-    IndexWriterConfig iwc = newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE);
-    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
-    w.addDocument(new Document());
-    DirectoryReader reader = w.getReader();
-    IndexSearcher searcher = newSearcher(reader);
-    searcher.setQueryCachingPolicy(ALWAYS_CACHE);
-
-    LRUQueryCache cache =
-        new LRUQueryCache(
-            2, 10000, new LRUQueryCache.MinSegmentSizePredicate(2, 0f), Float.POSITIVE_INFINITY);
-    searcher.setQueryCache(cache);
-    searcher.count(new DummyQuery());
-    assertEquals(0, cache.getCacheCount());
-
-    cache =
-        new LRUQueryCache(
-            2, 10000, new LRUQueryCache.MinSegmentSizePredicate(1, 0f), Float.POSITIVE_INFINITY);
-    searcher.setQueryCache(cache);
-    searcher.count(new DummyQuery());
-    assertEquals(1, cache.getCacheCount());
-
-    cache =
-        new LRUQueryCache(
-            2, 10000, new LRUQueryCache.MinSegmentSizePredicate(0, .6f), Float.POSITIVE_INFINITY);
-    searcher.setQueryCache(cache);
-    searcher.count(new DummyQuery());
-    assertEquals(1, cache.getCacheCount());
-
-    w.addDocument(new Document());
-    reader.close();
-    reader = w.getReader();
-    searcher = newSearcher(reader);
-    searcher.setQueryCachingPolicy(ALWAYS_CACHE);
-    cache =
-        new LRUQueryCache(
-            2, 10000, new LRUQueryCache.MinSegmentSizePredicate(0, .6f), Float.POSITIVE_INFINITY);
-    searcher.setQueryCache(cache);
-    searcher.count(new DummyQuery());
-    assertEquals(0, cache.getCacheCount());
-
+    IndexWriterConfig iwc = new IndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE);
+    IndexWriter w = new IndexWriter(dir, iwc);
+    IntConsumer newSegment =
+        numDocs -> {
+          try {
+            for (int i = 0; i < numDocs; i++) {
+              w.addDocument(new Document());
+            }
+            w.flush();
+          } catch (IOException e) {
+            throw new UncheckedIOException(e);
+          }
+        };
+    newSegment.accept(1);
+    newSegment.accept(4);
+    newSegment.accept(10);
+    newSegment.accept(35);
+    int numLargeSegments = RandomNumbers.randomIntBetween(random(), 2, 40);
+    for (int i = 0; i < numLargeSegments; i++) {
+      newSegment.accept(RandomNumbers.randomIntBetween(random(), 50, 55));
+    }
+    DirectoryReader reader = DirectoryReader.open(w);
+    for (int i = 0; i < 3; i++) {
+      var predicate =
+          new LRUQueryCache.MinSegmentSizePredicate(
+              RandomNumbers.randomIntBetween(random(), 1, Integer.MAX_VALUE));
+      assertFalse(predicate.test(reader.leaves().get(i)));
+    }
+    for (int i = 3; i < reader.leaves().size(); i++) {
+      var leaf = reader.leaves().get(i);
+      var small =
+          new LRUQueryCache.MinSegmentSizePredicate(
+              RandomNumbers.randomIntBetween(random(), 60, Integer.MAX_VALUE));
+      assertFalse(small.test(leaf));
+      var big =
+          new LRUQueryCache.MinSegmentSizePredicate(
+              RandomNumbers.randomIntBetween(random(), 10, 30));
+      assertTrue(big.test(leaf));
+    }
     reader.close();
     w.close();
     dir.close();
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java b/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java
index 059d3e7264a..a764daf9d5b 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java
@@ -75,11 +75,7 @@ public class TestUsageTrackingFilterCachingPolicy extends LuceneTestCase {
     IndexSearcher searcher = new IndexSearcher(reader);
     UsageTrackingQueryCachingPolicy policy = new UsageTrackingQueryCachingPolicy();
     LRUQueryCache cache =
-        new LRUQueryCache(
-            10,
-            Long.MAX_VALUE,
-            new LRUQueryCache.MinSegmentSizePredicate(1, 0f),
-            Float.POSITIVE_INFINITY);
+        new LRUQueryCache(10, Long.MAX_VALUE, ctx -> true, Float.POSITIVE_INFINITY);
     searcher.setQueryCache(cache);
     searcher.setQueryCachingPolicy(policy);