You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2017/02/16 13:59:15 UTC

[3/4] lucene-solr:master: LUCENE-7680: Never cache term filters.

LUCENE-7680: Never cache term filters.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/063954ce
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/063954ce
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/063954ce

Branch: refs/heads/master
Commit: 063954ce79f1e4babd22cc177b619c48136766d6
Parents: c2f061d
Author: Adrien Grand <jp...@gmail.com>
Authored: Thu Feb 16 09:40:31 2017 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Thu Feb 16 14:57:07 2017 +0100

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  4 +
 .../search/UsageTrackingQueryCachingPolicy.java | 78 +++++++++++++-------
 .../TestUsageTrackingFilterCachingPolicy.java   | 11 +++
 3 files changed, 66 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/063954ce/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 032d897..117b65c 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -145,6 +145,10 @@ Improvements
   of the less descriptive FileNotFound or NoSuchFileException (Mike Drob via 
   Mike McCandless, Erick Erickson)
 
+* LUCENE-7680: UsageTrackingQueryCachingPolicy never caches term filters anymore
+  since they are plenty fast. This also has the side-effect of leaving more
+  space in the history for costly filters. (Adrien Grand)
+
 Optimizations
 
 * LUCENE-7641: Optimized point range queries to compute documents that do not

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/063954ce/lucene/core/src/java/org/apache/lucene/search/UsageTrackingQueryCachingPolicy.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/UsageTrackingQueryCachingPolicy.java b/lucene/core/src/java/org/apache/lucene/search/UsageTrackingQueryCachingPolicy.java
index 035947f..568931d 100644
--- a/lucene/core/src/java/org/apache/lucene/search/UsageTrackingQueryCachingPolicy.java
+++ b/lucene/core/src/java/org/apache/lucene/search/UsageTrackingQueryCachingPolicy.java
@@ -27,7 +27,7 @@ import org.apache.lucene.util.FrequencyTrackingRingBuffer;
  *
  * @lucene.experimental
  */
-public final class UsageTrackingQueryCachingPolicy implements QueryCachingPolicy {
+public class UsageTrackingQueryCachingPolicy implements QueryCachingPolicy {
 
   // the hash code that we use as a sentinel in the ring buffer.
   private static final int SENTINEL = Integer.MIN_VALUE;
@@ -54,16 +54,49 @@ public final class UsageTrackingQueryCachingPolicy implements QueryCachingPolicy
         isPointQuery(query);
   }
 
-  static boolean isCheap(Query query) {
-    // same for cheap queries
-    // these queries are so cheap that they usually do not need caching
-    return query instanceof TermQuery;
+  private static boolean shouldNeverCache(Query query) {
+    if (query instanceof TermQuery) {
+      // We do not bother caching term queries since they are already plenty fast.
+      return true;
+    }
+
+    if (query instanceof MatchAllDocsQuery) {
+      // MatchAllDocsQuery has an iterator that is faster than what a bit set could do.
+      return true;
+    }
+
+    // For the below queries, it's cheap to notice they cannot match any docs so
+    // we do not bother caching them.
+    if (query instanceof MatchNoDocsQuery) {
+      return false;
+    }
+
+    if (query instanceof BooleanQuery) {
+      BooleanQuery bq = (BooleanQuery) query;
+      if (bq.clauses().isEmpty()) {
+        return true;
+      }
+    }
+
+    if (query instanceof DisjunctionMaxQuery) {
+      DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) query;
+      if (dmq.getDisjuncts().isEmpty()) {
+        return true;
+      }
+    }
+
+    return false;
   }
 
   private final FrequencyTrackingRingBuffer recentlyUsedFilters;
 
   /**
-   * Create a new instance.
+   * Expert: Create a new instance with a configurable history size. Beware of
+   * passing too large values for the size of the history, either
+   * {@link #minFrequencyToCache} returns low values and this means some filters
+   * that are rarely used will be cached, which would hurt performance. Or
+   * {@link #minFrequencyToCache} returns high values that are function of the
+   * size of the history but then filters will be slow to make it to the cache.
    *
    * @param historySize               the number of recently used filters to track
    */
@@ -71,20 +104,22 @@ public final class UsageTrackingQueryCachingPolicy implements QueryCachingPolicy
     this.recentlyUsedFilters = new FrequencyTrackingRingBuffer(historySize, SENTINEL);
   }
 
-  /** Create a new instance with an history size of 256. */
+  /** Create a new instance with an history size of 256. This should be a good
+   *  default for most cases. */
   public UsageTrackingQueryCachingPolicy() {
     this(256);
   }
 
   /**
-   * For a given query, return how many times it should appear in the history
-   * before being cached.
+   * For a given filter, return how many times it should appear in the history
+   * before being cached. The default implementation returns 2 for filters that
+   * need to evaluate against the entire index to build a {@link DocIdSetIterator},
+   * like {@link MultiTermQuery}, point-based queries or {@link TermInSetQuery},
+   * and 5 for other filters.
    */
   protected int minFrequencyToCache(Query query) {
     if (isCostly(query)) {
       return 2;
-    } else if (isCheap(query)) {
-      return 20;
     } else {
       // default: cache after the filter has been seen 5 times
       return 5;
@@ -96,6 +131,10 @@ public final class UsageTrackingQueryCachingPolicy implements QueryCachingPolicy
     assert query instanceof BoostQuery == false;
     assert query instanceof ConstantScoreQuery == false;
 
+    if (shouldNeverCache(query)) {
+      return;
+    }
+
     // call hashCode outside of sync block
     // in case it's somewhat expensive:
     int hashCode = query.hashCode();
@@ -123,24 +162,9 @@ public final class UsageTrackingQueryCachingPolicy implements QueryCachingPolicy
 
   @Override
   public boolean shouldCache(Query query) throws IOException {
-    if (query instanceof MatchAllDocsQuery
-        // MatchNoDocsQuery currently rewrites to a BooleanQuery,
-        // but who knows, it might get its own Weight one day
-        || query instanceof MatchNoDocsQuery) {
+    if (shouldNeverCache(query)) {
       return false;
     }
-    if (query instanceof BooleanQuery) {
-      BooleanQuery bq = (BooleanQuery) query;
-      if (bq.clauses().isEmpty()) {
-        return false;
-      }
-    }
-    if (query instanceof DisjunctionMaxQuery) {
-      DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) query;
-      if (dmq.getDisjuncts().isEmpty()) {
-        return false;
-      }
-    }
     final int frequency = frequency(query);
     final int minFrequency = minFrequencyToCache(query);
     return frequency >= minFrequency;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/063954ce/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java b/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java
index 29ed22f..eed3cb7 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java
@@ -16,6 +16,8 @@
  */
 package org.apache.lucene.search;
 
+import java.io.IOException;
+
 import org.apache.lucene.document.IntPoint;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.util.LuceneTestCase;
@@ -37,4 +39,13 @@ public class TestUsageTrackingFilterCachingPolicy extends LuceneTestCase {
     assertFalse(policy.shouldCache(q));
   }
 
+  public void testNeverCacheTermFilter() throws IOException {
+    Query q = new TermQuery(new Term("foo", "bar"));
+    UsageTrackingQueryCachingPolicy policy = new UsageTrackingQueryCachingPolicy();
+    for (int i = 0; i < 1000; ++i) {
+      policy.onUse(q);
+    }
+    assertFalse(policy.shouldCache(q));
+  }
+
 }