You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2017/12/04 17:49:14 UTC
[30/50] lucene-solr:jira/solr-11458-2: LUCENE-8058: Large instances of TermInSetQuery are no longer eligible for caching as they could break memory accounting of the query cache.

LUCENE-8058: Large instances of TermInSetQuery are no longer eligible for caching as they could break memory accounting of the query cache.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/6d34f232
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/6d34f232
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/6d34f232

Branch: refs/heads/jira/solr-11458-2
Commit: 6d34f23263e919e462d67a72cbe150465a0b87a7
Parents: 70767b1
Author: Adrien Grand <jp...@gmail.com>
Authored: Wed Nov 29 09:29:52 2017 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Wed Nov 29 09:29:52 2017 +0100

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  4 ++++
 .../org/apache/lucene/search/BooleanWeight.java |  6 +++++
 .../lucene/search/DisjunctionMaxQuery.java      |  6 +++++
 .../org/apache/lucene/search/LRUQueryCache.java | 25 +++++---------------
 .../apache/lucene/search/TermInSetQuery.java    |  4 +++-
 .../apache/lucene/search/TestLRUQueryCache.java | 15 +++---------
 6 files changed, 28 insertions(+), 32 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6d34f232/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 084d0dd..04cfde1 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -123,6 +123,10 @@ Optimizations
   (Dawid Weiss, Robert Muir, Mike McCandless)
 
 * LUCENE-8062: GlobalOrdinalsQuery is no longer eligible for caching. (Jim Ferenczi)
+
+* LUCENE-8058: Large instances of TermInSetQuery are no longer eligible for
+  caching as they could break memory accounting of the query cache.
+  (Adrien Grand)
   
 Tests
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6d34f232/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java
index 4cc038d..dbe3d17 100644
--- a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java
+++ b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java
@@ -301,6 +301,12 @@ final class BooleanWeight extends Weight {
 
   @Override
   public boolean isCacheable(LeafReaderContext ctx) {
+    if (weights.size() > TermInSetQuery.BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD) {
+      // Disallow caching large boolean queries to not encourage users
+      // to build large boolean queries as a workaround to the fact that
+      // we disallow caching large TermInSetQueries.
+      return false;
+    }
     for (Weight w : weights) {
       if (w.isCacheable(ctx) == false)
         return false;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6d34f232/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
index 17ce68a..13237a2 100644
--- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
@@ -139,6 +139,12 @@ public final class DisjunctionMaxQuery extends Query implements Iterable<Query>
 
     @Override
     public boolean isCacheable(LeafReaderContext ctx) {
+      if (weights.size() > TermInSetQuery.BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD) {
+        // Disallow caching large dismax queries to not encourage users
+        // to build large dismax queries as a workaround to the fact that
+        // we disallow caching large TermInSetQueries.
+        return false;
+      }
       for (Weight w : weights) {
         if (w.isCacheable(ctx) == false)
           return false;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6d34f232/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java
index b604733..5c828d6 100644
--- a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java
+++ b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java
@@ -87,8 +87,9 @@ import org.apache.lucene.util.RoaringDocIdSet;
  */
 public class LRUQueryCache implements QueryCache, Accountable {
 
-  // memory usage of a simple term query
-  static final long QUERY_DEFAULT_RAM_BYTES_USED = 192;
+  // approximate memory usage that we assign to all queries
+  // this maps roughly to a BooleanQuery with a couple term clauses
+  static final long QUERY_DEFAULT_RAM_BYTES_USED = 1024;
 
   static final long HASHTABLE_RAM_BYTES_PER_ENTRY =
       2 * RamUsageEstimator.NUM_BYTES_OBJECT_REF // key + value
@@ -298,7 +299,7 @@ public class LRUQueryCache implements QueryCache, Accountable {
     try {
       Query singleton = uniqueQueries.putIfAbsent(query, query);
       if (singleton == null) {
-        onQueryCache(query, LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY + ramBytesUsed(query));
+        onQueryCache(query, LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY + QUERY_DEFAULT_RAM_BYTES_USED);
       } else {
         query = singleton;
       }
@@ -381,7 +382,7 @@ public class LRUQueryCache implements QueryCache, Accountable {
 
   private void onEviction(Query singleton) {
     assert lock.isHeldByCurrentThread();
-    onQueryEviction(singleton, LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY + ramBytesUsed(singleton));
+    onQueryEviction(singleton, LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY + QUERY_DEFAULT_RAM_BYTES_USED);
     for (LeafCache leafCache : cache.values()) {
       leafCache.remove(singleton);
     }
@@ -421,9 +422,7 @@ public class LRUQueryCache implements QueryCache, Accountable {
       long recomputedRamBytesUsed =
             HASHTABLE_RAM_BYTES_PER_ENTRY * cache.size()
           + LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY * uniqueQueries.size();
-      for (Query query : mostRecentlyUsedQueries) {
-        recomputedRamBytesUsed += ramBytesUsed(query);
-      }
+      recomputedRamBytesUsed += mostRecentlyUsedQueries.size() * QUERY_DEFAULT_RAM_BYTES_USED;
       for (LeafCache leafCache : cache.values()) {
         recomputedRamBytesUsed += HASHTABLE_RAM_BYTES_PER_ENTRY * leafCache.cache.size();
         for (DocIdSet set : leafCache.cache.values()) {
@@ -482,18 +481,6 @@ public class LRUQueryCache implements QueryCache, Accountable {
   }
 
   /**
-   * Return the number of bytes used by the given query. The default
-   * implementation returns {@link Accountable#ramBytesUsed()} if the query
-   * implements {@link Accountable} and <code>1024</code> otherwise.
-   */
-  protected long ramBytesUsed(Query query) {
-    if (query instanceof Accountable) {
-      return ((Accountable) query).ramBytesUsed();
-    }
-    return QUERY_DEFAULT_RAM_BYTES_USED;
-  }
-
-  /**
    * Default cache implementation: uses {@link RoaringDocIdSet} for sets that
    * have a density &lt; 1% and a {@link BitDocIdSet} over a {@link FixedBitSet}
    * otherwise.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6d34f232/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java
index 85f97ed..19b3922 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java
@@ -318,7 +318,9 @@ public class TermInSetQuery extends Query implements Accountable {
 
       @Override
       public boolean isCacheable(LeafReaderContext ctx) {
-        return true;
+        // Only cache instances that have a reasonable size. Otherwise it might cause memory issues
+        // with the query cache if most memory ends up being spent on queries rather than doc id sets.
+        return ramBytesUsed() <= LRUQueryCache.QUERY_DEFAULT_RAM_BYTES_USED;
       }
 
     };

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6d34f232/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java
index 2f364f8..fa9743d 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java
@@ -77,15 +77,6 @@ public class TestLRUQueryCache extends LuceneTestCase {
 
   };
 
-  public void testFilterRamBytesUsed() {
-    final Query simpleQuery = new TermQuery(new Term("some_field", "some_term"));
-    final long actualRamBytesUsed = RamUsageTester.sizeOf(simpleQuery);
-    final long ramBytesUsed = LRUQueryCache.QUERY_DEFAULT_RAM_BYTES_USED;
-    // we cannot assert exactly that the constant is correct since actual
-    // memory usage depends on JVM implementations and settings (eg. UseCompressedOops)
-    assertEquals(actualRamBytesUsed, ramBytesUsed, actualRamBytesUsed / 2);
-  }
-
   public void testConcurrency() throws Throwable {
     final LRUQueryCache queryCache = new LRUQueryCache(1 + random().nextInt(20), 1 + random().nextInt(10000), context -> random().nextBoolean());
     Directory dir = newDirectory();
@@ -282,7 +273,7 @@ public class TestLRUQueryCache extends LuceneTestCase {
           return ((DocIdSet) o).ramBytesUsed();
         }
         if (o instanceof Query) {
-          return queryCache.ramBytesUsed((Query) o);
+          return LRUQueryCache.QUERY_DEFAULT_RAM_BYTES_USED;
         }
         if (o instanceof IndexReader || o.getClass().getSimpleName().equals("SegmentCoreReaders")) {
           // do not take readers or core cache keys into account
@@ -403,7 +394,7 @@ public class TestLRUQueryCache extends LuceneTestCase {
           return ((DocIdSet) o).ramBytesUsed();
         }
         if (o instanceof Query) {
-          return queryCache.ramBytesUsed((Query) o);
+          return LRUQueryCache.QUERY_DEFAULT_RAM_BYTES_USED;
         }
         if (o.getClass().getSimpleName().equals("SegmentCoreReaders")) {
           // do not follow references to core cache keys
@@ -1498,7 +1489,7 @@ public class TestLRUQueryCache extends LuceneTestCase {
     IndexSearcher searcher = newSearcher(reader);
     searcher.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE);
 
-    LRUQueryCache cache = new LRUQueryCache(1, 1000, context -> true);
+    LRUQueryCache cache = new LRUQueryCache(1, 10000, context -> true);
     searcher.setQueryCache(cache);
 
     DVCacheQuery query = new DVCacheQuery("field");