You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2015/01/05 09:45:27 UTC
svn commit: r1649480 - in /lucene/dev/trunk/lucene/core/src: java/org/apache/lucene/search/ test/org/apache/lucene/search/

Author: jpountz
Date: Mon Jan  5 08:45:27 2015
New Revision: 1649480

URL: http://svn.apache.org/r1649480
Log:
LUCENE-6157: Add the ability to compute finer-grained stats on the LRU filter cache.

Modified:
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FilterCache.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FilterCachingPolicy.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/LRUFilterCache.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestLRUFilterCache.java

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FilterCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FilterCache.java?rev=1649480&r1=1649479&r2=1649480&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FilterCache.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FilterCache.java Mon Jan  5 08:45:27 2015
@@ -20,6 +20,7 @@ package org.apache.lucene.search;
 /**
  * A cache for filters.
  *
+ * @see LRUFilterCache
  * @lucene.experimental
  */
 public interface FilterCache {

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FilterCachingPolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FilterCachingPolicy.java?rev=1649480&r1=1649479&r2=1649480&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FilterCachingPolicy.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/FilterCachingPolicy.java Mon Jan  5 08:45:27 2015
@@ -29,6 +29,8 @@ import org.apache.lucene.index.TieredMer
  *
  * Implementations of this class must be thread-safe.
  *
+ * @see UsageTrackingFilterCachingPolicy
+ * @see LRUFilterCache
  * @lucene.experimental
  */
 // TODO: add APIs for integration with IndexWriter.IndexReaderWarmer

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/LRUFilterCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/LRUFilterCache.java?rev=1649480&r1=1649479&r2=1649480&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/LRUFilterCache.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/LRUFilterCache.java Mon Jan  5 08:45:27 2015
@@ -49,6 +49,38 @@ import org.apache.lucene.util.RoaringDoc
  * {@link FilterCachingPolicy caching policies} that only cache on "large"
  * segments, and it is advised to not share this cache across too many indices.
  *
+ * Typical usage looks like this:
+ * <pre class="prettyprint">
+ *   final int maxNumberOfCachedFilters = 256;
+ *   final long maxRamBytesUsed = 50 * 1024L * 1024L; // 50MB
+ *   // these cache and policy instances can be shared across several filters and readers
+ *   // it is fine to eg. store them into static variables
+ *   final FilterCache filterCache = new LRUFilterCache(maxNumberOfCachedFilters, maxRamBytesUsed);
+ *   final FilterCachingPolicy defaultCachingPolicy = new UsageTrackingFilterCachingPolicy();
+ *   
+ *   // ...
+ *   
+ *   // Then at search time
+ *   Filter myFilter = ...;
+ *   Filter myCacheFilter = filterCache.doCache(myFilter, defaultCachingPolicy);
+ *   // myCacheFilter is now a wrapper around the original filter that will interact with the cache
+ *   IndexSearcher searcher = ...;
+ *   TopDocs topDocs = searcher.search(new ConstantScoreQuery(myCacheFilter), 10);
+ * </pre>
+ *
+ * This cache exposes some global statistics ({@link #getHitCount() hit count},
+ * {@link #getMissCount() miss count}, {@link #getCacheSize() number of cache
+ * entries}, {@link #getCacheCount() total number of DocIdSets that have ever
+ * been cached}, {@link #getEvictionCount() number of evicted entries}). In
+ * case you would like to have more fine-grained statistics, such as per-index
+ * or per-filter-class statistics, it is possible to override various callbacks:
+ * {@link #onHit}, {@link #onMiss},
+ * {@link #onFilterCache}, {@link #onFilterEviction},
+ * {@link #onDocIdSetCache}, {@link #onDocIdSetEviction} and {@link #onClear}.
+ * It is better to not perform heavy computations in these methods though since
+ * they are called synchronously and under a lock.
+ *
+ * @see FilterCachingPolicy
  * @lucene.experimental
  */
 public class LRUFilterCache implements FilterCache, Accountable {
@@ -96,6 +128,80 @@ public class LRUFilterCache implements F
     ramBytesUsed = 0;
   }
 
+  /**
+   * Expert: callback when there is a cache hit on a given filter.
+   * Implementing this method is typically useful in order to compute more
+   * fine-grained statistics about the filter cache.
+   * @see #onMiss
+   * @lucene.experimental
+   */
+  protected void onHit(Object readerCoreKey, Filter filter) {
+    hitCount += 1;
+  }
+
+  /**
+   * Expert: callback when there is a cache miss on a given filter.
+   * @see #onHit
+   * @lucene.experimental
+   */
+  protected void onMiss(Object readerCoreKey, Filter filter) {
+    assert filter != null;
+    missCount += 1;
+  }
+
+  /**
+   * Expert: callback when a filter is added to this cache.
+   * Implementing this method is typically useful in order to compute more
+   * fine-grained statistics about the filter cache.
+   * @see #onFilterEviction
+   * @lucene.experimental
+   */
+  protected void onFilterCache(Filter filter, long ramBytesUsed) {
+    this.ramBytesUsed += ramBytesUsed;
+  }
+
+  /**
+   * Expert: callback when a filter is evicted from this cache.
+   * @see #onFilterCache
+   * @lucene.experimental
+   */
+  protected void onFilterEviction(Filter filter, long ramBytesUsed) {
+    this.ramBytesUsed -= ramBytesUsed;
+  }
+
+  /**
+   * Expert: callback when a {@link DocIdSet} is added to this cache.
+   * Implementing this method is typically useful in order to compute more
+   * fine-grained statistics about the filter cache.
+   * @see #onDocIdSetEviction
+   * @lucene.experimental
+   */
+  protected void onDocIdSetCache(Object readerCoreKey, long ramBytesUsed) {
+    cacheSize += 1;
+    cacheCount += 1;
+    this.ramBytesUsed += ramBytesUsed;
+  }
+  
+  /**
+   * Expert: callback when one or more {@link DocIdSet}s are removed from this
+   * cache.
+   * @see #onDocIdSetCache
+   * @lucene.experimental
+   */
+  protected void onDocIdSetEviction(Object readerCoreKey, int numEntries, long sumRamBytesUsed) {
+    this.ramBytesUsed -= sumRamBytesUsed;
+    cacheSize -= numEntries;
+  }
+
+  /**
+   * Expert: callback when the cache is completely cleared.
+   * @lucene.experimental
+   */
+  protected void onClear() {
+    ramBytesUsed = 0;
+    cacheSize = 0;
+  }
+
   /** Whether evictions are required. */
   boolean requiresEviction() {
     final int size = mostRecentlyUsedFilters.size();
@@ -107,22 +213,23 @@ public class LRUFilterCache implements F
   }
 
   synchronized DocIdSet get(Filter filter, LeafReaderContext context) {
-    final LeafCache leafCache = cache.get(context.reader().getCoreCacheKey());
+    final Object readerKey = context.reader().getCoreCacheKey();
+    final LeafCache leafCache = cache.get(readerKey);
     if (leafCache == null) {
-      missCount += 1;
+      onMiss(readerKey, filter);
       return null;
     }
     // this get call moves the filter to the most-recently-used position
     final Filter singleton = uniqueFilters.get(filter);
     if (singleton == null) {
-      missCount += 1;
+      onMiss(readerKey, filter);
       return null;
     }
     final DocIdSet cached = leafCache.get(singleton);
     if (cached == null) {
-      missCount += 1;
+      onMiss(readerKey, singleton);
     } else {
-      hitCount += 1;
+      onHit(readerKey, singleton);
     }
     return cached;
   }
@@ -132,13 +239,14 @@ public class LRUFilterCache implements F
     assert set.isCacheable();
     Filter singleton = uniqueFilters.putIfAbsent(filter, filter);
     if (singleton == null) {
-      ramBytesUsed += LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY + ramBytesUsed(filter);
+      onFilterCache(singleton, LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY + ramBytesUsed(filter));
     } else {
       filter = singleton;
     }
-    LeafCache leafCache = cache.get(context.reader().getCoreCacheKey());
+    final Object key = context.reader().getCoreCacheKey();
+    LeafCache leafCache = cache.get(key);
     if (leafCache == null) {
-      leafCache = new LeafCache();
+      leafCache = new LeafCache(key);
       final LeafCache previous = cache.put(context.reader().getCoreCacheKey(), leafCache);
       ramBytesUsed += HASHTABLE_RAM_BYTES_PER_ENTRY;
       assert previous == null;
@@ -172,8 +280,8 @@ public class LRUFilterCache implements F
   public synchronized void clearCoreCacheKey(Object coreKey) {
     final LeafCache leafCache = cache.remove(coreKey);
     if (leafCache != null) {
-      ramBytesUsed -= leafCache.ramBytesUsed + HASHTABLE_RAM_BYTES_PER_ENTRY;
-      cacheSize -= leafCache.cache.size();
+      ramBytesUsed -= HASHTABLE_RAM_BYTES_PER_ENTRY;
+      onDocIdSetEviction(coreKey, leafCache.cache.size(), leafCache.ramBytesUsed);
     }
   }
 
@@ -188,7 +296,7 @@ public class LRUFilterCache implements F
   }
 
   private void onEviction(Filter singleton) {
-    ramBytesUsed -= LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY + ramBytesUsed(singleton);
+    onFilterEviction(singleton, LINKED_HASHTABLE_RAM_BYTES_PER_ENTRY + ramBytesUsed(singleton));
     for (LeafCache leafCache : cache.values()) {
       leafCache.remove(singleton);
     }
@@ -200,8 +308,7 @@ public class LRUFilterCache implements F
   public synchronized void clear() {
     cache.clear();
     mostRecentlyUsedFilters.clear();
-    ramBytesUsed = 0;
-    cacheSize = 0;
+    onClear();
   }
 
   // pkg-private for testing
@@ -388,17 +495,24 @@ public class LRUFilterCache implements F
   // this class is not thread-safe, everything but ramBytesUsed needs to be called under a lock
   private class LeafCache implements Accountable {
 
+    private final Object key;
     private final Map<Filter, DocIdSet> cache;
     private volatile long ramBytesUsed;
 
-    LeafCache() {
+    LeafCache(Object key) {
+      this.key = key;
       cache = new IdentityHashMap<>();
       ramBytesUsed = 0;
     }
 
-    private void incrementRamBytesUsed(long inc) {
-      ramBytesUsed += inc;
-      LRUFilterCache.this.ramBytesUsed += inc;
+    private void onDocIdSetCache(long ramBytesUsed) {
+      this.ramBytesUsed += ramBytesUsed;
+      LRUFilterCache.this.onDocIdSetCache(key, ramBytesUsed);
+    }
+
+    private void onDocIdSetEviction(long ramBytesUsed) {
+      this.ramBytesUsed -= ramBytesUsed;
+      LRUFilterCache.this.onDocIdSetEviction(key, 1, ramBytesUsed);
     }
 
     DocIdSet get(Filter filter) {
@@ -408,17 +522,14 @@ public class LRUFilterCache implements F
     void putIfAbsent(Filter filter, DocIdSet set) {
       if (cache.putIfAbsent(filter, set) == null) {
         // the set was actually put
-        cacheCount += 1;
-        cacheSize += 1;
-        incrementRamBytesUsed(HASHTABLE_RAM_BYTES_PER_ENTRY + set.ramBytesUsed());
+        onDocIdSetCache(HASHTABLE_RAM_BYTES_PER_ENTRY + set.ramBytesUsed());
       }
     }
 
     void remove(Filter filter) {
       DocIdSet removed = cache.remove(filter);
       if (removed != null) {
-        cacheSize -= 1;
-        incrementRamBytesUsed(-(HASHTABLE_RAM_BYTES_PER_ENTRY + removed.ramBytesUsed()));
+        onDocIdSetEviction(HASHTABLE_RAM_BYTES_PER_ENTRY + removed.ramBytesUsed());
       }
     }
 

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestLRUFilterCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestLRUFilterCache.java?rev=1649480&r1=1649479&r2=1649480&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestLRUFilterCache.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestLRUFilterCache.java Mon Jan  5 08:45:27 2015
@@ -27,6 +27,7 @@ import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicLong;
 import java.util.concurrent.atomic.AtomicReference;
 
 import org.apache.lucene.document.Document;
@@ -544,4 +545,169 @@ public class TestLRUFilterCache extends
     dir.close();
   }
 
+  public void testFineGrainedStats() throws IOException {
+    Directory dir1 = newDirectory();
+    final RandomIndexWriter w1 = new RandomIndexWriter(random(), dir1);
+    Directory dir2 = newDirectory();
+    final RandomIndexWriter w2 = new RandomIndexWriter(random(), dir2);
+
+    final List<String> colors = Arrays.asList("blue", "red", "green", "yellow");
+
+    Document doc = new Document();
+    StringField f = new StringField("color", "", Store.NO);
+    doc.add(f);
+    for (RandomIndexWriter w : Arrays.asList(w1, w2)) {
+      for (int i = 0; i < 10; ++i) {
+        f.setStringValue(RandomPicks.randomFrom(random(), colors));
+        w.addDocument(doc);
+        if (random().nextBoolean()) {
+          w.getReader().close();
+        }
+      }
+    }
+
+    final DirectoryReader reader1 = w1.getReader();
+    final int segmentCount1 = reader1.leaves().size();
+    final IndexSearcher searcher1 = new IndexSearcher(reader1);
+
+    final DirectoryReader reader2 = w2.getReader();
+    final int segmentCount2 = reader2.leaves().size();
+    final IndexSearcher searcher2 = new IndexSearcher(reader2);
+
+    final Map<Object, Integer> indexId = new HashMap<>();
+    for (LeafReaderContext ctx : reader1.leaves()) {
+      indexId.put(ctx.reader().getCoreCacheKey(), 1);
+    }
+    for (LeafReaderContext ctx : reader2.leaves()) {
+      indexId.put(ctx.reader().getCoreCacheKey(), 2);
+    }
+
+    final AtomicLong hitCount1 = new AtomicLong();
+    final AtomicLong hitCount2 = new AtomicLong();
+    final AtomicLong missCount1 = new AtomicLong();
+    final AtomicLong missCount2 = new AtomicLong();
+
+    final AtomicLong ramBytesUsage = new AtomicLong();
+    final AtomicLong cacheSize = new AtomicLong();
+
+    final LRUFilterCache filterCache = new LRUFilterCache(2, 10000000) {
+      @Override
+      protected void onHit(Object readerCoreKey, Filter filter) {
+        super.onHit(readerCoreKey, filter);
+        switch(indexId.get(readerCoreKey).intValue()) {
+          case 1:
+            hitCount1.incrementAndGet();
+            break;
+          case 2:
+            hitCount2.incrementAndGet();
+            break;
+          default:
+            throw new AssertionError();
+        }
+      }
+
+      @Override
+      protected void onMiss(Object readerCoreKey, Filter filter) {
+        super.onMiss(readerCoreKey, filter);
+        switch(indexId.get(readerCoreKey).intValue()) {
+          case 1:
+            missCount1.incrementAndGet();
+            break;
+          case 2:
+            missCount2.incrementAndGet();
+            break;
+          default:
+            throw new AssertionError();
+        }
+      }
+
+      @Override
+      protected void onFilterCache(Filter filter, long ramBytesUsed) {
+        super.onFilterCache(filter, ramBytesUsed);
+        ramBytesUsage.addAndGet(ramBytesUsed);
+      }
+
+      @Override
+      protected void onFilterEviction(Filter filter, long ramBytesUsed) {
+        super.onFilterEviction(filter, ramBytesUsed);
+        ramBytesUsage.addAndGet(-ramBytesUsed);
+      }
+
+      @Override
+      protected void onDocIdSetCache(Object readerCoreKey, long ramBytesUsed) {
+        super.onDocIdSetCache(readerCoreKey, ramBytesUsed);
+        ramBytesUsage.addAndGet(ramBytesUsed);
+        cacheSize.incrementAndGet();
+      }
+
+      @Override
+      protected void onDocIdSetEviction(Object readerCoreKey, int numEntries, long sumRamBytesUsed) {
+        super.onDocIdSetEviction(readerCoreKey, numEntries, sumRamBytesUsed);
+        ramBytesUsage.addAndGet(-sumRamBytesUsed);
+        cacheSize.addAndGet(-numEntries);
+      }
+
+      @Override
+      protected void onClear() {
+        super.onClear();
+        ramBytesUsage.set(0);
+        cacheSize.set(0);
+      }
+    };
+
+    final Filter filter = new QueryWrapperFilter(new TermQuery(new Term("color", "red")));
+    final Filter filter2 = new QueryWrapperFilter(new TermQuery(new Term("color", "blue")));
+    final Filter filter3 = new QueryWrapperFilter(new TermQuery(new Term("color", "green")));
+
+    // search on searcher1
+    Filter cached = filterCache.doCache(filter, FilterCachingPolicy.ALWAYS_CACHE);
+    for (int i = 0; i < 10; ++i) {
+      searcher1.search(new ConstantScoreQuery(cached), 1);
+    }
+    assertEquals(9 * segmentCount1, hitCount1.longValue());
+    assertEquals(0, hitCount2.longValue());
+    assertEquals(segmentCount1, missCount1.longValue());
+    assertEquals(0, missCount2.longValue());
+
+    // then on searcher2
+    cached = filterCache.doCache(filter2, FilterCachingPolicy.ALWAYS_CACHE);
+    for (int i = 0; i < 20; ++i) {
+      searcher2.search(new ConstantScoreQuery(cached), 1);
+    }
+    assertEquals(9 * segmentCount1, hitCount1.longValue());
+    assertEquals(19 * segmentCount2, hitCount2.longValue());
+    assertEquals(segmentCount1, missCount1.longValue());
+    assertEquals(segmentCount2, missCount2.longValue());
+
+    // now on searcher1 again to trigger evictions
+    cached = filterCache.doCache(filter3, FilterCachingPolicy.ALWAYS_CACHE);
+    for (int i = 0; i < 30; ++i) {
+      searcher1.search(new ConstantScoreQuery(cached), 1);
+    }
+    assertEquals(segmentCount1, filterCache.getEvictionCount());
+    assertEquals(38 * segmentCount1, hitCount1.longValue());
+    assertEquals(19 * segmentCount2, hitCount2.longValue());
+    assertEquals(2 * segmentCount1, missCount1.longValue());
+    assertEquals(segmentCount2, missCount2.longValue());
+
+    // check that the recomputed stats are the same as those reported by the cache
+    assertEquals(filterCache.ramBytesUsed(), (segmentCount1 + segmentCount2) * LRUFilterCache.HASHTABLE_RAM_BYTES_PER_ENTRY + ramBytesUsage.longValue());
+    assertEquals(filterCache.getCacheSize(), cacheSize.longValue());
+
+    reader1.close();
+    reader2.close();
+    w1.close();
+    w2.close();
+
+    assertEquals(filterCache.ramBytesUsed(), ramBytesUsage.longValue());
+    assertEquals(0, cacheSize.longValue());
+
+    filterCache.clear();
+    assertEquals(0, ramBytesUsage.longValue());
+    assertEquals(0, cacheSize.longValue());
+
+    dir1.close();
+    dir2.close();
+  }
+
 }