You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2014/12/16 15:52:05 UTC
svn commit: r1645958 - in /lucene/dev/trunk/lucene/core/src: java/org/apache/lucene/search/LRUFilterCache.java test/org/apache/lucene/search/TestLRUFilterCache.java

Author: jpountz
Date: Tue Dec 16 14:52:04 2014
New Revision: 1645958

URL: http://svn.apache.org/r1645958
Log:
LUCENE-6107: Add stats to LRUFilterCache.

Modified:
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/LRUFilterCache.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestLRUFilterCache.java

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/LRUFilterCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/LRUFilterCache.java?rev=1645958&r1=1645957&r2=1645958&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/LRUFilterCache.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/LRUFilterCache.java Tue Dec 16 14:52:04 2014
@@ -73,7 +73,14 @@ public class LRUFilterCache implements F
   // mostRecentlyUsedFilters. This is why write operations are performed under a lock
   private final Set<Filter> mostRecentlyUsedFilters;
   private final Map<Object, LeafCache> cache;
-  private volatile long ramBytesUsed; // all updates of this number must be performed under a lock
+
+  // these variables are volatile so that we do not need to sync reads
+  // but increments need to be performed under the lock
+  private volatile long ramBytesUsed;
+  private volatile long hitCount;
+  private volatile long missCount;
+  private volatile long cacheCount;
+  private volatile long cacheSize;
 
   /**
    * Create a new instance that will cache at most <code>maxSize</code> filters
@@ -101,14 +108,22 @@ public class LRUFilterCache implements F
   synchronized DocIdSet get(Filter filter, LeafReaderContext context) {
     final LeafCache leafCache = cache.get(context.reader().getCoreCacheKey());
     if (leafCache == null) {
+      missCount += 1;
       return null;
     }
     // this get call moves the filter to the most-recently-used position
     final Filter singleton = uniqueFilters.get(filter);
     if (singleton == null) {
+      missCount += 1;
       return null;
     }
-    return leafCache.get(singleton);
+    final DocIdSet cached = leafCache.get(singleton);
+    if (cached == null) {
+      missCount += 1;
+    } else {
+      hitCount += 1;
+    }
+    return cached;
   }
 
   synchronized void putIfAbsent(Filter filter, LeafReaderContext context, DocIdSet set) {
@@ -157,6 +172,7 @@ public class LRUFilterCache implements F
     final LeafCache leafCache = cache.remove(coreKey);
     if (leafCache != null) {
       ramBytesUsed -= leafCache.ramBytesUsed + HASHTABLE_RAM_BYTES_PER_ENTRY;
+      cacheSize -= leafCache.cache.size();
     }
   }
 
@@ -184,6 +200,7 @@ public class LRUFilterCache implements F
     cache.clear();
     mostRecentlyUsedFilters.clear();
     ramBytesUsed = 0;
+    cacheSize = 0;
   }
 
   // pkg-private for testing
@@ -215,6 +232,14 @@ public class LRUFilterCache implements F
     if (recomputedRamBytesUsed != ramBytesUsed) {
       throw new AssertionError("ramBytesUsed mismatch : " + ramBytesUsed + " != " + recomputedRamBytesUsed);
     }
+
+    long recomputedCacheSize = 0;
+    for (LeafCache leafCache : cache.values()) {
+      recomputedCacheSize += leafCache.cache.size();
+    }
+    if (recomputedCacheSize != getCacheSize()) {
+      throw new AssertionError("cacheSize mismatch : " + getCacheSize() + " != " + recomputedCacheSize);
+    }
   }
 
   // pkg-private for testing
@@ -286,6 +311,79 @@ public class LRUFilterCache implements F
     return new RoaringDocIdSet.Builder(reader.maxDoc()).add(iterator).build();
   }
 
+  /**
+   * Return the total number of times that a {@link Filter} has been looked up
+   * in this {@link FilterCache}. Note that this number is incremented once per
+   * segment so running a cached filter only once will increment this counter
+   * by the number of segments that are wrapped by the searcher.
+   * Note that by definition, {@link #getTotalCount()} is the sum of
+   * {@link #getHitCount()} and {@link #getMissCount()}.
+   * @see #getHitCount()
+   * @see #getMissCount()
+   */
+  public final long getTotalCount() {
+    return getHitCount() + getMissCount();
+  }
+
+  /**
+   * Over the {@link #getTotalCount() total} number of times that a filter has
+   * been looked up, return how many times a cached {@link DocIdSet} has been
+   * found and returned.
+   * @see #getTotalCount()
+   * @see #getMissCount()
+   */
+  public final long getHitCount() {
+    return hitCount;
+  }
+
+  /**
+   * Over the {@link #getTotalCount() total} number of times that a filter has
+   * been looked up, return how many times this filter was not contained in the
+   * cache.
+   * @see #getTotalCount()
+   * @see #getHitCount()
+   */
+  public final long getMissCount() {
+    return missCount;
+  }
+
+  /**
+   * Return the total number of {@link DocIdSet}s which are currently stored
+   * in the cache.
+   * @see #getCacheCount()
+   * @see #getEvictionCount()
+   */
+  public final long getCacheSize() {
+    return cacheSize;
+  }
+
+  /**
+   * Return the total number of cache entries that have been generated and put
+   * in the cache. It is highly desirable to have a {@link #getHitCount() hit
+   * count} that is much higher than the {@link #getCacheCount() cache count}
+   * as the opposite would indicate that the filter cache makes efforts in order
+   * to cache filters but then they do not get reused.
+   * @see #getCacheSize()
+   * @see #getEvictionCount()
+   */
+  public final long getCacheCount() {
+    return cacheCount;
+  }
+
+  /**
+   * Return the number of cache entries that have been removed from the cache
+   * either in order to stay under the maximum configured size/ram usage, or
+   * because a segment has been closed. High numbers of evictions might mean
+   * that filters are not reused or that the {@link FilterCachingPolicy
+   * caching policy} caches too aggressively on NRT segments which get merged
+   * early.
+   * @see #getCacheCount()
+   * @see #getCacheSize()
+   */
+  public final long getEvictionCount() {
+    return getCacheCount() - getCacheSize();
+  }
+
   // this class is not thread-safe, everything but ramBytesUsed needs to be called under a lock
   private class LeafCache implements Accountable {
 
@@ -309,6 +407,8 @@ public class LRUFilterCache implements F
     void putIfAbsent(Filter filter, DocIdSet set) {
       if (cache.putIfAbsent(filter, set) == null) {
         // the set was actually put
+        cacheCount += 1;
+        cacheSize += 1;
         incrementRamBytesUsed(HASHTABLE_RAM_BYTES_PER_ENTRY + set.ramBytesUsed());
       }
     }
@@ -316,6 +416,7 @@ public class LRUFilterCache implements F
     void remove(Filter filter) {
       DocIdSet removed = cache.remove(filter);
       if (removed != null) {
+        cacheSize -= 1;
         incrementRamBytesUsed(-(HASHTABLE_RAM_BYTES_PER_ENTRY + removed.ramBytesUsed()));
       }
     }

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestLRUFilterCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestLRUFilterCache.java?rev=1645958&r1=1645957&r2=1645958&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestLRUFilterCache.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestLRUFilterCache.java Tue Dec 16 14:52:04 2014
@@ -458,4 +458,90 @@ public class TestLRUFilterCache extends
     dir.close();
   }
 
+  public void testStats() throws IOException {
+    final LRUFilterCache filterCache = new LRUFilterCache(1, 10000000);
+
+    Directory dir = newDirectory();
+    final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+
+    final List<String> colors = Arrays.asList("blue", "red", "green", "yellow");
+
+    Document doc = new Document();
+    StringField f = new StringField("color", "", Store.NO);
+    doc.add(f);
+    for (int i = 0; i < 10; ++i) {
+      f.setStringValue(RandomPicks.randomFrom(random(), colors));
+      w.addDocument(doc);
+      if (random().nextBoolean()) {
+        w.getReader().close();
+      }
+    }
+
+    final DirectoryReader reader = w.getReader();
+    final int segmentCount = reader.leaves().size();
+    final IndexSearcher searcher = new IndexSearcher(reader);
+    final Filter filter = new QueryWrapperFilter(new TermQuery(new Term("color", "red")));
+    final Filter filter2 = new QueryWrapperFilter(new TermQuery(new Term("color", "blue")));
+
+    // first pass, lookups without caching that all miss
+    Filter cached = filterCache.doCache(filter, NEVER_CACHE);
+    for (int i = 0; i < 10; ++i) {
+      searcher.search(new ConstantScoreQuery(cached), 1);
+    }
+    assertEquals(10 * segmentCount, filterCache.getTotalCount());
+    assertEquals(0, filterCache.getHitCount());
+    assertEquals(10 * segmentCount, filterCache.getMissCount());
+    assertEquals(0, filterCache.getCacheCount());
+    assertEquals(0, filterCache.getEvictionCount());
+    assertEquals(0, filterCache.getCacheSize());
+
+    // second pass, lookups + caching, only the first one is a miss
+    cached = filterCache.doCache(filter, FilterCachingPolicy.ALWAYS_CACHE);
+    for (int i = 0; i < 10; ++i) {
+      searcher.search(new ConstantScoreQuery(cached), 1);
+    }
+    assertEquals(20 * segmentCount, filterCache.getTotalCount());
+    assertEquals(9 * segmentCount, filterCache.getHitCount());
+    assertEquals(11 * segmentCount, filterCache.getMissCount());
+    assertEquals(1 * segmentCount, filterCache.getCacheCount());
+    assertEquals(0, filterCache.getEvictionCount());
+    assertEquals(1 * segmentCount, filterCache.getCacheSize());
+
+    // third pass lookups without caching, we only have hits
+    cached = filterCache.doCache(filter, NEVER_CACHE);
+    for (int i = 0; i < 10; ++i) {
+      searcher.search(new ConstantScoreQuery(cached), 1);
+    }
+    assertEquals(30 * segmentCount, filterCache.getTotalCount());
+    assertEquals(19 * segmentCount, filterCache.getHitCount());
+    assertEquals(11 * segmentCount, filterCache.getMissCount());
+    assertEquals(1 * segmentCount, filterCache.getCacheCount());
+    assertEquals(0, filterCache.getEvictionCount());
+    assertEquals(1 * segmentCount, filterCache.getCacheSize());
+
+    // fourth pass with a different filter which will trigger evictions since the size is 1
+    cached = filterCache.doCache(filter2, FilterCachingPolicy.ALWAYS_CACHE);
+    for (int i = 0; i < 10; ++i) {
+      searcher.search(new ConstantScoreQuery(cached), 1);
+    }
+    assertEquals(40 * segmentCount, filterCache.getTotalCount());
+    assertEquals(28 * segmentCount, filterCache.getHitCount());
+    assertEquals(12 * segmentCount, filterCache.getMissCount());
+    assertEquals(2 * segmentCount, filterCache.getCacheCount());
+    assertEquals(1 * segmentCount, filterCache.getEvictionCount());
+    assertEquals(1 * segmentCount, filterCache.getCacheSize());
+
+    // now close, causing evictions due to the closing of segment cores
+    reader.close();
+    w.close();
+    assertEquals(40 * segmentCount, filterCache.getTotalCount());
+    assertEquals(28 * segmentCount, filterCache.getHitCount());
+    assertEquals(12 * segmentCount, filterCache.getMissCount());
+    assertEquals(2 * segmentCount, filterCache.getCacheCount());
+    assertEquals(2 * segmentCount, filterCache.getEvictionCount());
+    assertEquals(0, filterCache.getCacheSize());
+
+    dir.close();
+  }
+
 }