You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2014/12/16 15:52:05 UTC
svn commit: r1645958 - in /lucene/dev/trunk/lucene/core/src:
java/org/apache/lucene/search/LRUFilterCache.java
test/org/apache/lucene/search/TestLRUFilterCache.java
Author: jpountz
Date: Tue Dec 16 14:52:04 2014
New Revision: 1645958
URL: http://svn.apache.org/r1645958
Log:
LUCENE-6107: Add stats to LRUFilterCache.
Modified:
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/LRUFilterCache.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestLRUFilterCache.java
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/LRUFilterCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/LRUFilterCache.java?rev=1645958&r1=1645957&r2=1645958&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/LRUFilterCache.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/LRUFilterCache.java Tue Dec 16 14:52:04 2014
@@ -73,7 +73,14 @@ public class LRUFilterCache implements F
// mostRecentlyUsedFilters. This is why write operations are performed under a lock
private final Set<Filter> mostRecentlyUsedFilters;
private final Map<Object, LeafCache> cache;
- private volatile long ramBytesUsed; // all updates of this number must be performed under a lock
+
+ // these variables are volatile so that we do not need to sync reads
+ // but increments need to be performed under the lock
+ private volatile long ramBytesUsed;
+ private volatile long hitCount;
+ private volatile long missCount;
+ private volatile long cacheCount;
+ private volatile long cacheSize;
/**
* Create a new instance that will cache at most <code>maxSize</code> filters
@@ -101,14 +108,22 @@ public class LRUFilterCache implements F
synchronized DocIdSet get(Filter filter, LeafReaderContext context) {
final LeafCache leafCache = cache.get(context.reader().getCoreCacheKey());
if (leafCache == null) {
+ missCount += 1;
return null;
}
// this get call moves the filter to the most-recently-used position
final Filter singleton = uniqueFilters.get(filter);
if (singleton == null) {
+ missCount += 1;
return null;
}
- return leafCache.get(singleton);
+ final DocIdSet cached = leafCache.get(singleton);
+ if (cached == null) {
+ missCount += 1;
+ } else {
+ hitCount += 1;
+ }
+ return cached;
}
synchronized void putIfAbsent(Filter filter, LeafReaderContext context, DocIdSet set) {
@@ -157,6 +172,7 @@ public class LRUFilterCache implements F
final LeafCache leafCache = cache.remove(coreKey);
if (leafCache != null) {
ramBytesUsed -= leafCache.ramBytesUsed + HASHTABLE_RAM_BYTES_PER_ENTRY;
+ cacheSize -= leafCache.cache.size();
}
}
@@ -184,6 +200,7 @@ public class LRUFilterCache implements F
cache.clear();
mostRecentlyUsedFilters.clear();
ramBytesUsed = 0;
+ cacheSize = 0;
}
// pkg-private for testing
@@ -215,6 +232,14 @@ public class LRUFilterCache implements F
if (recomputedRamBytesUsed != ramBytesUsed) {
throw new AssertionError("ramBytesUsed mismatch : " + ramBytesUsed + " != " + recomputedRamBytesUsed);
}
+
+ long recomputedCacheSize = 0;
+ for (LeafCache leafCache : cache.values()) {
+ recomputedCacheSize += leafCache.cache.size();
+ }
+ if (recomputedCacheSize != getCacheSize()) {
+ throw new AssertionError("cacheSize mismatch : " + getCacheSize() + " != " + recomputedCacheSize);
+ }
}
// pkg-private for testing
@@ -286,6 +311,79 @@ public class LRUFilterCache implements F
return new RoaringDocIdSet.Builder(reader.maxDoc()).add(iterator).build();
}
+ /**
+ * Return the total number of times that a {@link Filter} has been looked up
+ * in this {@link FilterCache}. Note that this number is incremented once per
+ * segment so running a cached filter only once will increment this counter
+ * by the number of segments that are wrapped by the searcher.
+ * Note that by definition, {@link #getTotalCount()} is the sum of
+ * {@link #getHitCount()} and {@link #getMissCount()}.
+ * @see #getHitCount()
+ * @see #getMissCount()
+ */
+ public final long getTotalCount() {
+ return getHitCount() + getMissCount();
+ }
+
+ /**
+ * Over the {@link #getTotalCount() total} number of times that a filter has
+ * been looked up, return how many times a cached {@link DocIdSet} has been
+ * found and returned.
+ * @see #getTotalCount()
+ * @see #getMissCount()
+ */
+ public final long getHitCount() {
+ return hitCount;
+ }
+
+ /**
+ * Over the {@link #getTotalCount() total} number of times that a filter has
+ * been looked up, return how many times this filter was not contained in the
+ * cache.
+ * @see #getTotalCount()
+ * @see #getHitCount()
+ */
+ public final long getMissCount() {
+ return missCount;
+ }
+
+ /**
+ * Return the total number of {@link DocIdSet}s which are currently stored
+ * in the cache.
+ * @see #getCacheCount()
+ * @see #getEvictionCount()
+ */
+ public final long getCacheSize() {
+ return cacheSize;
+ }
+
+ /**
+ * Return the total number of cache entries that have been generated and put
+ * in the cache. It is highly desirable to have a {@link #getHitCount() hit
+ * count} that is much higher than the {@link #getCacheCount() cache count}
+ * as the opposite would indicate that the filter cache makes efforts in order
+ * to cache filters but then they do not get reused.
+ * @see #getCacheSize()
+ * @see #getEvictionCount()
+ */
+ public final long getCacheCount() {
+ return cacheCount;
+ }
+
+ /**
+ * Return the number of cache entries that have been removed from the cache
+ * either in order to stay under the maximum configured size/ram usage, or
+ * because a segment has been closed. High numbers of evictions might mean
+ * that filters are not reused or that the {@link FilterCachingPolicy
+ * caching policy} caches too aggressively on NRT segments which get merged
+ * early.
+ * @see #getCacheCount()
+ * @see #getCacheSize()
+ */
+ public final long getEvictionCount() {
+ return getCacheCount() - getCacheSize();
+ }
+
// this class is not thread-safe, everything but ramBytesUsed needs to be called under a lock
private class LeafCache implements Accountable {
@@ -309,6 +407,8 @@ public class LRUFilterCache implements F
void putIfAbsent(Filter filter, DocIdSet set) {
if (cache.putIfAbsent(filter, set) == null) {
// the set was actually put
+ cacheCount += 1;
+ cacheSize += 1;
incrementRamBytesUsed(HASHTABLE_RAM_BYTES_PER_ENTRY + set.ramBytesUsed());
}
}
@@ -316,6 +416,7 @@ public class LRUFilterCache implements F
void remove(Filter filter) {
DocIdSet removed = cache.remove(filter);
if (removed != null) {
+ cacheSize -= 1;
incrementRamBytesUsed(-(HASHTABLE_RAM_BYTES_PER_ENTRY + removed.ramBytesUsed()));
}
}
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestLRUFilterCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestLRUFilterCache.java?rev=1645958&r1=1645957&r2=1645958&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestLRUFilterCache.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestLRUFilterCache.java Tue Dec 16 14:52:04 2014
@@ -458,4 +458,90 @@ public class TestLRUFilterCache extends
dir.close();
}
+ public void testStats() throws IOException {
+ final LRUFilterCache filterCache = new LRUFilterCache(1, 10000000);
+
+ Directory dir = newDirectory();
+ final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+
+ final List<String> colors = Arrays.asList("blue", "red", "green", "yellow");
+
+ Document doc = new Document();
+ StringField f = new StringField("color", "", Store.NO);
+ doc.add(f);
+ for (int i = 0; i < 10; ++i) {
+ f.setStringValue(RandomPicks.randomFrom(random(), colors));
+ w.addDocument(doc);
+ if (random().nextBoolean()) {
+ w.getReader().close();
+ }
+ }
+
+ final DirectoryReader reader = w.getReader();
+ final int segmentCount = reader.leaves().size();
+ final IndexSearcher searcher = new IndexSearcher(reader);
+ final Filter filter = new QueryWrapperFilter(new TermQuery(new Term("color", "red")));
+ final Filter filter2 = new QueryWrapperFilter(new TermQuery(new Term("color", "blue")));
+
+ // first pass, lookups without caching that all miss
+ Filter cached = filterCache.doCache(filter, NEVER_CACHE);
+ for (int i = 0; i < 10; ++i) {
+ searcher.search(new ConstantScoreQuery(cached), 1);
+ }
+ assertEquals(10 * segmentCount, filterCache.getTotalCount());
+ assertEquals(0, filterCache.getHitCount());
+ assertEquals(10 * segmentCount, filterCache.getMissCount());
+ assertEquals(0, filterCache.getCacheCount());
+ assertEquals(0, filterCache.getEvictionCount());
+ assertEquals(0, filterCache.getCacheSize());
+
+ // second pass, lookups + caching, only the first one is a miss
+ cached = filterCache.doCache(filter, FilterCachingPolicy.ALWAYS_CACHE);
+ for (int i = 0; i < 10; ++i) {
+ searcher.search(new ConstantScoreQuery(cached), 1);
+ }
+ assertEquals(20 * segmentCount, filterCache.getTotalCount());
+ assertEquals(9 * segmentCount, filterCache.getHitCount());
+ assertEquals(11 * segmentCount, filterCache.getMissCount());
+ assertEquals(1 * segmentCount, filterCache.getCacheCount());
+ assertEquals(0, filterCache.getEvictionCount());
+ assertEquals(1 * segmentCount, filterCache.getCacheSize());
+
+ // third pass lookups without caching, we only have hits
+ cached = filterCache.doCache(filter, NEVER_CACHE);
+ for (int i = 0; i < 10; ++i) {
+ searcher.search(new ConstantScoreQuery(cached), 1);
+ }
+ assertEquals(30 * segmentCount, filterCache.getTotalCount());
+ assertEquals(19 * segmentCount, filterCache.getHitCount());
+ assertEquals(11 * segmentCount, filterCache.getMissCount());
+ assertEquals(1 * segmentCount, filterCache.getCacheCount());
+ assertEquals(0, filterCache.getEvictionCount());
+ assertEquals(1 * segmentCount, filterCache.getCacheSize());
+
+ // fourth pass with a different filter which will trigger evictions since the size is 1
+ cached = filterCache.doCache(filter2, FilterCachingPolicy.ALWAYS_CACHE);
+ for (int i = 0; i < 10; ++i) {
+ searcher.search(new ConstantScoreQuery(cached), 1);
+ }
+ assertEquals(40 * segmentCount, filterCache.getTotalCount());
+ assertEquals(28 * segmentCount, filterCache.getHitCount());
+ assertEquals(12 * segmentCount, filterCache.getMissCount());
+ assertEquals(2 * segmentCount, filterCache.getCacheCount());
+ assertEquals(1 * segmentCount, filterCache.getEvictionCount());
+ assertEquals(1 * segmentCount, filterCache.getCacheSize());
+
+ // now close, causing evictions due to the closing of segment cores
+ reader.close();
+ w.close();
+ assertEquals(40 * segmentCount, filterCache.getTotalCount());
+ assertEquals(28 * segmentCount, filterCache.getHitCount());
+ assertEquals(12 * segmentCount, filterCache.getMissCount());
+ assertEquals(2 * segmentCount, filterCache.getCacheCount());
+ assertEquals(2 * segmentCount, filterCache.getEvictionCount());
+ assertEquals(0, filterCache.getCacheSize());
+
+ dir.close();
+ }
+
}