You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by el...@apache.org on 2015/05/24 15:20:04 UTC
svn commit: r1681449 - in /lucene/dev/trunk/solr: CHANGES.txt
core/src/java/org/apache/solr/util/ConcurrentLFUCache.java
core/src/test/org/apache/solr/search/TestLFUCache.java
Author: elyograg
Date: Sun May 24 13:20:04 2015
New Revision: 1681449
URL: http://svn.apache.org/r1681449
Log:
SOLR-7585: Fix NoSuchElementException in LFUCache
Modified:
lucene/dev/trunk/solr/CHANGES.txt
lucene/dev/trunk/solr/core/src/java/org/apache/solr/util/ConcurrentLFUCache.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestLFUCache.java
Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1681449&r1=1681448&r2=1681449&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Sun May 24 13:20:04 2015
@@ -351,6 +351,9 @@ Bug Fixes
* SOLR-7335: Fix doc boosts to no longer be multiplied in each field value in multivalued fields that
are not used in copyFields (Shingo Sasaki via hossman)
+* SOLR-7585: Fix NoSuchElementException in LFUCache resulting from heavy writes
+ making concurrent put() calls. (Maciej Zasada via Shawn Heisey)
+
Optimizations
----------------------
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/util/ConcurrentLFUCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/util/ConcurrentLFUCache.java?rev=1681449&r1=1681448&r2=1681449&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/util/ConcurrentLFUCache.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/util/ConcurrentLFUCache.java Sun May 24 13:20:04 2015
@@ -51,6 +51,7 @@ public class ConcurrentLFUCache<K, V> im
private final boolean newThreadForCleanup;
private volatile boolean islive = true;
private final Stats stats = new Stats();
+ @SuppressWarnings("unused")
private final int acceptableWaterMark;
private long lowHitCount = 0; // not volatile, only accessed in the cleaning method
private final EvictionListener<K, V> evictionListener;
@@ -154,57 +155,64 @@ public class ConcurrentLFUCache<K, V> im
}
/**
- * Removes items from the cache to bring the size down
- * to an acceptable value ('acceptableWaterMark').
- * <p/>
- * It is done in two stages. In the first stage, least recently used items are evicted.
- * If, after the first stage, the cache size is still greater than 'acceptableSize'
- * config parameter, the second stage takes over.
- * <p/>
- * The second stage is more intensive and tries to bring down the cache size
- * to the 'lowerWaterMark' config parameter.
+ * Removes items from the cache to bring the size down to the lowerWaterMark.
*/
private void markAndSweep() {
if (!markAndSweepLock.tryLock()) return;
try {
long lowHitCount = this.lowHitCount;
isCleaning = true;
- this.lowHitCount = lowHitCount; // volatile write to make isCleaning visible
-
+ this.lowHitCount = lowHitCount; // volatile write to make isCleaning visible
+
int sz = stats.size.get();
-
+ if (sz <= upperWaterMark) {
+ /* SOLR-7585: Even though we acquired a lock, multiple threads might detect a need for calling this method.
+ * Locking keeps these from executing at the same time, so they run sequentially. The second and subsequent
+ * sequential runs of this method don't need to be done, since there are no elements to remove.
+ */
+ return;
+ }
+
int wantToRemove = sz - lowerWaterMark;
-
- TreeSet<CacheEntry> tree = new TreeSet<>();
-
+
+ TreeSet<CacheEntry<K, V>> tree = new TreeSet<>();
+
for (CacheEntry<K, V> ce : map.values()) {
- // set hitsCopy to avoid later Atomic reads
+ // set hitsCopy to avoid later Atomic reads. Primitive types are faster than the atomic get().
ce.hitsCopy = ce.hits.get();
ce.lastAccessedCopy = ce.lastAccessed;
if (timeDecay) {
ce.hits.set(ce.hitsCopy >>> 1);
}
-
+
if (tree.size() < wantToRemove) {
tree.add(ce);
} else {
- // If the hits are not equal, we can remove before adding
- // which is slightly faster
- if (ce.hitsCopy < tree.first().hitsCopy) {
- tree.remove(tree.first());
- tree.add(ce);
- } else if (ce.hitsCopy == tree.first().hitsCopy) {
- tree.add(ce);
- tree.remove(tree.first());
+ /*
+ * SOLR-7585: Before doing this part, make sure the TreeSet actually has an element, since the first() method
+ * fails with NoSuchElementException if the set is empty. If that test passes, check hits. This test may
+ * never actually fail due to the upperWaterMark check above, but we'll do it anyway.
+ */
+ if (tree.size() > 0) {
+ /* If hits are not equal, we can remove before adding which is slightly faster. I can no longer remember
+ * why removing first is faster, but I vaguely remember being sure about it!
+ */
+ if (ce.hitsCopy < tree.first().hitsCopy) {
+ tree.remove(tree.first());
+ tree.add(ce);
+ } else if (ce.hitsCopy == tree.first().hitsCopy) {
+ tree.add(ce);
+ tree.remove(tree.first());
+ }
}
}
}
-
+
for (CacheEntry<K, V> e : tree) {
evictEntry(e.key);
}
} finally {
- isCleaning = false; // set before markAndSweep.unlock() for visibility
+ isCleaning = false; // set before markAndSweep.unlock() for visibility
markAndSweepLock.unlock();
}
}
@@ -230,12 +238,12 @@ public class ConcurrentLFUCache<K, V> im
Map<K, V> result = new LinkedHashMap<>();
if (n <= 0)
return result;
- TreeSet<CacheEntry> tree = new TreeSet<>();
+ TreeSet<CacheEntry<K, V>> tree = new TreeSet<>();
// we need to grab the lock since we are changing the copy variables
markAndSweepLock.lock();
try {
for (Map.Entry<Object, CacheEntry<K, V>> entry : map.entrySet()) {
- CacheEntry ce = entry.getValue();
+ CacheEntry<K, V> ce = entry.getValue();
ce.hitsCopy = ce.hits.get();
ce.lastAccessedCopy = ce.lastAccessed;
if (tree.size() < n) {
@@ -274,7 +282,7 @@ public class ConcurrentLFUCache<K, V> im
Map<K, V> result = new LinkedHashMap<>();
if (n <= 0)
return result;
- TreeSet<CacheEntry> tree = new TreeSet<>();
+ TreeSet<CacheEntry<K, V>> tree = new TreeSet<>();
// we need to grab the lock since we are changing the copy variables
markAndSweepLock.lock();
try {
Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestLFUCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestLFUCache.java?rev=1681449&r1=1681448&r2=1681449&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestLFUCache.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestLFUCache.java Sun May 24 13:20:04 2015
@@ -18,8 +18,10 @@ package org.apache.solr.search;
*/
import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.util.ExecutorUtil;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.util.ConcurrentLFUCache;
+import org.apache.solr.util.DefaultSolrThreadFactory;
import org.apache.solr.util.RefCounted;
import org.junit.BeforeClass;
import org.junit.Test;
@@ -28,6 +30,9 @@ import java.io.IOException;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicReference;
/**
@@ -360,6 +365,41 @@ public class TestLFUCache extends SolrTe
}
}
+ @Test
+ public void testConcurrentAccess() throws InterruptedException {
+ /* Set up a thread pool with twice as many threads as there are CPUs. */
+ final ConcurrentLFUCache<Integer,Long> cache = new ConcurrentLFUCache<>(10, 9);
+ ExecutorService executorService = ExecutorUtil.newMDCAwareFixedThreadPool(10,
+ new DefaultSolrThreadFactory("testConcurrentAccess"));
+ final AtomicReference<Throwable> error = new AtomicReference<>();
+
+ /*
+ * Use the thread pool to execute at least two million puts into the cache.
+ * Without the fix on SOLR-7585, NoSuchElementException is thrown.
+ * Simultaneous calls to markAndSweep are protected from each other by a
+ * lock, so they run sequentially, and due to a problem in the previous
+ * design, the cache eviction doesn't work right.
+ */
+ for (int i = 0; i < atLeast(2_000_000); ++i) {
+ executorService.submit(new Runnable() {
+ @Override
+ public void run() {
+ try {
+ cache.put(random().nextInt(100), random().nextLong());
+ } catch (Throwable t) {
+ error.compareAndSet(null, t);
+ }
+ }
+ });
+ }
+
+ executorService.shutdown();
+ executorService.awaitTermination(1, TimeUnit.MINUTES);
+
+ // then:
+ assertNull("Exception during concurrent access: " + error.get(), error.get());
+ }
+
// From the original LRU cache tests, they're commented out there too because they take a while.
// void doPerfTest(int iter, int cacheSize, int maxKey) {
// long start = System.currentTimeMillis();