You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by el...@apache.org on 2015/05/24 15:20:04 UTC
svn commit: r1681449 - in /lucene/dev/trunk/solr: CHANGES.txt core/src/java/org/apache/solr/util/ConcurrentLFUCache.java core/src/test/org/apache/solr/search/TestLFUCache.java

Author: elyograg
Date: Sun May 24 13:20:04 2015
New Revision: 1681449

URL: http://svn.apache.org/r1681449
Log:
SOLR-7585: Fix NoSuchElementException in LFUCache

Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/util/ConcurrentLFUCache.java
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestLFUCache.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1681449&r1=1681448&r2=1681449&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Sun May 24 13:20:04 2015
@@ -351,6 +351,9 @@ Bug Fixes
 * SOLR-7335: Fix doc boosts to no longer be multiplied in each field value in multivalued fields that
   are not used in copyFields (Shingo Sasaki via hossman)
 
+* SOLR-7585: Fix NoSuchElementException in LFUCache resulting from heavy writes
+  making concurrent put() calls. (Maciej Zasada via Shawn Heisey)
+
 Optimizations
 ----------------------
 

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/util/ConcurrentLFUCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/util/ConcurrentLFUCache.java?rev=1681449&r1=1681448&r2=1681449&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/util/ConcurrentLFUCache.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/util/ConcurrentLFUCache.java Sun May 24 13:20:04 2015
@@ -51,6 +51,7 @@ public class ConcurrentLFUCache<K, V> im
   private final boolean newThreadForCleanup;
   private volatile boolean islive = true;
   private final Stats stats = new Stats();
+  @SuppressWarnings("unused")
   private final int acceptableWaterMark;
   private long lowHitCount = 0;  // not volatile, only accessed in the cleaning method
   private final EvictionListener<K, V> evictionListener;
@@ -154,57 +155,64 @@ public class ConcurrentLFUCache<K, V> im
   }
 
   /**
-   * Removes items from the cache to bring the size down
-   * to an acceptable value ('acceptableWaterMark').
-   * <p/>
-   * It is done in two stages. In the first stage, least recently used items are evicted.
-   * If, after the first stage, the cache size is still greater than 'acceptableSize'
-   * config parameter, the second stage takes over.
-   * <p/>
-   * The second stage is more intensive and tries to bring down the cache size
-   * to the 'lowerWaterMark' config parameter.
+   * Removes items from the cache to bring the size down to the lowerWaterMark.
    */
   private void markAndSweep() {
     if (!markAndSweepLock.tryLock()) return;
     try {
       long lowHitCount = this.lowHitCount;
       isCleaning = true;
-      this.lowHitCount = lowHitCount;     // volatile write to make isCleaning visible
-
+      this.lowHitCount = lowHitCount; // volatile write to make isCleaning visible
+      
       int sz = stats.size.get();
-
+      if (sz <= upperWaterMark) {
+        /* SOLR-7585: Even though we acquired a lock, multiple threads might detect a need for calling this method.
+         * Locking keeps these from executing at the same time, so they run sequentially.  The second and subsequent
+         * sequential runs of this method don't need to be done, since there are no elements to remove.
+        */
+        return;
+      }
+      
       int wantToRemove = sz - lowerWaterMark;
-
-      TreeSet<CacheEntry> tree = new TreeSet<>();
-
+      
+      TreeSet<CacheEntry<K, V>> tree = new TreeSet<>();
+      
       for (CacheEntry<K, V> ce : map.values()) {
-        // set hitsCopy to avoid later Atomic reads
+        // set hitsCopy to avoid later Atomic reads.  Primitive types are faster than the atomic get().
         ce.hitsCopy = ce.hits.get();
         ce.lastAccessedCopy = ce.lastAccessed;
         if (timeDecay) {
           ce.hits.set(ce.hitsCopy >>> 1);
         }
-
+        
         if (tree.size() < wantToRemove) {
           tree.add(ce);
         } else {
-          // If the hits are not equal, we can remove before adding
-          // which is slightly faster
-          if (ce.hitsCopy < tree.first().hitsCopy) {
-            tree.remove(tree.first());
-            tree.add(ce);
-          } else if (ce.hitsCopy == tree.first().hitsCopy) {
-            tree.add(ce);
-            tree.remove(tree.first());
+          /*
+           * SOLR-7585: Before doing this part, make sure the TreeSet actually has an element, since the first() method
+           * fails with NoSuchElementException if the set is empty.  If that test passes, check hits. This test may
+           * never actually fail due to the upperWaterMark check above, but we'll do it anyway.
+           */
+          if (tree.size() > 0) {
+            /* If hits are not equal, we can remove before adding which is slightly faster. I can no longer remember
+             * why removing first is faster, but I vaguely remember being sure about it!
+             */
+            if (ce.hitsCopy < tree.first().hitsCopy) {
+              tree.remove(tree.first());
+              tree.add(ce);
+            } else if (ce.hitsCopy == tree.first().hitsCopy) {
+              tree.add(ce);
+              tree.remove(tree.first());
+            }
           }
         }
       }
-
+      
       for (CacheEntry<K, V> e : tree) {
         evictEntry(e.key);
       }
     } finally {
-      isCleaning = false;  // set before markAndSweep.unlock() for visibility
+      isCleaning = false; // set before markAndSweep.unlock() for visibility
       markAndSweepLock.unlock();
     }
   }
@@ -230,12 +238,12 @@ public class ConcurrentLFUCache<K, V> im
     Map<K, V> result = new LinkedHashMap<>();
     if (n <= 0)
       return result;
-    TreeSet<CacheEntry> tree = new TreeSet<>();
+    TreeSet<CacheEntry<K, V>> tree = new TreeSet<>();
     // we need to grab the lock since we are changing the copy variables
     markAndSweepLock.lock();
     try {
       for (Map.Entry<Object, CacheEntry<K, V>> entry : map.entrySet()) {
-        CacheEntry ce = entry.getValue();
+        CacheEntry<K, V> ce = entry.getValue();
         ce.hitsCopy = ce.hits.get();
         ce.lastAccessedCopy = ce.lastAccessed;
         if (tree.size() < n) {
@@ -274,7 +282,7 @@ public class ConcurrentLFUCache<K, V> im
     Map<K, V> result = new LinkedHashMap<>();
     if (n <= 0)
       return result;
-    TreeSet<CacheEntry> tree = new TreeSet<>();
+    TreeSet<CacheEntry<K, V>> tree = new TreeSet<>();
     // we need to grab the lock since we are changing the copy variables
     markAndSweepLock.lock();
     try {

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestLFUCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestLFUCache.java?rev=1681449&r1=1681448&r2=1681449&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestLFUCache.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestLFUCache.java Sun May 24 13:20:04 2015
@@ -18,8 +18,10 @@ package org.apache.solr.search;
  */
 
 import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.util.ExecutorUtil;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.util.ConcurrentLFUCache;
+import org.apache.solr.util.DefaultSolrThreadFactory;
 import org.apache.solr.util.RefCounted;
 import org.junit.BeforeClass;
 import org.junit.Test;
@@ -28,6 +30,9 @@ import java.io.IOException;
 import java.util.HashMap;
 import java.util.Locale;
 import java.util.Map;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicReference;
 
 
 /**
@@ -360,6 +365,41 @@ public class TestLFUCache extends SolrTe
     }
   }
 
+  @Test
+  public void testConcurrentAccess() throws InterruptedException {
+    /* Set up a thread pool with twice as many threads as there are CPUs. */
+    final ConcurrentLFUCache<Integer,Long> cache = new ConcurrentLFUCache<>(10, 9);
+    ExecutorService executorService = ExecutorUtil.newMDCAwareFixedThreadPool(10,
+        new DefaultSolrThreadFactory("testConcurrentAccess"));
+    final AtomicReference<Throwable> error = new AtomicReference<>();
+    
+    /*
+     * Use the thread pool to execute at least two million puts into the cache.
+     * Without the fix on SOLR-7585, NoSuchElementException is thrown.
+     * Simultaneous calls to markAndSweep are protected from each other by a
+     * lock, so they run sequentially, and due to a problem in the previous
+     * design, the cache eviction doesn't work right.
+     */
+    for (int i = 0; i < atLeast(2_000_000); ++i) {
+      executorService.submit(new Runnable() {
+        @Override
+        public void run() {
+          try {
+            cache.put(random().nextInt(100), random().nextLong());
+          } catch (Throwable t) {
+            error.compareAndSet(null, t);
+          }
+        }
+      });
+    }
+    
+    executorService.shutdown();
+    executorService.awaitTermination(1, TimeUnit.MINUTES);
+    
+    // then:
+    assertNull("Exception during concurrent access: " + error.get(), error.get());
+  }
+
 // From the original LRU cache tests, they're commented out there too because they take a while.
 //  void doPerfTest(int iter, int cacheSize, int maxKey) {
 //    long start = System.currentTimeMillis();