You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@quickstep.apache.org by zu...@apache.org on 2016/05/05 06:16:50 UTC
[14/30] incubator-quickstep git commit: Fixed deadlocks when loading a block or blob while evicting another. (#183)

Fixed deadlocks when loading a block or blob while evicting another. (#183)

* Revert "Storage manager concurrency bug"

* Avoid deadlocks when loading a block or blob.


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/44235508
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/44235508
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/44235508

Branch: refs/heads/master
Commit: 44235508ef89ff3bbd29a8eeec9dca586534b721
Parents: 43b9260
Author: Zuyu ZHANG <zu...@users.noreply.github.com>
Authored: Tue Apr 26 04:36:30 2016 -0700
Committer: Zuyu Zhang <zz...@pivotal.io>
Committed: Wed May 4 23:15:34 2016 -0700

----------------------------------------------------------------------
 .../tests/execution_generator/CMakeLists.txt    |   2 +-
 storage/CMakeLists.txt                          |   1 -
 storage/FileManagerHdfs.cpp                     |   2 +-
 storage/FileManagerPosix.cpp                    |   4 +-
 storage/FileManagerWindows.cpp                  |   2 +-
 storage/StorageManager.cpp                      | 102 ++++++++++---------
 storage/StorageManager.hpp                      |  33 +-----
 storage/tests/StorageManager_unittest.cpp       |  42 ++++++++
 utility/CMakeLists.txt                          |   1 +
 utility/ShardedLockManager.hpp                  |  56 ++++++++--
 10 files changed, 154 insertions(+), 91 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/44235508/query_optimizer/tests/execution_generator/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/query_optimizer/tests/execution_generator/CMakeLists.txt b/query_optimizer/tests/execution_generator/CMakeLists.txt
index 149721c..56bae16 100644
--- a/query_optimizer/tests/execution_generator/CMakeLists.txt
+++ b/query_optimizer/tests/execution_generator/CMakeLists.txt
@@ -81,4 +81,4 @@ file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/Join)
 file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/Select)
 file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/StringPatternMatching)
 file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/TableGenerator)
-file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/Update)
+file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/Update)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/44235508/storage/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/storage/CMakeLists.txt b/storage/CMakeLists.txt
index 11c2819..ed23802 100644
--- a/storage/CMakeLists.txt
+++ b/storage/CMakeLists.txt
@@ -599,7 +599,6 @@ if (QUICKSTEP_HAVE_FILE_MANAGER_POSIX)
   target_link_libraries(quickstep_storage_FileManagerLocal
                         quickstep_storage_FileManagerPosix)
   target_link_libraries(quickstep_storage_FileManagerPosix
-                        glog
                         quickstep_storage_FileManager
                         quickstep_storage_StorageBlockInfo
                         quickstep_storage_StorageConstants

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/44235508/storage/FileManagerHdfs.cpp
----------------------------------------------------------------------
diff --git a/storage/FileManagerHdfs.cpp b/storage/FileManagerHdfs.cpp
index 7f30c59..5f9706e 100644
--- a/storage/FileManagerHdfs.cpp
+++ b/storage/FileManagerHdfs.cpp
@@ -140,7 +140,7 @@ size_t FileManagerHdfs::numSlots(const block_id block) const {
   hdfsFreeFileInfo(file_info, 1);
 
   if ((file_size % kSlotSizeBytes) != 0) {
-    LOG(FATAL) << "The file " << filename << " was corrupted.";
+    throw CorruptPersistentStorage();
   }
 
   return file_size / kSlotSizeBytes;

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/44235508/storage/FileManagerPosix.cpp
----------------------------------------------------------------------
diff --git a/storage/FileManagerPosix.cpp b/storage/FileManagerPosix.cpp
index 27257e5..3bfb69d 100644
--- a/storage/FileManagerPosix.cpp
+++ b/storage/FileManagerPosix.cpp
@@ -38,8 +38,6 @@
 #include "utility/Macros.hpp"
 #include "utility/StringUtil.hpp"
 
-#include "glog/logging.h"
-
 using std::size_t;
 using std::sscanf;
 using std::strerror;
@@ -88,7 +86,7 @@ size_t FileManagerPosix::numSlots(const block_id block) const {
   }
 
   if ((file_stat.st_size % kSlotSizeBytes) != 0) {
-    LOG(FATAL) << "The file " << filename << " was corrupted.";
+    throw CorruptPersistentStorage();
   }
 
   return static_cast<size_t>(file_stat.st_size) / kSlotSizeBytes;

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/44235508/storage/FileManagerWindows.cpp
----------------------------------------------------------------------
diff --git a/storage/FileManagerWindows.cpp b/storage/FileManagerWindows.cpp
index cfa8819..9e3d4c8 100644
--- a/storage/FileManagerWindows.cpp
+++ b/storage/FileManagerWindows.cpp
@@ -106,7 +106,7 @@ size_t FileManagerWindows::numSlots(const block_id block) const {
   uint64_t file_size = (static_cast<uint64_t>(file_stat.nFileSizeHigh) << 32) | file_stat.nFileSizeLow;
 
   if ((file_size % kSlotSizeBytes) != 0) {
-    LOG(FATAL) << "The file " << filename << " was corrupted.";
+    throw CorruptPersistentStorage();
   }
 
   return static_cast<size_t>(file_size / kSlotSizeBytes);

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/44235508/storage/StorageManager.cpp
----------------------------------------------------------------------
diff --git a/storage/StorageManager.cpp b/storage/StorageManager.cpp
index 1cdbcb6..a3f265d 100644
--- a/storage/StorageManager.cpp
+++ b/storage/StorageManager.cpp
@@ -214,7 +214,7 @@ block_id StorageManager::createBlock(const CatalogRelationSchema &relation,
 
     // Because block IDs are generated by atomically incrementing block_index_,
     // there should never be collisions.
-    DCHECK(blocks_.find(new_block_id) == blocks_.end());
+    DEBUG_ASSERT(blocks_.find(new_block_id) == blocks_.end());
     blocks_[new_block_id] = new_block_handle;
   }
 
@@ -241,7 +241,7 @@ block_id StorageManager::createBlob(const std::size_t num_slots,
 
     // Because block IDs are generated by atomically incrementing block_index_,
     // there should never be collisions.
-    DCHECK(blocks_.find(new_block_id) == blocks_.end());
+    DEBUG_ASSERT(blocks_.find(new_block_id) == blocks_.end());
     blocks_[new_block_id] = new_block_handle;
   }
 
@@ -287,12 +287,6 @@ StorageBlob* StorageManager::loadBlob(const block_id blob,
 }
 
 bool StorageManager::saveBlockOrBlob(const block_id block, const bool force) {
-  SpinSharedMutexSharedLock<false> read_lock(*lock_manager_.get(block));
-  return saveBlockOrBlobInternal(block, force);
-}
-
-
-bool StorageManager::saveBlockOrBlobInternal(const block_id block, const bool force) {
   // TODO(chasseur): This lock is held for the entire duration of this call
   // (including I/O), but really we only need to prevent the eviction of the
   // particular entry in 'blocks_' for the specified 'block'. If and when we
@@ -358,10 +352,10 @@ void StorageManager::deleteBlockOrBlobFile(const block_id block) {
 block_id StorageManager::allocateNewBlockOrBlob(const std::size_t num_slots,
                                                 BlockHandle *handle,
                                                 const int numa_node) {
-  DCHECK_GT(num_slots, 0u);
+  DEBUG_ASSERT(num_slots > 0);
   DEBUG_ASSERT(handle != nullptr);
 
-  handle->block_memory = allocateSlots(num_slots, numa_node, kInvalidBlockId);
+  handle->block_memory = allocateSlots(num_slots, numa_node);
   handle->block_memory_size = num_slots;
 
   return ++block_index_;
@@ -373,8 +367,8 @@ StorageManager::BlockHandle StorageManager::loadBlockOrBlob(
   // mutex in the lock manager. The caller has ensured that the block is not
   // already loaded before this function gets called.
   size_t num_slots = file_manager_->numSlots(block);
-  DCHECK_NE(num_slots, 0u);
-  void* block_buffer = allocateSlots(num_slots, numa_node, block);
+  DEBUG_ASSERT(num_slots != 0);
+  void *block_buffer = allocateSlots(num_slots, numa_node);
 
   const bool status = file_manager_->readBlockOrBlob(block, block_buffer, kSlotSizeBytes * num_slots);
   CHECK(status) << "Failed to read block from persistent storage: " << block;
@@ -389,13 +383,12 @@ StorageManager::BlockHandle StorageManager::loadBlockOrBlob(
 void StorageManager::insertBlockHandleAfterLoad(const block_id block,
                                                 const BlockHandle &handle) {
   SpinSharedMutexExclusiveLock<false> lock(blocks_shared_mutex_);
-  DCHECK(blocks_.find(block) == blocks_.end());
+  DEBUG_ASSERT(blocks_.find(block) == blocks_.end());
   blocks_[block] = handle;
 }
 
 void* StorageManager::allocateSlots(const std::size_t num_slots,
-                                    const int numa_node,
-                                    const block_id locked_block_id) {
+                                    const int numa_node) {
 #if defined(QUICKSTEP_HAVE_MMAP_LINUX_HUGETLB)
   static constexpr int kLargePageMmapFlags
       = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB;
@@ -404,6 +397,7 @@ void* StorageManager::allocateSlots(const std::size_t num_slots,
       = MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER;
 #endif
 
+  makeRoomForBlock(num_slots);
   void *slots = nullptr;
 
 #if defined(QUICKSTEP_HAVE_MMAP_LINUX_HUGETLB) || defined(QUICKSTEP_HAVE_MMAP_BSD_SUPERPAGE)
@@ -450,7 +444,7 @@ void* StorageManager::allocateSlots(const std::size_t num_slots,
 
 #if defined(QUICKSTEP_HAVE_LIBNUMA)
   if (numa_node != -1) {
-    DCHECK(numa_node < numa_num_configured_nodes());
+    DEBUG_ASSERT(numa_node < numa_num_configured_nodes());
     struct bitmask *numa_node_bitmask = numa_allocate_nodemask();
     // numa_node can be 0 through n-1, where n is the num of NUMA nodes.
     numa_bitmask_setbit(numa_node_bitmask, numa_node);
@@ -487,23 +481,18 @@ MutableBlockReference StorageManager::getBlockInternal(
     const block_id block,
     const CatalogRelationSchema &relation,
     const int numa_node) {
-  std::size_t num_slots = 0u;
   MutableBlockReference ret;
   {
-    // First, see if the block is in the buffer pool. If it is, we can return
-    // a reference to it immediately.
     SpinSharedMutexSharedLock<false> eviction_lock(*lock_manager_.get(block));
     SpinSharedMutexSharedLock<false> read_lock(blocks_shared_mutex_);
     std::unordered_map<block_id, BlockHandle>::iterator it = blocks_.find(block);
     if (it != blocks_.end()) {
-      DCHECK(!it->second.block->isBlob());
+      DEBUG_ASSERT(!it->second.block->isBlob());
       ret = MutableBlockReference(static_cast<StorageBlock*>(it->second.block), eviction_policy_.get());
-    } else {
-      // The block was not loaded. Taking advantage of the shared lock on the
-      // buffer pool, retrieve the size of the block's file.
-      num_slots = file_manager_->numSlots(block);
     }
   }
+  // To be safe, release the block's shard after 'eviction_lock' destructs.
+  lock_manager_.release(block);
 
   // Note that there is no way for the block to be evicted between the call to
   // loadBlock and the call to EvictionPolicy::blockReferenced from
@@ -517,33 +506,16 @@ MutableBlockReference StorageManager::getBlockInternal(
       SpinSharedMutexSharedLock<false> read_lock(blocks_shared_mutex_);
       std::unordered_map<block_id, BlockHandle>::iterator it = blocks_.find(block);
       if (it != blocks_.end()) {
-        DCHECK(!it->second.block->isBlob());
+        DEBUG_ASSERT(!it->second.block->isBlob());
         ret = MutableBlockReference(static_cast<StorageBlock*>(it->second.block), eviction_policy_.get());
         return ret;
       }
     }
-
-    // Call a best-effort method to evict blocks until the size of our buffer
-    // pool falls below the current buffer pool size plus the size of the
-    // block we are going to retrieve.
-    makeRoomForBlock(num_slots);
-
     // No other thread loaded the block before us.
-    // But going forward be careful as there is a potential self-deadlock
-    // situation here -- we are holding an Exclusive lock (io_lock)
-    //   and getting ready to go into the call chain
-    //   "MutableBlockReference"/"loadBlock" -> "loadBlockOrBlob"
-    //       -> "allocateSlots" -> "makeRoomForBlock"
-    //   In "makeRoomForBlock," we will acquire an exclusive lock via the call
-    //   "eviction_lock(*lock_manager_.get(block_index))"
-    //   This situation could lead to a self-deadlock as block_index could
-    //   hash to the same position in the "ShardedLockManager" as "block."
-    //   To deal with this case, we pass the block information for "block"
-    //   though the call chain, and check for a collision in the the
-    //   "ShardedLockManager" in the function "makeRoomForBlock."
-    //   If a collision is detected we avoid a self-deadlock.
     ret = MutableBlockReference(loadBlock(block, relation, numa_node), eviction_policy_.get());
   }
+  // To be safe, release the block's shard after 'io_lock' destructs.
+  lock_manager_.release(block);
 
   return ret;
 }
@@ -556,10 +528,12 @@ MutableBlobReference StorageManager::getBlobInternal(const block_id blob,
     SpinSharedMutexSharedLock<false> read_lock(blocks_shared_mutex_);
     std::unordered_map<block_id, BlockHandle>::iterator it = blocks_.find(blob);
     if (it != blocks_.end()) {
-      DCHECK(it->second.block->isBlob());
+      DEBUG_ASSERT(it->second.block->isBlob());
       ret = MutableBlobReference(static_cast<StorageBlob*>(it->second.block), eviction_policy_.get());
     }
   }
+  // To be safe, release the blob's shard after 'eviction_lock' destructs.
+  lock_manager_.release(blob);
 
   if (!ret.valid()) {
     SpinSharedMutexExclusiveLock<false> io_lock(*lock_manager_.get(blob));
@@ -572,7 +546,7 @@ MutableBlobReference StorageManager::getBlobInternal(const block_id blob,
       SpinSharedMutexSharedLock<false> read_lock(blocks_shared_mutex_);
       std::unordered_map<block_id, BlockHandle>::iterator it = blocks_.find(blob);
       if (it != blocks_.end()) {
-        DCHECK(it->second.block->isBlob());
+        DEBUG_ASSERT(it->second.block->isBlob());
         ret = MutableBlobReference(static_cast<StorageBlob*>(it->second.block), eviction_policy_.get());
         return ret;
       }
@@ -580,6 +554,8 @@ MutableBlobReference StorageManager::getBlobInternal(const block_id blob,
     // No other thread loaded the blob before us.
     ret = MutableBlobReference(loadBlob(blob, numa_node), eviction_policy_.get());
   }
+  // To be safe, release the blob's shard after 'io_lock' destructs.
+  lock_manager_.release(blob);
 
   return ret;
 }
@@ -590,23 +566,53 @@ void StorageManager::makeRoomForBlock(const size_t slots) {
     EvictionPolicy::Status status = eviction_policy_->chooseBlockToEvict(&block_index);
 
     if (status == EvictionPolicy::Status::kOk) {
-      SpinSharedMutexExclusiveLock<false> eviction_lock(*lock_manager_.get(block_index));
+      bool has_collision = false;
+      SpinSharedMutexExclusiveLock<false> eviction_lock(*lock_manager_.get(block_index, &has_collision));
+      if (has_collision) {
+        // We have a collision in the shared lock manager, where some callers
+        // of this function (i.e., getBlockInternal or getBlobInternal) has
+        // acquired an exclusive lock, and we are trying to evict a block that
+        // hashes to the same location. This will cause a deadlock.
+
+        // For now simply treat this situation as the case where there is not
+        // enough memory and we temporarily go over the memory limit.
+        break;
+      }
+
       StorageBlockBase* block;
       {
         SpinSharedMutexSharedLock<false> read_lock(blocks_shared_mutex_);
         if (blocks_.find(block_index) == blocks_.end()) {
           // another thread must have jumped in and evicted it before us
+
+          // NOTE(zuyu): It is ok to release the shard for a block or blob,
+          // before 'eviction_lock' destructs, because we will never encounter a
+          // self-deadlock in a single thread, and in multiple-thread case some
+          // thread will block but not deadlock if there is a shard collision.
+          lock_manager_.release(block_index);
           continue;
         }
         block = blocks_[block_index].block;
       }
       if (eviction_policy_->getRefCount(block->getID()) > 0) {
         // Someone sneaked in and referenced the block before we could evict it.
+
+        // NOTE(zuyu): It is ok to release the shard for a block or blob, before
+        // before 'eviction_lock' destructs, because we will never encounter a
+        // self-deadlock in a single thread, and in multiple-thread case some
+        // thread will block but not deadlock if there is a shard collision.
+        lock_manager_.release(block_index);
         continue;
       }
-      if (saveBlockOrBlobInternal(block->getID(), false)) {
+      if (saveBlockOrBlob(block->getID())) {
         evictBlockOrBlob(block->getID());
       }  // else : Someone sneaked in and evicted the block before we could.
+
+      // NOTE(zuyu): It is ok to release the shard for a block or blob, before
+      // before 'eviction_lock' destructs, because we will never encounter a
+      // self-deadlock in a single thread, and in multiple-thread case some
+      // thread will block but not deadlock if there is a shard collision.
+      lock_manager_.release(block_index);
     } else {
       // If status was not ok, then we must not have been able to evict enough
       // blocks; therefore, we return anyway, temporarily going over the memory

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/44235508/storage/StorageManager.hpp
----------------------------------------------------------------------
diff --git a/storage/StorageManager.hpp b/storage/StorageManager.hpp
index dd67177..dab33f6 100644
--- a/storage/StorageManager.hpp
+++ b/storage/StorageManager.hpp
@@ -84,7 +84,7 @@ class StorageManager {
    *        storage.
    * @param max_memory_usage The maximum amount of memory that the storage
    *                         manager should use for cached blocks in slots. If
-   *                         a block is requested that is not currently in
+   *                         an block is requested that is not currently in
    *                         memory and there are already max_memory_usage slots
    *                         in use in memory, then the storage manager will
    *                         attempt to evict enough blocks to make room for the
@@ -224,7 +224,6 @@ class StorageManager {
 
   /**
    * @brief Save a block or blob in memory to the persistent storage.
-   * @details Obtains a read lock on the shard containing the saved block.
    *
    * @param block The id of the block or blob to save.
    * @param force Force the block to the persistent storage, even if it is not
@@ -358,20 +357,8 @@ class StorageManager {
   // Allocate a buffer to hold the specified number of slots. The memory
   // returned will be zeroed-out, and mapped using large pages if the system
   // supports it.
-  // Note if the last parameter "locked_block_id" is set to something other than
-  // "kInvalidBlockId," then it means that the caller has acquired
-  // a lock in the sharded lock manager for that block. Thus, if a block needs
-  // to be evicted by the EvictionPolicy in the "makeRoomForBlock" call, and
-  // if the block to be evicted happens to hash to the same entry in the
-  // sharded lock manager, then the Eviction policy needs to pick a different
-  // block for eviction.
-  // The key point is that if "locked_block_id" is not "kInvalidBlockId," then
-  // the caller of allocateSlots, e.g. loadBlock, will have acquired a lock
-  // in the sharded lock manager for the block "locked_block_id."
   void* allocateSlots(const std::size_t num_slots,
-                      const int numa_node,
-                      // const block_id locked_block_id = kInvalidBlockId);
-                      const block_id locked_block_id);
+                      const int numa_node);
 
   // Deallocate a buffer allocated by allocateSlots(). This must be used
   // instead of free(), because the underlying implementation of
@@ -380,20 +367,6 @@ class StorageManager {
                        const std::size_t num_slots);
 
   /**
-   * @brief Save a block or blob in memory to the persistent storage.
-   * 
-   * @param block The id of the block or blob to save.
-   * @param force Force the block to the persistent storage, even if it is not
-   *        dirty (by default, only actually write dirty blocks to the
-   *        persistent storage).
-   * 
-   * @return False if the block is not found in the memory. True if the block is
-   *         successfully saved to the persistent storage OR the block is clean
-   *         and force is false.
-   */
-  bool saveBlockOrBlobInternal(const block_id block, const bool force);
-
-  /**
    * @brief Evict a block or blob from memory.
    * @note The block is NOT automatically saved, so call saveBlock() first if
    *       necessary.
@@ -434,7 +407,7 @@ class StorageManager {
    *
    * @param slots Number of slots to make room for.
    */
-  void makeRoomForBlock(const size_t slots);
+  void makeRoomForBlock(const std::size_t slots);
 
   /**
    * @brief Load a block from the persistent storage into memory.

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/44235508/storage/tests/StorageManager_unittest.cpp
----------------------------------------------------------------------
diff --git a/storage/tests/StorageManager_unittest.cpp b/storage/tests/StorageManager_unittest.cpp
index 60537a9..4c252a1 100644
--- a/storage/tests/StorageManager_unittest.cpp
+++ b/storage/tests/StorageManager_unittest.cpp
@@ -205,4 +205,46 @@ TEST(StorageManagerTest, DifferentNUMANodeBlobTestWithEviction) {
 }
 #endif  // QUICKSTEP_HAVE_LIBNUMA
 
+// Trigger an eviction from the same shard in StorageManager's
+// ShardedLockManager while attempting to load a blob. Previously, a bug
+// existed that caused a self-deadlock in such situations. This test reproduces
+// the issue and validates the fix.
+TEST(StorageManagerTest, EvictFromSameShardTest) {
+  // Set up a StorageManager with a soft memory limit of only one slot.
+  StorageManager storage_manager("eviction_test_storage", 1);
+
+  // Create a blob.
+  const block_id blob_a_id = storage_manager.createBlob(1);
+
+  // Blob "a" is now memory-resident in StorageManager, but has a reference
+  // count of zero.
+  EXPECT_TRUE(storage_manager.blockOrBlobIsLoaded(blob_a_id));
+  EXPECT_EQ(kSlotSizeBytes, storage_manager.getMemorySize());
+
+  // Manually alter 'block_index_' inside 'storage_manager' so that the next
+  // block_id generated will be in the same shard as 'blob_id_a'.
+  storage_manager.block_index_.fetch_add(StorageManager::kLockManagerNumShards - 1);
+
+  // Create another blob and verify that it is in the same shard.
+  const block_id blob_b_id = storage_manager.createBlob(1);
+  EXPECT_EQ(storage_manager.lock_manager_.get(blob_a_id),
+            storage_manager.lock_manager_.get(blob_b_id));
+
+  // Creating a second blob should have triggered an eviction that kicked
+  // blob A out.
+  EXPECT_FALSE(storage_manager.blockOrBlobIsLoaded(blob_a_id));
+  EXPECT_TRUE(storage_manager.blockOrBlobIsLoaded(blob_b_id));
+  EXPECT_EQ(kSlotSizeBytes, storage_manager.getMemorySize());
+
+  // Try and get a reference to blob A. Blob A must be reloaded from disk.
+  // This will trigger an eviction of blob B. This is the point where the
+  // self-deadlock bug could be observed.
+  BlobReference blob_a_ref = storage_manager.getBlob(blob_a_id);
+
+  // Reaching this point means we have not self-deadlocked. Now clean up.
+  blob_a_ref.release();
+  storage_manager.deleteBlockOrBlobFile(blob_a_id);
+  storage_manager.deleteBlockOrBlobFile(blob_b_id);
+}
+
 }  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/44235508/utility/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/utility/CMakeLists.txt b/utility/CMakeLists.txt
index 30f01ef..4ff9254 100644
--- a/utility/CMakeLists.txt
+++ b/utility/CMakeLists.txt
@@ -243,6 +243,7 @@ target_link_libraries(quickstep_utility_SortConfiguration_proto
                       quickstep_expressions_Expressions_proto
                       ${PROTOBUF_LIBRARY})
 target_link_libraries(quickstep_utility_ShardedLockManager
+                      quickstep_storage_StorageConstants
                       quickstep_threading_SharedMutex
                       quickstep_utility_Macros)
 target_link_libraries(quickstep_utility_StringUtil

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/44235508/utility/ShardedLockManager.hpp
----------------------------------------------------------------------
diff --git a/utility/ShardedLockManager.hpp b/utility/ShardedLockManager.hpp
index 0248882..1d59acb 100644
--- a/utility/ShardedLockManager.hpp
+++ b/utility/ShardedLockManager.hpp
@@ -1,6 +1,6 @@
 /**
  *   Copyright 2011-2015 Quickstep Technologies LLC.
- *   Copyright 2015 Pivotal Software, Inc.
+ *   Copyright 2015-2016 Pivotal Software, Inc.
  *
  *   Licensed under the Apache License, Version 2.0 (the "License");
  *   you may not use this file except in compliance with the License.
@@ -22,6 +22,7 @@
 #include <cstddef>
 #include <functional>
 
+#include "storage/StorageConstants.hpp"
 #include "threading/SharedMutex.hpp"
 #include "utility/Macros.hpp"
 
@@ -51,16 +52,59 @@ class ShardedLockManager {
 
   /**
    * @brief Get the SharedMutex corresponding to the provided key.
-   * @param  key The key to map to a SharedMutex.
-   * @return     The corresponding SharedMutex.
+   * @param key The key to map to a SharedMutex.
+   * @param has_collision Whether accessing the given key would result in a
+   *        hash collision. Used in StorageManager::makeRoomForBlock only.
+   * @return The corresponding SharedMutex if there is no collision; otherwise,
+   *         the collision SharedMutex.
    */
-  SharedMutexT *get(const T key) {
-    return &shards[hash_(key) % N];
+  SharedMutexT* get(const T key, bool *has_collision = nullptr) {
+    const std::size_t shard = hash_(key) % N;
+
+    if (has_collision != nullptr) {
+      // In StorageManager::makeRoomForBlock, check whether the evicting block
+      // or blob has a shard collision with existing referenced shards.
+      SpinSharedMutexSharedLock<false> read_lock(shards_mutex_);
+      if (shards_.find(shard) != shards_.end()) {
+        *has_collision = true;
+        return &collision_mutex_;
+      }
+    }
+
+    {
+      SpinSharedMutexExclusiveLock<false> write_lock(shards_mutex_);
+
+      // Check one more time for the evicting block or blob if there is a shard
+      // collision.
+      if (has_collision != nullptr && shards_.find(shard) != shards_.end()) {
+        *has_collision = true;
+        return &collision_mutex_;
+      }
+
+      shards_.insert(shard);
+    }
+    return &sharded_mutexes_[shard];
+  }
+
+  /**
+   * @brief Release the shard corresponding to the provided key.
+   * @param key The key to compute the shard.
+   */
+  void release(const T key) {
+    SpinSharedMutexExclusiveLock<false> write_lock(shards_mutex_);
+    shards_.erase(hash_(key) % N);
   }
 
  private:
   std::hash<T> hash_;
-  std::array<SharedMutexT, N> shards;
+  std::array<SharedMutexT, N> sharded_mutexes_;
+
+  // The placeholder mutex used whenever there is a hash collision.
+  SharedMutexT collision_mutex_;
+
+  // Bookkeep all shards referenced by StorageManager in multiple threads.
+  std::unordered_set<std::size_t> shards_;
+  alignas(kCacheLineBytes) mutable SpinSharedMutex<false> shards_mutex_;
 
   DISALLOW_COPY_AND_ASSIGN(ShardedLockManager);
 };