You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ad...@apache.org on 2017/03/13 23:32:49 UTC

kudu git commit: KUDU-1931: fix SIGSEGV in file cache RandomAccessFile::memory_footprint()

Repository: kudu
Updated Branches:
  refs/heads/master 7034bffc1 -> c658c8696


KUDU-1931: fix SIGSEGV in file cache RandomAccessFile::memory_footprint()

TIL that the pointer returned by std::make_shared().get() or
*std::make_shared() isn't the base of the underlying heap memory allocation,
because the shared_ptr control block _precedes_ the object pointer.

This means that kudu_malloc_usable_size(this) is totally unsafe if 'this'
was allocated via std::make_shared(). With glibc's malloc, it caused a
crash. Thankfully tcmalloc is more forgiving: it finds its allocations'
bookkeeping information by converting addresses into their underlying pages,
so a crash would only occur if there was a page boundary between the
shared_ptr control block and the object's pointer address (I don't even know
if this is possible).

I found this bug by accident when I ran an in-progress unit test with
--block_manager=file. This speaks to our lack of FBM coverage. For now, I've
added some coverage but we really should be running at least one end-to-end
test on all block managers; I filed KUDU-1932 to track that.

Change-Id: I9e57c06dd35252d03729c10ff64fe43d79288513
Reviewed-on: http://gerrit.cloudera.org:8080/6359
Reviewed-by: David Ribeiro Alves <dr...@apache.org>
Tested-by: Kudu Jenkins


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/c658c869
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/c658c869
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/c658c869

Branch: refs/heads/master
Commit: c658c869689b9589e653e8a24568b9f95d6ff65d
Parents: 7034bff
Author: Adar Dembo <ad...@cloudera.com>
Authored: Sun Mar 12 13:21:01 2017 -0700
Committer: Adar Dembo <ad...@cloudera.com>
Committed: Mon Mar 13 23:32:05 2017 +0000

----------------------------------------------------------------------
 src/kudu/fs/block_manager-test.cc |  4 ++++
 src/kudu/util/file_cache-test.cc  | 15 +++++++++++++++
 src/kudu/util/file_cache.cc       | 20 +++++++++++++++++++-
 3 files changed, 38 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/c658c869/src/kudu/fs/block_manager-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/fs/block_manager-test.cc b/src/kudu/fs/block_manager-test.cc
index a58f27f..600ce05 100644
--- a/src/kudu/fs/block_manager-test.cc
+++ b/src/kudu/fs/block_manager-test.cc
@@ -435,6 +435,10 @@ TYPED_TEST(BlockManagerTest, EndToEndTest) {
   ASSERT_OK(read_block->Read(0, test_data.length(), &data, scratch.get()));
   ASSERT_EQ(test_data, data);
 
+  // We don't actually do anything with the result of this call; we just want
+  // to make sure it doesn't trigger a crash (see KUDU-1931).
+  LOG(INFO) << "Block memory footprint: " << read_block->memory_footprint();
+
   // Delete the block.
   ASSERT_OK(this->bm_->DeleteBlock(written_block->id()));
   ASSERT_TRUE(this->bm_->OpenBlock(written_block->id(), nullptr)

http://git-wip-us.apache.org/repos/asf/kudu/blob/c658c869/src/kudu/util/file_cache-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/file_cache-test.cc b/src/kudu/util/file_cache-test.cc
index 2347ba9..d61a0fd 100644
--- a/src/kudu/util/file_cache-test.cc
+++ b/src/kudu/util/file_cache-test.cc
@@ -288,4 +288,19 @@ TYPED_TEST(FileCacheTest, TestNoRecursiveDeadlock) {
   }
 }
 
+class RandomAccessFileCacheTest : public FileCacheTest<RandomAccessFile> {
+};
+
+TEST_F(RandomAccessFileCacheTest, TestMemoryFootprintDoesNotCrash) {
+  const string kFile = this->GetTestPath("foo");
+  ASSERT_OK(this->WriteTestFile(kFile, "test data"));
+
+  shared_ptr<RandomAccessFile> f;
+  ASSERT_OK(this->cache_->OpenExistingFile(kFile, &f));
+
+  // This used to crash due to a kudu_malloc_usable_size() call on a memory
+  // address that wasn't the start of an actual heap allocation.
+  LOG(INFO) << f->memory_footprint();
+}
+
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/c658c869/src/kudu/util/file_cache.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/file_cache.cc b/src/kudu/util/file_cache.cc
index 7e3519a..6d4e5d6 100644
--- a/src/kudu/util/file_cache.cc
+++ b/src/kudu/util/file_cache.cc
@@ -337,7 +337,25 @@ class Descriptor<RandomAccessFile> : public RandomAccessFile {
   }
 
   size_t memory_footprint() const override {
-    return kudu_malloc_usable_size(this) +
+    // Normally we would use kudu_malloc_usable_size(this). However, that's
+    // not safe because 'this' was allocated via std::make_shared(), which
+    // means it isn't necessarily the base of the memory allocation; it may be
+    // preceded by the shared_ptr control block.
+    //
+    // It doesn't appear possible to get the base of the allocation via any
+    // shared_ptr APIs, so we'll use sizeof(*this) + 16 instead. The 16 bytes
+    // represent the shared_ptr control block. Overall the object size is still
+    // undercounted as it doesn't account for any internal heap fragmentation,
+    // but at least it's safe.
+    //
+    // Some anecdotal memory measurements taken inside gdb:
+    // - glibc 2.23 malloc_usable_size() on make_shared<FileType>: 88 bytes.
+    // - tcmalloc malloc_usable_size() on make_shared<FileType>: 96 bytes.
+    // - sizeof(std::_Sp_counted_base<>) with libstdc++ 5.4: 16 bytes.
+    // - sizeof(std::__1::__shared_ptr_emplace<>) with libc++ 3.9: 16 bytes.
+    // - sizeof(*this): 72 bytes.
+    return sizeof(*this) +
+        16 + // shared_ptr control block
         once_.memory_footprint_excluding_this() +
         base_.filename().capacity();
   }