You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by to...@apache.org on 2017/03/27 18:17:56 UTC

[2/2] kudu git commit: log_block_manager: use sparse_hash_map for block map

log_block_manager: use sparse_hash_map for block map

Based on my testing, this reduces the memory usage of 1M blocks from
~24M to ~9MB. Lookup performance should be similar. Write performance
may be slightly slower but this is relatively infrequent and not on a
hot path.

I also did an allocation trace of a simple program putting 1M <int,int>
pairs into both types of maps. With unordered_map, this resulted in 1M
16-byte allocations and several larger allocations as the hashtable
bucket array grew (largest being 8MB). With the sparse_hash_map, the
allocation sizes were evenly distributed with ~55K allocations at a
bunch of sizes between 16 and 384 bytes, with no large allocations. This
should be more efficient in terms of avoiding memory fragmentation or
longer resizing pauses as blocks are added.

In order for this patch to work on gcc 4.9, I had to patch sparsehash.
gcc 4 doesn't support std::is_trivially_copy_constructible. This adds a
workaround to the sparsehash source to use boost's implementation of the
same.

Change-Id: I2e75deba7f7fb8d4f800695f195304df603e4ce9
Reviewed-on: http://gerrit.cloudera.org:8080/6471
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Tested-by: Todd Lipcon <to...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/0254216c
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/0254216c
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/0254216c

Branch: refs/heads/master
Commit: 0254216c18af036a62a11fcfae8a77b781d74aa2
Parents: 0e188a3
Author: Todd Lipcon <to...@apache.org>
Authored: Fri Mar 24 16:09:55 2017 -0700
Committer: Todd Lipcon <to...@apache.org>
Committed: Mon Mar 27 18:07:11 2017 +0000

----------------------------------------------------------------------
 src/kudu/fs/log_block_manager.cc                |  1 +
 src/kudu/fs/log_block_manager.h                 |  7 ++-
 thirdparty/download-thirdparty.sh               |  6 +++
 ...-Add-compatibily-for-gcc-4.x-in-traits.patch | 53 ++++++++++++++++++++
 4 files changed, 65 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/0254216c/src/kudu/fs/log_block_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/fs/log_block_manager.cc b/src/kudu/fs/log_block_manager.cc
index 6792226..503273f 100644
--- a/src/kudu/fs/log_block_manager.cc
+++ b/src/kudu/fs/log_block_manager.cc
@@ -1248,6 +1248,7 @@ LogBlockManager::LogBlockManager(Env* env, const BlockManagerOptions& opts)
     read_only_(opts.read_only),
     buggy_el6_kernel_(IsBuggyEl6Kernel(env->GetKernelRelease())),
     next_block_id_(1) {
+  blocks_by_block_id_.set_deleted_key(BlockId());
 
   int64_t file_cache_capacity = GetFileCacheCapacityForBlockManager(env_);
   if (file_cache_capacity != kint64max) {

http://git-wip-us.apache.org/repos/asf/kudu/blob/0254216c/src/kudu/fs/log_block_manager.h
----------------------------------------------------------------------
diff --git a/src/kudu/fs/log_block_manager.h b/src/kudu/fs/log_block_manager.h
index 32249af..0d99d72 100644
--- a/src/kudu/fs/log_block_manager.h
+++ b/src/kudu/fs/log_block_manager.h
@@ -29,6 +29,7 @@
 
 #include <boost/optional/optional.hpp>
 #include <gtest/gtest_prod.h>
+#include <sparsehash/sparse_hash_map>
 
 #include "kudu/fs/block_id.h"
 #include "kudu/fs/block_manager.h"
@@ -197,10 +198,12 @@ class LogBlockManager : public BlockManager {
       BlockIdHash,
       BlockIdEqual> UntrackedBlockMap;
 
+  // Type for the actual block map used to store all live blocks.
+  // We use sparse_hash_map<> here to reduce memory overhead.
   typedef MemTrackerAllocator<
       std::pair<const BlockId, scoped_refptr<internal::LogBlock> > > BlockAllocator;
-  typedef std::unordered_map<
-      const BlockId,
+  typedef google::sparse_hash_map<
+      BlockId,
       scoped_refptr<internal::LogBlock>,
       BlockIdHash,
       BlockIdEqual,

http://git-wip-us.apache.org/repos/asf/kudu/blob/0254216c/thirdparty/download-thirdparty.sh
----------------------------------------------------------------------
diff --git a/thirdparty/download-thirdparty.sh b/thirdparty/download-thirdparty.sh
index 708ac52..aa53960 100755
--- a/thirdparty/download-thirdparty.sh
+++ b/thirdparty/download-thirdparty.sh
@@ -305,8 +305,14 @@ if [ ! -d "$BREAKPAD_SOURCE" ]; then
   fetch_and_expand breakpad-${BREAKPAD_VERSION}.tar.gz
 fi
 
+SPARSEHASH_PATCHLAVEL=2
+delete_if_wrong_patchlevel $SPARSEHASH_SOURCE $SPARSEHASH_PATCHLEVEL
 if [ ! -d "$SPARSEHASH_SOURCE" ]; then
   fetch_and_expand sparsehash-c11-${SPARSEHASH_VERSION}.tar.gz
+  pushd $SPARSEHASH_SOURCE
+  patch -p1 < $TP_DIR/patches/sparsehash-0001-Add-compatibily-for-gcc-4.x-in-traits.patch
+  touch patchlevel-$SPARSEHASH_PATCHLAVEL
+  popd
 fi
 
 echo "---------------"

http://git-wip-us.apache.org/repos/asf/kudu/blob/0254216c/thirdparty/patches/sparsehash-0001-Add-compatibily-for-gcc-4.x-in-traits.patch
----------------------------------------------------------------------
diff --git a/thirdparty/patches/sparsehash-0001-Add-compatibily-for-gcc-4.x-in-traits.patch b/thirdparty/patches/sparsehash-0001-Add-compatibily-for-gcc-4.x-in-traits.patch
new file mode 100644
index 0000000..43215c1
--- /dev/null
+++ b/thirdparty/patches/sparsehash-0001-Add-compatibily-for-gcc-4.x-in-traits.patch
@@ -0,0 +1,53 @@
+From 6c2e37ffdd85823549634ab53931bc412b297bde Mon Sep 17 00:00:00 2001
+From: Todd Lipcon <to...@cloudera.com>
+Date: Fri, 24 Mar 2017 15:14:31 -0700
+Subject: [PATCH] Add compatibily for gcc 4.x in traits
+
+gcc 4.x doesn't implement std::is_trivially_copy_constructible<>. In
+order to maintain compatibility with those versions, we switch to using
+the equivalent trait in boost.
+
+Tested by building a small test using gcc 4.8, gcc 5.4, and clang 3.9.
+
+This was discussed upstream at https://github.com/sparsehash/sparsehash-c11/pull/19
+but determined that it wasn't a good idea for the upstream project.
+---
+ sparsehash/traits | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/sparsehash/traits b/sparsehash/traits
+index 65135a6..e1e5648 100644
+--- a/sparsehash/traits
++++ b/sparsehash/traits
+@@ -30,6 +30,12 @@
+ #pragma once
+ #include <type_traits>
+ #include <utility>  // For pair
++#include <boost/move/detail/type_traits.hpp>
++
++// Work around lack of std::is_trivially_copy_constructible in gcc 4.x by
++// using the boost version.
++#define SPARSEHASH_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T) \
++  ::boost::move_detail::is_trivially_copy_constructible<T>::value
+ 
+ namespace google {
+ 
+@@ -43,7 +49,7 @@ namespace google {
+ template <class T>
+ struct is_relocatable
+     : std::integral_constant<bool,
+-                             (std::is_trivially_copy_constructible<T>::value &&
++                             (SPARSEHASH_IS_TRIVIALLY_COPY_CONSTRUCTIBLE(T) &&
+                               std::is_trivially_destructible<T>::value)> {};
+ template <class T, class U>
+ struct is_relocatable<std::pair<T, U>>
+@@ -52,4 +58,4 @@ struct is_relocatable<std::pair<T, U>>
+ 
+ template <class T>
+ struct is_relocatable<const T> : is_relocatable<T> {};
+-}
+\ No newline at end of file
++}
+-- 
+2.7.4
+