You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by to...@apache.org on 2017/03/23 04:18:59 UTC

[3/5] kudu git commit: key_encoder: avoid unordered_map for looking up encoders

key_encoder: avoid unordered_map for looking up encoders

I saw EncoderResolver::GetKeyEncoder() taking up ~3% of CPU in some
workloads, mostly caused by a 'div' instruction in unordered_map. Since
we know that the type enums are low integers, we can just use a vector
for lookup as well.

This also switches from shared_ptr to unique_ptr for holding the
resolvers: the shared_ptr is a holdover from pre-C++11 where putting
single-ownership smart pointers into a container was impossible.

Change-Id: I5a9bf3a28e451562c3347fe9650db3874d62a368
Reviewed-on: http://gerrit.cloudera.org:8080/6431
Reviewed-by: David Ribeiro Alves <dr...@apache.org>
Tested-by: Kudu Jenkins
Reviewed-by: Andrew Wong <aw...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/3679eadb
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/3679eadb
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/3679eadb

Branch: refs/heads/master
Commit: 3679eadb2c71b5ffa543862bf83fa69e49aecc7c
Parents: 4f51c66
Author: Todd Lipcon <to...@apache.org>
Authored: Sun Mar 19 15:45:48 2017 -0700
Committer: Todd Lipcon <to...@apache.org>
Committed: Wed Mar 22 07:23:00 2017 +0000

----------------------------------------------------------------------
 src/kudu/cfile/cfile-test.cc   |  4 ++--
 src/kudu/common/key_encoder.cc | 23 +++++++++++++++--------
 2 files changed, 17 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/3679eadb/src/kudu/cfile/cfile-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/cfile/cfile-test.cc b/src/kudu/cfile/cfile-test.cc
index 6935e27..794ee9e 100644
--- a/src/kudu/cfile/cfile-test.cc
+++ b/src/kudu/cfile/cfile-test.cc
@@ -21,9 +21,9 @@
 #include <list>
 
 #include "kudu/cfile/cfile-test-base.h"
+#include "kudu/cfile/cfile.pb.h"
 #include "kudu/cfile/cfile_reader.h"
 #include "kudu/cfile/cfile_writer.h"
-#include "kudu/cfile/cfile.pb.h"
 #include "kudu/cfile/index_block.h"
 #include "kudu/cfile/index_btree.h"
 #include "kudu/common/columnblock.h"
@@ -31,8 +31,8 @@
 #include "kudu/gutil/gscoped_ptr.h"
 #include "kudu/gutil/stringprintf.h"
 #include "kudu/util/metrics.h"
-#include "kudu/util/test_macros.h"
 #include "kudu/util/stopwatch.h"
+#include "kudu/util/test_macros.h"
 
 DECLARE_string(block_cache_type);
 DECLARE_string(cfile_do_on_finish);

http://git-wip-us.apache.org/repos/asf/kudu/blob/3679eadb/src/kudu/common/key_encoder.cc
----------------------------------------------------------------------
diff --git a/src/kudu/common/key_encoder.cc b/src/kudu/common/key_encoder.cc
index 84a38bf..453e8c7 100644
--- a/src/kudu/common/key_encoder.cc
+++ b/src/kudu/common/key_encoder.cc
@@ -16,8 +16,8 @@
 // under the License.
 
 #include <functional>
+#include <memory>
 #include <string>
-#include <unordered_map>
 #include <vector>
 
 #include "kudu/common/common.pb.h"
@@ -26,22 +26,22 @@
 #include "kudu/gutil/singleton.h"
 #include "kudu/util/faststring.h"
 
-using std::shared_ptr;
-using std::unordered_map;
+using std::unique_ptr;
+using std::vector;
 
 namespace kudu {
 
-
 // A resolver for Encoders
 template <typename Buffer>
 class EncoderResolver {
  public:
   const KeyEncoder<Buffer>& GetKeyEncoder(DataType t) {
-    return *FindOrDie(encoders_, t);
+    DCHECK(HasKeyEncoderForType(t));
+    return *encoders_[t];
   }
 
   const bool HasKeyEncoderForType(DataType t) {
-    return ContainsKey(encoders_, t);
+    return t < encoders_.size() && encoders_[t];
   }
 
  private:
@@ -59,11 +59,18 @@ class EncoderResolver {
 
   template<DataType Type> void AddMapping() {
     KeyEncoderTraits<Type, Buffer> traits;
-    InsertOrDie(&encoders_, Type, shared_ptr<KeyEncoder<Buffer> >(new KeyEncoder<Buffer>(traits)));
+    if (encoders_.size() <= Type) {
+      encoders_.resize(static_cast<size_t>(Type) + 1);
+    }
+    CHECK(!encoders_[Type]) << "already have mapping for " << DataType_Name(Type);
+    encoders_[Type].reset(new KeyEncoder<Buffer>(traits));
   }
 
   friend class Singleton<EncoderResolver<Buffer> >;
-  unordered_map<DataType, shared_ptr<KeyEncoder<Buffer> >, std::hash<size_t> > encoders_;
+  // We use a vector instead of a map here since this shows up in some hot paths
+  // and we know that the valid data types all have low enough IDs that the
+  // vector will be small.
+  vector<unique_ptr<KeyEncoder<Buffer>>> encoders_;
 };
 
 template <typename Buffer>