You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ad...@apache.org on 2016/09/07 21:17:17 UTC

[1/3] kudu git commit: compaction_policy: avoid O(n^2) calls to EstimateOnDiskSize

Repository: kudu
Updated Branches:
  refs/heads/master 6a12ba3f7 -> b1f1388e2


compaction_policy: avoid O(n^2) calls to EstimateOnDiskSize

In a cluster workload with a 130GB+ tablet, I found that the maintenance
manager scheduler thread was spending tens of seconds inside
RowSetInfo::CollectOrdered(), mostly inside calls to
EstimateOnDiskSize(). While any individual call is not exceedingly slow,
they involve a lot of virtual function calls and potential CPU cache
misses, so it appears to add up.

I deployed this patch on the cluster and found that the
MaintenanceManager 'FindBestOps' call went from ~16 seconds to ~350ms.

Change-Id: Ic2949218d7f5fd822571a7b14d1d0b4430aeee1d
Reviewed-on: http://gerrit.cloudera.org:8080/4191
Tested-by: Kudu Jenkins
Reviewed-by: David Ribeiro Alves <dr...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/859cf31d
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/859cf31d
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/859cf31d

Branch: refs/heads/master
Commit: 859cf31d13228d5e07d5ed35bdeb6e237dc1701f
Parents: 6a12ba3
Author: Todd Lipcon <to...@apache.org>
Authored: Wed Aug 31 15:30:25 2016 -0700
Committer: Todd Lipcon <to...@apache.org>
Committed: Wed Sep 7 06:28:44 2016 +0000

----------------------------------------------------------------------
 src/kudu/tablet/rowset_info.cc |  8 ++++----
 src/kudu/tablet/rowset_info.h  | 11 +++++++++--
 2 files changed, 13 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/859cf31d/src/kudu/tablet/rowset_info.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/rowset_info.cc b/src/kudu/tablet/rowset_info.cc
index 8ed1028..123266d 100644
--- a/src/kudu/tablet/rowset_info.cc
+++ b/src/kudu/tablet/rowset_info.cc
@@ -137,7 +137,7 @@ double WidthByDataSize(const Slice& prev, const Slice& next,
 
   for (const auto& rs_rsi : active) {
     double fraction = StringFractionInRange(rs_rsi.second, prev, next);
-    weight += rs_rsi.first->EstimateOnDiskSize() * fraction;
+    weight += rs_rsi.second->size_bytes() * fraction;
   }
 
   return weight;
@@ -253,8 +253,8 @@ void RowSetInfo::CollectOrdered(const RowSetTree& tree,
 
 RowSetInfo::RowSetInfo(RowSet* rs, double init_cdf)
   : rowset_(rs),
-    size_mb_(std::max(implicit_cast<int>(rs->EstimateOnDiskSize() / 1024 / 1024),
-                      kMinSizeMb)),
+    size_bytes_(rs->EstimateOnDiskSize()),
+    size_mb_(std::max(implicit_cast<int>(size_bytes_ / 1024 / 1024), kMinSizeMb)),
     cdf_min_key_(init_cdf),
     cdf_max_key_(init_cdf) {
   has_bounds_ = rs->GetBounds(&min_key_, &max_key_).ok();
@@ -266,7 +266,7 @@ void RowSetInfo::FinalizeCDFVector(vector<RowSetInfo>* vec,
   for (RowSetInfo& cdf_rs : *vec) {
     CHECK_GT(cdf_rs.size_mb_, 0) << "Expected file size to be at least 1MB "
                                  << "for RowSet " << cdf_rs.rowset_->ToString()
-                                 << ", was " << cdf_rs.rowset_->EstimateOnDiskSize()
+                                 << ", was " << cdf_rs.size_bytes()
                                  << " bytes.";
     cdf_rs.cdf_min_key_ /= quot;
     cdf_rs.cdf_max_key_ /= quot;

http://git-wip-us.apache.org/repos/asf/kudu/blob/859cf31d/src/kudu/tablet/rowset_info.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/rowset_info.h b/src/kudu/tablet/rowset_info.h
index cb315dc..767b7a6 100644
--- a/src/kudu/tablet/rowset_info.h
+++ b/src/kudu/tablet/rowset_info.h
@@ -41,6 +41,7 @@ class RowSetInfo {
                              std::vector<RowSetInfo>* min_key,
                              std::vector<RowSetInfo>* max_key);
 
+  int size_bytes() const { return size_bytes_; }
   int size_mb() const { return size_mb_; }
 
   // Return the value of the CDF at the minimum key of this candidate.
@@ -80,8 +81,14 @@ class RowSetInfo {
   static void FinalizeCDFVector(std::vector<RowSetInfo>* vec,
                                 double quot);
 
-  RowSet* rowset_;
-  int size_mb_;
+  RowSet* const rowset_;
+
+  // Cached version of rowset_->EstimateOnDiskSize().
+  const int size_bytes_;
+
+  // The size in MB, already clamped so that all rowsets have size at least
+  // 1MB. This is cached to avoid the branch during the selection hot path.
+  const int size_mb_;
 
   // True if the RowSet has known bounds.
   // MemRowSets in particular do not.


[3/3] kudu git commit: docs: fix list of support encodings

Posted by ad...@apache.org.
docs: fix list of support encodings

Change-Id: I47d7b29c802b0c8fee178b59d2e26ab00bbfbc72
Reviewed-on: http://gerrit.cloudera.org:8080/4322
Tested-by: Kudu Jenkins
Reviewed-by: Adar Dembo <ad...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/b1f1388e
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/b1f1388e
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/b1f1388e

Branch: refs/heads/master
Commit: b1f1388e2525bb236a4154762718aabd86e3b51f
Parents: 94ec3d5
Author: Todd Lipcon <to...@apache.org>
Authored: Tue Sep 6 15:52:30 2016 -0700
Committer: Adar Dembo <ad...@cloudera.com>
Committed: Wed Sep 7 21:08:59 2016 +0000

----------------------------------------------------------------------
 docs/schema_design.adoc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/b1f1388e/docs/schema_design.adoc
----------------------------------------------------------------------
diff --git a/docs/schema_design.adoc b/docs/schema_design.adoc
index 1d3078f..54134d9 100644
--- a/docs/schema_design.adoc
+++ b/docs/schema_design.adoc
@@ -256,9 +256,9 @@ of the column. Columns use plain encoding by default.
 [options="header"]
 |===
 | Column Type        | Encoding
-| integer, timestamp | plain, bitshuffle, run length
-| float              | plain, bitshuffle
-| bool               | plain, dictionary, run length
+| integer, timestamp | plain, bitshuffle, run length (except for 64-bit)
+| float, double      | plain, bitshuffle
+| bool               | plain, run length
 | string, binary     | plain, prefix, dictionary
 |===
 


[2/3] kudu git commit: Fix TSAN race in log throttler

Posted by ad...@apache.org.
Fix TSAN race in log throttler

After adding new log throttling code, some tests became flaky in TSAN.
This should fix the warnings by annotating the benign race.

Change-Id: Iabc063435e7f5c4f33ca1f195bf1920044a7913a
Reviewed-on: http://gerrit.cloudera.org:8080/4324
Tested-by: Kudu Jenkins
Reviewed-by: Mike Percy <mp...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/94ec3d51
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/94ec3d51
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/94ec3d51

Branch: refs/heads/master
Commit: 94ec3d515261b877617460be07f1639d2ef17f85
Parents: 859cf31
Author: Todd Lipcon <to...@apache.org>
Authored: Wed Sep 7 08:56:42 2016 -0700
Committer: Todd Lipcon <to...@apache.org>
Committed: Wed Sep 7 18:47:36 2016 +0000

----------------------------------------------------------------------
 src/kudu/util/logging.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/94ec3d51/src/kudu/util/logging.h
----------------------------------------------------------------------
diff --git a/src/kudu/util/logging.h b/src/kudu/util/logging.h
index 78fdf0b..48234be 100644
--- a/src/kudu/util/logging.h
+++ b/src/kudu/util/logging.h
@@ -223,7 +223,7 @@ namespace logging {
 class LogThrottler {
  public:
   LogThrottler() : num_suppressed_(0), last_ts_(0), last_tag_(nullptr) {
-    ANNOTATE_BENIGN_RACE(&last_ts_, "OK to be sloppy with log throttling");
+    ANNOTATE_BENIGN_RACE_SIZED(this, sizeof(*this), "OK to be sloppy with log throttling");
   }
 
   bool ShouldLog(int n_secs, const char* tag, int* num_suppressed) {