You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ad...@apache.org on 2016/09/07 21:17:17 UTC
[1/3] kudu git commit: compaction_policy: avoid O(n^2) calls to
EstimateOnDiskSize
Repository: kudu
Updated Branches:
refs/heads/master 6a12ba3f7 -> b1f1388e2
compaction_policy: avoid O(n^2) calls to EstimateOnDiskSize
In a cluster workload with a 130GB+ tablet, I found that the maintenance
manager scheduler thread was spending tens of seconds inside
RowSetInfo::CollectOrdered(), mostly inside calls to
EstimateOnDiskSize(). While any individual call is not exceedingly slow,
they involve a lot of virtual function calls and potential CPU cache
misses, so it appears to add up.
I deployed this patch on the cluster and found that the
MaintenanceManager 'FindBestOps' call went from ~16 seconds to ~350ms.
Change-Id: Ic2949218d7f5fd822571a7b14d1d0b4430aeee1d
Reviewed-on: http://gerrit.cloudera.org:8080/4191
Tested-by: Kudu Jenkins
Reviewed-by: David Ribeiro Alves <dr...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/859cf31d
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/859cf31d
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/859cf31d
Branch: refs/heads/master
Commit: 859cf31d13228d5e07d5ed35bdeb6e237dc1701f
Parents: 6a12ba3
Author: Todd Lipcon <to...@apache.org>
Authored: Wed Aug 31 15:30:25 2016 -0700
Committer: Todd Lipcon <to...@apache.org>
Committed: Wed Sep 7 06:28:44 2016 +0000
----------------------------------------------------------------------
src/kudu/tablet/rowset_info.cc | 8 ++++----
src/kudu/tablet/rowset_info.h | 11 +++++++++--
2 files changed, 13 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/859cf31d/src/kudu/tablet/rowset_info.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/rowset_info.cc b/src/kudu/tablet/rowset_info.cc
index 8ed1028..123266d 100644
--- a/src/kudu/tablet/rowset_info.cc
+++ b/src/kudu/tablet/rowset_info.cc
@@ -137,7 +137,7 @@ double WidthByDataSize(const Slice& prev, const Slice& next,
for (const auto& rs_rsi : active) {
double fraction = StringFractionInRange(rs_rsi.second, prev, next);
- weight += rs_rsi.first->EstimateOnDiskSize() * fraction;
+ weight += rs_rsi.second->size_bytes() * fraction;
}
return weight;
@@ -253,8 +253,8 @@ void RowSetInfo::CollectOrdered(const RowSetTree& tree,
RowSetInfo::RowSetInfo(RowSet* rs, double init_cdf)
: rowset_(rs),
- size_mb_(std::max(implicit_cast<int>(rs->EstimateOnDiskSize() / 1024 / 1024),
- kMinSizeMb)),
+ size_bytes_(rs->EstimateOnDiskSize()),
+ size_mb_(std::max(implicit_cast<int>(size_bytes_ / 1024 / 1024), kMinSizeMb)),
cdf_min_key_(init_cdf),
cdf_max_key_(init_cdf) {
has_bounds_ = rs->GetBounds(&min_key_, &max_key_).ok();
@@ -266,7 +266,7 @@ void RowSetInfo::FinalizeCDFVector(vector<RowSetInfo>* vec,
for (RowSetInfo& cdf_rs : *vec) {
CHECK_GT(cdf_rs.size_mb_, 0) << "Expected file size to be at least 1MB "
<< "for RowSet " << cdf_rs.rowset_->ToString()
- << ", was " << cdf_rs.rowset_->EstimateOnDiskSize()
+ << ", was " << cdf_rs.size_bytes()
<< " bytes.";
cdf_rs.cdf_min_key_ /= quot;
cdf_rs.cdf_max_key_ /= quot;
http://git-wip-us.apache.org/repos/asf/kudu/blob/859cf31d/src/kudu/tablet/rowset_info.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/rowset_info.h b/src/kudu/tablet/rowset_info.h
index cb315dc..767b7a6 100644
--- a/src/kudu/tablet/rowset_info.h
+++ b/src/kudu/tablet/rowset_info.h
@@ -41,6 +41,7 @@ class RowSetInfo {
std::vector<RowSetInfo>* min_key,
std::vector<RowSetInfo>* max_key);
+ int size_bytes() const { return size_bytes_; }
int size_mb() const { return size_mb_; }
// Return the value of the CDF at the minimum key of this candidate.
@@ -80,8 +81,14 @@ class RowSetInfo {
static void FinalizeCDFVector(std::vector<RowSetInfo>* vec,
double quot);
- RowSet* rowset_;
- int size_mb_;
+ RowSet* const rowset_;
+
+ // Cached version of rowset_->EstimateOnDiskSize().
+ const int size_bytes_;
+
+ // The size in MB, already clamped so that all rowsets have size at least
+ // 1MB. This is cached to avoid the branch during the selection hot path.
+ const int size_mb_;
// True if the RowSet has known bounds.
// MemRowSets in particular do not.
[3/3] kudu git commit: docs: fix list of support encodings
Posted by ad...@apache.org.
docs: fix list of support encodings
Change-Id: I47d7b29c802b0c8fee178b59d2e26ab00bbfbc72
Reviewed-on: http://gerrit.cloudera.org:8080/4322
Tested-by: Kudu Jenkins
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/b1f1388e
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/b1f1388e
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/b1f1388e
Branch: refs/heads/master
Commit: b1f1388e2525bb236a4154762718aabd86e3b51f
Parents: 94ec3d5
Author: Todd Lipcon <to...@apache.org>
Authored: Tue Sep 6 15:52:30 2016 -0700
Committer: Adar Dembo <ad...@cloudera.com>
Committed: Wed Sep 7 21:08:59 2016 +0000
----------------------------------------------------------------------
docs/schema_design.adoc | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/b1f1388e/docs/schema_design.adoc
----------------------------------------------------------------------
diff --git a/docs/schema_design.adoc b/docs/schema_design.adoc
index 1d3078f..54134d9 100644
--- a/docs/schema_design.adoc
+++ b/docs/schema_design.adoc
@@ -256,9 +256,9 @@ of the column. Columns use plain encoding by default.
[options="header"]
|===
| Column Type | Encoding
-| integer, timestamp | plain, bitshuffle, run length
-| float | plain, bitshuffle
-| bool | plain, dictionary, run length
+| integer, timestamp | plain, bitshuffle, run length (except for 64-bit)
+| float, double | plain, bitshuffle
+| bool | plain, run length
| string, binary | plain, prefix, dictionary
|===
[2/3] kudu git commit: Fix TSAN race in log throttler
Posted by ad...@apache.org.
Fix TSAN race in log throttler
After adding new log throttling code, some tests became flaky in TSAN.
This should fix the warnings by annotating the benign race.
Change-Id: Iabc063435e7f5c4f33ca1f195bf1920044a7913a
Reviewed-on: http://gerrit.cloudera.org:8080/4324
Tested-by: Kudu Jenkins
Reviewed-by: Mike Percy <mp...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/94ec3d51
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/94ec3d51
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/94ec3d51
Branch: refs/heads/master
Commit: 94ec3d515261b877617460be07f1639d2ef17f85
Parents: 859cf31
Author: Todd Lipcon <to...@apache.org>
Authored: Wed Sep 7 08:56:42 2016 -0700
Committer: Todd Lipcon <to...@apache.org>
Committed: Wed Sep 7 18:47:36 2016 +0000
----------------------------------------------------------------------
src/kudu/util/logging.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/94ec3d51/src/kudu/util/logging.h
----------------------------------------------------------------------
diff --git a/src/kudu/util/logging.h b/src/kudu/util/logging.h
index 78fdf0b..48234be 100644
--- a/src/kudu/util/logging.h
+++ b/src/kudu/util/logging.h
@@ -223,7 +223,7 @@ namespace logging {
class LogThrottler {
public:
LogThrottler() : num_suppressed_(0), last_ts_(0), last_tag_(nullptr) {
- ANNOTATE_BENIGN_RACE(&last_ts_, "OK to be sloppy with log throttling");
+ ANNOTATE_BENIGN_RACE_SIZED(this, sizeof(*this), "OK to be sloppy with log throttling");
}
bool ShouldLog(int n_secs, const char* tag, int* num_suppressed) {