You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by wd...@apache.org on 2018/11/02 21:06:19 UTC
[1/2] kudu git commit: [util] Add ParseStringsWithScheme in net_util
Repository: kudu
Updated Branches:
refs/heads/master 4ec2598a3 -> 137775bdd
[util] Add ParseStringsWithScheme in net_util
This commit adds a new util to allow parsing address with scheme, e.g.
'<fs>://<host>:<port>/<path>' into a <host>:<port> pair.
Change-Id: I2447ab80e0b0737c4eb2ba8216769a52b5c07ce0
Reviewed-on: http://gerrit.cloudera.org:8080/11843
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin <as...@cloudera.com>
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/32a4fe16
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/32a4fe16
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/32a4fe16
Branch: refs/heads/master
Commit: 32a4fe168aab97a0905283506da498304774748f
Parents: 4ec2598
Author: Hao Hao <ha...@cloudera.com>
Authored: Wed Oct 31 17:46:20 2018 -0700
Committer: Hao Hao <ha...@cloudera.com>
Committed: Thu Nov 1 20:03:56 2018 +0000
----------------------------------------------------------------------
src/kudu/util/net/net_util-test.cc | 46 +++++++++++++++++++++++++++++---
src/kudu/util/net/net_util.cc | 47 +++++++++++++++++++++++++++++++--
src/kudu/util/net/net_util.h | 17 +++++++++++-
3 files changed, 104 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/32a4fe16/src/kudu/util/net/net_util-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/net/net_util-test.cc b/src/kudu/util/net/net_util-test.cc
index 202ec6b..8ad0fba 100644
--- a/src/kudu/util/net/net_util-test.cc
+++ b/src/kudu/util/net/net_util-test.cc
@@ -76,13 +76,53 @@ TEST_F(NetUtilTest, TestParseAddresses) {
// Test some invalid addresses.
Status s = DoParseBindAddresses("0.0.0.0:xyz", &ret);
- ASSERT_STR_CONTAINS(s.ToString(), "Invalid port");
+ ASSERT_STR_CONTAINS(s.ToString(), "invalid port");
s = DoParseBindAddresses("0.0.0.0:100000", &ret);
- ASSERT_STR_CONTAINS(s.ToString(), "Invalid port");
+ ASSERT_STR_CONTAINS(s.ToString(), "invalid port");
s = DoParseBindAddresses("0.0.0.0:", &ret);
- ASSERT_STR_CONTAINS(s.ToString(), "Invalid port");
+ ASSERT_STR_CONTAINS(s.ToString(), "invalid port");
+}
+
+TEST_F(NetUtilTest, TestParseAddressesWithScheme) {
+ vector<HostPort> hostports;
+ const uint16_t kDefaultPort = 12345;
+
+ EXPECT_OK(HostPort::ParseStringsWithScheme("", kDefaultPort, &hostports));
+ EXPECT_TRUE(hostports.empty());
+
+ EXPECT_OK(HostPort::ParseStringsWithScheme(",,,", kDefaultPort, &hostports));
+ EXPECT_TRUE(hostports.empty());
+
+ EXPECT_OK(HostPort::ParseStringsWithScheme("http://foo-bar-baz:1234", kDefaultPort, &hostports));
+ EXPECT_EQ(vector<HostPort>({ HostPort("foo-bar-baz", 1234) }), hostports);
+
+ EXPECT_OK(HostPort::ParseStringsWithScheme("http://foo-bar-baz:1234/path",
+ kDefaultPort, &hostports));
+ EXPECT_EQ(vector<HostPort>({ HostPort("foo-bar-baz", 1234) }), hostports);
+
+ EXPECT_OK(HostPort::ParseStringsWithScheme("http://abc:1234,xyz", kDefaultPort, &hostports));
+ EXPECT_EQ(vector<HostPort>({ HostPort("abc", 1234), HostPort("xyz", kDefaultPort) }),
+ hostports);
+
+ // Test some invalid addresses.
+ Status s = HostPort::ParseStringsWithScheme("abc:1234/path", kDefaultPort, &hostports);
+ EXPECT_TRUE(s.IsInvalidArgument());
+ ASSERT_STR_CONTAINS(s.ToString(), "invalid port");
+
+ s = HostPort::ParseStringsWithScheme("://scheme:12", kDefaultPort, &hostports);
+ EXPECT_TRUE(s.IsInvalidArgument());
+ ASSERT_STR_CONTAINS(s.ToString(), "invalid scheme format");
+
+ s = HostPort::ParseStringsWithScheme("http:///path", kDefaultPort, &hostports);
+ EXPECT_TRUE(s.IsInvalidArgument());
+ ASSERT_STR_CONTAINS(s.ToString(), "invalid address format");
+
+ s = HostPort::ParseStringsWithScheme("http://abc:1234,://scheme,xyz",
+ kDefaultPort, &hostports);
+ EXPECT_TRUE(s.IsInvalidArgument());
+ ASSERT_STR_CONTAINS(s.ToString(), "invalid scheme format");
}
TEST_F(NetUtilTest, TestResolveAddresses) {
http://git-wip-us.apache.org/repos/asf/kudu/blob/32a4fe16/src/kudu/util/net/net_util.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/net/net_util.cc b/src/kudu/util/net/net_util.cc
index 520882f..482747e 100644
--- a/src/kudu/util/net/net_util.cc
+++ b/src/kudu/util/net/net_util.cc
@@ -138,7 +138,7 @@ Status HostPort::ParseString(const string& str, uint16_t default_port) {
port = default_port;
} else if (!SimpleAtoi(p.second, &port) ||
port > 65535) {
- return Status::InvalidArgument("Invalid port", str);
+ return Status::InvalidArgument("invalid port", str);
}
host_.swap(p.first);
@@ -146,6 +146,31 @@ Status HostPort::ParseString(const string& str, uint16_t default_port) {
return Status::OK();
}
+Status HostPort::ParseStringWithScheme(const string& str, uint16_t default_port) {
+ string str_copy(str);
+ const string kSchemeSeparator = "://";
+ const string kPathSeparator = "/";
+
+ auto scheme_idx = str_copy.find(kSchemeSeparator);
+
+ if (scheme_idx == 0) {
+ return Status::InvalidArgument("invalid scheme format", str_copy);
+ }
+
+ if (scheme_idx != string::npos) {
+ str_copy.erase(0, scheme_idx + kSchemeSeparator.size());
+ auto path_idx = str_copy.find(kPathSeparator);
+ if (path_idx == 0) {
+ return Status::InvalidArgument("invalid address format", str_copy);
+ }
+ if (path_idx != string::npos) {
+ str_copy.erase(path_idx, str_copy.size());
+ }
+ }
+
+ return ParseString(str_copy, default_port);
+}
+
Status HostPort::ResolveAddresses(vector<Sockaddr>* addresses) const {
TRACE_EVENT1("net", "HostPort::ResolveAddresses",
"host", host_);
@@ -179,11 +204,29 @@ Status HostPort::ResolveAddresses(vector<Sockaddr>* addresses) const {
Status HostPort::ParseStrings(const string& comma_sep_addrs,
uint16_t default_port,
vector<HostPort>* res) {
+ res->clear();
+
vector<string> addr_strings = strings::Split(comma_sep_addrs, ",", strings::SkipEmpty());
+ res->reserve(addr_strings.size());
for (const string& addr_string : addr_strings) {
HostPort host_port;
RETURN_NOT_OK(host_port.ParseString(addr_string, default_port));
- res->push_back(host_port);
+ res->emplace_back(host_port);
+ }
+ return Status::OK();
+}
+
+Status HostPort::ParseStringsWithScheme(const string& comma_sep_addrs,
+ uint16_t default_port,
+ vector<HostPort>* res) {
+ res->clear();
+
+ vector<string> addr_strings = strings::Split(comma_sep_addrs, ",", strings::SkipEmpty());
+ res->reserve(addr_strings.size());
+ for (const string& addr_string : addr_strings) {
+ HostPort host_port;
+ RETURN_NOT_OK(host_port.ParseStringWithScheme(addr_string, default_port));
+ res->emplace_back(host_port);
}
return Status::OK();
}
http://git-wip-us.apache.org/repos/asf/kudu/blob/32a4fe16/src/kudu/util/net/net_util.h
----------------------------------------------------------------------
diff --git a/src/kudu/util/net/net_util.h b/src/kudu/util/net/net_util.h
index c471ae8..329c9f6 100644
--- a/src/kudu/util/net/net_util.h
+++ b/src/kudu/util/net/net_util.h
@@ -39,10 +39,20 @@ class HostPort {
return !host_.empty();
}
- // Parse a "host:port" pair into this object.
+ // Parse a <host>:<port> pair into this object.
// If there is no port specified in the string, then 'default_port' is used.
+ //
+ // Note that <host> cannot be in IPv6 address notation.
Status ParseString(const std::string& str, uint16_t default_port);
+ // Similar to above but allow the address to have scheme and path, e.g.
+ // <host>
+ // <host>:<port>
+ // <fs>://<host>:<port>/<path>
+ //
+ // Note that both scheme and path are ignored.
+ Status ParseStringWithScheme(const std::string& str, uint16_t default_port);
+
// Resolve any addresses corresponding to this host:port pair.
// Note that a host may resolve to more than one IP address.
//
@@ -67,6 +77,11 @@ class HostPort {
static Status ParseStrings(
const std::string& comma_sep_addrs, uint16_t default_port, std::vector<HostPort>* res);
+ // Similar to above but allow the addresses to have scheme and path,
+ // which are ignored.
+ static Status ParseStringsWithScheme(
+ const std::string& comma_sep_addrs, uint16_t default_port, std::vector<HostPort>* res);
+
// Takes a vector of HostPort objects and returns a comma separated
// string containing of "host:port" pairs. This method is the
// "inverse" of ParseStrings().
[2/2] kudu git commit: [compaction] Cleanup of compaction policy code
Posted by wd...@apache.org.
[compaction] Cleanup of compaction policy code
I'm reading over the compaction policy code before starting to work in
earnest on KUDU-1400, and I made a number of small changes that I think
help make the code more readable.
This patch contains no functional changes.
Change-Id: I955ec66a510f04fd869f7d969a643e4d0f7f415f
Reviewed-on: http://gerrit.cloudera.org:8080/11827
Reviewed-by: Alexey Serbin <as...@cloudera.com>
Tested-by: Kudu Jenkins
Reviewed-by: Andrew Wong <aw...@cloudera.com>
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/137775bd
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/137775bd
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/137775bd
Branch: refs/heads/master
Commit: 137775bdd81aa60ba0949537d0acc5b044ebc8ea
Parents: 32a4fe1
Author: Will Berkeley <wd...@gmail.org>
Authored: Tue Oct 30 09:53:44 2018 -0700
Committer: Will Berkeley <wd...@gmail.com>
Committed: Fri Nov 2 17:43:09 2018 +0000
----------------------------------------------------------------------
src/kudu/tablet/compaction_policy-test.cc | 23 +-
src/kudu/tablet/compaction_policy.cc | 346 +++++++++++++------------
src/kudu/tablet/compaction_policy.h | 100 +++----
src/kudu/tablet/svg_dump.cc | 6 +-
src/kudu/tablet/svg_dump.h | 4 +-
src/kudu/tablet/tablet.cc | 88 ++++---
6 files changed, 295 insertions(+), 272 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/137775bd/src/kudu/tablet/compaction_policy-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/compaction_policy-test.cc b/src/kudu/tablet/compaction_policy-test.cc
index 3583c39..97714f3 100644
--- a/src/kudu/tablet/compaction_policy-test.cc
+++ b/src/kudu/tablet/compaction_policy-test.cc
@@ -15,13 +15,14 @@
// specific language governing permissions and limitations
// under the License.
+#include "kudu/tablet/compaction_policy.h"
+
#include <algorithm>
#include <memory>
#include <ostream>
#include <string>
-#include <unordered_set>
-#include <vector>
#include <utility>
+#include <vector>
#include <glog/logging.h>
#include <glog/stl_logging.h>
@@ -31,7 +32,6 @@
#include "kudu/gutil/strings/numbers.h"
#include "kudu/gutil/strings/split.h"
#include "kudu/gutil/strings/substitute.h"
-#include "kudu/tablet/compaction_policy.h"
#include "kudu/tablet/mock-rowsets.h"
#include "kudu/tablet/rowset.h"
#include "kudu/tablet/rowset_info.h"
@@ -44,7 +44,6 @@
#include "kudu/util/test_macros.h"
#include "kudu/util/test_util.h"
-using std::unordered_set;
using std::string;
using std::vector;
@@ -55,7 +54,7 @@ class TestCompactionPolicy : public KuduTest {
protected:
void RunTestCase(const RowSetVector& vec,
int size_budget_mb,
- unordered_set<RowSet*>* picked,
+ CompactionSelection* picked,
double* quality) {
RowSetTree tree;
ASSERT_OK(tree.Reset(vec));
@@ -82,7 +81,7 @@ TEST_F(TestCompactionPolicy, TestBudgetedSelection) {
};
constexpr auto kBudgetMb = 1000; // Enough to select all rowsets.
- unordered_set<RowSet*> picked;
+ CompactionSelection picked;
double quality = 0.0;
NO_FATALS(RunTestCase(rowsets, kBudgetMb, &picked, &quality));
ASSERT_EQ(rowsets.size(), picked.size());
@@ -109,7 +108,7 @@ TEST_F(TestCompactionPolicy, TestNonOverlappingRowSets) {
StringPrintf("%010d", i * 2 + 1)));
}
constexpr auto kBudgetMb = 128;
- unordered_set<RowSet*> picked;
+ CompactionSelection picked;
double quality = 0.0;
NO_FATALS(RunTestCase(rowsets, kBudgetMb, &picked, &quality));
ASSERT_EQ(quality, 0.0);
@@ -134,7 +133,7 @@ TEST_F(TestCompactionPolicy, TestTinyOverlapRowSets) {
StringPrintf("%09d", i * 10000 + 11000)));
}
constexpr auto kBudgetMb = 128;
- unordered_set<RowSet*> picked;
+ CompactionSelection picked;
double quality = 0;
NO_FATALS(RunTestCase(rowsets, kBudgetMb, &picked, &quality));
// With such small overlaps, no compaction will be considered worthwhile.
@@ -161,7 +160,7 @@ TEST_F(TestCompactionPolicy, TestSignificantOverlap) {
StringPrintf("%010d", (i + 2) * 10000)));
}
constexpr auto kBudgetMb = 64;
- unordered_set<RowSet*> picked;
+ CompactionSelection picked;
double quality = 0.0;
NO_FATALS(RunTestCase(rowsets, kBudgetMb, &picked, &quality));
// Each rowset is 1MB so the number of rowsets picked should be the number of
@@ -194,7 +193,7 @@ TEST_F(TestCompactionPolicy, TestSupportAdjust) {
std::make_shared<MockDiskRowSet>("B", "C"),
};
constexpr auto kBudgetMb = 1000; // Enough to select all rowsets.
- unordered_set<RowSet*> picked;
+ CompactionSelection picked;
double quality = 0.0;
NO_FATALS(RunTestCase(rowsets, kBudgetMb, &picked, &quality));
ASSERT_EQ(2, picked.size());
@@ -239,7 +238,7 @@ TEST_F(TestCompactionPolicy, TestYcsbCompaction) {
for (int budget_mb : {128, 256, 512, 1024}) {
BudgetedCompactionPolicy policy(budget_mb);
- unordered_set<RowSet*> picked;
+ CompactionSelection picked;
double quality = 0.0;
LOG_TIMING(INFO, strings::Substitute("computing compaction with $0MB budget",
budget_mb)) {
@@ -270,7 +269,7 @@ TEST_F(TestCompactionPolicy, KUDU2251) {
};
constexpr auto kBudgetMb = 1L << 30; // Enough to select all rowsets.
- unordered_set<RowSet*> picked;
+ CompactionSelection picked;
double quality = 0.0;
NO_FATALS(RunTestCase(rowsets, kBudgetMb, &picked, &quality));
ASSERT_EQ(3, picked.size());
http://git-wip-us.apache.org/repos/asf/kudu/blob/137775bd/src/kudu/tablet/compaction_policy.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/compaction_policy.cc b/src/kudu/tablet/compaction_policy.cc
index 3048606..54350eb 100644
--- a/src/kudu/tablet/compaction_policy.cc
+++ b/src/kudu/tablet/compaction_policy.cc
@@ -18,6 +18,7 @@
#include "kudu/tablet/compaction_policy.h"
#include <algorithm>
+#include <limits>
#include <ostream>
#include <queue>
#include <string>
@@ -29,7 +30,7 @@
#include <glog/logging.h>
#include "kudu/gutil/map-util.h"
-#include "kudu/gutil/mathlimits.h"
+#include "kudu/gutil/strings/substitute.h"
#include "kudu/tablet/rowset_info.h"
#include "kudu/tablet/svg_dump.h"
#include "kudu/util/flag_tags.h"
@@ -37,23 +38,26 @@
#include "kudu/util/status.h"
using std::vector;
+using strings::Substitute;
DEFINE_int32(budgeted_compaction_target_rowset_size, 32*1024*1024,
- "The target size for DiskRowSets during flush/compact when the "
- "budgeted compaction policy is used");
-TAG_FLAG(budgeted_compaction_target_rowset_size, experimental);
+ "The target size in bytes for DiskRowSets produced by flushes or "
+ "compactions when the budgeted compaction policy is used.");
TAG_FLAG(budgeted_compaction_target_rowset_size, advanced);
+TAG_FLAG(budgeted_compaction_target_rowset_size, experimental);
DEFINE_double(compaction_approximation_ratio, 1.05f,
"Approximation ratio allowed for optimal compaction calculation. A "
"value of 1.05 indicates that the policy may use an approximate result "
"if it is known to be within 5% of the optimal solution.");
+TAG_FLAG(compaction_approximation_ratio, advanced);
TAG_FLAG(compaction_approximation_ratio, experimental);
DEFINE_double(compaction_minimum_improvement, 0.01f,
"The minimum quality for a compaction to run. If a compaction does not "
"improve the average height of DiskRowSets by at least this amount, the "
"compaction will be considered ineligible.");
+TAG_FLAG(compaction_minimum_improvement, advanced);
namespace kudu {
namespace tablet {
@@ -91,28 +95,24 @@ uint64_t BudgetedCompactionPolicy::target_rowset_size() const {
return FLAGS_budgeted_compaction_target_rowset_size;
}
-// Returns in min-key and max-key sorted order
-void BudgetedCompactionPolicy::SetupKnapsackInput(const RowSetTree &tree,
- vector<RowSetInfo>* min_key,
- vector<RowSetInfo>* max_key) {
- RowSetInfo::ComputeCdfAndCollectOrdered(tree, nullptr, min_key, max_key);
-
- if (min_key->size() < 2) {
- // require at least 2 rowsets to compact
- min_key->clear();
- max_key->clear();
- return;
+void BudgetedCompactionPolicy::SetupKnapsackInput(
+ const RowSetTree& tree,
+ vector<RowSetInfo>* asc_min_key,
+ vector<RowSetInfo>* asc_max_key) const {
+ RowSetInfo::ComputeCdfAndCollectOrdered(tree,
+ /*average_height=*/nullptr,
+ asc_min_key,
+ asc_max_key);
+
+ if (asc_min_key->size() < 2) {
+ // There must be at least two rowsets to compact.
+ asc_min_key->clear();
+ asc_max_key->clear();
}
}
namespace {
-struct CompareByDescendingDensity {
- bool operator()(const RowSetInfo& a, const RowSetInfo& b) const {
- return a.density() > b.density();
- }
-};
-
struct KnapsackTraits {
typedef const RowSetInfo* item_type;
typedef double value_type;
@@ -124,16 +124,6 @@ struct KnapsackTraits {
}
};
-// Dereference-then-compare comparator
-template<class Compare>
-struct DerefCompare {
- template<class T>
- bool operator()(T* a, T* b) const {
- static const Compare comp = Compare();
- return comp(*a, *b);
- }
-};
-
// Incremental calculator for lower and upper bounds on a knapsack solution,
// given a set of items. The bounds are computed by solving the
// simpler "fractional knapsack problem" -- i.e the related problem
@@ -154,58 +144,70 @@ struct DerefCompare {
class BoundCalculator {
public:
explicit BoundCalculator(int max_weight)
- : total_weight_(0),
- total_value_(0),
+ : current_weight_(0.0),
+ current_value_(0.0),
max_weight_(max_weight),
- topdensity_(MathLimits<double>::kNegInf) {
+ top_density_(std::numeric_limits<double>::min()) {
}
void Add(const RowSetInfo& candidate) {
- // No need to add if less dense than the top and have no more room
- if (total_weight_ >= max_weight_ &&
- candidate.density() <= topdensity_)
+ // If we're already over the budget and 'candidate' is not denser than the
+ // least dense item in the knapsack, it won't be part of the upper bound
+ // fractional solution.
+ if (current_weight_ >= max_weight_ && candidate.density() <= top_density_) {
return;
+ }
- fractional_solution_.push_back(&candidate);
- std::push_heap(fractional_solution_.begin(), fractional_solution_.end(),
- DerefCompare<CompareByDescendingDensity>());
+ constexpr auto compareByDescendingDensity =
+ [](const RowSetInfo* a, const RowSetInfo* b) {
+ return a->density() > b->density();
+ };
- total_weight_ += candidate.size_mb();
- total_value_ += candidate.width();
+ // Push the candidate, then pop items from the heap until removing the
+ // element at the top of the heap reduces the current weight to the max
+ // weight or less. In other words, remove items until the heap is one item
+ // too heavy.
+ fractional_solution_.push_back(&candidate);
+ std::push_heap(fractional_solution_.begin(),
+ fractional_solution_.end(),
+ compareByDescendingDensity);
+ current_weight_ += candidate.size_mb();
+ current_value_ += candidate.width();
const RowSetInfo* top = fractional_solution_.front();
- while (total_weight_ - top->size_mb() > max_weight_) {
- total_weight_ -= top->size_mb();
- total_value_ -= top->width();
- std::pop_heap(fractional_solution_.begin(), fractional_solution_.end(),
- DerefCompare<CompareByDescendingDensity>());
+ while (current_weight_ - top->size_mb() > max_weight_) {
+ current_weight_ -= top->size_mb();
+ current_value_ -= top->width();
+ std::pop_heap(fractional_solution_.begin(),
+ fractional_solution_.end(),
+ compareByDescendingDensity);
fractional_solution_.pop_back();
top = fractional_solution_.front();
}
- topdensity_ = top->density();
+ top_density_ = top->density();
}
- // Compute the lower and upper bounds to the 0-1 knapsack problem with the elements
- // added so far.
+ // Compute the lower and upper bounds to the 0-1 knapsack problem for the
+ // current state.
std::pair<double, double> ComputeLowerAndUpperBound() const {
- int excess_weight = total_weight_ - max_weight_;
+ int excess_weight = current_weight_ - max_weight_;
if (excess_weight <= 0) {
// If we've added less than the budget, our "bounds" are just including
// all of the items.
- return { total_value_, total_value_ };
+ return { current_value_, current_value_ };
}
const RowSetInfo& top = *fractional_solution_.front();
- // The lower bound is either of:
- // - the highest density N items such that they all fit, OR
- // - the N+1th item, if it fits
+ // The 0-1 knapsack problem's solution is lower bounded by both
+ // - the value of the highest density N items that fit, and
+ // - the value of the (N+1)th item alone, if it fits
// This is a 2-approximation (i.e. no worse than 1/2 of the best solution).
// See https://courses.engr.illinois.edu/cs598csc/sp2009/lectures/lecture_4.pdf
- double lower_bound = std::max(total_value_ - top.width(), top.width());
+ double lower_bound = std::max(current_value_ - top.width(), top.width());
- // An upper bound for the integer problem is the solution to the fractional problem:
- // in the fractional problem we can add just a portion of the top element. The
- // portion to remove is determined by the amount of excess weight:
+ // An upper bound for the integer problem is the solution to the fractional
+ // problem since in the fractional problem we can add the fraction of the
+ // densest element that uses up the excess weight:
//
// fraction_to_remove = excess_weight / top.size_mb();
// portion_to_remove = fraction_to_remove * top.width()
@@ -213,47 +215,46 @@ class BoundCalculator {
// To avoid the division, we can just use the fact that density = width/size:
double portion_of_top_to_remove = static_cast<double>(excess_weight) * top.density();
DCHECK_GT(portion_of_top_to_remove, 0);
- double upper_bound = total_value_ - portion_of_top_to_remove;
+ double upper_bound = current_value_ - portion_of_top_to_remove;
return {lower_bound, upper_bound};
}
// Return the items which make up the current lower-bound solution.
void GetLowerBoundSolution(vector<const RowSetInfo*>* solution) {
+ DCHECK(solution);
solution->clear();
- int excess_weight = total_weight_ - max_weight_;
+ int excess_weight = current_weight_ - max_weight_;
if (excess_weight <= 0) {
*solution = fractional_solution_;
+ return;
+ }
+
+ // See above: there are two choices for the lower-bound estimate,
+ // and we need to return the one matching the bound we computed.
+ const RowSetInfo* top = fractional_solution_.front();
+ if (current_value_ - top->width() > top->width()) {
+ // The current solution less the top (minimum density) element.
+ solution->assign(fractional_solution_.begin() + 1,
+ fractional_solution_.end());
} else {
- const RowSetInfo* top = fractional_solution_.front();
-
- // See above: there are two choices for the lower-bound estimate,
- // and we need to return the one matching the bound we computed.
- if (total_value_ - top->width() > top->width()) {
- // The current solution less the top (minimum density) element.
- solution->assign(fractional_solution_.begin() + 1,
- fractional_solution_.end());
- } else {
- solution->push_back(top);
- }
+ solution->push_back(top);
}
}
void clear() {
fractional_solution_.clear();
- total_weight_ = 0;
- total_value_ = 0;
+ current_weight_ = 0;
+ current_value_ = 0;
}
private:
-
- // Store pointers to RowSetInfo rather than whole copies in order
- // to allow for fast swapping in the heap.
+ // Uses pointers to rather than copies to allow for fast swapping in the heap.
vector<const RowSetInfo*> fractional_solution_;
- int total_weight_;
- double total_value_;
- int max_weight_;
- double topdensity_;
+ int current_weight_;
+ double current_value_;
+ const int max_weight_;
+ double top_density_;
};
} // anonymous namespace
@@ -262,22 +263,28 @@ void BudgetedCompactionPolicy::RunApproximation(
const vector<RowSetInfo>& asc_min_key,
const vector<RowSetInfo>& asc_max_key,
vector<double>* best_upper_bounds,
- SolutionAndValue* best_solution) {
+ SolutionAndValue* best_solution) const {
+ DCHECK(best_upper_bounds);
+ DCHECK(best_solution);
+
best_upper_bounds->clear();
best_upper_bounds->reserve(asc_min_key.size());
BoundCalculator bound_calc(size_budget_mb_);
- for (const RowSetInfo& cc_a : asc_min_key) {
+
+ // See docs/design-docs/compaction-policy.md for an explanation of the logic.
+ for (const RowSetInfo& rowset_a : asc_min_key) {
bound_calc.clear();
- double ab_min = cc_a.cdf_min_key();
- double ab_max = cc_a.cdf_max_key();
- double best_upper = 0;
- for (const RowSetInfo& cc_b : asc_max_key) {
- if (cc_b.cdf_min_key() < ab_min) {
+ double union_min = rowset_a.cdf_min_key();
+ double union_max = rowset_a.cdf_max_key();
+ double best_upper = 0.0;
+ for (const RowSetInfo& rowset_b : asc_max_key) {
+ if (rowset_b.cdf_min_key() < union_min) {
continue;
}
- ab_max = std::max(cc_b.cdf_max_key(), ab_max);
- double union_width = ab_max - ab_min;
- bound_calc.Add(cc_b);
+ union_max = std::max(union_max, rowset_b.cdf_max_key());
+ double union_width = union_max - union_min;
+
+ bound_calc.Add(rowset_b);
auto bounds = bound_calc.ComputeLowerAndUpperBound();
double lower = bounds.first - union_width * kSupportAdjust;
double upper = bounds.second - union_width * kSupportAdjust;
@@ -300,52 +307,53 @@ void BudgetedCompactionPolicy::RunExact(
const vector<RowSetInfo>& asc_min_key,
const vector<RowSetInfo>& asc_max_key,
const vector<double>& best_upper_bounds,
- SolutionAndValue* best_solution) {
+ SolutionAndValue* best_solution) const {
+ DCHECK_EQ(asc_min_key.size(), best_upper_bounds.size());
+ DCHECK(best_solution);
KnapsackSolver<KnapsackTraits> solver;
- vector<const RowSetInfo*> inrange_candidates;
- inrange_candidates.reserve(asc_min_key.size());
+ vector<const RowSetInfo*> candidates;
+ candidates.reserve(asc_min_key.size());
for (int i = 0; i < asc_min_key.size(); i++) {
- const RowSetInfo& cc_a = asc_min_key[i];
- const double upper_bound = best_upper_bounds[i];
+ const auto& rowset_a = asc_min_key[i];
+ const auto upper_bound = best_upper_bounds[i];
- // 'upper_bound' is an upper bound on the solution value of any compaction that includes
- // 'cc_a' as its left-most RowSet. If that bound is worse than the current best solution,
- // then we can skip finding the precise value of this solution.
+ // 'upper_bound' is an upper bound on the solution value of any compaction
+ // that includes 'rowset_a' as its left-most rowset. If that bound is worse
+ // than the current best solution, then we can skip finding the precise
+ // value of this solution.
//
- // Here we also build in the approximation ratio as slop: the upper bound doesn't need
- // to just be better than the current solution, but needs to be better by at least
- // the approximation ratio before we bother looking for it.
+ // Here we also build in the approximation ratio as slop: the upper bound
+ // doesn't need to just be better than the current solution-- it needs to be
+ // better by at least the approximation ratio before we bother looking for
+ // the corresponding exact solution.
if (upper_bound <= best_solution->value * FLAGS_compaction_approximation_ratio) {
continue;
}
- inrange_candidates.clear();
- double ab_min = cc_a.cdf_min_key();
- double ab_max = cc_a.cdf_max_key();
- for (const RowSetInfo& cc_b : asc_max_key) {
- if (cc_b.cdf_min_key() < ab_min) {
- // Would expand support to the left.
- // TODO: possible optimization here: binary search to skip to the first
- // cc_b with cdf_max_key() > cc_a.cdf_min_key()
+ candidates.clear();
+ double union_min = rowset_a.cdf_min_key();
+ double union_max = rowset_a.cdf_max_key();
+ for (const RowSetInfo& rowset_b : asc_max_key) {
+ if (rowset_b.cdf_min_key() < union_min) {
continue;
}
- inrange_candidates.push_back(&cc_b);
+ candidates.push_back(&rowset_b);
}
- if (inrange_candidates.empty()) continue;
+ if (candidates.empty()) continue;
- solver.Reset(size_budget_mb_, &inrange_candidates);
+ solver.Reset(size_budget_mb_, &candidates);
vector<int> chosen_indexes;
int j = 0;
while (solver.ProcessNext()) {
- const RowSetInfo* item = inrange_candidates[j++];
+ const RowSetInfo* item = candidates[j++];
std::pair<int, double> best_with_this_item = solver.GetSolution();
double best_value = best_with_this_item.second;
- ab_max = std::max(item->cdf_max_key(), ab_max);
- DCHECK_GE(ab_max, ab_min);
- double solution = best_value - (ab_max - ab_min) * kSupportAdjust;
+ union_max = std::max(item->cdf_max_key(), union_max);
+ DCHECK_GE(union_max, union_min);
+ double solution = best_value - (union_max - union_min) * kSupportAdjust;
if (solution > best_solution->value) {
solver.TracePath(best_with_this_item, &chosen_indexes);
best_solution->value = solution;
@@ -356,7 +364,7 @@ void BudgetedCompactionPolicy::RunExact(
if (!chosen_indexes.empty()) {
best_solution->rowsets.clear();
for (int idx : chosen_indexes) {
- best_solution->rowsets.insert(inrange_candidates[idx]->rowset());
+ best_solution->rowsets.insert(candidates[idx]->rowset());
}
}
}
@@ -364,38 +372,45 @@ void BudgetedCompactionPolicy::RunExact(
// See docs/design-docs/compaction-policy.md for an overview of the compaction
// policy implemented in this function.
-Status BudgetedCompactionPolicy::PickRowSets(const RowSetTree &tree,
- std::unordered_set<RowSet*>* picked,
- double* quality,
- std::vector<std::string>* log) {
+Status BudgetedCompactionPolicy::PickRowSets(
+ const RowSetTree& tree,
+ CompactionSelection* picked,
+ double* quality,
+ std::vector<std::string>* log) {
+ DCHECK(picked);
+ DCHECK(quality);
+
vector<RowSetInfo> asc_min_key, asc_max_key;
SetupKnapsackInput(tree, &asc_min_key, &asc_max_key);
if (asc_max_key.empty()) {
if (log) {
LOG_STRING(INFO, log) << "No rowsets to compact";
}
- // nothing to compact.
+ *quality = 0.0;
return Status::OK();
}
// The best set of rowsets chosen so far, and the value attained by that choice.
SolutionAndValue best_solution;
- // The algorithm proceeds in two passes. The first is based on an approximation
- // of the knapsack problem, and computes some upper and lower bounds. The second
- // pass looks again over the input for any cases where the upper bound tells us
- // there is a significant improvement potentially available, and only in those
- // cases runs the actual knapsack solver.
-
- // Both passes are structured as a loop over all pairs {cc_a, cc_b} such that
- // cc_a.min_key() < cc_b.min_key(). Given any such pair, we know that a compaction
- // including both of these pairs would result in an output rowset that spans
- // the range [cc_a.min_key(), max(cc_a.max_key(), cc_b.max_key())]. This width
- // is designated 'union_width' below.
-
- // In particular, the order in which we consider 'cc_b' is such that, for the
- // inner loop (a fixed 'cc_a'), the 'union_width' increases minimally to only
- // include the new 'cc_b' and not also cover any other rowsets.
+ // The algorithm proceeds in two passes. The first is based on an
+ // approximation of the knapsack problem, and computes upper and lower bounds
+ // for the quality score of compaction selections with a specific rowset as
+ // the left-most rowset. The second pass looks again over the input for any
+ // cases where the upper bound tells us there is a significant improvement
+ // potentially available, and only in those cases runs the actual knapsack
+ // solver.
+
+ // Both passes are structured as a loop over all pairs {rowset_a, rowset_b}
+ // such that rowset_a.min_key() < rowset_b.min_key(). Given any such pair, we
+ // know that a compaction including both of these pairs would result in an
+ // output rowset that spans the range
+ // [rowset_a.min_key(), max(rowset_a.max_key(), rowset_b.max_key())]. This
+ // width is designated 'union_width' below.
+
+ // In particular, the order in which we consider 'rowset_b' is such that, for
+ // the inner loop (a fixed 'rowset_a'), the 'union_width' increases minimally
+ // to only include the new 'rowset_b' and not also cover any other rowsets.
//
// For example:
//
@@ -404,33 +419,35 @@ Status BudgetedCompactionPolicy::PickRowSets(const RowSetTree &tree,
// |----C----|
// |--------D-------|
//
- // We process in the order: A, B, C, D.
+ // We process 'rowset_b' in the order: A, B, C, D.
+ //
+ // With this iteration order, each knapsack problem builds on the previous
+ // knapsack problem by adding just a single rowset, meaning that we can reuse
+ // the existing solution state to incrementally update the solution, rather
+ // than having to rebuild from scratch.
//
- // With this iteration order, each knapsack problem builds on the previous knapsack
- // problem by adding just a single rowset, meaning that we can reuse the
- // existing solution state to incrementally update the solution, rather than having
- // to rebuild from scratch.
-
-
// Pass 1 (approximate)
// ------------------------------------------------------------
- // First, run the whole algorithm using a fast approximation of the knapsack solver
- // to come up with lower bounds and upper bounds. The approximation algorithm is a
- // 2-approximation but in practice often yields results that are very close to optimal
- // for the sort of problems we encounter in our use case.
+ // First, run the whole algorithm using a fast approximation of the knapsack
+ // solver to come up with lower bounds and upper bounds. The approximation
+ // algorithm is a 2-approximation but in practice often yields results that
+ // are very close to optimal for the sort of problems we encounter in our use
+ // case.
//
// This pass calculates:
- // 1) 'best_upper_bounds': for each 'cc_a', what's the upper bound for any solution
- // including this rowset. We use this later to avoid solving the knapsack for
- // cases where the upper bound is lower than our current best solution.
- // 2) 'best_solution' and 'best_solution->value': the best approximate solution
- // found.
+ // 1) 'best_upper_bounds': for each 'rowset_a', what's the upper bound for any
+ // solution including this rowset. We use this later to avoid solving the
+ // knapsack for cases where the upper bound is lower than our current best
+ // solution.
+ // 2) 'best_solution' and 'best_solution->value': the best approximate
+ // solution found.
vector<double> best_upper_bounds;
RunApproximation(asc_min_key, asc_max_key, &best_upper_bounds, &best_solution);
// If the best solution found above is less than some tiny threshold, we don't
- // need to bother searching for the exact solution, since it could be at most twice
- // the approximate solution.
+ // bother searching for the exact solution, since it can have value at most
+ // twice the approximate solution. Likewise, if all the upper bounds are below
+ // the threshold, we short-circuit.
if (best_solution.value * 2 <= FLAGS_compaction_minimum_improvement ||
*std::max_element(best_upper_bounds.begin(), best_upper_bounds.end()) <=
FLAGS_compaction_minimum_improvement) {
@@ -441,10 +458,10 @@ Status BudgetedCompactionPolicy::PickRowSets(const RowSetTree &tree,
// Pass 2 (precise)
// ------------------------------------------------------------
- // Now that we've found an approximate solution and upper bounds, do another pass.
- // In cases where the upper bound indicates we could do substantially better than
- // our current best solution, we use the exact knapsack solver to find the improved
- // solution.
+ // Now that we've found an approximate solution and upper bounds, do another
+ // pass. In cases where the upper bound indicates we could do substantially
+ // better than our current best solution, we use the exact knapsack solver to
+ // find the improved solution.
RunExact(asc_min_key, asc_max_key, best_upper_bounds, &best_solution);
// Log the input and output of the selection.
@@ -463,10 +480,11 @@ Status BudgetedCompactionPolicy::PickRowSets(const RowSetTree &tree,
*quality = best_solution.value;
if (best_solution.value <= FLAGS_compaction_minimum_improvement) {
- VLOG(1) << "Best compaction available (" << best_solution.value << " less than "
- << "minimum quality " << FLAGS_compaction_minimum_improvement
- << ": not compacting.";
- *quality = 0;
+ VLOG(1) << Substitute("The best compaction score found ($0) is less than "
+ "the threshold for compaction ($1): not compacting.",
+ best_solution.value,
+ FLAGS_compaction_minimum_improvement);
+ *quality = 0.0;
return Status::OK();
}
http://git-wip-us.apache.org/repos/asf/kudu/blob/137775bd/src/kudu/tablet/compaction_policy.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/compaction_policy.h b/src/kudu/tablet/compaction_policy.h
index fb0890a..f05f810 100644
--- a/src/kudu/tablet/compaction_policy.h
+++ b/src/kudu/tablet/compaction_policy.h
@@ -24,7 +24,6 @@
#include <vector>
#include "kudu/gutil/macros.h"
-#include "kudu/gutil/port.h"
#include "kudu/util/status.h"
namespace kudu {
@@ -34,97 +33,98 @@ class RowSet;
class RowSetInfo;
class RowSetTree;
-// A Compaction Policy is responsible for picking which files in a tablet
+// A set of rowsets selected for compaction.
+typedef std::unordered_set<const RowSet*> CompactionSelection;
+
+// A CompactionPolicy is responsible for picking which rowsets in a tablet
// should be compacted together.
class CompactionPolicy {
public:
- CompactionPolicy() {}
- virtual ~CompactionPolicy() {}
+ CompactionPolicy() = default;
+ virtual ~CompactionPolicy() = default;
// Select a set of RowSets to compact out of 'tree'.
//
// Callers are responsible for externally synchronizing selection within a
- // given Tablet. This will only select rowsets whose compact_flush_lock
+ // given tablet. This will only select rowsets whose compact_flush_lock
// is unlocked, but will not itself take the lock. Hence no other threads
// should lock or unlock the rowsets' compact_flush_lock while this method
// is running.
//
- // *quality is set to represent how effective the compaction will be on
- // reducing IO in the tablet. TODO: determine the units/ranges of this thing.
+ // *quality is set to represent how effectively the compaction reduces the
+ // tablet's average IO per write operation.
//
// If 'log' is not NULL, then a verbose log of the compaction selection
// process will be appended to it.
- virtual Status PickRowSets(const RowSetTree &tree,
- std::unordered_set<RowSet*>* picked,
+ virtual Status PickRowSets(const RowSetTree& tree,
+ CompactionSelection* picked,
double* quality,
std::vector<std::string>* log) = 0;
- // Return the size at which flush/compact should "roll" to new files. Some
- // compaction policies may prefer to deal with small constant-size files
+ // Return the size in bytes at which flush/compact should "roll" to new files.
+ // Some compaction policies may prefer to deal with small constant-size files
// whereas others may prefer large ones.
- virtual uint64_t target_rowset_size() const {
- return 1024 * 1024 * 1024; // no rolling
- }
+ virtual uint64_t target_rowset_size() const = 0;
private:
DISALLOW_COPY_AND_ASSIGN(CompactionPolicy);
};
-// Compaction policy which, given a size budget for a compaction, and a workload,
-// tries to pick a set of RowSets which fit into that budget and minimize the
-// future cost of operations on the tablet.
+// A compaction policy that, given a size budget for a compaction, tries to pick
+// a set of RowSets that fit into that budget and minimize the future cost of
+// operations on the tablet.
//
-// See src/kudu/tablet/compaction-policy.txt for details.
+// See docs/design-docs/compaction-policy.md for details.
class BudgetedCompactionPolicy : public CompactionPolicy {
public:
explicit BudgetedCompactionPolicy(int size_budget_mb);
- virtual Status PickRowSets(const RowSetTree &tree,
- std::unordered_set<RowSet*>* picked,
- double* quality,
- std::vector<std::string>* log) OVERRIDE;
+ Status PickRowSets(const RowSetTree &tree,
+ CompactionSelection* picked,
+ double* quality,
+ std::vector<std::string>* log) override;
- virtual uint64_t target_rowset_size() const OVERRIDE;
+ uint64_t target_rowset_size() const override;
private:
struct SolutionAndValue {
- std::unordered_set<RowSet*> rowsets;
- double value = 0;
+ CompactionSelection rowsets;
+ double value = 0.0;
};
- // Sets up the 'asc_min_key' and 'asc_max_key' vectors necessary
- // for both the approximate and exact solutions below.
- void SetupKnapsackInput(const RowSetTree &tree,
+ // Set up the 'asc_min_key' and 'asc_max_key' vectors used by both the
+ // RunApproximation and RunExact. 'asc_min_key' will hold RowSetInfo instances
+ // for rowsets from 'tree' in ascending order by min key; 'asc_max_key' will
+ // hold the RowSetInfo instances in ascending order by max key.
+ void SetupKnapsackInput(const RowSetTree& tree,
std::vector<RowSetInfo>* asc_min_key,
- std::vector<RowSetInfo>* asc_max_key);
-
+ std::vector<RowSetInfo>* asc_max_key) const;
- // Runs the first pass approximate solution for the algorithm.
- // Stores the best solution found in 'best_solution'.
+ // Runs the first pass approximate algorithm for the compaction selection
+ // problem. Stores the best solution found in 'best_solution'.
//
- // Sets best_upper_bounds[i] to the upper bound for any solution containing
+ // Sets best_upper_bounds[i] to an upper bound for a solution containing
// asc_min_key[i] as its left-most rowset.
- void RunApproximation(
- const std::vector<RowSetInfo>& asc_min_key,
- const std::vector<RowSetInfo>& asc_max_key,
- std::vector<double>* best_upper_bounds,
- SolutionAndValue* best_solution);
+ void RunApproximation(const std::vector<RowSetInfo>& asc_min_key,
+ const std::vector<RowSetInfo>& asc_max_key,
+ std::vector<double>* best_upper_bounds,
+ SolutionAndValue* best_solution) const;
// Runs the second pass of the algorithm.
//
- // For each i in asc_min_key, first checks if best_upper_bounds[i] indicates that a
- // solution containing asc_min_key[i] may be a better solution than the current
- // 'best_solution' by at least the configured approximation ratio. If so, runs the full
- // knapsack algorithm to determine the value of that solution and, if it is indeed
- // better, replaces '*best_solution' with the new best solution.
- void RunExact(
- const std::vector<RowSetInfo>& asc_min_key,
- const std::vector<RowSetInfo>& asc_max_key,
- const std::vector<double>& best_upper_bounds,
- SolutionAndValue* best_solution);
-
- size_t size_budget_mb_;
+ // For each i, the algorithm first checks if best_upper_bounds[i] indicates
+ // that a solution containing asc_min_key[i] as its left-most rowset might be
+ // a better solution than the current 'best_solution' by at least the
+ // configured approximation ratio. If so, it runs the full knapsack algorithm
+ // to determine the value of that solution and, if it is indeed better,
+ // replaces '*best_solution' with the new best solution.
+ void RunExact(const std::vector<RowSetInfo>& asc_min_key,
+ const std::vector<RowSetInfo>& asc_max_key,
+ const std::vector<double>& best_upper_bounds,
+ SolutionAndValue* best_solution) const;
+
+ const size_t size_budget_mb_;
};
} // namespace tablet
http://git-wip-us.apache.org/repos/asf/kudu/blob/137775bd/src/kudu/tablet/svg_dump.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/svg_dump.cc b/src/kudu/tablet/svg_dump.cc
index 93a3cc2..322bb4b 100644
--- a/src/kudu/tablet/svg_dump.cc
+++ b/src/kudu/tablet/svg_dump.cc
@@ -96,7 +96,7 @@ void OrganizeSVGRows(const vector<RowSetInfo>& candidates,
}
void DumpSVG(const vector<RowSetInfo>& candidates,
- const unordered_set<RowSet*>& picked,
+ const unordered_set<const RowSet*>& picked,
ostream* outptr) {
CHECK(outptr);
CHECK(outptr->good());
@@ -155,7 +155,7 @@ void PrintXMLHeader(ostream* out) {
} // anonymous namespace
void DumpCompactionSVG(const vector<RowSetInfo>& candidates,
- const unordered_set<RowSet*>& picked,
+ const unordered_set<const RowSet*>& picked,
ostream* out,
bool print_xml_header) {
CHECK(out);
@@ -168,7 +168,7 @@ void DumpCompactionSVG(const vector<RowSetInfo>& candidates,
}
void DumpCompactionSVGToFile(const vector<RowSetInfo>& candidates,
- const unordered_set<RowSet*>& picked) {
+ const unordered_set<const RowSet*>& picked) {
const string& pattern = FLAGS_compaction_policy_dump_svgs_pattern;
if (pattern.empty()) {
return;
http://git-wip-us.apache.org/repos/asf/kudu/blob/137775bd/src/kudu/tablet/svg_dump.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/svg_dump.h b/src/kudu/tablet/svg_dump.h
index 706f573..9adb85c 100644
--- a/src/kudu/tablet/svg_dump.h
+++ b/src/kudu/tablet/svg_dump.h
@@ -33,14 +33,14 @@ class RowSetInfo;
// null. If 'print_xml_header' is true, prints an XML header including the xml
// tag and DOCTYPE. Otherwise, only the '<svg>...</svg>' section is printed.
void DumpCompactionSVG(const std::vector<RowSetInfo>& candidates,
- const std::unordered_set<RowSet*>& picked,
+ const std::unordered_set<const RowSet*>& picked,
std::ostream* out,
bool print_xml_header);
// Like the above, but dumps the SVG to a file named according to the rules of
// --compaction_policy_dump_svgs_pattern. See the flag definition in svg_dump.cc.
void DumpCompactionSVGToFile(const std::vector<RowSetInfo>& candidates,
- const std::unordered_set<RowSet*>& picked);
+ const std::unordered_set<const RowSet*>& picked);
} // namespace tablet
} // namespace kudu
http://git-wip-us.apache.org/repos/asf/kudu/blob/137775bd/src/kudu/tablet/tablet.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/tablet.cc b/src/kudu/tablet/tablet.cc
index fe4bd63..0d89ad8 100644
--- a/src/kudu/tablet/tablet.cc
+++ b/src/kudu/tablet/tablet.cc
@@ -1349,7 +1349,7 @@ Status Tablet::PickRowSetsToCompact(RowSetsInCompaction *picked,
std::lock_guard<std::mutex> compact_lock(compact_select_lock_);
CHECK_EQ(picked->num_rowsets(), 0);
- unordered_set<RowSet*> picked_set;
+ unordered_set<const RowSet*> picked_set;
if (flags & FORCE_COMPACT_ALL) {
// Compact all rowsets, regardless of policy.
@@ -1360,8 +1360,11 @@ Status Tablet::PickRowSetsToCompact(RowSetsInCompaction *picked,
}
} else {
// Let the policy decide which rowsets to compact.
- double quality = 0;
- RETURN_NOT_OK(compaction_policy_->PickRowSets(*rowsets_copy, &picked_set, &quality, NULL));
+ double quality = 0.0;
+ RETURN_NOT_OK(compaction_policy_->PickRowSets(*rowsets_copy,
+ &picked_set,
+ &quality,
+ /*log=*/nullptr));
VLOG_WITH_PREFIX(2) << "Compaction quality: " << quality;
}
@@ -1751,7 +1754,7 @@ void Tablet::UpdateCompactionStats(MaintenanceOpStats* stats) {
// been in the last 5 minutes, and somehow scale the compaction quality
// based on that, so we favor hot tablets.
double quality = 0;
- unordered_set<RowSet*> picked_set_ignored;
+ unordered_set<const RowSet*> picked_set_ignored;
shared_ptr<RowSetTree> rowsets_copy;
{
@@ -2296,6 +2299,9 @@ size_t Tablet::num_rowsets() const {
}
void Tablet::PrintRSLayout(ostream* o) {
+ DCHECK(o);
+ auto& out = *o;
+
shared_ptr<RowSetTree> rowsets_copy;
{
shared_lock<rw_spinlock> l(component_lock_);
@@ -2305,19 +2311,19 @@ void Tablet::PrintRSLayout(ostream* o) {
// Run the compaction policy in order to get its log and highlight those
// rowsets which would be compacted next.
vector<string> log;
- unordered_set<RowSet*> picked;
+ unordered_set<const RowSet*> picked;
double quality;
Status s = compaction_policy_->PickRowSets(*rowsets_copy, &picked, &quality, &log);
if (!s.ok()) {
- *o << "<b>Error:</b> " << EscapeForHtmlToString(s.ToString());
+ out << "<b>Error:</b> " << EscapeForHtmlToString(s.ToString());
return;
}
if (!picked.empty()) {
- *o << "<p>";
- *o << "Highlighted rowsets indicate those that would be compacted next if a "
- << "compaction were to run on this tablet.";
- *o << "</p>";
+ out << "<p>";
+ out << "Highlighted rowsets indicate those that would be compacted next if a "
+ << "compaction were to run on this tablet.";
+ out << "</p>";
}
double avg_height;
@@ -2339,11 +2345,11 @@ void Tablet::PrintRSLayout(ostream* o) {
// The first condition excludes the memrowset.
return rowset->metadata() && !rowset->IsAvailableForCompaction();
});
- *o << Substitute("<div><p>In addition to the rowsets pictured and listed, "
- "there are $0 rowset(s) currently undergoing compactions."
- "</p></div>",
- num_rowsets_unavailable_for_compaction)
- << endl;
+ out << Substitute("<div><p>In addition to the rowsets pictured and listed, "
+ "there are $0 rowset(s) currently undergoing compactions."
+ "</p></div>",
+ num_rowsets_unavailable_for_compaction)
+ << endl;
// Compute some summary statistics for the tablet's rowsets.
const auto num_rowsets = min.size();
@@ -2353,7 +2359,7 @@ void Tablet::PrintRSLayout(ostream* o) {
for (const auto& rsi : min) {
rowset_sizes.push_back(rsi.size_bytes());
}
- *o << "<table class=\"table tablet-striped table-hover\">" << endl;
+ out << "<table class=\"table tablet-striped table-hover\">" << endl;
// Compute the stats quick'n'dirty by sorting and looking at approximately
// the right spot.
// TODO(wdberkeley): Could use an O(n) quickselect-based algorithm.
@@ -2365,38 +2371,38 @@ void Tablet::PrintRSLayout(ostream* o) {
const auto size_bytes_median = rowset_sizes[num_rowsets / 2];
const auto size_bytes_third_quartile = rowset_sizes[3 * num_rowsets / 4];
const auto size_bytes_max = rowset_sizes[num_rowsets - 1];
- *o << Substitute("<thead><tr>"
- " <th>Statistic</th>"
- " <th>Approximate Value</th>"
- "<tr></thead>"
- "<tbody>"
- " <tr><td>Count</td><td>$0</td></tr>"
- " <tr><td>Min</td><td>$1</td></tr>"
- " <tr><td>First quartile</td><td>$2</td></tr>"
- " <tr><td>Median</td><td>$3</td></tr>"
- " <tr><td>Third quartile</td><td>$4</td></tr>"
- " <tr><td>Max</td><td>$5</td></tr>"
- " <tr><td>Avg. Height</td><td>$6</td></tr>"
- "<tbody>",
- num_rowsets,
- HumanReadableNumBytes::ToString(size_bytes_min),
- HumanReadableNumBytes::ToString(size_bytes_first_quartile),
- HumanReadableNumBytes::ToString(size_bytes_median),
- HumanReadableNumBytes::ToString(size_bytes_third_quartile),
- HumanReadableNumBytes::ToString(size_bytes_max),
- avg_height);
- *o << "</table>" << endl;
+ out << Substitute("<thead><tr>"
+ " <th>Statistic</th>"
+ " <th>Approximate Value</th>"
+ "<tr></thead>"
+ "<tbody>"
+ " <tr><td>Count</td><td>$0</td></tr>"
+ " <tr><td>Min</td><td>$1</td></tr>"
+ " <tr><td>First quartile</td><td>$2</td></tr>"
+ " <tr><td>Median</td><td>$3</td></tr>"
+ " <tr><td>Third quartile</td><td>$4</td></tr>"
+ " <tr><td>Max</td><td>$5</td></tr>"
+ " <tr><td>Avg. Height</td><td>$6</td></tr>"
+ "<tbody>",
+ num_rowsets,
+ HumanReadableNumBytes::ToString(size_bytes_min),
+ HumanReadableNumBytes::ToString(size_bytes_first_quartile),
+ HumanReadableNumBytes::ToString(size_bytes_median),
+ HumanReadableNumBytes::ToString(size_bytes_third_quartile),
+ HumanReadableNumBytes::ToString(size_bytes_max),
+ avg_height);
+ out << "</table>" << endl;
}
// TODO(wdberkeley): Should we even display this? It's one line per rowset
// and doesn't contain any useful information except each rowset's size.
- *o << "<h2>Compaction policy log</h2>" << endl;
+ out << "<h2>Compaction policy log</h2>" << endl;
- *o << "<pre>" << std::endl;
+ out << "<pre>" << std::endl;
for (const string& s : log) {
- *o << EscapeForHtmlToString(s) << endl;
+ out << EscapeForHtmlToString(s) << endl;
}
- *o << "</pre>" << endl;
+ out << "</pre>" << endl;
}
string Tablet::LogPrefix() const {