You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by wd...@apache.org on 2018/10/09 17:07:01 UTC
[3/4] kudu git commit: [compaction] Small improvements to compaction
policy tests
[compaction] Small improvements to compaction policy tests
To learn more about compaction policy, I spent some time reading over
compaction_policy-test.cc, and in doing so I made a bunch of small
improvements. I also added one test covering a facet of the policy
that's pretty important but isn't obviously covered in the design docs
or the existing tests.
There are no functional non-test changes in this patch. I added
scaled-down versions of a couple of slow tests that can run all the
time, and made one slow test only run when slow tests are allowed.
Change-Id: I8f7a0e2c69f301d7fc7ca2fac657569ea240d4e3
Reviewed-on: http://gerrit.cloudera.org:8080/11605
Tested-by: Kudu Jenkins
Reviewed-by: Attila Bukor <ab...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/0c91532e
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/0c91532e
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/0c91532e
Branch: refs/heads/master
Commit: 0c91532e742df3b8f09d94ede5d7cde701b3857c
Parents: 77ead35
Author: Will Berkeley <wd...@gmail.org>
Authored: Fri Oct 5 20:41:19 2018 -0700
Committer: Will Berkeley <wd...@gmail.com>
Committed: Tue Oct 9 16:44:00 2018 +0000
----------------------------------------------------------------------
src/kudu/tablet/compaction_policy-test.cc | 196 +++++++++++++++++--------
1 file changed, 131 insertions(+), 65 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kudu/blob/0c91532e/src/kudu/tablet/compaction_policy-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/compaction_policy-test.cc b/src/kudu/tablet/compaction_policy-test.cc
index ddf917f..4277f9d 100644
--- a/src/kudu/tablet/compaction_policy-test.cc
+++ b/src/kudu/tablet/compaction_policy-test.cc
@@ -60,124 +60,188 @@ class TestCompactionPolicy : public KuduTest {
BudgetedCompactionPolicy policy(size_budget_mb);
- ASSERT_OK(policy.PickRowSets(tree, picked, quality, nullptr));
+ ASSERT_OK(policy.PickRowSets(tree, picked, quality, /*log=*/nullptr));
}
};
// Simple test for budgeted compaction: with three rowsets which
-// mostly overlap, and an high budget, they should all be selected.
+// mostly overlap, and a high budget, they should all be selected.
TEST_F(TestCompactionPolicy, TestBudgetedSelection) {
- RowSetVector vec = {
+ /*
+ * [C ------ c]
+ * [B ----- a]
+ * [A ------- b]
+ */
+ const RowSetVector rowsets = {
std::make_shared<MockDiskRowSet>("C", "c"),
std::make_shared<MockDiskRowSet>("B", "a"),
std::make_shared<MockDiskRowSet>("A", "b")
};
- const int kBudgetMb = 1000; // enough to select all
+ constexpr auto kBudgetMb = 1000; // Enough to select all rowsets.
unordered_set<RowSet*> picked;
- double quality = 0;
- NO_FATALS(RunTestCase(vec, kBudgetMb, &picked, &quality));
- ASSERT_EQ(3, picked.size());
+ double quality = 0.0;
+ NO_FATALS(RunTestCase(rowsets, kBudgetMb, &picked, &quality));
+ ASSERT_EQ(rowsets.size(), picked.size());
+ // Since all three rowsets are picked and each covers almost the entire key
+ // range, the sum of widths is about 3 while the union width is 1, so the
+ // quality score should be between 1 and 2.
ASSERT_GE(quality, 1.0);
+ ASSERT_LE(quality, 2.0);
}
// Test for the case when we have many rowsets, but none of them
// overlap at all. This is likely to occur in workloads where the
// primary key is always increasing (such as a timestamp).
-TEST_F(TestCompactionPolicy, TestNonOverlappingRowsets) {
- if (!AllowSlowTests()) {
- LOG(INFO) << "Skipped";
- return;
- }
- RowSetVector vec;
- for (int i = 0; i < 10000; i++) {
- vec.emplace_back(new MockDiskRowSet(
+TEST_F(TestCompactionPolicy, TestNonOverlappingRowSets) {
+ /* NB: Zero-padding of string keys omitted to save space.
+ *
+ * [0 - 1] [2 - 3] ... [198 - 199]
+ */
+ const auto kNumRowSets = AllowSlowTests() ? 10000 : 100;
+ RowSetVector rowsets;
+ for (auto i = 0; i < kNumRowSets; i++) {
+ rowsets.emplace_back(new MockDiskRowSet(
StringPrintf("%010d", i * 2),
StringPrintf("%010d", i * 2 + 1)));
}
+ constexpr auto kBudgetMb = 128;
unordered_set<RowSet*> picked;
- double quality = 0;
- NO_FATALS(RunTestCase(vec, /*size_budget_mb=*/128, &picked, &quality));
- ASSERT_EQ(quality, 0);
+ double quality = 0.0;
+ NO_FATALS(RunTestCase(rowsets, kBudgetMb, &picked, &quality));
+ ASSERT_EQ(quality, 0.0);
ASSERT_TRUE(picked.empty());
}
// Similar to the above, but with some small overlap between adjacent
// rowsets.
-TEST_F(TestCompactionPolicy, TestTinyOverlapRowsets) {
- if (!AllowSlowTests()) {
- LOG(INFO) << "Skipped";
- return;
- }
-
- RowSetVector vec;
- for (int i = 0; i < 10000; i++) {
- vec.emplace_back(new MockDiskRowSet(
- StringPrintf("%010d", i * 10000),
- StringPrintf("%010d", i * 10000 + 11000)));
+TEST_F(TestCompactionPolicy, TestTinyOverlapRowSets) {
+ /* NB: Zero-padding of string keys omitted to save space.
+ *
+ * [0 - 11000]
+ * [10000 - 21000]
+ * ...
+ * [990000 - 1001000]
+ */
+ const auto kNumRowSets = AllowSlowTests() ? 10000 : 100;
+ RowSetVector rowsets;
+ for (auto i = 0; i < kNumRowSets; i++) {
+ rowsets.emplace_back(new MockDiskRowSet(
+ StringPrintf("%09d", i * 10000),
+ StringPrintf("%09d", i * 10000 + 11000)));
}
+ constexpr auto kBudgetMb = 128;
unordered_set<RowSet*> picked;
double quality = 0;
- NO_FATALS(RunTestCase(vec, /*size_budget_mb=*/128, &picked, &quality));
- ASSERT_EQ(quality, 0);
+ NO_FATALS(RunTestCase(rowsets, kBudgetMb, &picked, &quality));
+ // With such small overlaps, no compaction will be considered worthwhile.
+ ASSERT_EQ(quality, 0.0);
ASSERT_TRUE(picked.empty());
}
// Test case with 100 rowsets, each of which overlaps with its two
// neighbors to the right.
TEST_F(TestCompactionPolicy, TestSignificantOverlap) {
- RowSetVector vec;
- for (int i = 0; i < 100; i++) {
- vec.emplace_back(new MockDiskRowSet(
+ /* NB: Zero-padding of string keys omitted to save space.
+ *
+ * [0 ------ 20000]
+ * [10000 ------ 30000]
+ * [20000 ------ 40000]
+ * ...
+ * [990000 - 1010000]
+ */
+ constexpr auto kNumRowSets = 100;
+ RowSetVector rowsets;
+ for (auto i = 0; i < kNumRowSets; i++) {
+ rowsets.emplace_back(new MockDiskRowSet(
StringPrintf("%010d", i * 10000),
StringPrintf("%010d", (i + 2) * 10000)));
}
+ constexpr auto kBudgetMb = 64;
unordered_set<RowSet*> picked;
- double quality = 0;
- const int kBudgetMb = 64;
- NO_FATALS(RunTestCase(vec, kBudgetMb, &picked, &quality));
- ASSERT_GT(quality, 0.5);
- // Each rowset is 1MB so we should exactly fill the budget.
+ double quality = 0.0;
+ NO_FATALS(RunTestCase(rowsets, kBudgetMb, &picked, &quality));
+ // Each rowset is 1MB so the number of rowsets picked should be the number of
+ // MB in the budget.
ASSERT_EQ(picked.size(), kBudgetMb);
+
+ // In picking 64 of 100 equally-sized and -spaced rowsets, the union width is
+ // about 0.64. Each rowset intersects the next in half its width, which makes
+ // the sum of widths about 2 * 0.64 = 1.28, if we ignore the smaller overlap
+ // between the ith and (i + 2)th rowsets. So, the quality score should be
+ // around 0.64.
+ ASSERT_GT(quality, 0.5);
+}
+
+// Test the adjustment we make to the quality measure that penalizes wider
+// solutions that have almost the same quality score. For example, with no
+// adjustment and inputs
+//
+// [ -- A -- ]
+// [ -- B -- ]
+// [ -- C -- ]
+//
+// compacting {A, B, C} results in the same quality score as {A, B}, 0.67, but
+// uses more budget. By penalizing the wider solution {A, B, C}, we ensure we
+// don't waste I/O.
+TEST_F(TestCompactionPolicy, TestSupportAdjust) {
+ const RowSetVector rowsets = {
+ std::make_shared<MockDiskRowSet>("A", "B"),
+ std::make_shared<MockDiskRowSet>("A", "B"),
+ std::make_shared<MockDiskRowSet>("B", "C"),
+ };
+ constexpr auto kBudgetMb = 1000; // Enough to select all rowsets.
+ unordered_set<RowSet*> picked;
+ double quality = 0.0;
+ NO_FATALS(RunTestCase(rowsets, kBudgetMb, &picked, &quality));
+ ASSERT_EQ(2, picked.size());
+ // The weights of A and B are both 0.67, so with the adjustment the quality
+ // score should be a little less than 0.67.
+ ASSERT_GE(quality, 0.6);
}
static RowSetVector LoadFile(const string& name) {
- RowSetVector ret;
- string path = JoinPathSegments(GetTestExecutableDirectory(), name);
+ RowSetVector rowsets;
+ const string path = JoinPathSegments(GetTestExecutableDirectory(), name);
faststring data;
CHECK_OK_PREPEND(ReadFileToString(Env::Default(), path, &data),
- strings::Substitute("Unable to load test data file $0", path));
- vector<string> lines = strings::Split(data.ToString(), "\n");
+ strings::Substitute("unable to load test data file $0", path));
+ const vector<string> lines = strings::Split(data.ToString(), "\n");
for (const auto& line : lines) {
if (line.empty() || line[0] == '#') continue;
- vector<string> fields = strings::Split(line, "\t");
+ const vector<string> fields = strings::Split(line, "\t");
CHECK_EQ(3, fields.size()) << "Expected 3 fields on line: " << line;
- int size_mb = ParseLeadingInt32Value(fields[0], -1);
+ const int size_mb = ParseLeadingInt32Value(fields[0], -1);
CHECK_GE(size_mb, 1) << "Expected size at least 1MB on line: " << line;
- ret.emplace_back(new MockDiskRowSet(fields[1] /* min key */,
- fields[2] /* max key */,
- size_mb * 1024 * 1024));
+ rowsets.emplace_back(new MockDiskRowSet(fields[1] /* min key */,
+ fields[2] /* max key */,
+ size_mb * 1024 * 1024));
}
- return ret;
+ return rowsets;
}
-// Realistic test using data scraped from a tablet containing 200+GB of YCSB data.
-// This test can be used as a benchmark for optimizing the compaction policy,
-// and also serves as a basic regression/stress test using real data.
+// Realistic test using data scraped from a tablet containing 200GB+ of YCSB
+// data. This test can be used as a benchmark for optimizing the compaction
+// policy, and also serves as a basic regression/stress test using real data.
TEST_F(TestCompactionPolicy, TestYcsbCompaction) {
- RowSetVector vec = LoadFile("ycsb-test-rowsets.tsv");
+ if (!AllowSlowTests()) {
+ LOG(WARNING) << "test is skipped; set KUDU_ALLOW_SLOW_TESTS=1 to run";
+ return;
+ }
+ const RowSetVector rowsets = LoadFile("ycsb-test-rowsets.tsv");
RowSetTree tree;
- ASSERT_OK(tree.Reset(vec));
+ ASSERT_OK(tree.Reset(rowsets));
vector<double> qualities;
for (int budget_mb : {128, 256, 512, 1024}) {
BudgetedCompactionPolicy policy(budget_mb);
unordered_set<RowSet*> picked;
- double quality = 0;
- LOG_TIMING(INFO, strings::Substitute("Computing compaction with $0MB budget", budget_mb)) {
- ASSERT_OK(policy.PickRowSets(tree, &picked, &quality, nullptr));
+ double quality = 0.0;
+ LOG_TIMING(INFO, strings::Substitute("computing compaction with $0MB budget",
+ budget_mb)) {
+ ASSERT_OK(policy.PickRowSets(tree, &picked, &quality, /*log=*/nullptr));
}
LOG(INFO) << "quality=" << quality;
int total_size = 0;
@@ -188,26 +252,28 @@ TEST_F(TestCompactionPolicy, TestYcsbCompaction) {
qualities.push_back(quality);
}
- // Given increasing budgets, our solutions should also be higher quality.
- ASSERT_TRUE(std::is_sorted(qualities.begin(), qualities.end()))
- << qualities;
+ // Since the budget increased in each iteration, the qualities should be
+ // non-decreasing.
+ ASSERT_TRUE(std::is_sorted(qualities.begin(), qualities.end())) << qualities;
}
// Regression test for KUDU-2251 which ensures that large (> 2GiB) rowsets don't
// cause integer overflow in compaction planning.
TEST_F(TestCompactionPolicy, KUDU2251) {
- RowSetVector vec = {
+ // Same arrangement as in TestBudgetedSelection.
+ const RowSetVector rowsets = {
std::make_shared<MockDiskRowSet>("C", "c", 1L << 31),
std::make_shared<MockDiskRowSet>("B", "a", 1L << 32),
std::make_shared<MockDiskRowSet>("A", "b", 1L << 33)
};
- const int kBudgetMb = 1L << 30; // enough to select all
+ constexpr auto kBudgetMb = 1L << 30; // Enough to select all rowsets.
unordered_set<RowSet*> picked;
- double quality = 0;
- NO_FATALS(RunTestCase(vec, kBudgetMb, &picked, &quality));
+ double quality = 0.0;
+ NO_FATALS(RunTestCase(rowsets, kBudgetMb, &picked, &quality));
ASSERT_EQ(3, picked.size());
- ASSERT_GE(quality, 1.0);
+ ASSERT_GT(quality, 1.0);
+ ASSERT_LT(quality, 2.0);
}
} // namespace tablet