You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by to...@apache.org on 2017/04/05 23:11:32 UTC

[3/3] kudu git commit: rowset_tree: add bulk queries

rowset_tree: add bulk queries

This extends the bulk query API from IntervalTree up into RowSet.

TestRowSetTreePerformance.TestPerformance shows a noticeable improvement for
the cases where the number of query points is large:

Q=   10 R=   10     1-by-1 3 ms
Q=   10 R=   10    batched 0 ms (3.00x)
Q=  100 R=   10     1-by-1 9 ms
Q=  100 R=   10    batched 9 ms (1.11x)
Q=  500 R=   10     1-by-1 54 ms
Q=  500 R=   10    batched 57 ms (0.95x)
Q= 1000 R=   10     1-by-1 92 ms
Q= 1000 R=   10    batched 125 ms (0.74x)
Q= 5000 R=   10     1-by-1 490 ms
Q= 5000 R=   10    batched 814 ms (0.60x)
Q=   10 R=  100     1-by-1 4 ms
Q=   10 R=  100    batched 4 ms (1.25x)
Q=  100 R=  100     1-by-1 19 ms
Q=  100 R=  100    batched 22 ms (0.87x)
Q=  500 R=  100     1-by-1 112 ms
Q=  500 R=  100    batched 78 ms (1.43x)
Q= 1000 R=  100     1-by-1 210 ms
Q= 1000 R=  100    batched 181 ms (1.16x)
Q= 5000 R=  100     1-by-1 1031 ms
Q= 5000 R=  100    batched 941 ms (1.10x)
Q=   10 R=  250     1-by-1 1 ms
Q=   10 R=  250    batched 7 ms (0.13x)
Q=  100 R=  250     1-by-1 23 ms
Q=  100 R=  250    batched 20 ms (1.14x)
Q=  500 R=  250     1-by-1 166 ms
Q=  500 R=  250    batched 112 ms (1.48x)
Q= 1000 R=  250     1-by-1 333 ms
Q= 1000 R=  250    batched 207 ms (1.61x)
Q= 5000 R=  250     1-by-1 1568 ms
Q= 5000 R=  250    batched 1155 ms (1.36x)
Q=   10 R=  500     1-by-1 10 ms
Q=   10 R=  500    batched 8 ms (1.37x)
Q=  100 R=  500     1-by-1 46 ms
Q=  100 R=  500    batched 45 ms (1.02x)
Q=  500 R=  500     1-by-1 238 ms
Q=  500 R=  500    batched 169 ms (1.41x)
Q= 1000 R=  500     1-by-1 451 ms
Q= 1000 R=  500    batched 307 ms (1.47x)
Q= 5000 R=  500     1-by-1 2234 ms
Q= 5000 R=  500    batched 1487 ms (1.50x)

The cases where the number of query points is small relative to the number of
rowsets are worse, as predicted by big-O analysis, but in those cases the
other fixed overhead of operations (eg RPC overhead) are probably so much
larger that it isn't noticeable.

If it does turn out to be noticeable for the small-batch case, we could
easily switch to the one-at-a-time algorithm when Q << R.

More important than the above CPU optimizations, however, is that this
will allow for other higher-level optimizations during the process of
applying row operations.

Change-Id: I6ab24681dfbb3b1e6f08d52eb0647a5f3ca6851f
Reviewed-on: http://gerrit.cloudera.org:8080/6482
Tested-by: Kudu Jenkins
Reviewed-by: David Ribeiro Alves <dr...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/27b3de7a
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/27b3de7a
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/27b3de7a

Branch: refs/heads/master
Commit: 27b3de7ab81b03e41422cac56e65f5e86cd0f0ee
Parents: df45190
Author: Todd Lipcon <to...@apache.org>
Authored: Tue Mar 21 17:19:55 2017 -0700
Committer: Todd Lipcon <to...@apache.org>
Committed: Wed Apr 5 23:10:37 2017 +0000

----------------------------------------------------------------------
 src/kudu/tablet/rowset_tree-test.cc | 85 ++++++++++++++++++++++++++------
 src/kudu/tablet/rowset_tree.cc      | 49 ++++++++++++++++++
 src/kudu/tablet/rowset_tree.h       | 10 ++++
 3 files changed, 130 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/27b3de7a/src/kudu/tablet/rowset_tree-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/rowset_tree-test.cc b/src/kudu/tablet/rowset_tree-test.cc
index c3efea4..6f94dcb 100644
--- a/src/kudu/tablet/rowset_tree-test.cc
+++ b/src/kudu/tablet/rowset_tree-test.cc
@@ -104,28 +104,85 @@ TEST_F(TestRowSetTree, TestTree) {
   ASSERT_EQ(vec[2].get(), out[3]);
 }
 
-TEST_F(TestRowSetTree, TestPerformance) {
-  const int kNumRowSets = 200;
-  const int kNumQueries = AllowSlowTests() ? 1000000 : 10000;
+class TestRowSetTreePerformance : public TestRowSetTree,
+                                  public testing::WithParamInterface<std::tuple<int, int>> {
+};
+INSTANTIATE_TEST_CASE_P(
+    Parameters, TestRowSetTreePerformance,
+    testing::Combine(
+        // Number of rowsets.
+        // Up to 500 rowsets (500*32MB = 16GB tablet)
+        testing::Values(10, 100, 250, 500),
+        // Number of query points in a batch.
+        testing::Values(10, 100, 500, 1000, 5000)));
+
+TEST_P(TestRowSetTreePerformance, TestPerformance) {
+  const int kNumRowSets = std::get<0>(GetParam());
+  const int kNumQueries = std::get<1>(GetParam());
+  const int kNumIterations = AllowSlowTests() ? 1000 : 10;
   SeedRandom();
 
-  // Create a bunch of rowsets, each of which spans about 10% of the "row space".
-  // The row space here is 4-digit 0-padded numbers.
-  RowSetVector vec = GenerateRandomRowSets(kNumRowSets);
+  Stopwatch one_at_time_timer;
+  Stopwatch batch_timer;
+  for (int i = 0; i < kNumIterations; i++) {
+    // Create a bunch of rowsets, each of which spans about 10% of the "row space".
+    // The row space here is 4-digit 0-padded numbers.
+    RowSetVector vec = GenerateRandomRowSets(kNumRowSets);
 
-  RowSetTree tree;
-  ASSERT_OK(tree.Reset(vec));
+    RowSetTree tree;
+    ASSERT_OK(tree.Reset(vec));
 
-  LOG_TIMING(INFO, StringPrintf("Querying rowset %d times", kNumQueries)) {
-    vector<RowSet *> out;
-    char buf[32];
+    vector<string> queries;
     for (int i = 0; i < kNumQueries; i++) {
-      out.clear();
       int query = rand() % 10000;
-      snprintf(buf, arraysize(buf), "%04d", query);
-      tree.FindRowSetsWithKeyInRange(Slice(buf, 4), &out);
+      queries.emplace_back(StringPrintf("%04d", query));
+    }
+
+    int individual_matches = 0;
+    one_at_time_timer.resume();
+    {
+      vector<RowSet *> out;
+      for (const auto& q : queries) {
+        out.clear();
+        tree.FindRowSetsWithKeyInRange(Slice(q), &out);
+        individual_matches += out.size();
+      }
     }
+    one_at_time_timer.stop();
+
+    vector<Slice> query_slices;
+    for (const auto& q : queries) {
+      query_slices.emplace_back(q);
+    }
+
+    batch_timer.resume();
+    std::sort(query_slices.begin(), query_slices.end(), Slice::Comparator());
+    int bulk_matches = 0;
+    {
+      vector<RowSet *> out;
+      tree.ForEachRowSetContainingKeys(
+          query_slices, [&](RowSet* rs, int slice_idx) {
+            bulk_matches++;
+          });
+    }
+    batch_timer.stop();
+
+    ASSERT_EQ(bulk_matches, individual_matches);
   }
+
+  double batch_total = batch_timer.elapsed().user;
+  double oat_total = one_at_time_timer.elapsed().user;
+  const string& case_desc = StringPrintf("Q=% 5d R=% 5d", kNumQueries, kNumRowSets);
+  const string& ratio = StringPrintf("%.2fx", batch_total / oat_total);
+  LOG(INFO) << StringPrintf("%s %10s %d ms",
+                            case_desc.c_str(),
+                            "1-by-1",
+                            static_cast<int>(oat_total/1e6));
+  LOG(INFO) << StringPrintf("%s %10s %d ms (%.2fx)",
+                            case_desc.c_str(),
+                            "batched",
+                            static_cast<int>(batch_total/1e6),
+                            oat_total / batch_total);
 }
 
 TEST_F(TestRowSetTree, TestEndpointsConsistency) {

http://git-wip-us.apache.org/repos/asf/kudu/blob/27b3de7a/src/kudu/tablet/rowset_tree.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/rowset_tree.cc b/src/kudu/tablet/rowset_tree.cc
index 417a12f..272468a 100644
--- a/src/kudu/tablet/rowset_tree.cc
+++ b/src/kudu/tablet/rowset_tree.cc
@@ -19,6 +19,7 @@
 
 #include <algorithm>
 #include <cstddef>
+#include <functional>
 #include <memory>
 #include <string>
 #include <vector>
@@ -49,6 +50,14 @@ bool RSEndpointBySliceCompare(const RowSetTree::RSEndpoint& a,
   return false;
 }
 
+// Wrapper used when making batch queries into the interval tree.
+struct QueryStruct {
+  // The slice of the operation performing the query.
+  Slice slice;
+  // The original index of this slice in the incoming batch.
+  int idx;
+};
+
 } // anonymous namespace
 
 // Entry for use in the interval tree.
@@ -74,6 +83,15 @@ struct RowSetIntervalTraits {
   static int compare(const Slice &a, const Slice &b) {
     return a.compare(b);
   }
+
+  static int compare(const Slice &a, const QueryStruct &b) {
+    return a.compare(b.slice);
+  }
+
+  static int compare(const QueryStruct &a, const Slice &b) {
+    return -compare(b, a);
+  }
+
 };
 
 RowSetTree::RowSetTree()
@@ -191,6 +209,37 @@ void RowSetTree::FindRowSetsWithKeyInRange(const Slice &encoded_key,
   }
 }
 
+void RowSetTree::ForEachRowSetContainingKeys(
+    const std::vector<Slice>& encoded_keys,
+    const std::function<void(RowSet*, int)>& cb) const {
+
+  DCHECK(std::is_sorted(encoded_keys.cbegin(), encoded_keys.cend(),
+                        Slice::Comparator()));
+  // All rowsets with unknown bounds need to be checked.
+  for (const shared_ptr<RowSet> &rs : unbounded_rowsets_) {
+    for (int i = 0; i < encoded_keys.size(); i++) {
+      cb(rs.get(), i);
+    }
+  }
+
+  // The interval tree batch query callback would naturally just give us back
+  // the matching Slices, but that won't allow us to easily tell the caller
+  // which specific operation _index_ matched the RowSet. So, we make a vector
+  // of QueryStructs to pair the Slice with its original index.
+  vector<QueryStruct> queries;
+  queries.resize(encoded_keys.size());
+  for (int i = 0; i < encoded_keys.size(); i++) {
+    queries[i] = {encoded_keys[i], i};
+  }
+
+  tree_->ForEachIntervalContainingPoints(
+      queries,
+      [&](const QueryStruct& qs, RowSetWithBounds* rs) {
+        cb(rs->rowset, qs.idx);
+      });
+}
+
+
 RowSetTree::~RowSetTree() {
   STLDeleteElements(&entries_);
 }

http://git-wip-us.apache.org/repos/asf/kudu/blob/27b3de7a/src/kudu/tablet/rowset_tree.h
----------------------------------------------------------------------
diff --git a/src/kudu/tablet/rowset_tree.h b/src/kudu/tablet/rowset_tree.h
index b3b4f2a..3994d3f 100644
--- a/src/kudu/tablet/rowset_tree.h
+++ b/src/kudu/tablet/rowset_tree.h
@@ -73,6 +73,16 @@ class RowSetTree {
   void FindRowSetsWithKeyInRange(const Slice &encoded_key,
                                  std::vector<RowSet *> *rowsets) const;
 
+  // Call 'cb(rowset, index)' for each (rowset, index) pair such that
+  // 'encoded_keys[index]' may be within the bounds of 'rowset'.
+  //
+  // See IntervalTree::ForEachIntervalContainingPoints for additional
+  // information on the particular order in which the callback will be called.
+  //
+  // REQUIRES: 'encoded_keys' must be in sorted order.
+  void ForEachRowSetContainingKeys(const std::vector<Slice>& encoded_keys,
+                                   const std::function<void(RowSet*, int)>& cb) const;
+
   void FindRowSetsIntersectingInterval(const Slice &lower_bound,
                                        const Slice &upper_bound,
                                        std::vector<RowSet *> *rowsets) const;