You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by ad...@apache.org on 2019/06/07 16:53:23 UTC

[kudu] branch master updated (0deeafe -> 086089a)

This is an automated email from the ASF dual-hosted git repository.

adar pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git.


    from 0deeafe  fuzz-itest: assorted cleanup
     new f9f9526  Add seek before mode for CBTree to accelerate CheckRowDeleted in dms.
     new 086089a  [java] small fixes for diff scan setup logic

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../org/apache/kudu/client/AsyncKuduScanner.java   | 12 +++-
 src/kudu/tablet/cbtree-test.cc                     | 72 ++++++++++++++++++++++
 src/kudu/tablet/concurrent_btree.h                 | 50 ++++++++++++---
 src/kudu/tablet/deltamemstore.cc                   | 30 ++++-----
 4 files changed, 135 insertions(+), 29 deletions(-)


[kudu] 01/02: Add seek before mode for CBTree to accelerate CheckRowDeleted in dms.

Posted by ad...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

adar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit f9f9526d3e02f226c0e6fb160726b7d566eeca23
Author: triplesheep <tr...@gmail.com>
AuthorDate: Tue Jun 4 08:51:24 2019 +0000

    Add seek before mode for CBTree to accelerate CheckRowDeleted in dms.
    
    It's a related work of KUDU-749.
    
    YCSB workloada with recordcount: 1000000, operationcount: 10000000 and SYNC_OPS_OPT = True.
    Here is the result and we can see improvment.
    
    [Before change]:
    UPDATE Operations: 5001219
    AverageLatency(us): 1525.3520189777732
    MinLatency(us): 208
    MaxLatency(us): 77247
    95thPercentileLatency(us): 4303
    99thPercentileLatency(us):7839
    
    [After change]:
    UPDATE Operations: 4997991
    AverageLatency(us): 1174.1029063477706
    MinLatency(us): 198
    MaxLatency(us): 70015
    95thPercentileLatency(us): 2991
    99thPercentileLatency(us): 5535
    
    Change-Id: Icda5585c7226a075ffebdb22c7fc7728edf85feb
    Reviewed-on: http://gerrit.cloudera.org:8080/13516
    Tested-by: Kudu Jenkins
    Reviewed-by: Todd Lipcon <to...@apache.org>
---
 src/kudu/tablet/cbtree-test.cc     | 72 ++++++++++++++++++++++++++++++++++++++
 src/kudu/tablet/concurrent_btree.h | 50 ++++++++++++++++++++++----
 src/kudu/tablet/deltamemstore.cc   | 30 ++++++----------
 3 files changed, 126 insertions(+), 26 deletions(-)

diff --git a/src/kudu/tablet/cbtree-test.cc b/src/kudu/tablet/cbtree-test.cc
index 0445bbe..9231a6c 100644
--- a/src/kudu/tablet/cbtree-test.cc
+++ b/src/kudu/tablet/cbtree-test.cc
@@ -28,6 +28,7 @@
 
 #include "kudu/gutil/gscoped_ptr.h"
 #include "kudu/gutil/stringprintf.h"
+#include "kudu/gutil/strings/substitute.h"
 #include "kudu/tablet/concurrent_btree.h"
 #include "kudu/util/barrier.h"
 #include "kudu/util/debug/sanitizer_scopes.h"
@@ -43,6 +44,7 @@ using std::string;
 using std::thread;
 using std::unordered_set;
 using std::vector;
+using strings::Substitute;
 
 namespace kudu {
 namespace tablet {
@@ -786,6 +788,76 @@ TEST_F(TestCBTree, TestScanPerformance) {
   }
 }
 
+// Test the seek AT_OR_BEFORE mode.
+TEST_F(TestCBTree, TestIteratorSeekAtOrBefore) {
+  int key_num = 1000;
+  CBTree<SmallFanoutTraits> t;
+
+  // Insert entry in CBTree with key1000 key1002 key1004 ... key2000.
+  // Key1001 key1003 key1005 ... key1999 are omitted.
+  for (int i = 500; i <= key_num; ++i) {
+    string key = Substitute("key$0", i * 2);
+    ASSERT_TRUE(t.Insert(Slice(key), Slice("val")));
+  }
+
+  // Seek to existing key should successfully reach key
+  // and set exact = true;
+  for (int i = 500; i <= key_num; ++i) {
+    string key = Substitute("key$0", i * 2);
+    gscoped_ptr<CBTreeIterator<SmallFanoutTraits> > iter(t.NewIterator());
+    bool exact;
+    ASSERT_TRUE(iter->SeekAtOrBefore(Slice(key), &exact));
+    ASSERT_TRUE(exact);
+
+    ASSERT_TRUE(iter->IsValid());
+    Slice k, v;
+    iter->GetCurrentEntry(&k, &v);
+    ASSERT_EQ(key, k.ToString());
+  }
+
+  // Seek to before first key should fail.
+  {
+    gscoped_ptr<CBTreeIterator<SmallFanoutTraits> > iter(t.NewIterator());
+
+    bool exact;
+    ASSERT_FALSE(iter->SeekAtOrBefore(Slice("key0000"), &exact));
+    ASSERT_FALSE(exact);
+    ASSERT_FALSE(iter->IsValid());
+  }
+
+  // Seek to non-existent key in current CBTree key range should
+  // successfully reach the first entry with key < given key
+  // and set exact = false.
+  for (int i = 500; i <= key_num - 1; ++i) {
+    string key = Substitute("key$0", i * 2 + 1);
+    string expect_key = Substitute("key$0", i * 2);
+    gscoped_ptr<CBTreeIterator<SmallFanoutTraits> > iter(t.NewIterator());
+    bool exact;
+    ASSERT_TRUE(iter->SeekAtOrBefore(Slice(key), &exact));
+    ASSERT_FALSE(exact);
+
+    ASSERT_TRUE(iter->IsValid());
+    Slice k, v;
+    iter->GetCurrentEntry(&k, &v);
+    ASSERT_EQ(expect_key, k.ToString());
+  }
+
+  // Seek to after last key should successfully reach last key
+  // and set exact = false;
+  {
+    gscoped_ptr<CBTreeIterator<SmallFanoutTraits> > iter(t.NewIterator());
+
+    bool exact;
+    ASSERT_TRUE(iter->SeekAtOrBefore(Slice("key2001"), &exact));
+    ASSERT_FALSE(exact);
+
+    ASSERT_TRUE(iter->IsValid());
+    Slice k, v;
+    iter->GetCurrentEntry(&k, &v);
+    ASSERT_EQ("key2000", k.ToString());
+  }
+}
+
 } // namespace btree
 } // namespace tablet
 } // namespace kudu
diff --git a/src/kudu/tablet/concurrent_btree.h b/src/kudu/tablet/concurrent_btree.h
index f638bb0..5d321ab 100644
--- a/src/kudu/tablet/concurrent_btree.h
+++ b/src/kudu/tablet/concurrent_btree.h
@@ -1616,7 +1616,13 @@ class CBTreeIterator {
 
   bool SeekAtOrAfter(const Slice &key, bool *exact) {
     SeekToLeaf(key);
-    SeekInLeaf(key, exact);
+    SeekInLeaf(key, exact, AT_OR_AFTER);
+    return IsValid();
+  }
+
+  bool SeekAtOrBefore(const Slice &key, bool *exact) {
+    SeekToLeaf(key);
+    SeekInLeaf(key, exact, AT_OR_BEFORE);
     return IsValid();
   }
 
@@ -1699,6 +1705,14 @@ class CBTreeIterator {
  private:
   friend class CBTree<Traits>;
 
+  // Control the mode in SeekInLeaf.
+  // AT_OR_BEFORE will seek the first idx with key <= given key.
+  // AT_OR_AFTER will seek the first idx with key >= given key.
+  enum SeekMode {
+      AT_OR_BEFORE,
+      AT_OR_AFTER
+  };
+
   CBTreeIterator(const CBTree<Traits> *tree,
                  bool tree_frozen) :
     tree_(tree),
@@ -1709,17 +1723,39 @@ class CBTreeIterator {
     leaf_to_scan_(&leaf_copy_)
   {}
 
-  bool SeekInLeaf(const Slice &key, bool *exact) {
+  bool SeekInLeaf(const Slice &key, bool *exact, SeekMode mode) {
     DCHECK(seeked_);
-    idx_in_leaf_ = leaf_to_scan_->Find(key, exact);
-    if (idx_in_leaf_ == leaf_to_scan_->num_entries()) {
-      // not found in leaf, seek to start of next leaf if it exists.
-      return SeekNextLeaf();
+    int idx = leaf_to_scan_->Find(key, exact);
+    if (mode == AT_OR_BEFORE) {
+      // If do not contain key equal to the given key, the result should be idx - 1;
+      // If contain the key(exact == true), the result should be idx.
+      // There are two corner case:
+      // 1. If all records in all leftnode > key will get idx = 0.
+      // 2. If all records in all leftnode < key will get idx idx = num_entries.
+      //
+      // The accuracy of this mode depends on the fact that we didn't optimize the
+      // internal nodes with "smallest separator" key. After optimize this function
+      // needs refactoring.
+      if (*exact) {
+        idx_in_leaf_ = idx;
+        return true;
+      }
+      if (leaf_to_scan_->num_entries() == 0 || idx == 0) {
+        seeked_ = false;
+        return false;
+      }
+      idx_in_leaf_ = idx - 1;
+    } else {
+      DCHECK_EQ(mode, AT_OR_AFTER);
+      idx_in_leaf_ = idx;
+      if (idx_in_leaf_ == leaf_to_scan_->num_entries()) {
+        // not found in leaf, seek to start of next leaf if it exists.
+        return SeekNextLeaf();
+      }
     }
     return true;
   }
 
-
   void SeekToLeaf(const Slice &key) {
     debug::ScopedTSANIgnoreReadsAndWrites ignore_tsan;
     retry_from_root:
diff --git a/src/kudu/tablet/deltamemstore.cc b/src/kudu/tablet/deltamemstore.cc
index c5d2009..d2faea3 100644
--- a/src/kudu/tablet/deltamemstore.cc
+++ b/src/kudu/tablet/deltamemstore.cc
@@ -158,37 +158,29 @@ Status DeltaMemStore::CheckRowDeleted(rowid_t row_idx,
                                       const IOContext* /*io_context*/,
                                       bool *deleted) const {
   *deleted = false;
-
-  DeltaKey key(row_idx, Timestamp(0));
+  DeltaKey key(row_idx, Timestamp(Timestamp::kMax));
   faststring buf;
   key.EncodeTo(&buf);
   Slice key_slice(buf);
 
   bool exact;
 
-  // TODO(unknown): can we avoid the allocation here?
   gscoped_ptr<DMSTreeIter> iter(tree_.NewIterator());
-  if (!iter->SeekAtOrAfter(key_slice, &exact)) {
+  if (!iter->SeekAtOrBefore(key_slice, &exact)) {
     return Status::OK();
   }
 
-  while (iter->IsValid()) {
-    // Iterate forward until reaching an entry with a larger row idx.
-    Slice key_slice, v;
-    iter->GetCurrentEntry(&key_slice, &v);
-    RETURN_NOT_OK(key.DecodeFrom(&key_slice));
-    DCHECK_GE(key.row_idx(), row_idx);
-    if (key.row_idx() != row_idx) break;
-
-    RowChangeList val(v);
-    // Mutation is for the target row, check deletion status.
-    RowChangeListDecoder decoder((RowChangeList(v)));
-    decoder.InitNoSafetyChecks();
-    decoder.TwiddleDeleteStatus(deleted);
+  DCHECK(!exact);
 
-    iter->Next();
+  Slice current_key_slice, v;
+  iter->GetCurrentEntry(&current_key_slice, &v);
+  RETURN_NOT_OK(key.DecodeFrom(&current_key_slice));
+  if (key.row_idx() != row_idx) {
+    return Status::OK();
   }
-
+  RowChangeListDecoder decoder((RowChangeList(v)));
+  decoder.InitNoSafetyChecks();
+  *deleted = decoder.is_delete();
   return Status::OK();
 }
 


[kudu] 02/02: [java] small fixes for diff scan setup logic

Posted by ad...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

adar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 086089ab91c70753eb632289fad2a4e30d51dddb
Author: Adar Dembo <ad...@cloudera.com>
AuthorDate: Thu Jun 6 22:32:10 2019 -0700

    [java] small fixes for diff scan setup logic
    
    Change-Id: Ie6775221115b078d4b2eb640a15d34ee4f315e27
    Reviewed-on: http://gerrit.cloudera.org:8080/13553
    Reviewed-by: Mike Percy <mp...@apache.org>
    Reviewed-by: Grant Henke <gr...@apache.org>
    Tested-by: Adar Dembo <ad...@cloudera.com>
---
 .../main/java/org/apache/kudu/client/AsyncKuduScanner.java   | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/java/kudu-client/src/main/java/org/apache/kudu/client/AsyncKuduScanner.java b/java/kudu-client/src/main/java/org/apache/kudu/client/AsyncKuduScanner.java
index a145cf9..f327faf 100644
--- a/java/kudu-client/src/main/java/org/apache/kudu/client/AsyncKuduScanner.java
+++ b/java/kudu-client/src/main/java/org/apache/kudu/client/AsyncKuduScanner.java
@@ -274,6 +274,12 @@ public final class AsyncKuduScanner {
       checkArgument(readMode == ReadMode.READ_AT_SNAPSHOT, "When specifying a " +
           "HybridClock timestamp, the read mode needs to be set to READ_AT_SNAPSHOT");
     }
+    if (startTimestamp != AsyncKuduClient.NO_TIMESTAMP) {
+      checkArgument(htTimestamp >= 0, "Must have both start and end timestamps " +
+                    "for a diff scan");
+      checkArgument(startTimestamp <= htTimestamp, "Start timestamp must be less " +
+                    "than or equal to end timestamp");
+    }
 
     this.isFaultTolerant = isFaultTolerant;
     if (this.isFaultTolerant) {
@@ -442,7 +448,7 @@ public final class AsyncKuduScanner {
     return keepAlivePeriodMs;
   }
 
-  long getStartSnaphshotTimestamp() {
+  long getStartSnapshotTimestamp() {
     return this.startTimestamp;
   }
 
@@ -1037,8 +1043,8 @@ public final class AsyncKuduScanner {
             if (AsyncKuduScanner.this.getSnapshotTimestamp() != AsyncKuduClient.NO_TIMESTAMP) {
               newBuilder.setSnapTimestamp(AsyncKuduScanner.this.getSnapshotTimestamp());
             }
-            if (AsyncKuduScanner.this.getStartSnaphshotTimestamp() != AsyncKuduClient.NO_TIMESTAMP) {
-              newBuilder.setSnapStartTimestamp(AsyncKuduScanner.this.getStartSnaphshotTimestamp());
+            if (AsyncKuduScanner.this.getStartSnapshotTimestamp() != AsyncKuduClient.NO_TIMESTAMP) {
+              newBuilder.setSnapStartTimestamp(AsyncKuduScanner.this.getStartSnapshotTimestamp());
             }
           }