You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by aw...@apache.org on 2020/12/15 07:36:31 UTC

[kudu] branch master updated: client: retry id-based lookups if there is no leader

This is an automated email from the ASF dual-hosted git repository.

awong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new 59b3836  client: retry id-based lookups if there is no leader
59b3836 is described below

commit 59b3836a9a5b16fc68819a377cf8a5021a43b176
Author: Andrew Wong <aw...@cloudera.com>
AuthorDate: Sun Dec 13 23:33:54 2020 -0800

    client: retry id-based lookups if there is no leader
    
    In testing out a later patch, I stumbled on the fact that we may end
    up never retrying a tablet lookup in cases where all cached replicas are
    marked as followers. The correct behavior is to attempt another lookup
    in hopes that the master has more up-to-date information about
    leadership than the client.
    
    This is only an issue with the new id-based MetaCache lookups. This
    patch extends the id-based path with logic that was already in the
    key-based lookup path that prevented this.
    
    Along the way, I did some renaming for clarity.
    
    Change-Id: If311f7e94a4fc2efa067476a4cef9ef5b378d981
    Reviewed-on: http://gerrit.cloudera.org:8080/16875
    Tested-by: Andrew Wong <aw...@cloudera.com>
    Reviewed-by: Alexey Serbin <as...@cloudera.com>
---
 src/kudu/client/client-test.cc | 42 +++++++++++++++++++++++++++++++++++++++++-
 src/kudu/client/client.h       |  1 +
 src/kudu/client/meta_cache.cc  | 12 +++++++-----
 src/kudu/client/meta_cache.h   |  6 +++---
 4 files changed, 52 insertions(+), 9 deletions(-)

diff --git a/src/kudu/client/client-test.cc b/src/kudu/client/client-test.cc
index c1087ce..ac49571 100644
--- a/src/kudu/client/client-test.cc
+++ b/src/kudu/client/client-test.cc
@@ -75,6 +75,7 @@
 #include "kudu/common/timestamp.h"
 #include "kudu/common/txn_id.h"
 #include "kudu/common/wire_protocol.pb.h"
+#include "kudu/consensus/metadata.pb.h"
 #include "kudu/gutil/atomicops.h"
 #include "kudu/gutil/casts.h"
 #include "kudu/gutil/integral_types.h"
@@ -123,9 +124,9 @@
 #include "kudu/util/path_util.h"
 #include "kudu/util/random.h"
 #include "kudu/util/random_util.h"
+#include "kudu/util/scoped_cleanup.h"
 #include "kudu/util/semaphore.h"
 #include "kudu/util/slice.h"
-#include "kudu/util/scoped_cleanup.h"
 #include "kudu/util/status.h"
 #include "kudu/util/stopwatch.h"
 #include "kudu/util/test_macros.h"
@@ -2469,6 +2470,45 @@ TEST_F(ClientTest, TestMetaCacheExpiryWithKeysAndIds) {
   ASSERT_FALSE(entry.stale());
 }
 
+// Test that if our cache entry indicates there is no leader, we will perform a
+// lookup and refresh our cache entry.
+TEST_F(ClientTest, TestMetaCacheLookupNoLeaders) {
+  auto& meta_cache = client_->data_->meta_cache_;
+  auto tablet_ids = cluster_->mini_tablet_server(0)->ListTablets();
+  ASSERT_FALSE(tablet_ids.empty());
+  const auto& tablet_id = tablet_ids[0];
+
+  // Populate the cache.
+  scoped_refptr<internal::RemoteTablet> rt;
+  ASSERT_OK(MetaCacheLookupById(tablet_id, &rt));
+  ASSERT_NE(nullptr, rt);
+
+  // Mark the cache entry's replicas as followers.
+  internal::MetaCacheEntry entry;
+  ASSERT_TRUE(meta_cache->LookupEntryByIdFastPath(tablet_id, &entry));
+  ASSERT_FALSE(entry.stale());
+  vector<internal::RemoteReplica> replicas;
+  auto remote_tablet = entry.tablet();
+  remote_tablet->GetRemoteReplicas(&replicas);
+  for (auto& r : replicas) {
+    remote_tablet->MarkTServerAsFollower(r.ts);
+  }
+  const auto has_leader = [&] {
+    remote_tablet->GetRemoteReplicas(&replicas);
+    for (auto& r : replicas) {
+      if (r.role == consensus::RaftPeerPB::LEADER) {
+        return true;
+      }
+    }
+    return false;
+  };
+  ASSERT_FALSE(has_leader());
+  // This should force a lookup RPC, rather than leaving our cache entries
+  // leaderless.
+  ASSERT_OK(MetaCacheLookupById(tablet_id, &rt));
+  ASSERT_TRUE(has_leader());
+}
+
 TEST_F(ClientTest, TestGetTabletServerBlacklist) {
   shared_ptr<KuduTable> table;
   NO_FATALS(CreateTable("blacklist",
diff --git a/src/kudu/client/client.h b/src/kudu/client/client.h
index 22d63c5..b1ab428 100644
--- a/src/kudu/client/client.h
+++ b/src/kudu/client/client.h
@@ -908,6 +908,7 @@ class KUDU_EXPORT KuduClient : public sp::enable_shared_from_this<KuduClient> {
   FRIEND_TEST(ClientTest, TestMetaCacheExpiry);
   FRIEND_TEST(ClientTest, TestMetaCacheExpiryById);
   FRIEND_TEST(ClientTest, TestMetaCacheExpiryWithKeysAndIds);
+  FRIEND_TEST(ClientTest, TestMetaCacheLookupNoLeaders);
   FRIEND_TEST(ClientTest, TestMetaCacheWithKeysAndIds);
   FRIEND_TEST(ClientTest, TestNonCoveringRangePartitions);
   FRIEND_TEST(ClientTest, TestRetrieveAuthzTokenInParallel);
diff --git a/src/kudu/client/meta_cache.cc b/src/kudu/client/meta_cache.cc
index adc2989..1ab1a11 100644
--- a/src/kudu/client/meta_cache.cc
+++ b/src/kudu/client/meta_cache.cc
@@ -453,7 +453,6 @@ void MetaCacheServerPicker::PickLeader(const ServerPickedCallback& callback,
     {
       std::lock_guard<simple_spinlock> lock(lock_);
       marked_as_follower = ContainsKey(followers_, leader);
-
     }
     if (leader && marked_as_follower) {
       VLOG(2) << "Tablet " << tablet_->tablet_id() << ": We have a follower for a leader: "
@@ -1231,13 +1230,16 @@ bool MetaCache::LookupEntryByIdFastPath(const string& tablet_id,
 Status MetaCache::DoFastPathLookupById(const string& tablet_id,
                                        scoped_refptr<RemoteTablet>* remote_tablet) {
   MetaCacheEntry entry;
-  if (PREDICT_TRUE(LookupEntryByIdFastPath(tablet_id, &entry))) {
+  if (PREDICT_TRUE(LookupEntryByIdFastPath(tablet_id, &entry) &&
+                   entry.tablet()->HasLeader())) {
     DCHECK(!entry.is_non_covered_range());
     if (remote_tablet) {
       *remote_tablet = entry.tablet();
     }
     return Status::OK();
   }
+  // If we have no cached entry, or the cached entries don't have a leader, we
+  // must do another lookup.
   return Status::Incomplete("");
 }
 
@@ -1299,13 +1301,13 @@ void MetaCache::LookupTabletById(KuduClient* client,
                                  const string& tablet_id,
                                  const MonoTime& deadline,
                                  scoped_refptr<RemoteTablet>* remote_tablet,
-                                 const StatusCallback& callback) {
+                                 const StatusCallback& lookup_complete_cb) {
   Status fastpath_status = DoFastPathLookupById(tablet_id, remote_tablet);
   if (!fastpath_status.IsIncomplete()) {
-    callback(fastpath_status);
+    lookup_complete_cb(fastpath_status);
     return;
   }
-  LookupRpcById* rpc = new LookupRpcById(this, client, callback, tablet_id,
+  LookupRpcById* rpc = new LookupRpcById(this, client, lookup_complete_cb, tablet_id,
                                          remote_tablet, deadline);
   rpc->SendRpcSlowPath();
 }
diff --git a/src/kudu/client/meta_cache.h b/src/kudu/client/meta_cache.h
index 2b21e94..5dcc738 100644
--- a/src/kudu/client/meta_cache.h
+++ b/src/kudu/client/meta_cache.h
@@ -426,8 +426,8 @@ class MetaCache : public RefCountedThreadSafe<MetaCache> {
                          const StatusCallback& callback);
 
   // Look up the locations of the given tablet, storing the result in
-  // 'remote_tablet' if not null, and calling 'callback' once the lookup is
-  // complete. Only tablets with non-failed LEADERs are considered.
+  // 'remote_tablet' if not null, and calling 'lookup_complete_cb' once the
+  // lookup is complete. Only tablets with non-failed LEADERs are considered.
   //
   // NOTE: the callback may be called from an IO thread or inline with this
   // call if the cached data is already available.
@@ -435,7 +435,7 @@ class MetaCache : public RefCountedThreadSafe<MetaCache> {
                         const std::string& tablet_id,
                         const MonoTime& deadline,
                         scoped_refptr<RemoteTablet>* remote_tablet,
-                        const StatusCallback& callback);
+                        const StatusCallback& lookup_complete_cb);
 
   // Lookup the given tablet by key, only consulting local information.
   // Returns true and sets *entry if successful.