You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by aw...@apache.org on 2020/12/15 07:36:31 UTC
[kudu] branch master updated: client: retry id-based lookups if
there is no leader
This is an automated email from the ASF dual-hosted git repository.
awong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 59b3836 client: retry id-based lookups if there is no leader
59b3836 is described below
commit 59b3836a9a5b16fc68819a377cf8a5021a43b176
Author: Andrew Wong <aw...@cloudera.com>
AuthorDate: Sun Dec 13 23:33:54 2020 -0800
client: retry id-based lookups if there is no leader
In testing out a later patch, I stumbled on the fact that we may end
up never retrying a tablet lookup in cases where all cached replicas are
marked as followers. The correct behavior is to attempt another lookup
in hopes that the master has more up-to-date information about
leadership than the client.
This is only an issue with the new id-based MetaCache lookups. This
patch extends the id-based path with logic that was already in the
key-based lookup path that prevented this.
Along the way, I did some renaming for clarity.
Change-Id: If311f7e94a4fc2efa067476a4cef9ef5b378d981
Reviewed-on: http://gerrit.cloudera.org:8080/16875
Tested-by: Andrew Wong <aw...@cloudera.com>
Reviewed-by: Alexey Serbin <as...@cloudera.com>
---
src/kudu/client/client-test.cc | 42 +++++++++++++++++++++++++++++++++++++++++-
src/kudu/client/client.h | 1 +
src/kudu/client/meta_cache.cc | 12 +++++++-----
src/kudu/client/meta_cache.h | 6 +++---
4 files changed, 52 insertions(+), 9 deletions(-)
diff --git a/src/kudu/client/client-test.cc b/src/kudu/client/client-test.cc
index c1087ce..ac49571 100644
--- a/src/kudu/client/client-test.cc
+++ b/src/kudu/client/client-test.cc
@@ -75,6 +75,7 @@
#include "kudu/common/timestamp.h"
#include "kudu/common/txn_id.h"
#include "kudu/common/wire_protocol.pb.h"
+#include "kudu/consensus/metadata.pb.h"
#include "kudu/gutil/atomicops.h"
#include "kudu/gutil/casts.h"
#include "kudu/gutil/integral_types.h"
@@ -123,9 +124,9 @@
#include "kudu/util/path_util.h"
#include "kudu/util/random.h"
#include "kudu/util/random_util.h"
+#include "kudu/util/scoped_cleanup.h"
#include "kudu/util/semaphore.h"
#include "kudu/util/slice.h"
-#include "kudu/util/scoped_cleanup.h"
#include "kudu/util/status.h"
#include "kudu/util/stopwatch.h"
#include "kudu/util/test_macros.h"
@@ -2469,6 +2470,45 @@ TEST_F(ClientTest, TestMetaCacheExpiryWithKeysAndIds) {
ASSERT_FALSE(entry.stale());
}
+// Test that if our cache entry indicates there is no leader, we will perform a
+// lookup and refresh our cache entry.
+TEST_F(ClientTest, TestMetaCacheLookupNoLeaders) {
+ auto& meta_cache = client_->data_->meta_cache_;
+ auto tablet_ids = cluster_->mini_tablet_server(0)->ListTablets();
+ ASSERT_FALSE(tablet_ids.empty());
+ const auto& tablet_id = tablet_ids[0];
+
+ // Populate the cache.
+ scoped_refptr<internal::RemoteTablet> rt;
+ ASSERT_OK(MetaCacheLookupById(tablet_id, &rt));
+ ASSERT_NE(nullptr, rt);
+
+ // Mark the cache entry's replicas as followers.
+ internal::MetaCacheEntry entry;
+ ASSERT_TRUE(meta_cache->LookupEntryByIdFastPath(tablet_id, &entry));
+ ASSERT_FALSE(entry.stale());
+ vector<internal::RemoteReplica> replicas;
+ auto remote_tablet = entry.tablet();
+ remote_tablet->GetRemoteReplicas(&replicas);
+ for (auto& r : replicas) {
+ remote_tablet->MarkTServerAsFollower(r.ts);
+ }
+ const auto has_leader = [&] {
+ remote_tablet->GetRemoteReplicas(&replicas);
+ for (auto& r : replicas) {
+ if (r.role == consensus::RaftPeerPB::LEADER) {
+ return true;
+ }
+ }
+ return false;
+ };
+ ASSERT_FALSE(has_leader());
+ // This should force a lookup RPC, rather than leaving our cache entries
+ // leaderless.
+ ASSERT_OK(MetaCacheLookupById(tablet_id, &rt));
+ ASSERT_TRUE(has_leader());
+}
+
TEST_F(ClientTest, TestGetTabletServerBlacklist) {
shared_ptr<KuduTable> table;
NO_FATALS(CreateTable("blacklist",
diff --git a/src/kudu/client/client.h b/src/kudu/client/client.h
index 22d63c5..b1ab428 100644
--- a/src/kudu/client/client.h
+++ b/src/kudu/client/client.h
@@ -908,6 +908,7 @@ class KUDU_EXPORT KuduClient : public sp::enable_shared_from_this<KuduClient> {
FRIEND_TEST(ClientTest, TestMetaCacheExpiry);
FRIEND_TEST(ClientTest, TestMetaCacheExpiryById);
FRIEND_TEST(ClientTest, TestMetaCacheExpiryWithKeysAndIds);
+ FRIEND_TEST(ClientTest, TestMetaCacheLookupNoLeaders);
FRIEND_TEST(ClientTest, TestMetaCacheWithKeysAndIds);
FRIEND_TEST(ClientTest, TestNonCoveringRangePartitions);
FRIEND_TEST(ClientTest, TestRetrieveAuthzTokenInParallel);
diff --git a/src/kudu/client/meta_cache.cc b/src/kudu/client/meta_cache.cc
index adc2989..1ab1a11 100644
--- a/src/kudu/client/meta_cache.cc
+++ b/src/kudu/client/meta_cache.cc
@@ -453,7 +453,6 @@ void MetaCacheServerPicker::PickLeader(const ServerPickedCallback& callback,
{
std::lock_guard<simple_spinlock> lock(lock_);
marked_as_follower = ContainsKey(followers_, leader);
-
}
if (leader && marked_as_follower) {
VLOG(2) << "Tablet " << tablet_->tablet_id() << ": We have a follower for a leader: "
@@ -1231,13 +1230,16 @@ bool MetaCache::LookupEntryByIdFastPath(const string& tablet_id,
Status MetaCache::DoFastPathLookupById(const string& tablet_id,
scoped_refptr<RemoteTablet>* remote_tablet) {
MetaCacheEntry entry;
- if (PREDICT_TRUE(LookupEntryByIdFastPath(tablet_id, &entry))) {
+ if (PREDICT_TRUE(LookupEntryByIdFastPath(tablet_id, &entry) &&
+ entry.tablet()->HasLeader())) {
DCHECK(!entry.is_non_covered_range());
if (remote_tablet) {
*remote_tablet = entry.tablet();
}
return Status::OK();
}
+ // If we have no cached entry, or the cached entries don't have a leader, we
+ // must do another lookup.
return Status::Incomplete("");
}
@@ -1299,13 +1301,13 @@ void MetaCache::LookupTabletById(KuduClient* client,
const string& tablet_id,
const MonoTime& deadline,
scoped_refptr<RemoteTablet>* remote_tablet,
- const StatusCallback& callback) {
+ const StatusCallback& lookup_complete_cb) {
Status fastpath_status = DoFastPathLookupById(tablet_id, remote_tablet);
if (!fastpath_status.IsIncomplete()) {
- callback(fastpath_status);
+ lookup_complete_cb(fastpath_status);
return;
}
- LookupRpcById* rpc = new LookupRpcById(this, client, callback, tablet_id,
+ LookupRpcById* rpc = new LookupRpcById(this, client, lookup_complete_cb, tablet_id,
remote_tablet, deadline);
rpc->SendRpcSlowPath();
}
diff --git a/src/kudu/client/meta_cache.h b/src/kudu/client/meta_cache.h
index 2b21e94..5dcc738 100644
--- a/src/kudu/client/meta_cache.h
+++ b/src/kudu/client/meta_cache.h
@@ -426,8 +426,8 @@ class MetaCache : public RefCountedThreadSafe<MetaCache> {
const StatusCallback& callback);
// Look up the locations of the given tablet, storing the result in
- // 'remote_tablet' if not null, and calling 'callback' once the lookup is
- // complete. Only tablets with non-failed LEADERs are considered.
+ // 'remote_tablet' if not null, and calling 'lookup_complete_cb' once the
+ // lookup is complete. Only tablets with non-failed LEADERs are considered.
//
// NOTE: the callback may be called from an IO thread or inline with this
// call if the cached data is already available.
@@ -435,7 +435,7 @@ class MetaCache : public RefCountedThreadSafe<MetaCache> {
const std::string& tablet_id,
const MonoTime& deadline,
scoped_refptr<RemoteTablet>* remote_tablet,
- const StatusCallback& callback);
+ const StatusCallback& lookup_complete_cb);
// Lookup the given tablet by key, only consulting local information.
// Returns true and sets *entry if successful.