You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2021/10/08 03:53:35 UTC
[kudu] branch master updated: [master] update on
max_returned_locations semantics
This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 9c7f673 [master] update on max_returned_locations semantics
9c7f673 is described below
commit 9c7f67325972b50d68ca836f83c96554222d4b9c
Author: Alexey Serbin <al...@apache.org>
AuthorDate: Mon Oct 4 10:59:07 2021 -0700
[master] update on max_returned_locations semantics
While looking at the partition-related code, I noticed that
GetTableLocations RPC requires the 'max_returned_locations' field to be
always set, even if it's necessary to fetch information on all tablets
in a table. This patch updates the implementation of the
GetTableLocations RPC to output information on every tablet in a table
if the GetTableLocationsRequestPB::max_returned_locations field isn't
set. This patch also contains a unit test for the new functionality.
The primary incentive for this update was getting rid of the hard-coded
values in test scenarios like we had in table_locations-itest.cc (see
the diff for TableLocationsTest.TestRangeSpecificHashing as a part of
this changelist). That hard-coding the expected limit on the number of
returned locations for the 'max_returned_locations' field might hide a
bug if there are more tablets actually created, and making it possible
to request getting information on all the existing tablets in the API
looked cleaner than introducing some hard-coded values like INT_MAX in
the mentioned test scenario. As for the Kudu client library code, the
'max_returned_locations' field is still set to some heuristic values
based on the type of tablet lookup performed, so there shouldn't be a
huge amount of metadata returned for large fact tables in case of a
real-life use case.
Change-Id: Iebc2b2295576c0b1b637fdeb95d4f9ff7b4d51c7
Reviewed-on: http://gerrit.cloudera.org:8080/17902
Tested-by: Alexey Serbin <as...@cloudera.com>
Reviewed-by: Alexey Serbin <as...@cloudera.com>
---
.../integration-tests/create-table-stress-test.cc | 4 +-
.../integration-tests/table_locations-itest.cc | 6 ++-
src/kudu/master/catalog_manager-test.cc | 48 ++++++++++++++++++++++
src/kudu/master/catalog_manager.cc | 11 +++--
4 files changed, 63 insertions(+), 6 deletions(-)
diff --git a/src/kudu/integration-tests/create-table-stress-test.cc b/src/kudu/integration-tests/create-table-stress-test.cc
index 11f9b18..aa4f6fc 100644
--- a/src/kudu/integration-tests/create-table-stress-test.cc
+++ b/src/kudu/integration-tests/create-table-stress-test.cc
@@ -258,7 +258,9 @@ TEST_F(CreateTableStressTest, TestGetTableLocationsOptions) {
req.mutable_table()->set_table_name(table_name);
req.set_max_returned_locations(0);
Status s = catalog->GetTableLocations(&req, &resp, /*user=*/none);
- ASSERT_STR_CONTAINS(s.ToString(), "must be greater than 0");
+ ASSERT_STR_CONTAINS(
+ s.ToString(),
+ "max_returned_locations must be greater than 0 if specified");
}
// Ask for one, get one, verify
diff --git a/src/kudu/integration-tests/table_locations-itest.cc b/src/kudu/integration-tests/table_locations-itest.cc
index 983e2c8..e96eab2 100644
--- a/src/kudu/integration-tests/table_locations-itest.cc
+++ b/src/kudu/integration-tests/table_locations-itest.cc
@@ -497,11 +497,13 @@ TEST_F(TableLocationsTest, TestRangeSpecificHashing) {
table_name, schema, {}, bounds, range_hash_schemas, table_hash_schema_5));
NO_FATALS(CheckMasterTableCreation(table_name, 19));
+ // The default setting for GetTableLocationsRequestPB::max_returned_locations
+ // is 10 , but here it's necessary to fetch all the existing tablets.
GetTableLocationsRequestPB req;
+ req.mutable_table()->set_table_name(table_name);
+ req.clear_max_returned_locations();
GetTableLocationsResponsePB resp;
RpcController controller;
- req.mutable_table()->set_table_name(table_name);
- req.set_max_returned_locations(19);
ASSERT_OK(proxy_->GetTableLocations(req, &resp, &controller));
SCOPED_TRACE(SecureDebugString(resp));
diff --git a/src/kudu/master/catalog_manager-test.cc b/src/kudu/master/catalog_manager-test.cc
index e4889ca..38db444 100644
--- a/src/kudu/master/catalog_manager-test.cc
+++ b/src/kudu/master/catalog_manager-test.cc
@@ -17,6 +17,7 @@
#include "kudu/master/catalog_manager.h"
+#include <numeric>
#include <ostream>
#include <string>
#include <vector>
@@ -36,6 +37,7 @@
#include "kudu/util/test_macros.h"
#include "kudu/util/test_util.h"
+using std::iota;
using std::string;
using std::vector;
using strings::Substitute;
@@ -182,6 +184,52 @@ TEST(TestTSDescriptor, TestReplicaCreationsDecay) {
ASSERT_NEAR(0.891, ts.RecentReplicaCreations(), 0.05);
}
}
+TEST(TableInfoTest, MaxReturnedLocationsNotSpecified) {
+ const string table_id = CURRENT_TEST_NAME();
+ scoped_refptr<TableInfo> table(new TableInfo(table_id));
+ vector<scoped_refptr<TabletInfo>> tablets;
+
+ vector<string> ranges(128);
+ std::iota(ranges.begin(), ranges.end(), '\0');
+ for (auto it = ranges.begin(); it != ranges.end(); ++it) {
+ auto next_it = it;
+ ++next_it;
+ const string& start_key = *it;
+ const string& end_key = (next_it == ranges.end()) ? "" : *next_it;
+
+ string tablet_id = Substitute("tablet-$0-$1", start_key, end_key);
+
+ scoped_refptr<TabletInfo> tablet = new TabletInfo(table, tablet_id);
+ {
+ TabletMetadataLock meta_lock(tablet.get(), LockMode::WRITE);
+ PartitionPB* partition = meta_lock.mutable_data()->pb.mutable_partition();
+ partition->set_partition_key_start(start_key);
+ if (next_it != ranges.end()) {
+ partition->set_partition_key_end(end_key);
+ }
+ meta_lock.mutable_data()->pb.set_state(SysTabletsEntryPB::RUNNING);
+ meta_lock.Commit();
+ }
+
+ TabletMetadataLock meta_lock(tablet.get(), LockMode::READ);
+ table->AddRemoveTablets({ tablet }, {});
+ tablets.push_back(tablet);
+ }
+
+ // Fetch all the available tablets.
+ {
+ GetTableLocationsRequestPB req;
+ req.clear_max_returned_locations(); // the default is 10 in protobuf
+ req.mutable_table()->mutable_table_name()->assign(table_id);
+ // Query using the start key of the first tablet's range.
+ req.mutable_partition_key_start()->assign("\0");
+ vector<scoped_refptr<TabletInfo>> tablets_in_range;
+ ASSERT_OK(table->GetTabletsInRange(&req, &tablets_in_range));
+
+ // The response should contain information on every tablet created.
+ ASSERT_EQ(ranges.size(), tablets_in_range.size());
+ }
+}
} // namespace master
} // namespace kudu
diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc
index b9606fe..12c8873 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -47,6 +47,7 @@
#include <ctime>
#include <functional>
#include <iterator>
+#include <limits>
#include <map>
#include <memory>
#include <mutex>
@@ -5701,8 +5702,10 @@ Status CatalogManager::GetTableLocations(const GetTableLocationsRequestPB* req,
return Status::InvalidArgument("start partition range key must not be "
"greater than the end partition range key");
}
- if (PREDICT_FALSE(req->max_returned_locations() <= 0)) {
- return Status::InvalidArgument("max_returned_locations must be greater than 0");
+ if (PREDICT_FALSE(req->has_max_returned_locations() &&
+ req->max_returned_locations() <= 0)) {
+ return Status::InvalidArgument(
+ "max_returned_locations must be greater than 0 if specified");
}
leader_lock_.AssertAcquiredForReading();
@@ -6447,7 +6450,9 @@ Status TableInfo::GetTabletsInRange(
? tablet_map_.upper_bound(partition_key_end)
: tablet_map_.end();
- const size_t max_returned_locations = req->max_returned_locations();
+ const size_t max_returned_locations =
+ req->has_max_returned_locations() ? req->max_returned_locations()
+ : std::numeric_limits<size_t>::max();
size_t count = 0;
for (; it != it_end && count < max_returned_locations; ++it) {
ret->emplace_back(make_scoped_refptr(it->second));