You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2021/10/08 03:53:35 UTC

[kudu] branch master updated: [master] update on max_returned_locations semantics

This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new 9c7f673  [master] update on max_returned_locations semantics
9c7f673 is described below

commit 9c7f67325972b50d68ca836f83c96554222d4b9c
Author: Alexey Serbin <al...@apache.org>
AuthorDate: Mon Oct 4 10:59:07 2021 -0700

    [master] update on max_returned_locations semantics
    
    While looking at the partition-related code, I noticed that
    GetTableLocations RPC requires the 'max_returned_locations' field to be
    always set, even if it's necessary to fetch information on all tablets
    in a table.  This patch updates the implementation of the
    GetTableLocations RPC to output information on every tablet in a table
    if the GetTableLocationsRequestPB::max_returned_locations field isn't
    set.  This patch also contains a unit test for the new functionality.
    
    The primary incentive for this update was getting rid of the hard-coded
    values in test scenarios like we had in table_locations-itest.cc (see
    the diff for TableLocationsTest.TestRangeSpecificHashing as a part of
    this changelist).  That hard-coding the expected limit on the number of
    returned locations for the 'max_returned_locations' field might hide a
    bug if there are more tablets actually created, and making it possible
    to request getting information on all the existing tablets in the API
    looked cleaner than introducing some hard-coded values like INT_MAX in
    the mentioned test scenario.  As for the Kudu client library code, the
    'max_returned_locations' field is still set to some heuristic values
    based on the type of tablet lookup performed, so there shouldn't be a
    huge amount of metadata returned for large fact tables in case of a
    real-life use case.
    
    Change-Id: Iebc2b2295576c0b1b637fdeb95d4f9ff7b4d51c7
    Reviewed-on: http://gerrit.cloudera.org:8080/17902
    Tested-by: Alexey Serbin <as...@cloudera.com>
    Reviewed-by: Alexey Serbin <as...@cloudera.com>
---
 .../integration-tests/create-table-stress-test.cc  |  4 +-
 .../integration-tests/table_locations-itest.cc     |  6 ++-
 src/kudu/master/catalog_manager-test.cc            | 48 ++++++++++++++++++++++
 src/kudu/master/catalog_manager.cc                 | 11 +++--
 4 files changed, 63 insertions(+), 6 deletions(-)

diff --git a/src/kudu/integration-tests/create-table-stress-test.cc b/src/kudu/integration-tests/create-table-stress-test.cc
index 11f9b18..aa4f6fc 100644
--- a/src/kudu/integration-tests/create-table-stress-test.cc
+++ b/src/kudu/integration-tests/create-table-stress-test.cc
@@ -258,7 +258,9 @@ TEST_F(CreateTableStressTest, TestGetTableLocationsOptions) {
     req.mutable_table()->set_table_name(table_name);
     req.set_max_returned_locations(0);
     Status s = catalog->GetTableLocations(&req, &resp, /*user=*/none);
-    ASSERT_STR_CONTAINS(s.ToString(), "must be greater than 0");
+    ASSERT_STR_CONTAINS(
+        s.ToString(),
+        "max_returned_locations must be greater than 0 if specified");
   }
 
   // Ask for one, get one, verify
diff --git a/src/kudu/integration-tests/table_locations-itest.cc b/src/kudu/integration-tests/table_locations-itest.cc
index 983e2c8..e96eab2 100644
--- a/src/kudu/integration-tests/table_locations-itest.cc
+++ b/src/kudu/integration-tests/table_locations-itest.cc
@@ -497,11 +497,13 @@ TEST_F(TableLocationsTest, TestRangeSpecificHashing) {
       table_name, schema, {}, bounds, range_hash_schemas, table_hash_schema_5));
   NO_FATALS(CheckMasterTableCreation(table_name, 19));
 
+  // The default setting for GetTableLocationsRequestPB::max_returned_locations
+  // is 10 , but here it's necessary to fetch all the existing tablets.
   GetTableLocationsRequestPB req;
+  req.mutable_table()->set_table_name(table_name);
+  req.clear_max_returned_locations();
   GetTableLocationsResponsePB resp;
   RpcController controller;
-  req.mutable_table()->set_table_name(table_name);
-  req.set_max_returned_locations(19);
   ASSERT_OK(proxy_->GetTableLocations(req, &resp, &controller));
   SCOPED_TRACE(SecureDebugString(resp));
 
diff --git a/src/kudu/master/catalog_manager-test.cc b/src/kudu/master/catalog_manager-test.cc
index e4889ca..38db444 100644
--- a/src/kudu/master/catalog_manager-test.cc
+++ b/src/kudu/master/catalog_manager-test.cc
@@ -17,6 +17,7 @@
 
 #include "kudu/master/catalog_manager.h"
 
+#include <numeric>
 #include <ostream>
 #include <string>
 #include <vector>
@@ -36,6 +37,7 @@
 #include "kudu/util/test_macros.h"
 #include "kudu/util/test_util.h"
 
+using std::iota;
 using std::string;
 using std::vector;
 using strings::Substitute;
@@ -182,6 +184,52 @@ TEST(TestTSDescriptor, TestReplicaCreationsDecay) {
     ASSERT_NEAR(0.891, ts.RecentReplicaCreations(), 0.05);
   }
 }
+TEST(TableInfoTest, MaxReturnedLocationsNotSpecified) {
+  const string table_id = CURRENT_TEST_NAME();
+  scoped_refptr<TableInfo> table(new TableInfo(table_id));
+  vector<scoped_refptr<TabletInfo>> tablets;
+
+  vector<string> ranges(128);
+  std::iota(ranges.begin(), ranges.end(), '\0');
+  for (auto it = ranges.begin(); it != ranges.end(); ++it) {
+    auto next_it = it;
+    ++next_it;
+    const string& start_key = *it;
+    const string& end_key = (next_it == ranges.end()) ? "" : *next_it;
+
+    string tablet_id = Substitute("tablet-$0-$1", start_key, end_key);
+
+    scoped_refptr<TabletInfo> tablet = new TabletInfo(table, tablet_id);
+    {
+      TabletMetadataLock meta_lock(tablet.get(), LockMode::WRITE);
+      PartitionPB* partition = meta_lock.mutable_data()->pb.mutable_partition();
+      partition->set_partition_key_start(start_key);
+      if (next_it != ranges.end()) {
+        partition->set_partition_key_end(end_key);
+      }
+      meta_lock.mutable_data()->pb.set_state(SysTabletsEntryPB::RUNNING);
+      meta_lock.Commit();
+    }
+
+    TabletMetadataLock meta_lock(tablet.get(), LockMode::READ);
+    table->AddRemoveTablets({ tablet }, {});
+    tablets.push_back(tablet);
+  }
+
+  // Fetch all the available tablets.
+  {
+    GetTableLocationsRequestPB req;
+    req.clear_max_returned_locations(); // the default is 10 in protobuf
+    req.mutable_table()->mutable_table_name()->assign(table_id);
+    // Query using the start key of the first tablet's range.
+    req.mutable_partition_key_start()->assign("\0");
+    vector<scoped_refptr<TabletInfo>> tablets_in_range;
+    ASSERT_OK(table->GetTabletsInRange(&req, &tablets_in_range));
+
+    // The response should contain information on every tablet created.
+    ASSERT_EQ(ranges.size(), tablets_in_range.size());
+  }
+}
 
 } // namespace master
 } // namespace kudu
diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc
index b9606fe..12c8873 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -47,6 +47,7 @@
 #include <ctime>
 #include <functional>
 #include <iterator>
+#include <limits>
 #include <map>
 #include <memory>
 #include <mutex>
@@ -5701,8 +5702,10 @@ Status CatalogManager::GetTableLocations(const GetTableLocationsRequestPB* req,
     return Status::InvalidArgument("start partition range key must not be "
                                    "greater than the end partition range key");
   }
-  if (PREDICT_FALSE(req->max_returned_locations() <= 0)) {
-    return Status::InvalidArgument("max_returned_locations must be greater than 0");
+  if (PREDICT_FALSE(req->has_max_returned_locations() &&
+                    req->max_returned_locations() <= 0)) {
+    return Status::InvalidArgument(
+        "max_returned_locations must be greater than 0 if specified");
   }
 
   leader_lock_.AssertAcquiredForReading();
@@ -6447,7 +6450,9 @@ Status TableInfo::GetTabletsInRange(
       ? tablet_map_.upper_bound(partition_key_end)
       : tablet_map_.end();
 
-  const size_t max_returned_locations = req->max_returned_locations();
+  const size_t max_returned_locations =
+      req->has_max_returned_locations() ? req->max_returned_locations()
+                                        : std::numeric_limits<size_t>::max();
   size_t count = 0;
   for (; it != it_end && count < max_returned_locations; ++it) {
     ret->emplace_back(make_scoped_refptr(it->second));