You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by al...@apache.org on 2019/11/12 05:56:55 UTC

[kudu] branch branch-1.11.x updated (8c9173e -> 3b84cf7)

This is an automated email from the ASF dual-hosted git repository.

alexey pushed a change to branch branch-1.11.x
in repository https://gitbox.apache.org/repos/asf/kudu.git.


    from 8c9173e  [examples] update version to 1.11.1
     new a873341  KUDU-2987 Intra location rebalance crashes in special case.
     new 3b84cf7  KUDU-2989. Work around SASL bug when FQDN is >=64 characters

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/kudu/rpc/server_negotiation.cc     |  15 ++++-
 src/kudu/tools/rebalancer_tool-test.cc | 119 ++++++++++++++++++++++++++++++++-
 src/kudu/tools/rebalancer_tool.cc      |   4 +-
 3 files changed, 134 insertions(+), 4 deletions(-)


[kudu] 02/02: KUDU-2989. Work around SASL bug when FQDN is >=64 characters

Posted by al...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch branch-1.11.x
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit 3b84cf781aa5e0f7c2c8c4f60a21d82ad7fffb3e
Author: Todd Lipcon <to...@apache.org>
AuthorDate: Thu Oct 31 22:24:10 2019 -0700

    KUDU-2989. Work around SASL bug when FQDN is >=64 characters
    
    This adds a workaround for an upstream SASL bug which is triggered when
    the FQDN has more than 64 characters. In this case, SASL would truncate
    the FQDN and not be able to find the relevant keytab.
    
    The workaround simply uses our own code to determine the FQDN.
    
    Change-Id: I4898814f2f7ab87151798336414dde7078d28a4a
    Reviewed-on: http://gerrit.cloudera.org:8080/14609
    Reviewed-by: Anurag Mantripragada <an...@cloudera.com>
    Reviewed-by: Adar Dembo <ad...@cloudera.com>
    Tested-by: Kudu Jenkins
    (cherry picked from commit 111b13775193820b3e3551368fe00a8f00387007)
    Reviewed-on: http://gerrit.cloudera.org:8080/14687
    Reviewed-by: Grant Henke <gr...@apache.org>
    Tested-by: Alexey Serbin <as...@cloudera.com>
---
 src/kudu/rpc/server_negotiation.cc | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/kudu/rpc/server_negotiation.cc b/src/kudu/rpc/server_negotiation.cc
index 04e08b1..d56282f 100644
--- a/src/kudu/rpc/server_negotiation.cc
+++ b/src/kudu/rpc/server_negotiation.cc
@@ -388,12 +388,25 @@ Status ServerNegotiation::InitSaslServer() {
   unsigned secflags = 0;
 
   sasl_conn_t* sasl_conn = nullptr;
+
+  const char* server_fqdn = helper_.server_fqdn();
+  // If not explicitly set, use the host's FQDN here.
+  // SASL handles this itself if we pass null, but in a buggy way[1] that fails
+  // if the FQDN is >64 characters.
+  //
+  // [1] https://github.com/cyrusimap/cyrus-sasl/issues/583
+  string default_server_fqdn;
+  if (!server_fqdn) {
+    RETURN_NOT_OK_PREPEND(GetFQDN(&default_server_fqdn), "could not determine own FQDN");
+    server_fqdn = default_server_fqdn.c_str();
+  }
+
   RETURN_NOT_OK_PREPEND(WrapSaslCall(nullptr /* no conn */, [&]() {
       return sasl_server_new(
           // Registered name of the service using SASL. Required.
           sasl_proto_name_.c_str(),
           // The fully qualified domain name of this server.
-          helper_.server_fqdn(),
+          server_fqdn,
           // Permits multiple user realms on server. NULL == use default.
           nullptr,
           // Local and remote IP address strings. We don't use any mechanisms


[kudu] 01/02: KUDU-2987 Intra location rebalance crashes in special case.

Posted by al...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

alexey pushed a commit to branch branch-1.11.x
in repository https://gitbox.apache.org/repos/asf/kudu.git

commit a8733419fd43c488586172f71cab0892581146e8
Author: triplesheep <tr...@gmail.com>
AuthorDate: Fri Nov 1 10:29:20 2019 +0800

    KUDU-2987 Intra location rebalance crashes in special case.
    
    The crash manifested itself in cases where a Kudu cluster
    had a location that didn't host even a single replica of
    a tablet.
    
    Change-Id: Iea39472e55178fca688b390249432a0f7fefaaba
    Reviewed-on: http://gerrit.cloudera.org:8080/14608
    Tested-by: Alexey Serbin <as...@cloudera.com>
    Reviewed-by: Alexey Serbin <as...@cloudera.com>
    (cherry picked from commit 0fea1cb8ede852a87efc422b394ffe8d1e89bc6c)
    Reviewed-on: http://gerrit.cloudera.org:8080/14686
    Tested-by: Kudu Jenkins
    Reviewed-by: Grant Henke <gr...@apache.org>
---
 src/kudu/tools/rebalancer_tool-test.cc | 119 ++++++++++++++++++++++++++++++++-
 src/kudu/tools/rebalancer_tool.cc      |   4 +-
 2 files changed, 120 insertions(+), 3 deletions(-)

diff --git a/src/kudu/tools/rebalancer_tool-test.cc b/src/kudu/tools/rebalancer_tool-test.cc
index 8a14ca0..18063ee 100644
--- a/src/kudu/tools/rebalancer_tool-test.cc
+++ b/src/kudu/tools/rebalancer_tool-test.cc
@@ -473,6 +473,7 @@ class RebalancingTest : public tserver::TabletServerIntegrationTestBase {
           tserver_unresponsive_ms_, created_tables_names));
     } else {
       ASSERT_OK(CreateTablesExcludingLocations(empty_locations,
+                                               kTableNamePattern,
                                                created_tables_names));
     }
   }
@@ -481,8 +482,13 @@ class RebalancingTest : public tserver::TabletServerIntegrationTestBase {
   // tablet servers in the specified locations. This is similar to
   // CreateUnbalancedTables() but the set of tablet servers to avoid is defined
   // by the set of the specified locations.
+  //
+  // Note: 'table_name_pattern' is used to describe the table names created by this
+  // function. Each call to this function can create different batch of tables with
+  // this parameter set.
   Status CreateTablesExcludingLocations(
       const unordered_set<string>& excluded_locations,
+      const string& table_name_pattern = kTableNamePattern,
       vector<string>* table_names = nullptr) {
     // Shutdown all tablet servers in the specified locations so no tablet
     // replicas would be hosted by those servers.
@@ -504,7 +510,7 @@ class RebalancingTest : public tserver::TabletServerIntegrationTestBase {
     // are available.
     SleepFor(MonoDelta::FromMilliseconds(5 * tserver_unresponsive_ms_ / 4));
     RETURN_NOT_OK(CreateTables(cluster_.get(), client_.get(), schema_,
-                               kTableNamePattern, num_tables_, rep_factor_,
+                               table_name_pattern, num_tables_, rep_factor_,
                                table_names));
     // Start tablet servers at the excluded locations.
     if (!excluded_locations.empty()) {
@@ -519,6 +525,44 @@ class RebalancingTest : public tserver::TabletServerIntegrationTestBase {
     return Status::OK();
   }
 
+  // Create tables placing their tablet replicas elsewhere but not at the specified
+  // number of tservers in the specified locations.
+  Status CreateTablesExcludingTserversInLocations(
+      const unordered_map<string, int>& excluded_tserver_by_location,
+      const string& table_name_pattern = kTableNamePattern,
+      vector<string>* table_names = nullptr) {
+    unordered_map<string, int> excluded_tservers = excluded_tserver_by_location;
+    vector<string> excluded_tserver_uuids;
+    // Shutdown some tablet servers in the specified locations so no tablet
+    // replicas would be hosted by those servers.
+    if (!excluded_tservers.empty()) {
+      for (const auto& elem : tablet_servers_) {
+        auto* ts = elem.second;
+        if (ContainsKey(excluded_tservers, ts->location)
+            && excluded_tservers[ts->location] > 0) {
+          excluded_tserver_uuids.push_back(ts->uuid());
+          --excluded_tservers[ts->location];
+          cluster_->tablet_server_by_uuid(ts->uuid())->Shutdown();
+        }
+      }
+    }
+
+    for (const auto& elem : excluded_tservers) {
+      CHECK_EQ(0, elem.second);
+    }
+    // Wait for the catalog manager to understand that not all tablet servers
+    // are available.
+    SleepFor(MonoDelta::FromMilliseconds(5 * tserver_unresponsive_ms_ / 4));
+    RETURN_NOT_OK(CreateTables(cluster_.get(), client_.get(), schema_,
+                               table_name_pattern, num_tables_, rep_factor_,
+                               table_names));
+    // Start tablet servers at the excluded locations.
+    for (const auto& uuid : excluded_tserver_uuids) {
+      RETURN_NOT_OK(cluster_->tablet_server_by_uuid(uuid)->Restart());
+    }
+    return Status::OK();
+  }
+
   // When the rebalancer starts moving replicas, ksck detects corruption
   // (that's why RuntimeError), seeing affected tables as non-healthy
   // with data state of corresponding tablets as TABLET_DATA_COPYING. If using
@@ -1668,5 +1712,78 @@ TEST_P(LocationAwareRebalancingParamTest, Rebalance) {
   }
 }
 
+// Basic intra location rebalance tests.
+class IntraLocationRebalancingBasicTest : public RebalancingTest {
+public:
+  IntraLocationRebalancingBasicTest()
+      : RebalancingTest(/*num_tables=*/ 1,
+                        /*rep_factor=*/ 3,
+                        /*num_tservers=*/ 11) {
+  }
+
+  bool is_343_scheme() const override {
+    return true;
+  }
+};
+
+TEST_F(IntraLocationRebalancingBasicTest, LocationsWithEmptyTabletServers) {
+  if (!AllowSlowTests()) {
+    LOG(WARNING) << "test is skipped; set KUDU_ALLOW_SLOW_TESTS=1 to run";
+    return;
+  }
+  const string first_table_name_pattern = "rebalance_test_first_table_$0";
+  const string second_table_name_pattern = "rebalance_test_second_table_$0";
+  const LocationInfo location_info = { { "/A", 3 }, { "/B", 3 }, { "/C", 3 },
+                                       { "/D", 2 } };
+  const vector<string>& extra_master_flags = {
+    "--master_client_location_assignment_enabled=false",
+  };
+
+  copy(extra_master_flags.begin(), extra_master_flags.end(),
+       back_inserter(master_flags_));
+
+  FLAGS_num_tablet_servers = num_tservers_;
+  FLAGS_num_replicas = rep_factor_;
+  NO_FATALS(BuildAndStart(tserver_flags_, master_flags_, location_info,
+                          /*create_table=*/ false));
+
+  // Create special intra location imbalance in location "/D". Location "/D"
+  // should only contain tablet replicas of one table.
+  //
+  // Note: there are 3 tablets in each table and the replica factor is 3,
+  // so it's 9 tablet replicas per table.
+  //
+  // 1. Create table0 with tablet replicas in locations /A, /B and /C. Because
+  // the per table location load skew is not greater than 1, it won't trigger
+  // cross location rebalance, so no replica of table0 will be moved.
+  ASSERT_OK(CreateTablesExcludingLocations({"/D"}, first_table_name_pattern));
+  // 2. Create table1 in { "/A", 3 }, { "/B", 3 }, { "/C", 3 }, {"/D", 1 }.
+  // There is an imbalance in location "/D" of table1.
+  ASSERT_OK(CreateTablesExcludingTserversInLocations({ {"/D", 1 } },
+      second_table_name_pattern));
+
+  const vector<string> tool_args = {
+    "cluster",
+    "rebalance",
+    cluster_->master()->bound_rpc_addr().ToString(),
+  };
+
+  // Before the fix added in this changelist the rebalancer would simply
+  // crash while running. See KUDU-2987 for detail informations.
+  {
+    string out;
+    string err;
+    const Status s = RunKuduTool(tool_args, &out, &err);
+    ASSERT_TRUE(s.ok()) << ToolRunInfo(s, out, err);
+    ASSERT_STR_CONTAINS(out, "rebalancing is complete: cluster is balanced")
+        << "stderr: " << err;
+
+    // Intra location rebalancing in location "/D" should move one replica
+    // to another tserver, so there is at least one move.
+    ASSERT_STR_NOT_CONTAINS(out, "(moved 0 replicas)");
+    ASSERT_STR_CONTAINS(out, "Placement policy violations:\n  none\n");
+  }
+}
+
 } // namespace tools
 } // namespace kudu
diff --git a/src/kudu/tools/rebalancer_tool.cc b/src/kudu/tools/rebalancer_tool.cc
index 60231e1..cfaaa25 100644
--- a/src/kudu/tools/rebalancer_tool.cc
+++ b/src/kudu/tools/rebalancer_tool.cc
@@ -286,9 +286,9 @@ Status RebalancerTool::KsckResultsToClusterRawInfo(
       }
       if (!replicas_at_location.empty()) {
         table_ids_at_location.insert(summary.table_id);
+        tablet_summaries.push_back(summary);
+        tablet_summaries.back().replicas = std::move(replicas_at_location);
       }
-      tablet_summaries.push_back(summary);
-      tablet_summaries.back().replicas = std::move(replicas_at_location);
     }
 
     for (const auto& summary : ksck_info.cluster_status.table_summaries) {