You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kudu.apache.org by gr...@apache.org on 2018/09/05 01:30:10 UTC

[1/3] kudu git commit: [location_awareness] Assign locations to registering tablet servers

Repository: kudu
Updated Branches:
  refs/heads/master a58867c59 -> 224f4792d


[location_awareness] Assign locations to registering tablet servers

This patch introduces a location field maintained by the master for
each tablet server. The master determines the value of this field
whenever a tablet server registers. It does this by using an
external command, specified with the flag --location_mapping_cmd,
to produce a location from the hostname or IP address of the tablet
server.

To help with ad hoc testing, I also added the location field to the
/tablet-servers web page, and fixed a small oversight where the
table of tablet servers wasn't sorted, so its order changed depending
on the order tablet servers first registered in.

I also altered the DATA_FILES CMake function so that data files copied
to the build directory or to slaves by dist-test are executable as well
as readable. This was necessary for the new TSDescriptor test which
tests location assignment.

Change-Id: I5eb98823ab7b3b8141b8630196c29c1ebf8e6878
Reviewed-on: http://gerrit.cloudera.org:8080/11115
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin <as...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/dcc39d53
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/dcc39d53
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/dcc39d53

Branch: refs/heads/master
Commit: dcc39d53d8c36a3f4896ce4c208b855abee8da83
Parents: a58867c
Author: Will Berkeley <wd...@gmail.org>
Authored: Thu Aug 2 13:14:34 2018 -0700
Committer: Will Berkeley <wd...@gmail.com>
Committed: Tue Sep 4 21:50:47 2018 +0000

----------------------------------------------------------------------
 CMakeLists.txt                             |   6 +-
 src/kudu/master/CMakeLists.txt             |   1 +
 src/kudu/master/master-test.cc             |  37 +++++++
 src/kudu/master/master_path_handlers.cc    |  18 +++-
 src/kudu/master/testdata/first_argument.sh |  20 ++++
 src/kudu/master/ts_descriptor-test.cc      | 135 ++++++++++++++++++++++++
 src/kudu/master/ts_descriptor.cc           | 116 +++++++++++++++++++-
 src/kudu/master/ts_descriptor.h            |  19 +++-
 www/tablet-servers.mustache                |   2 +
 9 files changed, 342 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/dcc39d53/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d7821e8..6c9d35e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -843,10 +843,10 @@ function(ADD_KUDU_TEST REL_TEST_NAME)
       file(REMOVE ${DST_DIR}/${DATA_FILE})
     endif()
     file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/${DATA_FILE}
-      # Copy with read-only permissions since tests should not
-      # modify the data files in place.
+      # Copy with read and execute permissions, since tests should not modify
+      # the data files in place, but data files may be scripts used by tests.
       DIRECTORY_PERMISSIONS OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE
-      FILE_PERMISSIONS OWNER_READ GROUP_READ
+      FILE_PERMISSIONS OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE
       DESTINATION ${DST_DIR})
   endforeach()
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/dcc39d53/src/kudu/master/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/kudu/master/CMakeLists.txt b/src/kudu/master/CMakeLists.txt
index 41a7dfe..bb6ccce 100644
--- a/src/kudu/master/CMakeLists.txt
+++ b/src/kudu/master/CMakeLists.txt
@@ -76,6 +76,7 @@ ADD_KUDU_TEST(hms_notification_log_listener-test)
 ADD_KUDU_TEST(master-test RESOURCE_LOCK "master-web-port")
 ADD_KUDU_TEST(mini_master-test RESOURCE_LOCK "master-web-port")
 ADD_KUDU_TEST(sys_catalog-test RESOURCE_LOCK "master-web-port")
+ADD_KUDU_TEST(ts_descriptor-test DATA_FILES testdata/first_argument.sh)
 
 # Actual master executable
 add_executable(kudu-master master_main.cc)

http://git-wip-us.apache.org/repos/asf/kudu/blob/dcc39d53/src/kudu/master/master-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/master-test.cc b/src/kudu/master/master-test.cc
index ef7ea2d..5916448 100644
--- a/src/kudu/master/master-test.cc
+++ b/src/kudu/master/master-test.cc
@@ -106,6 +106,7 @@ DECLARE_bool(raft_prepare_replacement_before_eviction);
 DECLARE_double(sys_catalog_fail_during_write);
 DECLARE_int32(diagnostics_log_stack_traces_interval_ms);
 DECLARE_int32(master_inject_latency_on_tablet_lookups_ms);
+DECLARE_string(location_mapping_cmd);
 
 namespace kudu {
 namespace master {
@@ -442,6 +443,42 @@ TEST_F(MasterTest, TestRegisterAndHeartbeat) {
         "they must be run with the same scheme", kTsUUID);
     ASSERT_STR_MATCHES(s.ToString(), msg);
   }
+
+  // Ensure that the TS doesn't register if location mapping fails.
+  {
+    // Set a command that always fails.
+    FLAGS_location_mapping_cmd = "false";
+
+    // Set a new UUID so registration is for the first time.
+    auto new_common = common;
+    new_common.mutable_ts_instance()->set_permanent_uuid("lmc-fail-ts");
+
+    TSHeartbeatRequestPB hb_req;
+    TSHeartbeatResponsePB hb_resp;
+    RpcController rpc;
+    hb_req.mutable_common()->CopyFrom(new_common);
+    hb_req.mutable_registration()->CopyFrom(fake_reg);
+    hb_req.mutable_replica_management_info()->CopyFrom(rmi);
+
+    // Registration should fail.
+    Status s = proxy_->TSHeartbeat(hb_req, &hb_resp, &rpc);
+    ASSERT_TRUE(s.IsRemoteError());
+    ASSERT_STR_CONTAINS(s.ToString(), "failed to run location mapping command");
+
+    // Make sure the tablet server isn't returned to clients.
+    ListTabletServersRequestPB list_ts_req;
+    ListTabletServersResponsePB list_ts_resp;
+    rpc.Reset();
+    ASSERT_OK(proxy_->ListTabletServers(list_ts_req, &list_ts_resp, &rpc));
+
+    LOG(INFO) << SecureDebugString(list_ts_resp);
+    ASSERT_FALSE(list_ts_resp.has_error());
+    ASSERT_EQ(1, list_ts_resp.servers_size());
+    ASSERT_EQ("my-ts-uuid", list_ts_resp.servers(0).instance_id().permanent_uuid());
+
+    // Reset the flag.
+    FLAGS_location_mapping_cmd = "";
+  }
 }
 
 Status MasterTest::CreateTable(const string& table_name,

http://git-wip-us.apache.org/repos/asf/kudu/blob/dcc39d53/src/kudu/master/master_path_handlers.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/master_path_handlers.cc b/src/kudu/master/master_path_handlers.cc
index 75433af..8566966 100644
--- a/src/kudu/master/master_path_handlers.cc
+++ b/src/kudu/master/master_path_handlers.cc
@@ -32,6 +32,7 @@
 #include <vector>
 
 #include <boost/bind.hpp> // IWYU pragma: keep
+#include <boost/optional/optional.hpp>
 #include <glog/logging.h>
 
 #include "kudu/common/common.pb.h"
@@ -90,9 +91,17 @@ MasterPathHandlers::~MasterPathHandlers() {
 void MasterPathHandlers::HandleTabletServers(const Webserver::WebRequest& /*req*/,
                                              Webserver::WebResponse* resp) {
   EasyJson* output = resp->output;
-  vector<std::shared_ptr<TSDescriptor>> descs;
+  vector<shared_ptr<TSDescriptor>> descs;
   master_->ts_manager()->GetAllDescriptors(&descs);
 
+  // Sort by UUID so the order remains consistent betweeen restarts.
+  std::sort(descs.begin(), descs.end(),
+            [](const shared_ptr<TSDescriptor>& left,
+               const shared_ptr<TSDescriptor>& right) {
+              DCHECK(left && right);
+              return left->permanent_uuid() < right->permanent_uuid();
+            });
+
   (*output)["num_ts"] = std::to_string(descs.size());
 
   // In mustache, when conditionally rendering a section of the template based
@@ -106,7 +115,7 @@ void MasterPathHandlers::HandleTabletServers(const Webserver::WebRequest& /*req*
   output->Set("live_tservers", EasyJson::kArray);
   output->Set("dead_tservers", EasyJson::kArray);
   map<string, array<int, 2>> version_counts;
-  for (const std::shared_ptr<TSDescriptor>& desc : descs) {
+  for (const auto& desc : descs) {
     string ts_key = desc->PresumedDead() ? "dead_tservers" : "live_tservers";
     EasyJson ts_json = (*output)[ts_key].PushBack(EasyJson::kObject);
 
@@ -121,6 +130,7 @@ void MasterPathHandlers::HandleTabletServers(const Webserver::WebRequest& /*req*
     }
     ts_json["time_since_hb"] = StringPrintf("%.1fs", desc->TimeSinceHeartbeat().ToSeconds());
     ts_json["registration"] = pb_util::SecureShortDebugString(reg);
+    ts_json["location"] = desc->location().get_value_or("<none>");
     version_counts[reg.software_version()][desc->PresumedDead() ? 1 : 0]++;
     has_no_live_ts &= desc->PresumedDead();
     has_no_dead_ts &= !desc->PresumedDead();
@@ -599,9 +609,9 @@ void MasterPathHandlers::HandleDumpEntities(const Webserver::WebRequest& /*req*/
 
   jw.String("tablet_servers");
   jw.StartArray();
-  vector<std::shared_ptr<TSDescriptor> > descs;
+  vector<shared_ptr<TSDescriptor> > descs;
   master_->ts_manager()->GetAllDescriptors(&descs);
-  for (const std::shared_ptr<TSDescriptor>& desc : descs) {
+  for (const auto& desc : descs) {
     jw.StartObject();
 
     jw.String("uuid");

http://git-wip-us.apache.org/repos/asf/kudu/blob/dcc39d53/src/kudu/master/testdata/first_argument.sh
----------------------------------------------------------------------
diff --git a/src/kudu/master/testdata/first_argument.sh b/src/kudu/master/testdata/first_argument.sh
new file mode 100755
index 0000000..832908c
--- /dev/null
+++ b/src/kudu/master/testdata/first_argument.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# A script that prints the first argument and ignores all the others.
+echo "$1"

http://git-wip-us.apache.org/repos/asf/kudu/blob/dcc39d53/src/kudu/master/ts_descriptor-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/ts_descriptor-test.cc b/src/kudu/master/ts_descriptor-test.cc
new file mode 100644
index 0000000..a7ec824
--- /dev/null
+++ b/src/kudu/master/ts_descriptor-test.cc
@@ -0,0 +1,135 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "kudu/master/ts_descriptor.h"
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <boost/optional/optional.hpp>
+#include <gflags/gflags_declare.h>
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+#include "kudu/common/common.pb.h"
+#include "kudu/common/wire_protocol.pb.h"
+#include "kudu/gutil/strings/substitute.h"
+#include "kudu/util/path_util.h"
+#include "kudu/util/status.h"
+#include "kudu/util/test_macros.h"
+#include "kudu/util/test_util.h"
+
+DECLARE_string(location_mapping_cmd);
+
+using std::shared_ptr;
+using std::string;
+using std::vector;
+using strings::Substitute;
+
+namespace kudu {
+namespace master {
+
+// Configures 'instance' and 'registration' with basic info that can be
+// used to register a tablet server.
+void SetupBasicRegistrationInfo(const string& uuid,
+                                NodeInstancePB* instance,
+                                ServerRegistrationPB* registration) {
+  CHECK_NOTNULL(instance);
+  CHECK_NOTNULL(registration);
+
+  instance->set_permanent_uuid(uuid);
+  instance->set_instance_seqno(0);
+
+  registration->clear_rpc_addresses();
+  for (const auto port : { 12345, 67890 }) {
+    auto* rpc_hostport = registration->add_rpc_addresses();
+    rpc_hostport->set_host("localhost");
+    rpc_hostport->set_port(port);
+  }
+  registration->clear_http_addresses();
+  auto* http_hostport = registration->add_http_addresses();
+  http_hostport->set_host("localhost");
+  http_hostport->set_port(54321);
+  registration->set_software_version("1.0.0");
+  registration->set_https_enabled(false);
+}
+
+TEST(TSDescriptorTest, TestRegistration) {
+  const string uuid = "test";
+  NodeInstancePB instance;
+  ServerRegistrationPB registration;
+  SetupBasicRegistrationInfo(uuid, &instance, &registration);
+  shared_ptr<TSDescriptor> desc;
+  ASSERT_OK(TSDescriptor::RegisterNew(instance, registration, &desc));
+
+  // Spot check some fields and the ToString value.
+  ASSERT_EQ(uuid, desc->permanent_uuid());
+  ASSERT_EQ(0, desc->latest_seqno());
+  // There is no location as --location_mapping_cmd is unset by default.
+  ASSERT_EQ(boost::none, desc->location());
+  ASSERT_EQ("test (localhost:12345)", desc->ToString());
+}
+
+TEST(TSDescriptorTest, TestLocationCmd) {
+  const string kLocationCmdPath = JoinPathSegments(GetTestExecutableDirectory(),
+                                                   "testdata/first_argument.sh");
+  // A happy case, using all allowed special characters.
+  const string location = "/foo-bar0/BAAZ._9-quux";
+  FLAGS_location_mapping_cmd = Substitute("$0 $1", kLocationCmdPath, location);
+
+  const string uuid = "test";
+  NodeInstancePB instance;
+  ServerRegistrationPB registration;
+  SetupBasicRegistrationInfo(uuid, &instance, &registration);
+  shared_ptr<TSDescriptor> desc;
+  ASSERT_OK(TSDescriptor::RegisterNew(instance, registration, &desc));
+
+  ASSERT_EQ(location, desc->location());
+
+  // Bad cases where the script returns locations with disallowed characters or
+  // in the wrong format.
+  const vector<string> bad_locations = {
+    "\"\"",      // Empty (doesn't begin with /).
+    "foo",       // Doesn't begin with /.
+    "/foo$",     // Contains the illegal character '$'.
+  };
+  for (const auto& bad_location : bad_locations) {
+    FLAGS_location_mapping_cmd = Substitute("$0 $1", kLocationCmdPath, bad_location);
+    ASSERT_TRUE(desc->Register(instance, registration).IsRuntimeError());
+  }
+
+  // Bad cases where the script is invalid.
+  const vector<string> bad_cmds = {
+    // No command provided.
+    " ",
+    // Command not found.
+    "notfound.sh",
+    // Command returns no output.
+    "true",
+    // Command fails.
+    "false",
+    // Command returns too many locations (i.e. contains illegal ' ' character).
+    Substitute("echo $0 $1", "/foo", "/bar"),
+  };
+  for (const auto& bad_cmd : bad_cmds) {
+    FLAGS_location_mapping_cmd = bad_cmd;
+    ASSERT_TRUE(desc->Register(instance, registration).IsRuntimeError());
+  }
+}
+} // namespace master
+} // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/dcc39d53/src/kudu/master/ts_descriptor.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/ts_descriptor.cc b/src/kudu/master/ts_descriptor.cc
index 27b60b8..a360ccb 100644
--- a/src/kudu/master/ts_descriptor.cc
+++ b/src/kudu/master/ts_descriptor.cc
@@ -18,6 +18,7 @@
 #include "kudu/master/ts_descriptor.h"
 
 #include <cmath>
+#include <cstdio>
 #include <mutex>
 #include <ostream>
 #include <unordered_set>
@@ -25,17 +26,24 @@
 #include <vector>
 
 #include <gflags/gflags.h>
+#include <glog/logging.h>
 
 #include "kudu/common/common.pb.h"
 #include "kudu/common/wire_protocol.h"
 #include "kudu/common/wire_protocol.pb.h"
 #include "kudu/consensus/consensus.proxy.h"
+#include "kudu/gutil/strings/charset.h"
+#include "kudu/gutil/strings/split.h"
+#include "kudu/gutil/strings/strip.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/tserver/tserver_admin.proxy.h"
 #include "kudu/util/flag_tags.h"
+#include "kudu/util/logging.h"
 #include "kudu/util/net/net_util.h"
 #include "kudu/util/net/sockaddr.h"
 #include "kudu/util/pb_util.h"
+#include "kudu/util/subprocess.h"
+#include "kudu/util/trace.h"
 
 DEFINE_int32(tserver_unresponsive_timeout_ms, 60 * 1000,
              "The period of time that a Master can go without receiving a heartbeat from a "
@@ -43,16 +51,84 @@ DEFINE_int32(tserver_unresponsive_timeout_ms, 60 * 1000,
              "selected when assigning replicas during table creation or re-replication.");
 TAG_FLAG(tserver_unresponsive_timeout_ms, advanced);
 
+DEFINE_string(location_mapping_cmd, "",
+              "A Unix command which takes a single argument, the IP address or "
+              "hostname of a tablet server or client, and returns the location "
+              "string for the tablet server. A location string begins with a / "
+              "and consists of /-separated tokens each of which contains only "
+              "characters from the set [a-zA-Z0-9_-.]. If the cluster is not "
+              "using location awareness features this flag should not be set.");
+TAG_FLAG(location_mapping_cmd, evolving);
+
 using kudu::pb_util::SecureDebugString;
 using kudu::pb_util::SecureShortDebugString;
 using std::make_shared;
 using std::shared_ptr;
 using std::string;
 using std::vector;
+using strings::Substitute;
 
 namespace kudu {
 namespace master {
 
+namespace {
+// Returns if 'location' is a valid location string, i.e. it begins with /
+// and consists of /-separated tokens each of which contains only characters
+// from the set [a-zA-Z0-9_-.].
+bool IsValidLocation(const string& location) {
+  if (location.empty() || location[0] != '/') {
+    return false;
+  }
+  const strings::CharSet charset("abcdefghijklmnopqrstuvwxyz"
+                                 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                                 "0123456789"
+                                 "_-./");
+  for (const auto c : location) {
+    if (!charset.Test(c)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+// Resolves 'host', which is the IP address or hostname of a tablet server or
+// client, into a location using the command 'cmd'. The result will be stored
+// in 'location', which must not be null. If there is an error running the
+// command or the output is invalid, an error Status will be returned.
+// TODO(wdberkeley): Eventually we may want to get multiple locations at once
+// by giving the script multiple arguments (like Hadoop).
+Status GetLocationFromLocationMappingCmd(const string& cmd,
+                                         const string& host,
+                                         string* location) {
+  DCHECK_NOTNULL(location);
+  vector<string> argv = strings::Split(cmd, " ", strings::SkipEmpty());
+  if (argv.empty()) {
+    return Status::RuntimeError("invalid empty location mapping command");
+  }
+  argv.push_back(host);
+  string stderr, location_temp;
+  Status s = Subprocess::Call(argv, /*stdin=*/"", &location_temp, &stderr);
+  if (!s.ok()) {
+    return Status::RuntimeError(
+        Substitute("failed to run location mapping command: $0", s.ToString()),
+        stderr);
+  }
+  StripWhiteSpace(&location_temp);
+  // Special case an empty location for a better error.
+  if (location_temp.empty()) {
+    return Status::RuntimeError(
+        "location mapping command returned invalid empty location");
+  }
+  if (!IsValidLocation(location_temp)) {
+    return Status::RuntimeError(
+        "location mapping command returned invalid location",
+        location_temp);
+  }
+  *location = std::move(location_temp);
+  return Status::OK();
+}
+} // anonymous namespace
+
 Status TSDescriptor::RegisterNew(const NodeInstancePB& instance,
                                  const ServerRegistrationPB& registration,
                                  shared_ptr<TSDescriptor>* desc) {
@@ -95,9 +171,8 @@ static bool HostPortPBsEqual(const google::protobuf::RepeatedPtrField<HostPortPB
   return hostports1 == hostports2;
 }
 
-Status TSDescriptor::Register(const NodeInstancePB& instance,
-                              const ServerRegistrationPB& registration) {
-  std::lock_guard<simple_spinlock> l(lock_);
+Status TSDescriptor::RegisterUnlocked(const NodeInstancePB& instance,
+                                      const ServerRegistrationPB& registration) {
   CHECK_EQ(instance.permanent_uuid(), permanent_uuid_);
 
   // TODO(KUDU-418): we don't currently support changing RPC addresses since the
@@ -137,6 +212,41 @@ Status TSDescriptor::Register(const NodeInstancePB& instance,
   registration_.reset(new ServerRegistrationPB(registration));
   ts_admin_proxy_.reset();
   consensus_proxy_.reset();
+  return Status::OK();
+}
+
+Status TSDescriptor::Register(const NodeInstancePB& instance,
+                              const ServerRegistrationPB& registration) {
+  // Do basic registration work under the lock.
+  {
+    std::lock_guard<simple_spinlock> l(lock_);
+    RETURN_NOT_OK(RegisterUnlocked(instance, registration));
+  }
+
+  // Resolve the location outside the lock. This involves calling the location
+  // mapping script.
+  const string& location_mapping_cmd = FLAGS_location_mapping_cmd;
+  if (!location_mapping_cmd.empty()) {
+    const auto& host = registration_->rpc_addresses(0).host();
+    string location;
+    TRACE("Assigning location");
+    Status s = GetLocationFromLocationMappingCmd(location_mapping_cmd,
+                                                 host,
+                                                 &location);
+    TRACE("Assigned location");
+
+    // Assign the location under the lock if location resolution succeeds. If
+    // it fails, log the error.
+    if (s.ok()) {
+      std::lock_guard<simple_spinlock> l(lock_);
+      location_.emplace(std::move(location));
+    } else {
+      KLOG_EVERY_N_SECS(ERROR, 60) << Substitute(
+          "Unable to assign location to tablet server $0: $1",
+          ToString(), s.ToString());
+      return s;
+    }
+  }
 
   return Status::OK();
 }

http://git-wip-us.apache.org/repos/asf/kudu/blob/dcc39d53/src/kudu/master/ts_descriptor.h
----------------------------------------------------------------------
diff --git a/src/kudu/master/ts_descriptor.h b/src/kudu/master/ts_descriptor.h
index a26f68a..ad85fc0 100644
--- a/src/kudu/master/ts_descriptor.h
+++ b/src/kudu/master/ts_descriptor.h
@@ -22,6 +22,7 @@
 #include <mutex>
 #include <string>
 
+#include <boost/optional/optional.hpp>
 #include <glog/logging.h>
 #include <gtest/gtest_prod.h>
 
@@ -35,8 +36,8 @@
 namespace kudu {
 
 class NodeInstancePB;
-class Sockaddr;
 class ServerRegistrationPB;
+class Sockaddr;
 
 namespace consensus {
 class ConsensusServiceProxy;
@@ -54,7 +55,7 @@ namespace master {
 
 // Master-side view of a single tablet server.
 //
-// Tracks the last heartbeat, status, instance identifier, etc.
+// Tracks the last heartbeat, status, instance identifier, location, etc.
 // This class is thread-safe.
 class TSDescriptor : public enable_make_shared<TSDescriptor> {
  public:
@@ -119,6 +120,14 @@ class TSDescriptor : public enable_make_shared<TSDescriptor> {
     return num_live_replicas_;
   }
 
+  // Return the location of the tablet server. This returns a safe copy
+  // since the location could change at any time if the tablet server
+  // re-registers.
+  boost::optional<std::string> location() const {
+    std::lock_guard<simple_spinlock> l(lock_);
+    return location_;
+  }
+
   // Return a string form of this TS, suitable for printing.
   // Includes the UUID as well as last known host/port.
   std::string ToString() const;
@@ -129,6 +138,9 @@ class TSDescriptor : public enable_make_shared<TSDescriptor> {
  private:
   FRIEND_TEST(TestTSDescriptor, TestReplicaCreationsDecay);
 
+  Status RegisterUnlocked(const NodeInstancePB& instance,
+                          const ServerRegistrationPB& registration);
+
   // Uses DNS to resolve registered hosts to a single Sockaddr.
   // Returns the resolved address as well as the hostname associated with it
   // in 'addr' and 'host'.
@@ -152,6 +164,9 @@ class TSDescriptor : public enable_make_shared<TSDescriptor> {
   // The number of live replicas on this host, from the last heartbeat.
   int num_live_replicas_;
 
+  // The tablet server's location, as determined by the master at registration.
+  boost::optional<std::string> location_;
+
   gscoped_ptr<ServerRegistrationPB> registration_;
 
   std::shared_ptr<tserver::TabletServerAdminServiceProxy> ts_admin_proxy_;

http://git-wip-us.apache.org/repos/asf/kudu/blob/dcc39d53/www/tablet-servers.mustache
----------------------------------------------------------------------
diff --git a/www/tablet-servers.mustache b/www/tablet-servers.mustache
index 539c90b..a778f72 100644
--- a/www/tablet-servers.mustache
+++ b/www/tablet-servers.mustache
@@ -45,6 +45,7 @@ under the License.
   <table class='table table-striped'>
     <thead><tr>
       <th>UUID</th>
+      <th>Location</th>
       <th>Time since heartbeat</th>
       <th>Registration</th>
     </tr></thead>
@@ -52,6 +53,7 @@ under the License.
     {{#live_tservers}}
       <tr>
         <td>{{#target}}<a href="{{.}}">{{/target}}{{uuid}}{{#target}}</a>{{/target}}</td>
+        <td>{{location}}</td>
         <td>{{time_since_hb}}</td>
         <td><pre><code>{{registration}}</code></pre></td>
       </tr>


[2/3] kudu git commit: [java] Adjust the RetryRule to log to stdout

Posted by gr...@apache.org.
[java] Adjust the RetryRule to log to stdout

Changes the logging in RetryRule to use slf4j and
write to stdout. This makes serializing the logs and
failures more straightforward.

Additionally the stacktrace for each failure is printed
with the log message. This way, even if a retry passes,
we can see the reason for earlier failures.

Change-Id: I2608256d98c08f0ecfda52ea44289a0deca64fe0
Reviewed-on: http://gerrit.cloudera.org:8080/11382
Tested-by: Kudu Jenkins
Reviewed-by: Adar Dembo <ad...@cloudera.com>
Reviewed-by: Andrew Wong <aw...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/459be18f
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/459be18f
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/459be18f

Branch: refs/heads/master
Commit: 459be18fab2d722eafd03e03c608d9f86a4eb990
Parents: dcc39d5
Author: Grant Henke <gr...@apache.org>
Authored: Tue Sep 4 10:35:28 2018 -0500
Committer: Grant Henke <gr...@apache.org>
Committed: Tue Sep 4 22:49:03 2018 +0000

----------------------------------------------------------------------
 .../src/test/java/org/apache/kudu/junit/RetryRule.java        | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/459be18f/java/kudu-client/src/test/java/org/apache/kudu/junit/RetryRule.java
----------------------------------------------------------------------
diff --git a/java/kudu-client/src/test/java/org/apache/kudu/junit/RetryRule.java b/java/kudu-client/src/test/java/org/apache/kudu/junit/RetryRule.java
index 89bd84f..716fcab 100644
--- a/java/kudu-client/src/test/java/org/apache/kudu/junit/RetryRule.java
+++ b/java/kudu-client/src/test/java/org/apache/kudu/junit/RetryRule.java
@@ -19,6 +19,8 @@ package org.apache.kudu.junit;
 import org.junit.rules.TestRule;
 import org.junit.runner.Description;
 import org.junit.runners.model.Statement;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * A JUnit rule to retry failed tests.
@@ -28,6 +30,7 @@ import org.junit.runners.model.Statement;
  */
 public class RetryRule implements TestRule {
 
+  private static final Logger LOG = LoggerFactory.getLogger(RetryRule.class);
   private static final int RETRY_COUNT = Integer.getInteger("rerunFailingTestsCount", 0);
 
   public RetryRule () {}
@@ -66,10 +69,10 @@ public class RetryRule implements TestRule {
           return;
         } catch (Throwable t) {
           lastException = t;
-          System.err.println(description.getDisplayName() + ": run " + (i + 1) + " failed.");
+          LOG.error(description.getDisplayName() + ": failed run " + (i + 1), t);
         }
       }
-      System.err.println(description.getDisplayName() + ": giving up after " + retryCount + " failures.");
+      LOG.error(description.getDisplayName() + ": giving up after " + retryCount + " failures");
       throw lastException;
     }
   }


[3/3] kudu git commit: KUDU-2489: Improve runtime of slow java test.

Posted by gr...@apache.org.
KUDU-2489: Improve runtime of slow java  test.

Shortened the kerberos renew lifetime parameter and the
Thread.sleep parameters.  These changes cut the test runtime by ~30 seconds.

Change-Id: I19fa5185430a6c91fbe050dbc458b7b91e2d5bea
Reviewed-on: http://gerrit.cloudera.org:8080/11365
Tested-by: Kudu Jenkins
Reviewed-by: Grant Henke <gr...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/224f4792
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/224f4792
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/224f4792

Branch: refs/heads/master
Commit: 224f4792d9d443066be6a4a7f6442211a340c59b
Parents: 459be18
Author: Brian McDevitt <br...@phdata.io>
Authored: Thu Aug 30 16:15:34 2018 -0500
Committer: Grant Henke <gr...@apache.org>
Committed: Wed Sep 5 01:29:43 2018 +0000

----------------------------------------------------------------------
 .../src/test/java/org/apache/kudu/client/TestSecurity.java  | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/224f4792/java/kudu-client/src/test/java/org/apache/kudu/client/TestSecurity.java
----------------------------------------------------------------------
diff --git a/java/kudu-client/src/test/java/org/apache/kudu/client/TestSecurity.java b/java/kudu-client/src/test/java/org/apache/kudu/client/TestSecurity.java
index 0ebcc50..a6d0f95 100644
--- a/java/kudu-client/src/test/java/org/apache/kudu/client/TestSecurity.java
+++ b/java/kudu-client/src/test/java/org/apache/kudu/client/TestSecurity.java
@@ -52,7 +52,7 @@ import com.stumbleupon.async.Deferred;
 public class TestSecurity {
   private static final String TABLE_NAME = "TestSecurity-table";
   private static final int TICKET_LIFETIME_SECS = 10;
-  private static final int RENEWABLE_LIFETIME_SECS = 30;
+  private static final int RENEWABLE_LIFETIME_SECS = 20;
 
   private final CapturingLogAppender cla = new CapturingLogAppender();
   private MiniKuduCluster miniCluster;
@@ -313,7 +313,7 @@ public class TestSecurity {
     try (Closeable c = cla.attach()) {
       for (Stopwatch sw = Stopwatch.createStarted();
            sw.elapsed(TimeUnit.SECONDS) < RENEWABLE_LIFETIME_SECS * 2;) {
-        if (timeSinceKinit.elapsed(TimeUnit.SECONDS) > TICKET_LIFETIME_SECS + 5) {
+        if (timeSinceKinit.elapsed(TimeUnit.SECONDS) > TICKET_LIFETIME_SECS + 2) {
           // We have gotten past the initial lifetime and well into the renewable
           // lifetime. If we haven't failed yet, that means that Kudu
           // successfully renewed the ticket.
@@ -324,7 +324,7 @@ public class TestSecurity {
           miniCluster.kinit("test-admin");
           timeSinceKinit.reset().start();
         }
-        Thread.sleep(5000);
+        Thread.sleep(1000);
         // Ensure that we don't use an authentication token to reconnect.
         client.asyncClient.securityContext.setAuthenticationToken(null);
         checkClientCanReconnect(client);
@@ -418,6 +418,7 @@ public class TestSecurity {
   @Test(timeout=300000)
   public void testExternallyProvidedSubjectRefreshedExternally() throws Exception {
     startCluster(ImmutableSet.of(Option.SHORT_TOKENS_AND_TICKETS));
+
     Subject subject = SecurityUtil.getSubjectFromTicketCacheOrNull();
     Assert.assertNotNull(subject);
     try (Closeable c = cla.attach()) {
@@ -427,7 +428,7 @@ public class TestSecurity {
       // are indeed picking up the new credentials.
       for (Stopwatch sw = Stopwatch.createStarted();
           sw.elapsed(TimeUnit.SECONDS) < RENEWABLE_LIFETIME_SECS + 5;
-          Thread.sleep(3000)) {
+          Thread.sleep(1000)) {
         miniCluster.kinit("test-admin");
 
         // Update the existing subject in-place by copying over the credentials from