You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by tm...@apache.org on 2018/02/16 19:15:22 UTC

[1/3] impala git commit: IMPALA-6416: extend Thread::Create to track instance id

Repository: impala
Updated Branches:
  refs/heads/master 9d3702192 -> cb05241c8


IMPALA-6416: extend Thread::Create to track instance id

This commit builds upon IMPALA-3703. Each thread that
was created through Thread::Create() has a ThreadDebugInfo
object on the stack frame of Thread::SuperviseThread().
This object has stack allocated char buffers that can be
read during a debug session even if we only have minidumps.

However, with the old solution ThreadDebugInfo::instance_id
was set manually for each thread. It is too easy to forget
to set instance_id every time we create a new thread.

This commit has the assumption that if a thread has an
instance id associated, then the threads spawned by it will
always work on the same instance id. In Thread::StartThread
the parent thread passes its ThreadDebugInfo object to
its child who copies the instance id, and also stores the
name and system thread id of its parent.

This means if we set ThreadDebugInfo::instance_id in some
"root thread", then all descendant threads will annotate
themselves with the instance id automatically. Since threads
also record the name (and a system thread id) of their parent,
it might be also possible to reconstruct the thread creation
graph.

With GDB I tested if it copies the instance id at every
place where we previously needed to set it manually.

I added an automated test to thread-debug-info-test.cc

Change-Id: I27de2962cf0b224c17b685d77dcba3bf2e9db187
Reviewed-on: http://gerrit.cloudera.org:8080/9053
Reviewed-by: Dan Hecht <dh...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/61f892a6
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/61f892a6
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/61f892a6

Branch: refs/heads/master
Commit: 61f892a60c88650ed05799ca06d2f74cfffc845f
Parents: 9d37021
Author: Zoltan Borok-Nagy <bo...@cloudera.com>
Authored: Thu Jan 18 17:03:32 2018 +0100
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Thu Feb 15 21:58:06 2018 +0000

----------------------------------------------------------------------
 be/src/common/thread-debug-info-test.cc   | 31 ++++++++++++++++++++++++
 be/src/common/thread-debug-info.cc        |  1 -
 be/src/common/thread-debug-info.h         | 33 +++++++++++++++++++++-----
 be/src/exec/blocking-join-node.cc         |  2 --
 be/src/exec/hdfs-scan-node.cc             |  7 +-----
 be/src/runtime/fragment-instance-state.cc |  6 +----
 be/src/util/thread.cc                     | 10 ++++----
 be/src/util/thread.h                      | 10 +++++++-
 8 files changed, 75 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/61f892a6/be/src/common/thread-debug-info-test.cc
----------------------------------------------------------------------
diff --git a/be/src/common/thread-debug-info-test.cc b/be/src/common/thread-debug-info-test.cc
index 1700c48..c2ac4bb 100644
--- a/be/src/common/thread-debug-info-test.cc
+++ b/be/src/common/thread-debug-info-test.cc
@@ -19,6 +19,7 @@
 
 #include "common/thread-debug-info.h"
 #include "testutil/gtest-util.h"
+#include "util/thread.h"
 
 #include "common/names.h"
 
@@ -66,6 +67,36 @@ TEST(ThreadDebugInfo, Global) {
   EXPECT_EQ(&thread_debug_info, global_thread_debug_info);
 }
 
+TEST(ThreadDebugInfo, ThreadCreateRelationships) {
+  // Checks if child thread extracts debug info from parent automatically.
+  // Child's thread name is given in Thread::Create
+  // Child's instance_id_ should be the same as parent's instance_id_
+  // Child should store a copy of its parent's thread name.
+  // Child should store its parent's system thread id.
+  string parent_name = "Parent";
+  string child_name = "Child";
+
+  ThreadDebugInfo parent_tdi;
+  parent_tdi.SetThreadName(parent_name);
+  TUniqueId uid;
+  uid.hi = 123;
+  uid.lo = 456;
+  parent_tdi.SetInstanceId(uid);
+
+  std::unique_ptr<Thread> child_thread;
+  auto f = [uid, child_name, parent_name, &parent_tdi]() {
+    // In child's thread the global ThreadDebugInfo object points to the child's own
+    // ThreadDebugInfo object which was automatically created in Thread::SuperviseThread
+    ThreadDebugInfo* child_tdi = GetThreadDebugInfo();
+    EXPECT_EQ(child_name, child_tdi->GetThreadName());
+    EXPECT_EQ(PrintId(uid), child_tdi->GetInstanceId());
+    EXPECT_EQ(parent_name, child_tdi->GetParentThreadName());
+    EXPECT_EQ(parent_tdi.GetSystemThreadId(), child_tdi->GetParentSystemThreadId());
+  };
+  ASSERT_OK(Thread::Create("Test", child_name, f, &child_thread));
+  child_thread->Join();
+}
+
 }
 
 IMPALA_TEST_MAIN();

http://git-wip-us.apache.org/repos/asf/impala/blob/61f892a6/be/src/common/thread-debug-info.cc
----------------------------------------------------------------------
diff --git a/be/src/common/thread-debug-info.cc b/be/src/common/thread-debug-info.cc
index 5e642e6..b775dc0 100644
--- a/be/src/common/thread-debug-info.cc
+++ b/be/src/common/thread-debug-info.cc
@@ -33,7 +33,6 @@ void ThreadDebugInfo::CloseThreadDebugInfo() {
 }
 
 ThreadDebugInfo* GetThreadDebugInfo() {
-  DCHECK(thread_debug_info != nullptr);
   return thread_debug_info;
 }
 

http://git-wip-us.apache.org/repos/asf/impala/blob/61f892a6/be/src/common/thread-debug-info.h
----------------------------------------------------------------------
diff --git a/be/src/common/thread-debug-info.h b/be/src/common/thread-debug-info.h
index 6219ee8..d5100c0 100644
--- a/be/src/common/thread-debug-info.h
+++ b/be/src/common/thread-debug-info.h
@@ -19,13 +19,14 @@
 #define IMPALA_COMMON_THREAD_DEBUG_INFO_H
 
 #include <string>
+#include <sys/syscall.h>
+#include <unistd.h>
 
 #include "glog/logging.h"
 #include "gutil/macros.h"
+#include "gutil/strings/util.h"
 #include "util/debug-util.h"
 
-#include "common/names.h"
-
 namespace impala {
 
 /// Stores information about the current thread that can be useful in a debug session.
@@ -39,6 +40,8 @@ public:
   /// Only one ThreadDebugInfo object can be alive per thread at a time.
   /// This object is not copyable, nor movable
   ThreadDebugInfo() {
+    system_thread_id_ = syscall(SYS_gettid);
+
     // This call makes the global (thread local) pointer point to this object.
     InitializeThreadDebugInfo(this);
   }
@@ -50,10 +53,13 @@ public:
 
   const char* GetInstanceId() const { return instance_id_; }
   const char* GetThreadName() const { return thread_name_; }
+  int64_t GetSystemThreadId() const { return system_thread_id_; }
+  int64_t GetParentSystemThreadId() const { return parent_.system_thread_id_; }
+  const char* GetParentThreadName() const { return parent_.thread_name_; }
 
   /// Saves the string representation of param 'instance_id' to member 'instance_id_'
   void SetInstanceId(const TUniqueId& instance_id) {
-    string id_str = PrintId(instance_id);
+    std::string id_str = PrintId(instance_id);
     DCHECK_LT(id_str.length(), TUNIQUE_ID_STRING_SIZE);
     id_str.copy(instance_id_, id_str.length());
   }
@@ -62,7 +68,7 @@ public:
   /// If the length of param 'thread_name' is larger than THREAD_NAME_SIZE,
   /// we store the front of 'thread_name' + '...' + the last few bytes
   /// of thread name, e.g.: "Long Threadname with more te...001afec4)"
-  void SetThreadName(const string& thread_name) {
+  void SetThreadName(const std::string& thread_name) {
     const int64_t length = thread_name.length();
 
     if (length < THREAD_NAME_SIZE) {
@@ -81,6 +87,13 @@ public:
     }
   }
 
+  void SetParentInfo(const ThreadDebugInfo* parent) {
+    if (parent == nullptr) return;
+    parent_.system_thread_id_ = parent->system_thread_id_;
+    strings::strlcpy(instance_id_, parent->instance_id_, TUNIQUE_ID_STRING_SIZE);
+    strings::strlcpy(parent_.thread_name_, parent->thread_name_, THREAD_NAME_SIZE);
+  }
+
 private:
   /// Initializes a thread local pointer with thread_debug_info.
   static void InitializeThreadDebugInfo(ThreadDebugInfo* thread_debug_info);
@@ -91,14 +104,22 @@ private:
   static constexpr int64_t THREAD_NAME_SIZE = 256;
   static constexpr int64_t THREAD_NAME_TAIL_LENGTH = 8;
 
-  char instance_id_[TUNIQUE_ID_STRING_SIZE] = {};
+  /// This struct contains information we want to store about the parent.
+  struct ParentInfo {
+    int64_t system_thread_id_ = 0;
+    char thread_name_[THREAD_NAME_SIZE] = {};
+  };
+
+  ParentInfo parent_;
+  int64_t system_thread_id_ = 0;
   char thread_name_[THREAD_NAME_SIZE] = {};
+  char instance_id_[TUNIQUE_ID_STRING_SIZE] = {};
 
   DISALLOW_COPY_AND_ASSIGN(ThreadDebugInfo);
 };
 
 /// Returns a pointer to the ThreadDebugInfo object for this thread.
-/// The ThreadDebugInfo object needs to be created before this function is called.
+/// Returns nullptr if there is no ThreadDebugInfo object for the current thread.
 ThreadDebugInfo* GetThreadDebugInfo();
 
 }

http://git-wip-us.apache.org/repos/asf/impala/blob/61f892a6/be/src/exec/blocking-join-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/blocking-join-node.cc b/be/src/exec/blocking-join-node.cc
index 944aba8..7adea7f 100644
--- a/be/src/exec/blocking-join-node.cc
+++ b/be/src/exec/blocking-join-node.cc
@@ -19,7 +19,6 @@
 
 #include <sstream>
 
-#include "common/thread-debug-info.h"
 #include "exec/data-sink.h"
 #include "exprs/scalar-expr.h"
 #include "runtime/fragment-instance-state.h"
@@ -196,7 +195,6 @@ Status BlockingJoinNode::ProcessBuildInputAndOpenProbe(
     unique_ptr<Thread> build_thread;
     Status thread_status = Thread::Create(FragmentInstanceState::FINST_THREAD_GROUP_NAME,
         thread_name, [this, state, build_sink, status=&build_side_status]() {
-          GetThreadDebugInfo()->SetInstanceId(state->fragment_instance_id());
           ProcessBuildInputAsync(state, build_sink, status);
         }, &build_thread, true);
     if (!thread_status.ok()) {

http://git-wip-us.apache.org/repos/asf/impala/blob/61f892a6/be/src/exec/hdfs-scan-node.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/hdfs-scan-node.cc b/be/src/exec/hdfs-scan-node.cc
index 235c799..710a8af 100644
--- a/be/src/exec/hdfs-scan-node.cc
+++ b/be/src/exec/hdfs-scan-node.cc
@@ -21,7 +21,6 @@
 #include <sstream>
 
 #include "common/logging.h"
-#include "common/thread-debug-info.h"
 #include "exec/base-sequence-scanner.h"
 #include "exec/hdfs-scanner.h"
 #include "exec/scanner-context.h"
@@ -348,11 +347,7 @@ void HdfsScanNode::ThreadTokenAvailableCb(ThreadResourceMgr::ResourcePool* pool)
         PrintId(runtime_state_->fragment_instance_id()), id(),
         num_scanner_threads_started_counter_->value());
 
-    auto fn = [this]() {
-      RuntimeState* state = this->runtime_state();
-      GetThreadDebugInfo()->SetInstanceId(state->fragment_instance_id());
-      this->ScannerThread();
-    };
+    auto fn = [this]() { this->ScannerThread(); };
     std::unique_ptr<Thread> t;
     status =
       Thread::Create(FragmentInstanceState::FINST_THREAD_GROUP_NAME, name, fn, &t, true);

http://git-wip-us.apache.org/repos/asf/impala/blob/61f892a6/be/src/runtime/fragment-instance-state.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/fragment-instance-state.cc b/be/src/runtime/fragment-instance-state.cc
index 91b33a7..c5c57a3 100644
--- a/be/src/runtime/fragment-instance-state.cc
+++ b/be/src/runtime/fragment-instance-state.cc
@@ -26,7 +26,6 @@
 #include <boost/date_time/posix_time/posix_time_types.hpp>
 
 #include "common/names.h"
-#include "common/thread-debug-info.h"
 #include "codegen/llvm-codegen.h"
 #include "exec/plan-root-sink.h"
 #include "exec/exec-node.h"
@@ -225,10 +224,7 @@ Status FragmentInstanceState::Prepare() {
     string thread_name = Substitute("profile-report (finst:$0)", PrintId(instance_id()));
     unique_lock<mutex> l(report_thread_lock_);
     RETURN_IF_ERROR(Thread::Create(FragmentInstanceState::FINST_THREAD_GROUP_NAME,
-        thread_name, [this]() {
-          GetThreadDebugInfo()->SetInstanceId(this->instance_id());
-          this->ReportProfileThread();
-        }, &report_thread_, true));
+        thread_name, [this]() { this->ReportProfileThread(); }, &report_thread_, true));
     // Make sure the thread started up, otherwise ReportProfileThread() might get into
     // a race with StopReportThread().
     while (!report_thread_active_) report_thread_started_cv_.Wait(l);

http://git-wip-us.apache.org/repos/asf/impala/blob/61f892a6/be/src/util/thread.cc
----------------------------------------------------------------------
diff --git a/be/src/util/thread.cc b/be/src/util/thread.cc
index 8397f35..e3f07ba 100644
--- a/be/src/util/thread.cc
+++ b/be/src/util/thread.cc
@@ -311,7 +311,7 @@ Status Thread::StartThread(const std::string& category, const std::string& name,
   try {
     t->thread_.reset(
         new boost::thread(&Thread::SuperviseThread, t->name_, t->category_, functor,
-            &thread_started));
+            GetThreadDebugInfo(), &thread_started));
   } catch (boost::thread_resource_error& e) {
     return Status(TErrorCode::THREAD_CREATION_FAILED, name, category, e.what());
   }
@@ -327,7 +327,8 @@ Status Thread::StartThread(const std::string& category, const std::string& name,
 }
 
 void Thread::SuperviseThread(const string& name, const string& category,
-    Thread::ThreadFunctor functor, Promise<int64_t>* thread_started) {
+    Thread::ThreadFunctor functor, const ThreadDebugInfo* parent_thread_info,
+    Promise<int64_t>* thread_started) {
   int64_t system_tid = syscall(SYS_gettid);
   if (system_tid == -1) {
     string error_msg = GetStrErrMsg();
@@ -340,12 +341,13 @@ void Thread::SuperviseThread(const string& name, const string& category,
 
   // Use boost's get_id rather than the system thread ID as the unique key for this thread
   // since the latter is more prone to being recycled.
-
   thread_mgr_ref->AddThread(this_thread::get_id(), name_copy, category_copy, system_tid);
-  thread_started->Set(system_tid);
 
   ThreadDebugInfo thread_debug_info;
   thread_debug_info.SetThreadName(name_copy);
+  thread_debug_info.SetParentInfo(parent_thread_info);
+
+  thread_started->Set(system_tid);
 
   // Any reference to any parameter not copied in by value may no longer be valid after
   // this point, since the caller that is waiting on *tid != 0 may wake, take the lock and

http://git-wip-us.apache.org/repos/asf/impala/blob/61f892a6/be/src/util/thread.h
----------------------------------------------------------------------
diff --git a/be/src/util/thread.h b/be/src/util/thread.h
index 6898517..b114e26 100644
--- a/be/src/util/thread.h
+++ b/be/src/util/thread.h
@@ -32,6 +32,7 @@
 namespace impala {
 
 class MetricGroup;
+class ThreadDebugInfo;
 class Webserver;
 
 /// Thin wrapper around boost::thread that can register itself with the singleton
@@ -179,8 +180,15 @@ class Thread {
   /// As a result, the 'functor' parameter is deliberately copied into this method, since
   /// it is used after the notification completes.h The tid parameter is written to
   /// exactly once before SuperviseThread() notifies the caller.
+  ///
+  /// parent_thread_info points to the parent thread's ThreadDebugInfo object if the
+  /// parent has one, otherwise it's a nullptr. As part of the initialisation
+  /// SuperviseThread() copies the useful information from the parent's ThreadDebugInfo
+  /// info object to its own TDI object. This way the TDI objects can preserve the thread
+  /// creation graph.
   static void SuperviseThread(const std::string& name, const std::string& category,
-      ThreadFunctor functor, Promise<int64_t>* thread_started);
+      Thread::ThreadFunctor functor, const ThreadDebugInfo* parent_thread_info,
+      Promise<int64_t>* thread_started);
 };
 
 /// Utility class to group together a set of threads. A replacement for


[3/3] impala git commit: Bump Kudu Java version to 1.7.0

Posted by tm...@apache.org.
Bump Kudu Java version to 1.7.0

Change-Id: I69a9bc9e36657a936591f723cf4110a1297973ab
Reviewed-on: http://gerrit.cloudera.org:8080/9349
Reviewed-by: Grant Henke <gr...@gmail.com>
Reviewed-by: Philip Zeyliger <ph...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/cb05241c
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/cb05241c
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/cb05241c

Branch: refs/heads/master
Commit: cb05241c87b69f76005334158733ab580e48b320
Parents: be675e1
Author: Thomas Tauber-Marshall <tm...@cloudera.com>
Authored: Thu Feb 15 20:47:19 2018 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Feb 16 08:46:11 2018 +0000

----------------------------------------------------------------------
 bin/impala-config.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/cb05241c/bin/impala-config.sh
----------------------------------------------------------------------
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index d13e68b..15b9116 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -162,7 +162,7 @@ export IMPALA_KUDU_VERSION=0eef8e0
 unset IMPALA_KUDU_URL
 
 # Kudu version used to identify Java client jar from maven
-export KUDU_JAVA_VERSION=1.6.0-cdh5.15.0-SNAPSHOT
+export KUDU_JAVA_VERSION=1.7.0-cdh5.15.0-SNAPSHOT
 
 # Versions of Hadoop ecosystem dependencies.
 # ------------------------------------------


[2/3] impala git commit: Bump Kudu version to 0eef8e0

Posted by tm...@apache.org.
Bump Kudu version to 0eef8e0

This pulls in support for DECIMAL.

Change-Id: Id983ceb8806b2a002fadb19c065267d45f4c65db
Reviewed-on: http://gerrit.cloudera.org:8080/9348
Reviewed-by: Lars Volker <lv...@cloudera.com>
Reviewed-by: Grant Henke <gr...@gmail.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/be675e15
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/be675e15
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/be675e15

Branch: refs/heads/master
Commit: be675e15578eda6285c9558c21bc16ce9dfb7ffc
Parents: 61f892a
Author: Thomas Tauber-Marshall <tm...@cloudera.com>
Authored: Thu Feb 15 18:29:33 2018 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Feb 16 06:36:48 2018 +0000

----------------------------------------------------------------------
 be/src/exec/kudu-util.cc | 3 +++
 bin/impala-config.sh     | 4 ++--
 2 files changed, 5 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/be675e15/be/src/exec/kudu-util.cc
----------------------------------------------------------------------
diff --git a/be/src/exec/kudu-util.cc b/be/src/exec/kudu-util.cc
index 03cb51f..d32df25 100644
--- a/be/src/exec/kudu-util.cc
+++ b/be/src/exec/kudu-util.cc
@@ -193,6 +193,9 @@ ColumnType KuduDataTypeToColumnType(DataType type) {
     case DataType::DOUBLE: return ColumnType(PrimitiveType::TYPE_DOUBLE);
     case DataType::BINARY: return ColumnType(PrimitiveType::TYPE_BINARY);
     case DataType::UNIXTIME_MICROS: return ColumnType(PrimitiveType::TYPE_TIMESTAMP);
+    case DataType::DECIMAL:
+      DCHECK(false) << "DECIMAL is not supported on Kudu.";
+      return ColumnType(PrimitiveType::INVALID_TYPE);
   }
   return ColumnType(PrimitiveType::INVALID_TYPE);
 }

http://git-wip-us.apache.org/repos/asf/impala/blob/be675e15/bin/impala-config.sh
----------------------------------------------------------------------
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index bb1cb15..d13e68b 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -72,7 +72,7 @@ fi
 # moving to a different build of the toolchain, e.g. when a version is bumped or a
 # compile option is changed. The build id can be found in the output of the toolchain
 # build jobs, it is constructed from the build number and toolchain git hash prefix.
-export IMPALA_TOOLCHAIN_BUILD_ID=39-a1bbd2851a
+export IMPALA_TOOLCHAIN_BUILD_ID=53-d95bb7f778
 # Versions of toolchain dependencies.
 # -----------------------------------
 export IMPALA_AVRO_VERSION=1.7.4-p4
@@ -158,7 +158,7 @@ if [[ $OSTYPE == "darwin"* ]]; then
 fi
 
 # Kudu version in the toolchain; provides libkudu_client.so and minicluster binaries.
-export IMPALA_KUDU_VERSION=1520b39
+export IMPALA_KUDU_VERSION=0eef8e0
 unset IMPALA_KUDU_URL
 
 # Kudu version used to identify Java client jar from maven