You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by wl...@apache.org on 2015/11/13 14:28:45 UTC
incubator-hawq git commit: HAWQ-159. Fix the bug when hawq disconnect
from libyarn, heartbeat thread hasn't been canceled
Repository: incubator-hawq
Updated Branches:
refs/heads/master 17f0a5642 -> 2ee1ed909
HAWQ-159. Fix the bug when hawq disconnect from libyarn, heartbeat thread hasn't been canceled
Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/2ee1ed90
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/2ee1ed90
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/2ee1ed90
Branch: refs/heads/master
Commit: 2ee1ed909c4bf5e4531cde84372b6a80e30b5af1
Parents: 17f0a56
Author: Wen Lin <wl...@pivotal.io>
Authored: Fri Nov 13 21:29:23 2015 +0800
Committer: Wen Lin <wl...@pivotal.io>
Committed: Fri Nov 13 21:29:23 2015 +0800
----------------------------------------------------------------------
depends/libyarn/src/CMakeLists.txt | 2 +-
.../libyarn/src/libyarnclient/LibYarnClient.cpp | 53 ++++++++++++++------
2 files changed, 38 insertions(+), 17 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/2ee1ed90/depends/libyarn/src/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/depends/libyarn/src/CMakeLists.txt b/depends/libyarn/src/CMakeLists.txt
index db5ccf5..c08ba1f 100644
--- a/depends/libyarn/src/CMakeLists.txt
+++ b/depends/libyarn/src/CMakeLists.txt
@@ -2,7 +2,7 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
SET(libyarn_VERSION_MAJOR 0)
SET(libyarn_VERSION_MINOR 1)
-SET(libyarn_VERSION_PATCH 6)
+SET(libyarn_VERSION_PATCH 7)
SET(libyarn_VERSION_STRING "${libyarn_VERSION_MAJOR}.${libyarn_VERSION_MINOR}.${libyarn_VERSION_PATCH}")
SET(libyarn_VERSION_API 1)
SET(libyarn_ROOT_SOURCES_DIR ${CMAKE_SOURCE_DIR}/src)
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/2ee1ed90/depends/libyarn/src/libyarnclient/LibYarnClient.cpp
----------------------------------------------------------------------
diff --git a/depends/libyarn/src/libyarnclient/LibYarnClient.cpp b/depends/libyarn/src/libyarnclient/LibYarnClient.cpp
index d0137e2..ff1dd98 100644
--- a/depends/libyarn/src/libyarnclient/LibYarnClient.cpp
+++ b/depends/libyarn/src/libyarnclient/LibYarnClient.cpp
@@ -42,7 +42,7 @@ LibYarnClient::LibYarnClient(string &user, string &rmHost, string &rmPort,
amUser(user), schedHost(schedHost), schedPort(schedPort), amHost(amHost),
amPort(amPort), am_tracking_url(am_tracking_url),
heartbeatInterval(heartbeatInterval),clientJobId(""),response_id(0),
- keepRun(true){
+ keepRun(false){
pthread_mutex_init( &(heartbeatLock), NULL );
amrmClient = NULL;
@@ -66,6 +66,21 @@ LibYarnClient::LibYarnClient(string &rmHost, string &rmPort, string &schedHost,
#endif
LibYarnClient::~LibYarnClient() {
+#ifndef MOCKTEST
+ if ( keepRun ) {
+ // No need to run heart-beat thread now.
+ keepRun = false;
+ void *thrc = NULL;
+ int rc = pthread_join(heartbeatThread, &thrc);
+ if ( rc != 0 ) {
+ LOG(INFO, "LibYarnClient::~LibYarnClient, fail to join heart-beat thread. "
+ "error code %d", rc);
+ }
+ else {
+ LOG(INFO, "LibYarnClient::~LibYarnClient, join heart-beat thread successfully.");
+ }
+ }
+#endif
if (amrmClient != NULL){
delete (ApplicationMaster*)amrmClient;
}
@@ -196,12 +211,14 @@ int LibYarnClient::createJob(string &jobName, string &queue,string &jobId) {
LOG(INFO, "LibYarnClient::createJob, registerApplicationMaster finished");
#ifndef MOCKTEST
+ keepRun = true;
//5. setup the heartbeat thread to allocate, release, heartbeat
int rc = pthread_create(&heartbeatThread, NULL, heartbeatFunc, this);
if ( rc != 0 ) {
- LOG(INFO, "LibYarnClient::createJob, fail to create heart-beat thread. "
- "error code %d", rc);
- throw std::runtime_error( "Fail to create heart-beat thread.");
+ keepRun = false;
+ LOG(INFO, "LibYarnClient::createJob, fail to create heart-beat thread. "
+ "error code %d", rc);
+ throw std::runtime_error( "Fail to create heart-beat thread.");
}
#endif
@@ -237,13 +254,15 @@ int LibYarnClient::forceKillJob(string &jobId) {
#ifndef MOCKTEST
if ( keepRun ) {
- keepRun=false;
+ keepRun = false;
void *thrc = NULL;
int rc = pthread_join(heartbeatThread, &thrc);
if ( rc != 0 ) {
LOG(INFO, "LibYarnClient::forceKillJob, fail to join heart-beat thread. "
"error code %d", rc);
return FR_FAILED;
+ } else {
+ LOG(INFO, "LibYarnClient::forceKillJob, join heart-beat thread successfully.");
}
}
#endif
@@ -737,17 +756,19 @@ int LibYarnClient::getActiveFailContainerIds(set<int> &activeFailIds){
int LibYarnClient::finishJob(string &jobId, FinalApplicationStatus finalStatus) {
#ifndef MOCKTEST
- if ( keepRun ) {
- // No need to run heart-beat thread now.
- keepRun=false;
- void *thrc = NULL;
- int rc = pthread_join(heartbeatThread, &thrc);
- if ( rc != 0 ) {
- LOG(INFO, "LibYarnClient::finishJob, fail to join heart-beat thread. "
- "error code %d", rc);
- return FR_FAILED;
- }
- }
+ if ( keepRun ) {
+ // No need to run heart-beat thread now.
+ keepRun = false;
+ void *thrc = NULL;
+ int rc = pthread_join(heartbeatThread, &thrc);
+ if ( rc != 0 ) {
+ LOG(INFO, "LibYarnClient::finishJob, fail to join heart-beat thread. "
+ "error code %d", rc);
+ return FR_FAILED;
+ } else {
+ LOG(INFO, "LibYarnClient::finishJob, join heart-beat thread successfully.");
+ }
+ }
#endif
try{