You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by wl...@apache.org on 2015/11/13 14:28:45 UTC

incubator-hawq git commit: HAWQ-159. Fix the bug when hawq disconnect from libyarn, heartbeat thread hasn't been canceled

Repository: incubator-hawq
Updated Branches:
  refs/heads/master 17f0a5642 -> 2ee1ed909


HAWQ-159. Fix the bug when hawq disconnect from libyarn, heartbeat thread hasn't been canceled


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/2ee1ed90
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/2ee1ed90
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/2ee1ed90

Branch: refs/heads/master
Commit: 2ee1ed909c4bf5e4531cde84372b6a80e30b5af1
Parents: 17f0a56
Author: Wen Lin <wl...@pivotal.io>
Authored: Fri Nov 13 21:29:23 2015 +0800
Committer: Wen Lin <wl...@pivotal.io>
Committed: Fri Nov 13 21:29:23 2015 +0800

----------------------------------------------------------------------
 depends/libyarn/src/CMakeLists.txt              |  2 +-
 .../libyarn/src/libyarnclient/LibYarnClient.cpp | 53 ++++++++++++++------
 2 files changed, 38 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/2ee1ed90/depends/libyarn/src/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/depends/libyarn/src/CMakeLists.txt b/depends/libyarn/src/CMakeLists.txt
index db5ccf5..c08ba1f 100644
--- a/depends/libyarn/src/CMakeLists.txt
+++ b/depends/libyarn/src/CMakeLists.txt
@@ -2,7 +2,7 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
 
 SET(libyarn_VERSION_MAJOR 0)
 SET(libyarn_VERSION_MINOR 1)
-SET(libyarn_VERSION_PATCH 6)
+SET(libyarn_VERSION_PATCH 7)
 SET(libyarn_VERSION_STRING "${libyarn_VERSION_MAJOR}.${libyarn_VERSION_MINOR}.${libyarn_VERSION_PATCH}")
 SET(libyarn_VERSION_API 1)
 SET(libyarn_ROOT_SOURCES_DIR ${CMAKE_SOURCE_DIR}/src)

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/2ee1ed90/depends/libyarn/src/libyarnclient/LibYarnClient.cpp
----------------------------------------------------------------------
diff --git a/depends/libyarn/src/libyarnclient/LibYarnClient.cpp b/depends/libyarn/src/libyarnclient/LibYarnClient.cpp
index d0137e2..ff1dd98 100644
--- a/depends/libyarn/src/libyarnclient/LibYarnClient.cpp
+++ b/depends/libyarn/src/libyarnclient/LibYarnClient.cpp
@@ -42,7 +42,7 @@ LibYarnClient::LibYarnClient(string &user, string &rmHost, string &rmPort,
 		amUser(user), schedHost(schedHost), schedPort(schedPort), amHost(amHost),
 		amPort(amPort), am_tracking_url(am_tracking_url),
 		heartbeatInterval(heartbeatInterval),clientJobId(""),response_id(0),
-		keepRun(true){
+		keepRun(false){
         pthread_mutex_init( &(heartbeatLock), NULL );
 
 		amrmClient = NULL;
@@ -66,6 +66,21 @@ LibYarnClient::LibYarnClient(string &rmHost, string &rmPort, string &schedHost,
 #endif
 
 LibYarnClient::~LibYarnClient() {
+#ifndef MOCKTEST
+	if ( keepRun ) {
+		// No need to run heart-beat thread now.
+		keepRun = false;
+		void *thrc = NULL;
+		int rc = pthread_join(heartbeatThread, &thrc);
+		if ( rc != 0 ) {
+			LOG(INFO, "LibYarnClient::~LibYarnClient, fail to join heart-beat thread. "
+						"error code %d", rc);
+		}
+		else {
+			LOG(INFO, "LibYarnClient::~LibYarnClient, join heart-beat thread successfully.");
+		}
+	}
+#endif
 	if (amrmClient != NULL){
 		delete (ApplicationMaster*)amrmClient;
 	}
@@ -196,12 +211,14 @@ int LibYarnClient::createJob(string &jobName, string &queue,string &jobId) {
         LOG(INFO, "LibYarnClient::createJob, registerApplicationMaster finished");
 
 #ifndef MOCKTEST
+        keepRun = true;
         //5. setup the heartbeat thread to allocate, release, heartbeat
         int rc = pthread_create(&heartbeatThread, NULL, heartbeatFunc, this);
         if ( rc != 0 ) {
-        	LOG(INFO, "LibYarnClient::createJob, fail to create heart-beat thread. "
-        			  "error code %d", rc);
-        	throw std::runtime_error( "Fail to create heart-beat thread.");
+            keepRun = false;
+            LOG(INFO, "LibYarnClient::createJob, fail to create heart-beat thread. "
+                      "error code %d", rc);
+            throw std::runtime_error( "Fail to create heart-beat thread.");
         }
 #endif
 
@@ -237,13 +254,15 @@ int LibYarnClient::forceKillJob(string &jobId) {
 
 #ifndef MOCKTEST
     if ( keepRun ) {
-        keepRun=false;
+        keepRun = false;
         void *thrc = NULL;
         int rc = pthread_join(heartbeatThread, &thrc);
         if ( rc != 0 ) {
             LOG(INFO, "LibYarnClient::forceKillJob, fail to join heart-beat thread. "
                       "error code %d", rc);
             return FR_FAILED;
+        } else {
+            LOG(INFO, "LibYarnClient::forceKillJob, join heart-beat thread successfully.");
         }
     }
 #endif
@@ -737,17 +756,19 @@ int LibYarnClient::getActiveFailContainerIds(set<int> &activeFailIds){
 int LibYarnClient::finishJob(string &jobId, FinalApplicationStatus finalStatus) {
 
 #ifndef MOCKTEST
-	if ( keepRun ) {
-    	// No need to run heart-beat thread now.
-    	keepRun=false;
-    	void *thrc = NULL;
-    	int rc = pthread_join(heartbeatThread, &thrc);
-    	if ( rc != 0 ) {
-			LOG(INFO, "LibYarnClient::finishJob, fail to join heart-beat thread. "
-					  "error code %d", rc);
-			return FR_FAILED;
-    	}
-	}
+    if ( keepRun ) {
+        // No need to run heart-beat thread now.
+        keepRun = false;
+        void *thrc = NULL;
+        int rc = pthread_join(heartbeatThread, &thrc);
+        if ( rc != 0 ) {
+            LOG(INFO, "LibYarnClient::finishJob, fail to join heart-beat thread. "
+                        "error code %d", rc);
+            return FR_FAILED;
+        } else {
+            LOG(INFO, "LibYarnClient::finishJob, join heart-beat thread successfully.");
+        }
+    }
 #endif
 
 	try{