You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by yj...@apache.org on 2016/01/20 07:27:30 UTC

incubator-hawq git commit: HAWQ-344. When resource queue capacity is shrunk, deadlock detection maybe not triggered

Repository: incubator-hawq
Updated Branches:
  refs/heads/master 8f753f32d -> f7e6fc5d0


HAWQ-344. When resource queue capacity is shrunk, deadlock detection maybe not triggered


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/f7e6fc5d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/f7e6fc5d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/f7e6fc5d

Branch: refs/heads/master
Commit: f7e6fc5d06da0b74daadb8fd10ded4f69645721f
Parents: 8f753f3
Author: YI JIN <yj...@pivotal.io>
Authored: Wed Jan 20 17:27:17 2016 +1100
Committer: YI JIN <yj...@pivotal.io>
Committed: Wed Jan 20 17:27:17 2016 +1100

----------------------------------------------------------------------
 .../resourcemanager/include/resqueuemanager.h   |  4 +-
 src/backend/resourcemanager/resqueuedeadlock.c  | 30 ++++++++--
 src/backend/resourcemanager/resqueuemanager.c   | 62 +++++++++++++-------
 3 files changed, 67 insertions(+), 29 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/f7e6fc5d/src/backend/resourcemanager/include/resqueuemanager.h
----------------------------------------------------------------------
diff --git a/src/backend/resourcemanager/include/resqueuemanager.h b/src/backend/resourcemanager/include/resqueuemanager.h
index 86fb69a..0b38520 100644
--- a/src/backend/resourcemanager/include/resqueuemanager.h
+++ b/src/backend/resourcemanager/include/resqueuemanager.h
@@ -528,8 +528,8 @@ void applyResourceQueueTrackChangesFromShadows(List *quehavingshadow);
 
 void cancelQueryRequestToBreakDeadLockInShadow(DynResourceQueueTrack shadowtrack,
 											   DQueueNode			 iter,
-											   uint32_t				 expmemorymb,
-											   uint32_t				 availmemorymb);
+											   int32_t				 expmemorymb,
+											   int32_t				 availmemorymb);
 
 /* Dump resource queue status to file system. */
 void dumpResourceQueueStatus(const char *filename);

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/f7e6fc5d/src/backend/resourcemanager/resqueuedeadlock.c
----------------------------------------------------------------------
diff --git a/src/backend/resourcemanager/resqueuedeadlock.c b/src/backend/resourcemanager/resqueuedeadlock.c
index 90404f2..3922b4b 100644
--- a/src/backend/resourcemanager/resqueuedeadlock.c
+++ b/src/backend/resourcemanager/resqueuedeadlock.c
@@ -59,6 +59,13 @@ int addSessionInUseResource(ResqueueDeadLockDetector detector,
 	addResourceBundleData(&(sessiontrack->InUseTotal), memorymb, core);
 	addResourceBundleData(&(detector->InUseTotal), memorymb, core);
 
+	elog(DEBUG3, "Deadlock detector adds in-use %d MB from session "INT64_FORMAT", "
+				 "has %d MB in use %d MB locked.",
+				 memorymb,
+				 sessionid,
+				 detector->InUseTotal.MemoryMB,
+				 detector->LockedTotal.MemoryMB);
+
 	return FUNC_RETURN_OK;
 }
 
@@ -97,6 +104,13 @@ int minusSessionInUseResource(ResqueueDeadLockDetector	detector,
 		removeHASHTABLENode(&(detector->Sessions), &key);
 	}
 
+	elog(DEBUG3, "Deadlock detector reduces in-use %d MB from session "INT64_FORMAT", "
+				 "has %d MB in use %d MB locked.",
+				 memorymb,
+				 sessionid,
+				 detector->InUseTotal.MemoryMB,
+				 detector->LockedTotal.MemoryMB);
+
 	return FUNC_RETURN_OK;
 }
 
@@ -132,9 +146,11 @@ void createAndLockSessionResource(ResqueueDeadLockDetector detector,
 	addResourceBundleDataByBundle(&(detector->LockedTotal),
 								  &(curstrack->InUseTotal));
 
-	elog(RMLOG, "Locked session "INT64_FORMAT" Locked %d MB",
-				sessionid,
-				detector->LockedTotal.MemoryMB);
+	elog(DEBUG3, "Deadlock detector locked session "INT64_FORMAT
+				 ", has %d MB in use %d MB locked",
+				 sessionid,
+				 detector->InUseTotal.MemoryMB,
+				 detector->LockedTotal.MemoryMB);
 }
 
 void unlockSessionResource(ResqueueDeadLockDetector detector,
@@ -156,9 +172,11 @@ void unlockSessionResource(ResqueueDeadLockDetector detector,
 									    &(sessiontrack->InUseTotal));
 		sessiontrack->Locked = false;
 
-		elog(DEBUG3, "Unlocked session "INT64_FORMAT " Locked %d MB",
-					 sessionid,
-					 detector->LockedTotal.MemoryMB);
+		elog(DEBUG3, "Deadlock detector unlocked session "INT64_FORMAT
+						 ", has %d MB in use %d MB locked",
+						 sessionid,
+						 detector->InUseTotal.MemoryMB,
+						 detector->LockedTotal.MemoryMB);
 	}
 
 	Assert(detector->LockedTotal.Core >= 0.0 &&

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/f7e6fc5d/src/backend/resourcemanager/resqueuemanager.c
----------------------------------------------------------------------
diff --git a/src/backend/resourcemanager/resqueuemanager.c b/src/backend/resourcemanager/resqueuemanager.c
index f358b3f..9ccf619 100644
--- a/src/backend/resourcemanager/resqueuemanager.c
+++ b/src/backend/resourcemanager/resqueuemanager.c
@@ -2968,6 +2968,9 @@ void dispatchResourceToQueries(void)
 {
 	bool 		hasresourceallocated = false;
 	bool 		hasrequest 		  	 = false;
+
+	elog(DEBUG3, "Resource manager tries to dispatch resource to queries.");
+
 	/*
 	 *--------------------------------------------------------------------------
 	 * STEP 1. Re-balance resource among different mem/core ratio trackers. After
@@ -2991,6 +2994,14 @@ void dispatchResourceToQueries(void)
 		if ( (mctrack->ClusterMemoryMaxMB == 0 || mctrack->ClusterVCoreMax == 0) ||
 			 (mctrack->TotalAllocated.MemoryMB == 0 && mctrack->TotalAllocated.Core == 0) )
 		{
+			elog(DEBUG3, "Resource manager skipped memory core ratio index %d, "
+						 "memory max limit %d MB, %lf CORE, "
+						 "total allocated %d MB, %lf CORE",
+						 i,
+						 mctrack->ClusterMemoryMaxMB,
+						 mctrack->ClusterVCoreMax,
+						 mctrack->TotalAllocated.MemoryMB,
+						 mctrack->TotalAllocated.Core);
 			continue;
 		}
 
@@ -3014,6 +3025,8 @@ void dispatchResourceToQueries(void)
 			/* Ignore the queues not in use. */
 			if ( !track->isBusy )
 			{
+				elog(DEBUG3, "Resource manager skips idle resource queue %s",
+							 track->QueueInfo->Name);
 				continue;
 			}
 
@@ -3043,6 +3056,10 @@ void dispatchResourceToQueries(void)
 							 expweight,
 							 track->TotalUsed.MemoryMB,
 							 track->TotalUsed.Core);
+
+				/* We still need to handle the resource queue dead lock here. */
+				detectAndDealWithDeadLock(track);
+
 			}
 			else
 			{
@@ -3834,7 +3851,7 @@ void refreshResourceQueuePercentageCapacityInternal(uint32_t clustermemmb,
 		}
 		else
 		{
-			track->ClusterVCoreMax = track->ClusterMemoryMaxMB / track->MemCoreRatio;
+			track->ClusterVCoreMax = 1.0 * track->ClusterMemoryMaxMB / track->MemCoreRatio;
 		}
 
 		/* Decide cluster segment resource quota. */
@@ -3949,6 +3966,9 @@ void dispatchResourceToQueriesInOneQueue(DynResourceQueueTrack track)
 	int			policy			= 0;
 	Assert( track != NULL );
 
+	elog(DEBUG3, "Resource manager dispatch resource in queue %s",
+				 track->QueueInfo->Name);
+
 	if ( track->QueryResRequests.NodeCount > 0 )
 	{
 		ConnectionTrack topwaiter = getDQueueHeadNodeData(&(track->QueryResRequests));
@@ -4168,7 +4188,6 @@ int dispatchResourceToQueries_EVEN(DynResourceQueueTrack track)
 
 	if ( counter == 0 )
 	{
-		/* TODO:: Maybe too conservative. */
 		detectAndDealWithDeadLock(track);
 		return FUNC_RETURN_OK; /* Expect requests are processed in next loop. */
 	}
@@ -5186,6 +5205,10 @@ int rebuildResourceQueueTrackDynamicStatusInShadow(DynResourceQueueTrack  quetra
 	copyResourceDeadLockDetectorWithoutLocking(&(quetrack->DLDetector),
 											   &(shadowtrack->DLDetector));
 
+	elog(DEBUG3, "Deadlock detector in shadow has %d MB in use %d MB locked.",
+				 shadowtrack->DLDetector.InUseTotal.MemoryMB,
+				 quetrack->DLDetector.LockedTotal.MemoryMB);
+
 	/* Go through all queued query resource requests, recalculate the request. */
 	DQUEUE_LOOP_BEGIN(&(quetrack->QueryResRequests), iter, ConnectionTrack, conn)
 
@@ -5263,6 +5286,11 @@ int rebuildResourceQueueTrackDynamicStatusInShadow(DynResourceQueueTrack  quetra
 		}
 	DQUEUE_LOOP_END
 
+	elog(DEBUG3, "Deadlock detector in shadow has %d MB in use %d MB locked "
+				 "after rebuilding.",
+				 shadowtrack->DLDetector.InUseTotal.MemoryMB,
+				 shadowtrack->DLDetector.LockedTotal.MemoryMB);
+
 	elog(LOG, "Finished rebuilding resource queue %s dynamic status in its shadow.",
 			  quetrack->QueueInfo->Name);
 
@@ -5276,12 +5304,12 @@ int detectAndDealWithDeadLockInShadow(DynResourceQueueTrack quetrack,
 	Assert(quetrack->ShadowQueueTrack != NULL);
 	DynResourceQueueTrack shadowtrack = quetrack->ShadowQueueTrack;
 
-	elog(DEBUG3, "Deadlock detector has %d MB in use, %d MB locked",
+	elog(DEBUG3, "Deadlock detector in shadow has %d MB in use, %d MB locked",
 				 shadowtrack->DLDetector.InUseTotal.MemoryMB,
 				 shadowtrack->DLDetector.LockedTotal.MemoryMB);
 
 	/* Assume more available resource unlocked queued requests. */
-	uint32_t pavailmemorymb = 0;
+	int32_t pavailmemorymb = 0;
 
 	/* Go through all queued query resource requests, recalculate the request. */
 	DQUEUE_LOOP_BEGIN(&(shadowtrack->QueryResRequests), iter, ConnectionTrack, conn)
@@ -5293,10 +5321,10 @@ int detectAndDealWithDeadLockInShadow(DynResourceQueueTrack quetrack,
 		}
 
 		/* Check if this connection has deadlock issue. */
-		uint32_t expmemorymb   = conn->SegMemoryMB * conn->SegNumMin;
-		uint32_t availmemorymb = shadowtrack->ClusterMemoryMaxMB -
-								 shadowtrack->DLDetector.LockedTotal.MemoryMB +
-								 pavailmemorymb;
+		int32_t expmemorymb   = conn->SegMemoryMB * conn->SegNumMin;
+		int32_t availmemorymb = shadowtrack->ClusterMemoryMaxMB -
+								shadowtrack->DLDetector.LockedTotal.MemoryMB +
+								pavailmemorymb;
 
 		/*----------------------------------------------------------------------
 		 * If the queue already uses more resource than its maximum capability,
@@ -5308,6 +5336,7 @@ int detectAndDealWithDeadLockInShadow(DynResourceQueueTrack quetrack,
 						shadowtrack->ClusterMemoryMaxMB :
 						availmemorymb;
 
+		/* NOTE: availmemorymb maybe less than 0. */
 		if ( expmemorymb > availmemorymb )
 		{
 			/* We encounter a deadlock issue. */
@@ -5347,8 +5376,8 @@ int detectAndDealWithDeadLockInShadow(DynResourceQueueTrack quetrack,
 
 void cancelQueryRequestToBreakDeadLockInShadow(DynResourceQueueTrack shadowtrack,
 											   DQueueNode			 iter,
-											   uint32_t				 expmemorymb,
-											   uint32_t				 availmemorymb)
+											   int32_t				 expmemorymb,
+											   int32_t				 availmemorymb)
 {
 	static char errorbuf[ERRORMESSAGE_SIZE];
 	DQueueNode tailiter = getDQueueContainerTail(&(shadowtrack->QueryResRequests));
@@ -5441,8 +5470,8 @@ void applyResourceQueueTrackChangesFromShadows(List *quehavingshadow)
 
 		/* The deadlock detector should use the new one completely. */
 		resetResourceDeadLockDetector(&(quetrack->DLDetector));
-		copyResourceDeadLockDetectorWithoutLocking(&(quetrack->DLDetector),
-												   &(shadowtrack->DLDetector));
+		copyResourceDeadLockDetectorWithoutLocking(&(shadowtrack->DLDetector),
+												   &(quetrack->DLDetector));
 
 		resetResourceBundleDataByBundle(&(quetrack->TotalUsed),
 										&(shadowtrack->TotalUsed));
@@ -5498,15 +5527,6 @@ void applyResourceQueueTrackChangesFromShadows(List *quehavingshadow)
 				MEMORY_CONTEXT_SWITCH_TO(PCONTEXT)
 				PCONTRACK->ConnToSend = lappend(PCONTRACK->ConnToSend, conn);
 				MEMORY_CONTEXT_SWITCH_BACK
-
-				/* Recycle connection track instance. */
-				quetrack->CurConnCounter--;
-				if ( quetrack->CurConnCounter == 0 )
-				{
-					quetrack->isBusy = false;
-					refreshMemoryCoreRatioLimits();
-					refreshMemoryCoreRatioWaterMark();
-				}
 			}
 			else
 			{