You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@hawq.apache.org by jiny2 <gi...@git.apache.org> on 2015/12/08 23:25:53 UTC

[GitHub] incubator-hawq pull request: HAWQ-234. Improve HAWQ resource manag...

GitHub user jiny2 opened a pull request:

    https://github.com/apache/incubator-hawq/pull/169

    HAWQ-234. Improve HAWQ resource manager resource allocation algorithm…

    1) reject resource request from dispatcher at once if too many segments unavailable;
    2) explain how many segments unavailable in current cluster when the resource request is rejected;
    3) make error message string generated in resource manager process only for all resource manager RPCs;
    4) change NVSEG_* limits semantic meanings to latest version;
    5) minimum vseg number for one resource request is changed to queue vseg num / active_statements, but the request having only a few vseg required is not changed.

You can merge this pull request into a Git repository by running:

    $ git pull https://github.com/jiny2/incubator-hawq HAWQ-234

Alternatively you can review and apply these changes as the patch at:

    https://github.com/apache/incubator-hawq/pull/169.patch

To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:

    This closes #169
    
----
commit 83c6aa9c581547a0a7356f1533bc6423a11af9a2
Author: Yi Jin <yj...@pivotal.io>
Date:   2015-12-08T22:24:34Z

    HAWQ-234. Improve HAWQ resource manager resource allocation algorithm and RPC framework

----


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-hawq pull request: HAWQ-234. Improve HAWQ resource manag...

Posted by zhangh43 <gi...@git.apache.org>.
Github user zhangh43 commented on a diff in the pull request:

    https://github.com/apache/incubator-hawq/pull/169#discussion_r47337343
  
    --- Diff: src/backend/resourcemanager/resqueuemanager.c ---
    @@ -2165,103 +2165,87 @@ void cancelResourceAllocRequest(ConnectionTrack conntrack)
     	/* Unlock session in deadlock */
     	unlockSessionResource(&(queuetrack->DLDetector), conntrack->SessionID);
     
    -	buildTimeoutResponseForQueuedRequest(conntrack, RESQUEMGR_NORESOURCE_TIMEOUT);
    +	buildTimeoutResponseForQueuedRequest(conntrack,
    +										 RESQUEMGR_NORESOURCE_TIMEOUT,
    +										 errorbuf);
     }
     
     /* Acquire resource from queue. */
    -int acquireResourceFromResQueMgr(ConnectionTrack conntrack)
    +int acquireResourceFromResQueMgr(ConnectionTrack  conntrack,
    +								 char 			 *errorbuf,
    +								 int 			  errorbufsize)
     {
    -	int						res			  	= FUNC_RETURN_OK;
    -
    -	DynResourceQueueTrack   queuetrack	  	= conntrack->QueueTrack;
    -
    -	if ( queuetrack->ClusterSegNumberMax == 0 )
    -	{
    -		elog(LOG, "The queue %s has no resource available to run queries.",
    -				  queuetrack->QueueInfo->Name);
    -		return RESQUEMGR_NO_RESOURCE;
    -	}
    +	int						res			= FUNC_RETURN_OK;
    +	DynResourceQueueTrack	queuetrack	= conntrack->QueueTrack;
     
     	/* Call quota logic to make decision of resource for current query. */
    -	res = computeQueryQuota(conntrack);
    +	res = computeQueryQuota(conntrack, errorbuf, errorbufsize);
     
     	if ( res == FUNC_RETURN_OK )
     	{
     		if ( conntrack->StatNVSeg == 0 )
     		{
    -			int32_t Rmax  = conntrack->SegNum;
    -			int32_t RmaxL = conntrack->VSegLimitPerSeg * PRESPOOL->AvailNodeCount;
    -			int32_t Rmin  = conntrack->SegNumMin;
    -			elog(LOG, "Original quota min seg num:%d, max seg num:%d",
    -					  conntrack->SegNumMin,
    -					  conntrack->SegNum);
    -
    -			/* Ensure quota [min,max] is between request [min,max] */
    -			int32_t Gmax= conntrack->MaxSegCountFixed;
    -			int32_t Gmin= conntrack->MinSegCountFixed;
    -
    -			if(Gmin==1)
    -			{
    -				/* case 1 */
    -				conntrack->SegNumMin = min(min(Gmax,Rmin),RmaxL);
    -				conntrack->SegNum = min(Gmax,RmaxL);
    -				if(conntrack->SegNumMin > conntrack->SegNum)
    -				{
    -					return RESQUEMGR_NO_RESOURCE;
    -				}
    -			}
    -			else if(Gmax == Gmin)
    -			{
    -				/* case 2 */
    -				conntrack->SegNumMin = Gmax;
    -				conntrack->SegNum = Gmax;
    -				if(Rmax < Gmax)
    -				{
    -					return RESQUEMGR_NO_RESOURCE;
    -				}
    -			}
    -			else
    -			{
    -				/* case 3 */
    -				conntrack->SegNumMin = min(max(Gmin,Rmin),Gmax);
    -				conntrack->SegNum = min(max(min(RmaxL,Gmax),Gmin),Rmax);
    -				if(conntrack->SegNumMin > conntrack->SegNum)
    -				{
    -					return RESQUEMGR_NO_RESOURCE;
    -				}
    -			}
    -
    -			elog(LOG, "Query resource expects (%d MB, %lf CORE) x %d ( min %d ) resource.",
    -					   conntrack->SegMemoryMB,
    -					   conntrack->SegCore,
    -					   conntrack->SegNum,
    -					   conntrack->SegNumMin);
    -
    +			/*------------------------------------------------------------------
    +			 * The following logic consider the actual resource requirement from
    +			 * dispatcher based on table size, workload, etc. The requirement is
    +			 * described by (MinSegCountFixed, MaxSegCountFixed). The requirement
    +			 * can be satisfied only when there is a non-empty intersect between
    +			 * (MinSegCountFixed, MaxSegCountFixed) and (SegNumMin, SegNum).
    +			 *------------------------------------------------------------------
    +			 */
    +			conntrack->SegNumMin =
    +				conntrack->MaxSegCountFixed < conntrack->SegNumMin ?
    +				conntrack->MinSegCountFixed :
    +				max(conntrack->SegNumMin, conntrack->MinSegCountFixed);
    +
    +			conntrack->SegNum = min(conntrack->SegNum,
    --- End diff --
    
    can you explain the meaning of MinSegCountFixed, MaxSegCountFixed, MinSegCountFixed and MaxSegCountFixed?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-hawq pull request: HAWQ-234. Improve HAWQ resource manag...

Posted by zhangh43 <gi...@git.apache.org>.
Github user zhangh43 commented on a diff in the pull request:

    https://github.com/apache/incubator-hawq/pull/169#discussion_r47332978
  
    --- Diff: src/backend/resourcemanager/communication/rmcomm_QD2RM.c ---
    @@ -354,9 +361,9 @@ int createNewResourceContext(int *index)
             	return COMM2RM_CLIENT_FULL_RESOURCECONTEXT;
             }
             QD2RM_ResourceSets = rm_repalloc(QD2RM_CommContext,
    -                QD2RM_ResourceSets,
    -                sizeof(QDResourceContext) *
    -                QD2RM_ResourceSetSize * 2);
    +                						 QD2RM_ResourceSets,
    --- End diff --
    
    please fix the format 


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-hawq pull request: HAWQ-234. Improve HAWQ resource manag...

Posted by jiny2 <gi...@git.apache.org>.
Github user jiny2 closed the pull request at:

    https://github.com/apache/incubator-hawq/pull/169


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-hawq pull request: HAWQ-234. Improve HAWQ resource manag...

Posted by zhangh43 <gi...@git.apache.org>.
Github user zhangh43 commented on a diff in the pull request:

    https://github.com/apache/incubator-hawq/pull/169#discussion_r47336814
  
    --- Diff: src/backend/resourcemanager/resqueuemanager.c ---
    @@ -2165,103 +2165,87 @@ void cancelResourceAllocRequest(ConnectionTrack conntrack)
     	/* Unlock session in deadlock */
     	unlockSessionResource(&(queuetrack->DLDetector), conntrack->SessionID);
     
    -	buildTimeoutResponseForQueuedRequest(conntrack, RESQUEMGR_NORESOURCE_TIMEOUT);
    +	buildTimeoutResponseForQueuedRequest(conntrack,
    +										 RESQUEMGR_NORESOURCE_TIMEOUT,
    +										 errorbuf);
     }
     
     /* Acquire resource from queue. */
    -int acquireResourceFromResQueMgr(ConnectionTrack conntrack)
    +int acquireResourceFromResQueMgr(ConnectionTrack  conntrack,
    +								 char 			 *errorbuf,
    +								 int 			  errorbufsize)
     {
    -	int						res			  	= FUNC_RETURN_OK;
    -
    -	DynResourceQueueTrack   queuetrack	  	= conntrack->QueueTrack;
    -
    -	if ( queuetrack->ClusterSegNumberMax == 0 )
    -	{
    -		elog(LOG, "The queue %s has no resource available to run queries.",
    -				  queuetrack->QueueInfo->Name);
    -		return RESQUEMGR_NO_RESOURCE;
    -	}
    +	int						res			= FUNC_RETURN_OK;
    --- End diff --
    
    format problem


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-hawq pull request: HAWQ-234. Improve HAWQ resource manag...

Posted by zhangh43 <gi...@git.apache.org>.
Github user zhangh43 commented on a diff in the pull request:

    https://github.com/apache/incubator-hawq/pull/169#discussion_r47336522
  
    --- Diff: src/backend/resourcemanager/resourcepool.c ---
    @@ -1153,7 +1156,7 @@ int setSegResHAWQAvailability( SegResource segres, uint8_t newstatus)
     
     int setSegResGLOBAvailability( SegResource segres, uint8_t newstatus)
     {
    -	return setSegStatGLOBAvailability(segres->Stat, newstatus);
    +	int res = setSegStatGLOBAvailability(segres->Stat, newstatus);
    --- End diff --
    
    here should add "return res;"


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-hawq pull request: HAWQ-234. Improve HAWQ resource manag...

Posted by zhangh43 <gi...@git.apache.org>.
Github user zhangh43 commented on a diff in the pull request:

    https://github.com/apache/incubator-hawq/pull/169#discussion_r47333137
  
    --- Diff: src/backend/resourcemanager/communication/rmcomm_QD2RM.c ---
    @@ -464,24 +471,23 @@ int cleanupQD2RMComm(void)
             {
                 if ( QD2RM_ResourceSets[i]->QD_ResourceList != NULL )
                 {
    -            	elog(LOG, "Un-returned resource is probed, will be returned. "
    -                          "(%d MB, %lf CORE) x %d. Conn ID=%d",
    -                          QD2RM_ResourceSets[i]->QD_SegMemoryMB,
    -                          QD2RM_ResourceSets[i]->QD_SegCore,
    -                          QD2RM_ResourceSets[i]->QD_SegCount,
    -                          QD2RM_ResourceSets[i]->QD_Conn_ID);
    -
    +            	elog(WARNING, "Un-returned resource is probed, will be returned. "
    +                              "(%d MB, %lf CORE) x %d. Conn ID=%d",
    +							  QD2RM_ResourceSets[i]->QD_SegMemoryMB,
    +							  QD2RM_ResourceSets[i]->QD_SegCore,
    +							  QD2RM_ResourceSets[i]->QD_SegCount,
    +							  QD2RM_ResourceSets[i]->QD_Conn_ID);
    +            	errorbuf[0] = '\0';
                     res = returnResource(i, errorbuf, sizeof(errorbuf));
                     if ( res != FUNC_RETURN_OK )
                     {
    -                	elog(WARNING, "Failed to return resource when cleaning up "
    -                				  "resource context.");
    +                	elog(WARNING, "%s", errorbuf);
    --- End diff --
    
    in line 480 errorbuf[0] = '\0';  so line 484 will elog warning an empty string?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-hawq pull request: HAWQ-234. Improve HAWQ resource manag...

Posted by zhangh43 <gi...@git.apache.org>.
Github user zhangh43 commented on a diff in the pull request:

    https://github.com/apache/incubator-hawq/pull/169#discussion_r47333451
  
    --- Diff: src/backend/resourcemanager/communication/rmcomm_QD2RM.c ---
    @@ -807,7 +839,8 @@ int acquireResourceFromRM(int 		  		  index,
             	newqdseg->QD_HdfsHostName = (hnameidxarray[i] < nodecount) ?
             								curcontext->QD_HdfsHostNames[hnameidxarray[i]] :
     										NULL;
    -        	newqdseg->QD_SegInfo = (SegInfo)(recvbuffer->Buffer + hoffsetarray[i]);
    +        	newqdseg->QD_SegInfo = (SegInfo)(SMBUFF_CONTENT(recvbuffer) +
    +        									 hoffsetarray[i]);
    --- End diff --
    
    there are some format problem in this file


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

[GitHub] incubator-hawq pull request: HAWQ-234. Improve HAWQ resource manag...

Posted by zhangh43 <gi...@git.apache.org>.
Github user zhangh43 commented on a diff in the pull request:

    https://github.com/apache/incubator-hawq/pull/169#discussion_r47333743
  
    --- Diff: src/backend/resourcemanager/communication/rmcomm_QD2RM.c ---
    @@ -1409,12 +1477,14 @@ void *generateResourceRefreshHeartBeat(void *arg)
     			phead->MessageSize = contbuffer.Cursor + 1;
     
     			appendSelfMaintainBuffer(&sendbuffer, (char *)phead, sizeof(*phead));
    -			appendSelfMaintainBuffer(&sendbuffer, contbuffer.Buffer, contbuffer.Cursor+1);
    +			appendSelfMaintainBuffer(&sendbuffer,
    +									 SMBUFF_CONTENT(&contbuffer),
    +									 getSMBContentSize(&contbuffer));
     			appendSelfMaintainBuffer(&sendbuffer, (char *)ptail, sizeof(*ptail));
     
     			if ( sendWithRetry(fd,
    -							   sendbuffer.Buffer,
    -							   sendbuffer.Cursor+1,
    +							   SMBUFF_CONTENT(&sendbuffer),
    --- End diff --
    
    also format problem


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---