You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by yj...@apache.org on 2015/12/18 06:13:29 UTC

incubator-hawq git commit: HAWQ-259. YARN mode resource manager gets YARN cluster report once before processing query resource request

Repository: incubator-hawq
Updated Branches:
  refs/heads/master 5ad0995b4 -> a40a27a4f


HAWQ-259. YARN mode resource manager gets YARN cluster report once before processing query resource request


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/a40a27a4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/a40a27a4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/a40a27a4

Branch: refs/heads/master
Commit: a40a27a4ff2e7ae9e3cc6fbec4bb4ad0c378a274
Parents: 5ad0995
Author: YI JIN <yj...@pivotal.io>
Authored: Fri Dec 18 16:13:10 2015 +1100
Committer: YI JIN <yj...@pivotal.io>
Committed: Fri Dec 18 16:13:10 2015 +1100

----------------------------------------------------------------------
 src/backend/cdb/cdbvars.c                            |  4 ++++
 src/backend/resourcemanager/include/dynrm.h          |  1 +
 src/backend/resourcemanager/include/resourcepool.h   |  2 ++
 src/backend/resourcemanager/requesthandler.c         | 15 ++++++++++++++-
 .../resourcebroker/resourcebroker_API.c              |  2 ++
 .../resourcebroker/resourcebroker_LIBYARN.c          |  2 ++
 src/backend/resourcemanager/resourcemanager.c        | 11 +++++++++--
 src/backend/resourcemanager/resourcepool.c           | 12 ++++++++++++
 src/backend/utils/misc/guc.c                         |  9 +++++++++
 src/include/cdb/cdbvars.h                            |  1 +
 10 files changed, 56 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a40a27a4/src/backend/cdb/cdbvars.c
----------------------------------------------------------------------
diff --git a/src/backend/cdb/cdbvars.c b/src/backend/cdb/cdbvars.c
index fc0ff47..76ee65c 100644
--- a/src/backend/cdb/cdbvars.c
+++ b/src/backend/cdb/cdbvars.c
@@ -336,12 +336,16 @@ int		rm_request_timeoutcheck_interval; 	/* How many seconds to wait before
 int		rm_session_lease_heartbeat_interval;/* How many seconds to wait before
 											   sending another heart-beat to
 											   resource manager. */
+int		rm_nocluster_timeout;				/* How many seconds to wait before
+											   getting enough number of available
+											   segments registered. */
 
 int		rm_tolerate_nseg_limit;
 int		rm_rejectrequest_nseg_limit;
 int		rm_nvseg_variance_among_seg_limit;
 int		rm_container_batch_limit;
 
+
 char   *rm_resourcepool_test_filename;
 
 bool	rm_enforce_cpu_enable;

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a40a27a4/src/backend/resourcemanager/include/dynrm.h
----------------------------------------------------------------------
diff --git a/src/backend/resourcemanager/include/dynrm.h b/src/backend/resourcemanager/include/dynrm.h
index 677addf..3d66a2c 100644
--- a/src/backend/resourcemanager/include/dynrm.h
+++ b/src/backend/resourcemanager/include/dynrm.h
@@ -191,6 +191,7 @@ struct DynRMGlobalData{
     ConnectionTrackManager   ConnTrackManager;	   /* Connection track. 	  */
     ResourcePool     	 	 ResourcePoolInstance; /* Node management.		  */
     volatile bool			 ResManagerMainKeepRun;
+    uint64_t				 ResourceManagerStartTime;
 
     /*------------------------------------------------------------------------*/
     /* INTERCONN:: RM server and RM agents.                                   */

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a40a27a4/src/backend/resourcemanager/include/resourcepool.h
----------------------------------------------------------------------
diff --git a/src/backend/resourcemanager/include/resourcepool.h b/src/backend/resourcemanager/include/resourcepool.h
index 5b8c999..a0f808a 100644
--- a/src/backend/resourcemanager/include/resourcepool.h
+++ b/src/backend/resourcemanager/include/resourcepool.h
@@ -498,6 +498,8 @@ struct ResourcePoolData {
 	/* Slaves file content. */
 	int64_t			SlavesFileTimestamp;
 	int				SlavesHostCount;
+
+	int				RBClusterReportCounter;
 };
 
 typedef struct ResourcePoolData *ResourcePool;

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a40a27a4/src/backend/resourcemanager/requesthandler.c
----------------------------------------------------------------------
diff --git a/src/backend/resourcemanager/requesthandler.c b/src/backend/resourcemanager/requesthandler.c
index f2e8c93..9693656 100644
--- a/src/backend/resourcemanager/requesthandler.c
+++ b/src/backend/resourcemanager/requesthandler.c
@@ -330,6 +330,19 @@ bool handleRMRequestAcquireResource(void **arg)
 	static char		 errorbuf[ERRORMESSAGE_SIZE];
 	int				 res		= FUNC_RETURN_OK;
 	ConnectionTrack *conntrack	= (ConnectionTrack *)arg;
+	uint64_t		 reqtime	= gettime_microsec();
+
+	/* If we run in YARN mode, we expect that we should try to get at least one
+	 * available segment, and this requires at least once global resource manager
+	 * cluster report returned.
+	 */
+	if ( reqtime - DRMGlobalInstance->ResourceManagerStartTime <=
+		 rm_nocluster_timeout * 1000000LL &&
+		 PRESPOOL->RBClusterReportCounter == 0 )
+	{
+		elog(DEBUG3, "Resource manager defers the resource request.");
+		return false;
+	}
 
 	RPCRequestHeadAcquireResourceFromRM request =
 		SMBUFF_HEAD(RPCRequestHeadAcquireResourceFromRM,
@@ -447,7 +460,7 @@ bool handleRMRequestAcquireResource(void **arg)
 	{
 		goto sendresponse;
 	}
-	(*conntrack)->ResRequestTime = gettime_microsec();
+	(*conntrack)->ResRequestTime = reqtime;
 	(*conntrack)->LastActTime    = (*conntrack)->ResRequestTime;
 
 	return true;

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a40a27a4/src/backend/resourcemanager/resourcebroker/resourcebroker_API.c
----------------------------------------------------------------------
diff --git a/src/backend/resourcemanager/resourcebroker/resourcebroker_API.c b/src/backend/resourcemanager/resourcebroker/resourcebroker_API.c
index ef87629..323b13a 100644
--- a/src/backend/resourcemanager/resourcebroker/resourcebroker_API.c
+++ b/src/backend/resourcemanager/resourcebroker/resourcebroker_API.c
@@ -68,6 +68,8 @@ int RB_getClusterReport(const char *queuename, List **machines, double *maxcapac
 	}
 
 	*maxcapacity = 1;
+	PRESPOOL->RBClusterReportCounter++;
+
 	return FUNC_RETURN_OK;
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a40a27a4/src/backend/resourcemanager/resourcebroker/resourcebroker_LIBYARN.c
----------------------------------------------------------------------
diff --git a/src/backend/resourcemanager/resourcebroker/resourcebroker_LIBYARN.c b/src/backend/resourcemanager/resourcebroker/resourcebroker_LIBYARN.c
index f5932ff..0e1e414 100644
--- a/src/backend/resourcemanager/resourcebroker/resourcebroker_LIBYARN.c
+++ b/src/backend/resourcemanager/resourcebroker/resourcebroker_LIBYARN.c
@@ -536,6 +536,8 @@ int handleRB2RM_ClusterReport(void)
 	int			piperes     = 0;
 	List	   *segstats	= NULL;
 
+	PRESPOOL->RBClusterReportCounter++;
+
 	/* Read whole result head. */
 	RPCResponseRBGetClusterReportHeadData response;
 	piperes = readPipe(fd, (char *)&response, sizeof(response));

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a40a27a4/src/backend/resourcemanager/resourcemanager.c
----------------------------------------------------------------------
diff --git a/src/backend/resourcemanager/resourcemanager.c b/src/backend/resourcemanager/resourcemanager.c
index c1e39a7..160a4b7 100644
--- a/src/backend/resourcemanager/resourcemanager.c
+++ b/src/backend/resourcemanager/resourcemanager.c
@@ -528,6 +528,8 @@ int MainHandlerLoop(void)
 {
 	int res = FUNC_RETURN_OK;
 
+	DRMGlobalInstance->ResourceManagerStartTime = gettime_microsec();
+
 	while( DRMGlobalInstance->ResManagerMainKeepRun )
 	{
 		/* STEP 1. Check resource broker status. */
@@ -852,6 +854,10 @@ int initializeDRMInstance(MCTYPE context)
 	if ( res != FUNC_RETURN_OK ) {
 		elog(WARNING, "Fail to get local host name.");
 	}
+
+	/* Set resource manager server startup time to 0, i.e. not started yet. */
+	DRMGlobalInstance->ResourceManagerStartTime = 0;
+
 	return res;
 }
 
@@ -2860,10 +2866,11 @@ void processResourceBrokerTasks(void)
 		 */
         curtime = gettime_microsec();
 
-		if ( (curtime - PRESPOOL->LastUpdateTime  >
+		if ( (PRESPOOL->Segments.NodeCount > 0 ) &&
+			 (curtime - PRESPOOL->LastUpdateTime  >
 			  rm_cluster_report_period * 1000000LL ||
 			  hasSegmentGRMCapacityNotUpdated() ) &&
-			  curtime - PRESPOOL->LastRequestTime >    5LL * 1000000LL)
+			 (curtime - PRESPOOL->LastRequestTime > 5LL * 1000000LL) )
 		{
 			double  maxcap  = 0.0;
 			List   *report	= NULL;

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a40a27a4/src/backend/resourcemanager/resourcepool.c
----------------------------------------------------------------------
diff --git a/src/backend/resourcemanager/resourcepool.c b/src/backend/resourcemanager/resourcepool.c
index 28e874d..6a98927 100644
--- a/src/backend/resourcemanager/resourcepool.c
+++ b/src/backend/resourcemanager/resourcepool.c
@@ -362,6 +362,8 @@ void initializeResourcePoolManager(void)
 	{
 		PRESPOOL->pausePhase[i] = false;
 	}
+
+	PRESPOOL->RBClusterReportCounter = 0;
 }
 
 #define CONNECT_TIMEOUT 60
@@ -3423,6 +3425,16 @@ bool hasSegmentGRMCapacityNotUpdated(void)
 	{
 		return false;
 	}
+
+	/*
+	 * If there is no segment registered, we consider no need to update global
+	 * resource manager info.
+	 */
+	if ( PRESPOOL->Segments.NodeCount == 0 )
+	{
+		return false;
+	}
+
 	bool res = false;
 
 	List 	 *allsegres = NULL;

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a40a27a4/src/backend/utils/misc/guc.c
----------------------------------------------------------------------
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index c98d33e..2880067 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -6412,6 +6412,15 @@ static struct config_int ConfigureNamesInt[] =
 	},
 
 	{
+		{"hawq_rm_nocluster_timeout", PGC_POSTMASTER, RESOURCES_MGM,
+			gettext_noop("timeout for having enough number of segments registered."),
+			NULL
+		},
+		&rm_nocluster_timeout,
+		60, 0, 65535, NULL, NULL
+	},
+
+	{
 		{"hawq_rm_session_lease_heartbeat_interval", PGC_POSTMASTER, RESOURCES_MGM,
 			gettext_noop("interval for sending heart-beat to resource manager to keep "
 						 "resource context alive."),

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a40a27a4/src/include/cdb/cdbvars.h
----------------------------------------------------------------------
diff --git a/src/include/cdb/cdbvars.h b/src/include/cdb/cdbvars.h
index 158509e..ff10a76 100644
--- a/src/include/cdb/cdbvars.h
+++ b/src/include/cdb/cdbvars.h
@@ -1185,6 +1185,7 @@ extern int 	   rm_resource_allocation_timeout;
 extern int	   rm_resource_timeout;
 extern int	   rm_request_timeoutcheck_interval;
 extern int	   rm_session_lease_heartbeat_interval;
+extern int	   rm_nocluster_timeout;
 extern int	   rm_tolerate_nseg_limit;
 extern int	   rm_rejectrequest_nseg_limit;
 extern int	   rm_nvseg_variance_among_seg_limit;