You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by wl...@apache.org on 2016/01/22 06:04:19 UTC

incubator-hawq git commit: HAWQ-274. Remove shared memory for tmp dir list

Repository: incubator-hawq
Updated Branches:
  refs/heads/master cb4a95a17 -> 08783b226


HAWQ-274. Remove shared memory for tmp dir list


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/08783b22
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/08783b22
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/08783b22

Branch: refs/heads/master
Commit: 08783b226c1f72aca0ae064ae487bd0c1e366903
Parents: cb4a95a
Author: Wen Lin <wl...@pivotal.io>
Authored: Fri Jan 22 13:03:33 2016 +0800
Committer: Wen Lin <wl...@pivotal.io>
Committed: Fri Jan 22 13:03:33 2016 +0800

----------------------------------------------------------------------
 src/backend/cdb/cdbtmpdir.c                     | 269 +++----------------
 src/backend/postmaster/identity.c               |   4 +-
 .../communication/rmcomm_RMSEG2RM.c             |   3 +-
 src/backend/resourcemanager/include/dynrm.h     |   5 +-
 .../resourcemanager/requesthandler_RMSEG.c      |   7 +-
 src/backend/resourcemanager/resourcemanager.c   |  50 ++--
 .../resourcemanager/resourcemanager_RMSEG.c     |  83 +++++-
 src/backend/storage/ipc/ipci.c                  |   3 +-
 src/backend/utils/init/postinit.c               |   4 +-
 src/include/cdb/cdbtmpdir.h                     |  19 +-
 10 files changed, 148 insertions(+), 299 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/08783b22/src/backend/cdb/cdbtmpdir.c
----------------------------------------------------------------------
diff --git a/src/backend/cdb/cdbtmpdir.c b/src/backend/cdb/cdbtmpdir.c
index d89c26e..eff6921 100644
--- a/src/backend/cdb/cdbtmpdir.c
+++ b/src/backend/cdb/cdbtmpdir.c
@@ -25,283 +25,78 @@
 #include "miscadmin.h"
 #include "postmaster/autovacuum.h"
 #include "storage/ipc.h"
-#include "storage/shmem.h"
 #include <sys/stat.h>
 
-TmpDirInfo* TmpDirInfoArray = NULL;
+static List *initTmpDirList(List *list, char *tmpdir_config);
 
-static List *tmpDirList = NULL;
-
-int32_t TmpDirNum = 0;
-
-Size TmpDirInfoArraySize(void);
-
-void TmpDirInfoArray_ShmemInit(void);
-
-char* GetTmpDirPathFromArray(int64_t idx);
-
-bool DestroyTmpDirInfoArray(TmpDirInfo *info);
-
-bool CheckTmpDirAvailable(char *path);
-
-void destroyTmpDirList(List *list)
-{
-    ListCell *lc = NULL;
-
-    foreach(lc, list)
-    {
-        char *tmpdir = (char *)lfirst(lc);
-        pfree(tmpdir);
-    }
-    list_free(list);
-}
-
-static bool CheckDirValid(char* path)
-{
-    struct stat info;
-    if (path == NULL || stat(path, &info) < 0)
-    {
-        return false;
-    }
-    else
-    {
-        if (!S_ISDIR(info.st_mode))
-            return false;
-        else
-            return true;
-    }
-}
-
-static int GetTmpDirNumber(char* szTmpDir)
+List *initTmpDirList(List *list, char *szTmpDir)
 {
-    int i = 0, idx = -1;
-    char *tmpdir = NULL;
+    int idx = -1, i = 0;
+    char *tmpdir;
     int tmpDirNum = 0;
-    tmpDirList = NULL;
 
     for (i = 0; i <= strlen(szTmpDir); i++)
     {
         if (szTmpDir[i] == ',' || i == strlen(szTmpDir))
         {
             /* in case two commas are written together */
-            if (i-idx > 1 && i-idx <= MAX_TMP_DIR_LEN)
+            if (i-idx > 1)
             {
                 tmpdir = (char *)palloc0(i-idx);
                 strncpy(tmpdir, szTmpDir+idx+1, i-idx-1);
-                if(CheckDirValid(tmpdir))
-                {
-                    tmpDirNum++;
-                    elog(LOG, "Get a temporary directory:%s", tmpdir);
-                    tmpDirList = lappend(tmpDirList, tmpdir);
-                }
-                else
-                {
-                    pfree(tmpdir);
-                }
+                tmpDirNum++;
+                elog(LOG, "Get a temporary directory:%s", tmpdir);
+                list = lappend(list, tmpdir);
             }
             idx = i;
         }
     }
 
-    elog(LOG, "Get %d temporary directories", tmpDirNum);
-    return tmpDirNum;
+    return list;
 }
 
-/*
- *  Calculate the size of share memory for temporary directory information
- */
-Size TmpDirInfoArrayShmemSize(void)
-{
-
-    if (AmIMaster())
-    {
-        TmpDirNum = GetTmpDirNumber(rm_master_tmp_dirs);
-    }
-    else if (AmISegment())
-    {
-        TmpDirNum = GetTmpDirNumber(rm_seg_tmp_dirs);
-    }
-    else
-    {
-        elog(LOG, "Don't need create share memory for temporary directory information");
-        TmpDirNum = 0;
-    }
-
-    return MAXALIGN(TmpDirNum*sizeof(TmpDirInfo));
-}
-
-/*
- *  Initialize share memory for temporary directory information
- */
-void TmpDirInfoArrayShmemInit(void)
+void destroyTmpDirList(List *list)
 {
-    bool found = false;
-
-    if (TmpDirNum == 0)
-        return;
-
-    TmpDirInfoArray = (TmpDirInfo *)ShmemInitStruct("Temporary Directory Information Cache",
-                                                    TmpDirNum*sizeof(TmpDirInfo), &found);
-    if(!TmpDirInfoArray)
-    {
-        elog(FATAL,
-             "Could not initialize Temporary Directory Information shared memory");
-    }
+    ListCell *lc = NULL;
 
-    if(!found)
+    foreach(lc, list)
     {
-        ListCell *lc = NULL;
-        int32_t i = 0;
-        MemSet(TmpDirInfoArray, 0, TmpDirNum*sizeof(TmpDirInfo));
-        foreach(lc, tmpDirList) {
-            if (strlen((char*)lfirst(lc)) < MAX_TMP_DIR_LEN)
-            {
-                strncpy(TmpDirInfoArray[i].path, (char*)lfirst(lc), strlen((char*)lfirst(lc)));
-                TmpDirInfoArray[i].available = true;
-                i++;
-            }
-        }
-
-        if (tmpDirList)
-        {
-            destroyTmpDirList(tmpDirList);
-        }
+        char *tmpdir = (char *)lfirst(lc);
+        pfree(tmpdir);
     }
-    elog(LOG, "Initialize share memeory for temporary directory info finish.");
+    list_free(list);
 }
 
-/*
- *  Check if this temporary directory is OK to read or write.
- *  If not, it's probably due to disk error.
- */
-bool CheckTmpDirAvailable(char *path)
+void getLocalTmpDirFromMasterConfig(int session_id)
 {
-    FILE  *tmp = NULL;
-    bool  ret = true;
-    char* fname = NULL;
-    char* testfile = "/checktmpdir.log";
+    List *tmpdirs = NULL;
 
-    /* write some bytes to a file to check if
-     * this temporary directory is OK.
-     */
-    fname = palloc0(strlen(path) + strlen(testfile) + 1);
-    strncpy(fname, path, strlen(path));
-    strncpy(fname + strlen(path), testfile, strlen(testfile));
-    tmp = fopen(fname, "w");
-    if (tmp == NULL)
-    {
-        elog(LOG, "Can't open file:%s when check temporary directory", fname);
-        ret = false;
-        goto _exit;
-    }
+    tmpdirs = initTmpDirList(tmpdirs, rm_master_tmp_dirs);
+    LocalTempPath = pstrdup((char *)lfirst(list_nth_cell(tmpdirs, gp_session_id % list_length(tmpdirs))));
 
-    if (fseek(tmp, 0, SEEK_SET) != 0)
-    {
-        elog(LOG, "Can't seek file:%s when check temporary directory", fname);
-        ret = false;
-        goto _exit;
-    }
-
-    if (strlen("test") != fwrite("test", 1, strlen("test"), tmp))
-    {
-        elog(LOG, "Can't write file:%s when check temporary directory", fname);
-        ret = false;
-        goto _exit;
-    }
-
-_exit:
-    pfree(fname);
-    if (tmp != NULL)
-        fclose(tmp);
-    return ret;
+    destroyTmpDirList(tmpdirs);
 }
 
-/*
- * Check the status of each temporary directory kept in
- * shared memory, set to false if it is not available.
- */
-void checkTmpDirStatus(void)
+void getLocalTmpDirFromSegmentConfig(int session_id, int command_id, int qeidx)
 {
-    LWLockAcquire(TmpDirInfoLock, LW_SHARED);
+    List *tmpdirs = NULL;
 
-    for (int i = 0; i < TmpDirNum; i++)
+    if (qeidx == -1)
     {
-        bool oldStatus = TmpDirInfoArray[i].available;
-        bool newStatus = CheckTmpDirAvailable(TmpDirInfoArray[i].path);
-        if (oldStatus != newStatus)
-        {
-            LWLockRelease(TmpDirInfoLock);
-            LWLockAcquire(TmpDirInfoLock, LW_EXCLUSIVE);
-            TmpDirInfoArray[i].available = newStatus;
-            LWLockRelease(TmpDirInfoLock);
-            LWLockAcquire(TmpDirInfoLock, LW_SHARED);
-        }
+        // QE on master
+        getLocalTmpDirFromMasterConfig(session_id);
     }
-
-    LWLockRelease(TmpDirInfoLock);
-    elog(LOG, "checkTmpDirStatus finish!");
-}
-
-/*
- * Get a list of failed temporary directory
- */
-List* getFailedTmpDirList(void)
-{
-    List *failedList = NULL;
-    char *failedDir = NULL;
-
-    LWLockAcquire(TmpDirInfoLock, LW_SHARED);
-    for (int i = 0; i < TmpDirNum; i++)
+    else
     {
-        if (!TmpDirInfoArray[i].available)
-        {
-            failedDir = pstrdup(TmpDirInfoArray[i].path);
-            failedList = lappend(failedList, failedDir);
-        }
+        // QE on segment
+        tmpdirs = initTmpDirList(tmpdirs, rm_seg_tmp_dirs);
+        int64_t session_key = session_id;
+        int64_t key = (session_key << 32) + command_id + qeidx;
+        LocalTempPath = pstrdup((char *)lfirst(list_nth_cell(tmpdirs, key % list_length(tmpdirs))));
+        destroyTmpDirList(tmpdirs);
     }
-    LWLockRelease(TmpDirInfoLock);
-    return failedList;
 }
 
-/*
- *  Get a temporary directory path from array by its index
- */
-char* GetTmpDirPathFromArray(int64_t idx)
-{
-    Insist(idx >=0 && idx <= TmpDirNum-1);
-
-    LWLockAcquire(TmpDirInfoLock, LW_SHARED);
 
-    if (TmpDirInfoArray[idx].available)
-    {
-        LWLockRelease(TmpDirInfoLock);
-        return TmpDirInfoArray[idx].path;
-    }
-    else
-    {
-        LWLockRelease(TmpDirInfoLock);
-        ereport(FATAL,
-                (errcode(ERRCODE_CDB_INTERNAL_ERROR),
-                errmsg("Temporary directory:%s is failed", TmpDirInfoArray[idx].path)));
-    }
-    return NULL;
-}
 
-void getMasterLocalTmpDirFromShmem(int session_id)
-{
-    LocalTempPath = GetTmpDirPathFromArray(session_id % TmpDirNum);
-}
 
-void getSegmentLocalTmpDirFromShmem(int session_id, int command_id, int qeidx)
-{
-    if(qeidx == -1)
-    {
-        getMasterLocalTmpDirFromShmem(session_id);
-    }
-    else
-    {
-        int64_t session_key = session_id;
-        int64_t key = (session_key << 32) + command_id + qeidx;
-        LocalTempPath = GetTmpDirPathFromArray(key % TmpDirNum);
-    }
-}

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/08783b22/src/backend/postmaster/identity.c
----------------------------------------------------------------------
diff --git a/src/backend/postmaster/identity.c b/src/backend/postmaster/identity.c
index 898a18a..7851fa8 100644
--- a/src/backend/postmaster/identity.c
+++ b/src/backend/postmaster/identity.c
@@ -414,8 +414,8 @@ SetupProcessIdentity(const char *str)
         }
         else
         {
-            getSegmentLocalTmpDirFromShmem(gp_session_id, gp_command_count, GetQEIndex());
-            elog(DEBUG1, "getSegmentLocalTmpDirFromShmem session_id:%d command_id:%d qeidx:%d tmpdir:%s", gp_session_id, gp_command_count, GetQEIndex(), LocalTempPath);
+            getLocalTmpDirFromSegmentConfig(gp_session_id, gp_command_count, GetQEIndex());
+            elog(DEBUG1, "getLocalTmpDirFromSegmentConfig session_id:%d command_id:%d qeidx:%d tmpdir:%s", gp_session_id, gp_command_count, GetQEIndex(), LocalTempPath);
         }
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/08783b22/src/backend/resourcemanager/communication/rmcomm_RMSEG2RM.c
----------------------------------------------------------------------
diff --git a/src/backend/resourcemanager/communication/rmcomm_RMSEG2RM.c b/src/backend/resourcemanager/communication/rmcomm_RMSEG2RM.c
index aa7ed37..9d4eca3 100644
--- a/src/backend/resourcemanager/communication/rmcomm_RMSEG2RM.c
+++ b/src/backend/resourcemanager/communication/rmcomm_RMSEG2RM.c
@@ -22,7 +22,6 @@
 #include "communication/rmcomm_MessageHandler.h"
 #include "communication/rmcomm_RMSEG_RM_Protocol.h"
 #include "dynrm.h"
-#include "cdb/cdbtmpdir.h"
 #include "utils/memutilities.h"
 #include "utils/simplestring.h"
 #include "utils/linkedlist.h"
@@ -116,7 +115,7 @@ int sendIMAlive(int  *errorcode,
 	initializeSelfMaintainBuffer(&tosend, PCONTEXT);
 
 	RPCRequestHeadIMAliveData requesthead;
-	requesthead.TmpDirCount 	  = TmpDirNum;
+	requesthead.TmpDirCount 	  = getDQueueLength(&DRMGlobalInstance->LocalHostTempDirectories);
 	requesthead.TmpDirBrokenCount = DRMGlobalInstance->LocalHostStat->FailedTmpDirNum;
 	requesthead.Reserved		  = 0;
 

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/08783b22/src/backend/resourcemanager/include/dynrm.h
----------------------------------------------------------------------
diff --git a/src/backend/resourcemanager/include/dynrm.h b/src/backend/resourcemanager/include/dynrm.h
index 4c5879e..103adbf 100644
--- a/src/backend/resourcemanager/include/dynrm.h
+++ b/src/backend/resourcemanager/include/dynrm.h
@@ -120,7 +120,7 @@ bool handleRMRequestQuotaControl(void **arg);
 
 int refreshLocalHostInstance(void);
 void checkLocalPostmasterStatus(void);
-void checkTmpDirStatus(void);
+
 /*-----------------------------------------------------------------------------
  * Dynamic resource manager overall APIs
  *----------------------------------------------------------------------------*/
@@ -208,10 +208,11 @@ struct DynRMGlobalData{
     /*------------------------------------------------------------------------*/
     SegStat 				 LocalHostStat;
     
-    DQueueData				 LocalHostTempDirectoriesForQD;      
+    DQueueData				 LocalHostTempDirectoriesForQD;
     int                      NextLocalHostTempDirIdxForQD;
     
     DQueueData				 LocalHostTempDirectories;
+    List*                    LocalHostFailedTmpDirList;
     int                      NextLocalHostTempDirIdx;
     HTAB                     *LocalTmpDirTable;
     int                      TmpDirTableCapacity;

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/08783b22/src/backend/resourcemanager/requesthandler_RMSEG.c
----------------------------------------------------------------------
diff --git a/src/backend/resourcemanager/requesthandler_RMSEG.c b/src/backend/resourcemanager/requesthandler_RMSEG.c
index 66f929a..c673040 100644
--- a/src/backend/resourcemanager/requesthandler_RMSEG.c
+++ b/src/backend/resourcemanager/requesthandler_RMSEG.c
@@ -72,15 +72,15 @@ int refreshLocalHostInstance(void)
 	DQUEUE_LOOP_END
 
 	/* Get a list of failed temporary directory */
-	List* failedTmpDir = getFailedTmpDirList();
-	uint16_t failedTmpDirNum = list_length(failedTmpDir);
+	uint16_t failedTmpDirNum =
+								list_length(&DRMGlobalInstance->LocalHostFailedTmpDirList);
 	if (failedTmpDirNum > 0)
 	{
 		SelfMaintainBufferData buf;
 		initializeSelfMaintainBuffer(&buf, PCONTEXT);
 		uint16_t idx = 0;
 		ListCell *lc = NULL;
-		foreach(lc, failedTmpDir)
+		foreach(lc, &DRMGlobalInstance->LocalHostFailedTmpDirList)
 		{
 			elog(LOG, "Get a failed temporary directory list for IMAlive message: %s",
 					  (char *)lfirst(lc));
@@ -281,7 +281,6 @@ int refreshLocalHostInstance(void)
 	DQUEUE_LOOP_END
 	removeAllDQueueNodes(&addresses);
 	cleanDQueue(&addresses);
-	destroyTmpDirList(failedTmpDir);
 	freeSimpleStringContent(&failedTmpDirStr);
 
 	return FUNC_RETURN_OK;

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/08783b22/src/backend/resourcemanager/resourcemanager.c
----------------------------------------------------------------------
diff --git a/src/backend/resourcemanager/resourcemanager.c b/src/backend/resourcemanager/resourcemanager.c
index 5aaf64a..fcdc2df 100644
--- a/src/backend/resourcemanager/resourcemanager.c
+++ b/src/backend/resourcemanager/resourcemanager.c
@@ -263,39 +263,40 @@ int ResManagerMain(int argc, char *argv[])
 	pqsignal(SIGTTIN, SIG_IGN);
 	pqsignal(SIGTTOU, SIG_IGN);
 
-	CurrentResourceOwner = ResourceOwnerCreate(NULL, "Resource Manager");
+	if ( DRMGlobalInstance->Role == START_RM_ROLE_MASTER ) {
+		CurrentResourceOwner = ResourceOwnerCreate(NULL, "Resource Manager");
 
-	BaseInit();
-	InitProcess();
-	InitBufferPoolBackend();
-	InitXLOGAccess();
+		BaseInit();
+		InitProcess();
+		InitBufferPoolBackend();
+		InitXLOGAccess();
 
-	SetProcessingMode(NormalProcessing);
+		SetProcessingMode(NormalProcessing);
 
-	MyDatabaseId = TemplateDbOid;
-	MyDatabaseTableSpace = DEFAULTTABLESPACE_OID;
-	if (!FindMyDatabase(probeDatabase, &MyDatabaseId, &MyDatabaseTableSpace))
-		ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE),
-			errmsg("database 'postgres' does not exist")));
+		MyDatabaseId = TemplateDbOid;
+		MyDatabaseTableSpace = DEFAULTTABLESPACE_OID;
+		if (!FindMyDatabase(probeDatabase, &MyDatabaseId, &MyDatabaseTableSpace))
+			ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE),
+				errmsg("database 'postgres' does not exist")));
 
-	char *fullpath = GetDatabasePath(MyDatabaseId, MyDatabaseTableSpace);
+		char *fullpath = GetDatabasePath(MyDatabaseId, MyDatabaseTableSpace);
 
-	SetDatabasePath(fullpath);
+		SetDatabasePath(fullpath);
 
-	InitProcessPhase2();
+		InitProcessPhase2();
 
-	MyBackendId = InvalidBackendId;
+		MyBackendId = InvalidBackendId;
 
-	SharedInvalBackendInit(false);
+		SharedInvalBackendInit(false);
 
-	if (MyBackendId > MaxBackends || MyBackendId <= 0)
-		elog(FATAL, "bad backend id: %d", MyBackendId);
-
-	InitBufferPoolBackend();
-	RelationCacheInitialize();
-	InitCatalogCache();
-	RelationCacheInitializePhase2();
+		if (MyBackendId > MaxBackends || MyBackendId <= 0)
+			elog(FATAL, "bad backend id: %d", MyBackendId);
 
+		InitBufferPoolBackend();
+		RelationCacheInitialize();
+		InitCatalogCache();
+		RelationCacheInitializePhase2();
+	}
 	/* END: INIT for making RM process access catalog by caql etc.            */
 	/**************************************************************************/
 	PG_SETMASK(&UnBlockSig);
@@ -821,6 +822,7 @@ int initializeDRMInstance(MCTYPE context)
 	
     initializeDQueue(&(DRMGlobalInstance->LocalHostTempDirectories),   context);
     DRMGlobalInstance->NextLocalHostTempDirIdx = -1;
+    DRMGlobalInstance->LocalHostFailedTmpDirList = NULL;
 
     HASHCTL ctl;
     ctl.keysize                                 = sizeof(TmpDirKey);
@@ -1067,6 +1069,8 @@ int  loadDynamicResourceManagerConfigure(void)
 		elog(LOG, "HAWQ Segment RM :: Temporary directory %s", value->Str);
 	DQUEUE_LOOP_END
 
+	checkAndBuildFailedTmpDirList();
+
 	/****** Resource enforcement GUCs begins ******/
 
 	/* Get resource enforcement enablement flag */

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/08783b22/src/backend/resourcemanager/resourcemanager_RMSEG.c
----------------------------------------------------------------------
diff --git a/src/backend/resourcemanager/resourcemanager_RMSEG.c b/src/backend/resourcemanager/resourcemanager_RMSEG.c
index 6b29a40..ffac70a 100644
--- a/src/backend/resourcemanager/resourcemanager_RMSEG.c
+++ b/src/backend/resourcemanager/resourcemanager_RMSEG.c
@@ -61,7 +61,7 @@ int ResManagerMainSegment2ndPhase(void)
 	 */
 	initCGroupThreads();
 
-	//InitFileAccess();
+	InitFileAccess();
 
 	/*
 	 * Notify postmaster that HAWQ RM is ready. Ignore the possible problem that
@@ -152,7 +152,7 @@ int  initializeSocketServer_RMSEG(void)
 }
 #define SEGMENT_HEARTBEAT_INTERVAL (3LL * 1000000LL)
 #define SEGMENT_HOSTCHECK_INTERVAL (5LL * 1000000LL)
-#define SEGMENT_TMPDIRCHECK_INTERVAL (10 * 60LL * 1000000LL)
+#define SEGMENT_TMPDIRCHECK_INTERVAL (5 * 60LL * 1000000LL)
 int MainHandlerLoop_RMSEG(void)
 {
 	int 		res 	  = FUNC_RETURN_OK;
@@ -183,6 +183,12 @@ int MainHandlerLoop_RMSEG(void)
 		/* PART2. Handle all BE submitted requests. */
 		processSubmittedRequests();
 
+		if ( curtime - DRMGlobalInstance->TmpDirLastCheckTime >
+			SEGMENT_TMPDIRCHECK_INTERVAL ) {
+			checkAndBuildFailedTmpDirList();
+			DRMGlobalInstance->TmpDirLastCheckTime = gettime_microsec();
+		}
+
 		/* PART3. Fresh local host info and send IMAlive message to resource
 		 * 		  manager server.											  */
 		curtime = gettime_microsec();
@@ -193,12 +199,6 @@ int MainHandlerLoop_RMSEG(void)
 			checkLocalPostmasterStatus();
 		}
 
-		if ( curtime - DRMGlobalInstance->TmpDirLastCheckTime >
-			SEGMENT_TMPDIRCHECK_INTERVAL ) {
-			checkTmpDirStatus();
-			DRMGlobalInstance->TmpDirLastCheckTime = gettime_microsec();
-		}
-
 		if ( DRMGlobalInstance->SendIMAlive ) {
 			 if (DRMGlobalInstance->LocalHostStat != NULL &&
 			     curtime - DRMGlobalInstance->HeartBeatLastSentTime >
@@ -232,3 +232,70 @@ int MainHandler_RMSEGDummyLoop(void)
 
 	return FUNC_RETURN_OK;
 }
+
+/*
+ *  Check if this temporary directory is OK to read or write.
+ *  If not, it's probably due to disk error.
+ */
+bool CheckTmpDirAvailable(char *path)
+{
+	FILE  *tmp = NULL;
+	bool  ret = true;
+	char* fname = NULL;
+	char* testfile = "/checktmpdir.log";
+
+	/* write some bytes to a file to check if
+	 * this temporary directory is OK.
+	 */
+	fname = palloc0(strlen(path) + strlen(testfile) + 1);
+	strncpy(fname, path, strlen(path));
+	strncpy(fname + strlen(path), testfile, strlen(testfile));
+	tmp = fopen(fname, "w");
+	if (tmp == NULL)
+	{
+		elog(LOG, "Can't open file:%s when check temporary directory", fname);
+		ret = false;
+		goto _exit;
+	}
+
+	if (fseek(tmp, 0, SEEK_SET) != 0)
+	{
+		elog(LOG, "Can't seek file:%s when check temporary directory", fname);
+		ret = false;
+		goto _exit;
+	}
+
+	if (strlen("test") != fwrite("test", 1, strlen("test"), tmp))
+	{
+		elog(LOG, "Can't write file:%s when check temporary directory", fname);
+		ret = false;
+		goto _exit;
+	}
+
+	_exit:
+	pfree(fname);
+	if (tmp != NULL)
+		fclose(tmp);
+	return ret;
+}
+
+/*
+ * Check the status of each temporary directory,
+ * and build a list of failed temporary directories.
+ */
+void checkAndBuildFailedTmpDirList()
+{
+	destroyTmpDirList(DRMGlobalInstance->LocalHostFailedTmpDirList);
+	DRMGlobalInstance->LocalHostFailedTmpDirList = NULL;
+
+	DQUEUE_LOOP_BEGIN(&DRMGlobalInstance->LocalHostTempDirectories, iter, SimpStringPtr, value)
+		if (!CheckTmpDirAvailable(value->Str))
+		{
+			char *failedDir = pstrdup(value->Str);
+			DRMGlobalInstance->LocalHostFailedTmpDirList =
+					lappend(DRMGlobalInstance->LocalHostFailedTmpDirList, failedDir);
+		}
+	DQUEUE_LOOP_END
+
+	elog(LOG, "checkAndBuildFailedTmpDirList finish!");
+}

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/08783b22/src/backend/storage/ipc/ipci.c
----------------------------------------------------------------------
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index 4898c73..ad73af1 100644
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -174,7 +174,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
             size = add_size(size, MetadataCache_ShmemSize());
             elog(LOG, "Metadata Cache Share Memory Size : %lu", MetadataCache_ShmemSize());
         }
-        size = add_size(size, TmpDirInfoArrayShmemSize());
+
 		
 #ifdef FAULT_INJECTOR
 		size = add_size(size, FaultInjector_ShmemSize());
@@ -295,7 +295,6 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
     {
         MetadataCache_ShmemInit();
     }
-    TmpDirInfoArrayShmemInit();
 
 	if (!IsUnderPostmaster)
 	{

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/08783b22/src/backend/utils/init/postinit.c
----------------------------------------------------------------------
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 8d48836..e4d0752 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -441,8 +441,8 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username,
         }
         else
         {
-            getMasterLocalTmpDirFromShmem(gp_session_id);
-            elog(LOG, "getMasterLocalTmpDirFromShmem session_id:%d tmpdir:%s", gp_session_id, LocalTempPath);
+            getLocalTmpDirFromMasterConfig(gp_session_id);
+            elog(LOG, "getLocalTmpDirFromMasterConfig session_id:%d tmpdir:%s", gp_session_id, LocalTempPath);
         }
 
     }

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/08783b22/src/include/cdb/cdbtmpdir.h
----------------------------------------------------------------------
diff --git a/src/include/cdb/cdbtmpdir.h b/src/include/cdb/cdbtmpdir.h
index 63feb8d..61cff9d 100644
--- a/src/include/cdb/cdbtmpdir.h
+++ b/src/include/cdb/cdbtmpdir.h
@@ -21,25 +21,10 @@
 #define CDBTMPDIR_H
 #include "c.h"
 
-#define MAX_TMP_DIR_LEN    8192
-
-typedef struct TmpDirInfo
-{
-    bool available;
-    char path[MAX_TMP_DIR_LEN];
-} TmpDirInfo;
-
 extern int32_t TmpDirNum;
-
-Size TmpDirInfoArrayShmemSize(void);
-void TmpDirInfoArrayShmemInit(void);
-char* GetTmpDirPathFromArray(int64_t idx);
-bool DestroyTmpDirInfoArray(TmpDirInfo *info);
-bool CheckTmpDirAvailable(char *path);
 void destroyTmpDirList(List *list);
-void checkTmpDirStatus(void);
+void getLocalTmpDirFromMasterConfig(int session_id);
+void getLocalTmpDirFromSegmentConfig(int session_id, int command_id, int qeidx);
 
-void getMasterLocalTmpDirFromShmem(int session_id);
-void getSegmentLocalTmpDirFromShmem(int session_id, int command_id, int qeidx);
 
 #endif