You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by hu...@apache.org on 2015/12/08 03:00:13 UTC

incubator-hawq git commit: HAWQ-227. Data locality downgrade by wrong insert host.

Repository: incubator-hawq
Updated Branches:
  refs/heads/master afd0e554c -> 72b349b4b


HAWQ-227. Data locality downgrade by wrong insert host.


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/72b349b4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/72b349b4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/72b349b4

Branch: refs/heads/master
Commit: 72b349b4bb6421e97ad2b838344a4d1b5be475fc
Parents: afd0e55
Author: hubertzhang <hz...@pivotal.io>
Authored: Tue Dec 8 09:57:43 2015 +0800
Committer: hubertzhang <hz...@pivotal.io>
Committed: Tue Dec 8 09:57:43 2015 +0800

----------------------------------------------------------------------
 src/backend/cdb/cdbdatalocality.c | 68 ++++++++++++++++++----------------
 1 file changed, 36 insertions(+), 32 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/72b349b4/src/backend/cdb/cdbdatalocality.c
----------------------------------------------------------------------
diff --git a/src/backend/cdb/cdbdatalocality.c b/src/backend/cdb/cdbdatalocality.c
index 3e1cee7..3cacd74 100644
--- a/src/backend/cdb/cdbdatalocality.c
+++ b/src/backend/cdb/cdbdatalocality.c
@@ -2659,42 +2659,46 @@ static void allocate_random_relation(Relation_Data* rel_data,
 
 
 	/*find the insert node for each block*/
-	int *hostOccurTimes = (int *) palloc(
-						sizeof(int) * context->dds_context.size);
+	int *hostOccurTimes = (int *) palloc(sizeof(int) * context->dds_context.size);
 	for (int fi = 0; fi < fileCount; fi++) {
-			Relation_File *rel_file = file_vector[fi];
-			/*for hash file whose bucket number doesn't equal to segment number*/
-			if (rel_file->hostIDs == NULL) {
-				rel_file->splits[0].host = 0;
-				continue;
-			}
-			MemSet(hostOccurTimes, 0,	sizeof(int) * context->dds_context.size);
-			for (i = 0; i < rel_file->split_num; i++) {
-				Block_Host_Index *hostID = rel_file->hostIDs + i;
-						for (int l = 0; l < hostID->replica_num; l++) {
-							uint32_t key = hostID->hostIndex[l];
-							hostOccurTimes[key]++;
-						}
+		Relation_File *rel_file = file_vector[fi];
+		/*for hash file whose bucket number doesn't equal to segment number*/
+		if (rel_file->hostIDs == NULL) {
+			rel_file->splits[0].host = 0;
+			continue;
+		}
+		MemSet(hostOccurTimes, 0,	sizeof(int) * context->dds_context.size);
+		for (i = 0; i < rel_file->split_num; i++) {
+			Block_Host_Index *hostID = rel_file->hostIDs + i;
+			for (int l = 0; l < hostID->replica_num; l++) {
+				uint32_t key = hostID->hostIndex[l];
+				hostOccurTimes[key]++;
 			}
-			int maxOccurTime = -1;
-			int inserthost = -1;
-			for(int i=0;i< context->dds_context.size;i++){
-				if(hostOccurTimes[i] > maxOccurTime){
-				  maxOccurTime = hostOccurTimes[i];
-				  inserthost = i;
-				}
+		}
+		int maxOccurTime = -1;
+		int inserthost = -1;
+		int hostsWithSameOccurTimesExist = true;
+		for (int i = 0; i < context->dds_context.size; i++) {
+			if (hostOccurTimes[i] > maxOccurTime) {
+				maxOccurTime = hostOccurTimes[i];
+				inserthost = i;
+				hostsWithSameOccurTimesExist = false;
+			} else if (hostOccurTimes[i] == maxOccurTime) {
+				hostsWithSameOccurTimesExist = true;
 			}
+		}
 
-			/* currently we consider the insert hosts are the same for all the blocks in the same file.
-			 * this logic can be changed in future, so we store the state in block level not file level*/
-			if(maxOccurTime < rel_file->split_num){
-				inserthost = -1;
-			}else{
-				for (i = 0; i < rel_file->split_num; i++) {
-					Block_Host_Index *hostID = rel_file->hostIDs + i;
-					hostID->insertHost = inserthost;
-				}
-			}
+		/* currently we consider the insert hosts are the same for all the blocks in the same file.
+		 * this logic can be changed in future, so we store the state in block level not file level
+		 * if hostsWithSameOccurTimesExist we cannot determine which is insert host
+		 * if maxOccurTime <2 we cannot determine which is insert host either*/
+		if (maxOccurTime < rel_file->split_num || maxOccurTime < 2 || hostsWithSameOccurTimesExist) {
+			inserthost = -1;
+		}
+		for (i = 0; i < rel_file->split_num; i++) {
+			Block_Host_Index *hostID = rel_file->hostIDs + i;
+			hostID->insertHost = inserthost;
+		}
 	}
 	pfree(hostOccurTimes);