You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by hu...@apache.org on 2015/10/20 05:14:00 UTC

incubator-hawq git commit: HAWQ-68. Wrong answer when converting hash to random partition table

Repository: incubator-hawq
Updated Branches:
  refs/heads/master 413b6647b -> 72e29416e


HAWQ-68. Wrong answer when converting hash to random partition table


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/72e29416
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/72e29416
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/72e29416

Branch: refs/heads/master
Commit: 72e29416e1c49d2000004126eb47eef457427c16
Parents: 413b664
Author: hzhang2 <zh...@163.com>
Authored: Mon Oct 19 22:38:34 2015 +0800
Committer: hzhang2 <zh...@163.com>
Committed: Tue Oct 20 11:13:04 2015 +0800

----------------------------------------------------------------------
 src/backend/cdb/cdbdatalocality.c     | 99 ++++++++++++++++--------------
 src/backend/cdb/cdbtargeteddispatch.c | 16 +++++
 2 files changed, 68 insertions(+), 47 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/72e29416/src/backend/cdb/cdbdatalocality.c
----------------------------------------------------------------------
diff --git a/src/backend/cdb/cdbdatalocality.c b/src/backend/cdb/cdbdatalocality.c
index b894629..db7013c 100644
--- a/src/backend/cdb/cdbdatalocality.c
+++ b/src/backend/cdb/cdbdatalocality.c
@@ -355,7 +355,7 @@ static bool IsAggFunction(char* funcName);
 static bool allocate_hash_relation(Relation_Data* rel_data,
 		Assignment_Log_Context *log_context, TargetSegmentIDMap* idMap,
 		Relation_Assignment_Context* assignment_context,
-		split_to_segment_mapping_context *context);
+		split_to_segment_mapping_context *context, bool parentIsHashExist, bool parentIsHash);
 
 static void allocate_random_relation(Relation_Data* rel_data,
 		Assignment_Log_Context *log_context, TargetSegmentIDMap* idMap,
@@ -2477,7 +2477,7 @@ static int64 set_maximum_segment_volumn_parameter(Relation_Data *rel_data,
 static bool allocate_hash_relation(Relation_Data* rel_data,
 		Assignment_Log_Context* log_context, TargetSegmentIDMap* idMap,
 		Relation_Assignment_Context* assignment_context,
-		split_to_segment_mapping_context *context) {
+		split_to_segment_mapping_context *context, bool parentIsHashExist, bool parentIsHash) {
 	/*allocation unit in hash relation is file, we assign all the blocks of one file to one virtual segments*/
 	ListCell *lc_file;
 	int fileCount = 0;
@@ -2492,8 +2492,8 @@ static bool allocate_hash_relation(Relation_Data* rel_data,
 			for (int i = 0; i < rel_file->split_num; i++) {
 				int64 split_size = rel_file->splits[i].length;
 				int targethost = (rel_file->segno - 1) % (assignment_context->virtual_segment_num);
-				//calculate keephash datalocality
-				// for keep hash one file corresponds to one split
+				/*calculate keephash datalocality*/
+				/*for keep hash one file corresponds to one split*/
 				for (int p = 0; p < rel_file->block_num; p++) {
 					bool islocal = false;
 					Block_Host_Index *hostID = rel_file->hostIDs + p;
@@ -2522,8 +2522,17 @@ static bool allocate_hash_relation(Relation_Data* rel_data,
 			relationDatalocality = log_context->localDataSizePerRelation / log_context->totalDataSizePerRelation;
 	}
 	double hash2RandomDatalocalityThreshold= 0.9;
-	// for now orca doesn't support convert hash to random
-	if(relationDatalocality < hash2RandomDatalocalityThreshold && relationDatalocality >= 0 && !optimizer){
+	/*for partition hash table, whether to convert random table to hash
+	 * is determined by the datalocality of the first partition*/
+	if (parentIsHashExist) {
+		if (!parentIsHash) {
+			log_context->totalDataSizePerRelation = 0;
+			log_context->localDataSizePerRelation = 0;
+			return true;
+		}
+	}
+	/*for now orca doesn't support convert hash to random*/
+	else if(relationDatalocality < hash2RandomDatalocalityThreshold && relationDatalocality >= 0 && !optimizer){
 		log_context->totalDataSizePerRelation =0;
 		log_context->localDataSizePerRelation =0;
 		return true;
@@ -3515,36 +3524,6 @@ run_allocation_algorithm(SplitAllocResult *result, List *virtual_segments, Query
 				compare_relation_size);
 	}
 
-	/*
-	Relation pg_proc_rel;
-		TupleDesc pg_proc_dsc;
-		HeapTuple tuple;
-		SysScanDesc pg_proc_scan;
-
-		pg_proc_rel = heap_open(ProcedureRelationId, AccessShareLock);
-		pg_proc_dsc = RelationGetDescr(pg_proc_rel);
-		ScanKeyData skey;
-
-		ScanKeyInit(&skey, PRONAME, BTEqualStrategyNumber,
-		F_NAMEEQ, CStringGetDatum(funcName));
-
-		pg_proc_scan = systable_beginscan(pg_proc_rel, InvalidOid, FALSE,
-				ActiveSnapshot, 1, &skey);
-		while (HeapTupleIsValid(tuple = systable_getnext(pg_proc_scan))) {
-
-			bool isAgg = DatumGetBool(fastgetattr(tuple, PROISAGG, pg_proc_dsc, NULL));
-			systable_endscan(pg_proc_scan);
-			heap_close(pg_proc_rel, AccessShareLock);
-			if (isAgg) {
-				return true;
-			} else {
-				return false;
-			}
-		}
-		systable_endscan(pg_proc_scan);
-		heap_close(pg_proc_rel, AccessShareLock);
-		*/
-
 	assignment_context.patition_parent_size_map = createHASHTABLE(
 				context->datalocality_memorycontext, 16,
 				HASHTABLE_SLOT_VOLUME_DEFAULT_MAX, HASHTABLE_KEYTYPE_UINT32,
@@ -3563,7 +3542,8 @@ run_allocation_algorithm(SplitAllocResult *result, List *virtual_segments, Query
 	cqContext  *pcqCtx;
 	cqContext	cqc;
 	HeapTuple	inhtup;
-	/*calculate average size per vseg for all all the relation in a query*/
+	/*calculate average size per vseg for all all the relation in a query
+	 * and initialize the patition_parent_size_map*/
 	for (int relIndex = 0; relIndex < relationCount; relIndex++) {
 			Relation_Data *rel_data = rel_data_vector[relIndex];
 			pcqCtx = caql_beginscan(
@@ -3640,6 +3620,7 @@ run_allocation_algorithm(SplitAllocResult *result, List *virtual_segments, Query
 	int allocate_hash_or_random_time = 0;
 
 	bool vSegOrderChanged = false;
+	List* parentRelsType =NULL;
 	for (int relIndex = 0; relIndex < relationCount; relIndex++) {
 		log_context.localDataSizePerRelation = 0;
 		log_context.totalDataSizePerRelation = 0;
@@ -3695,11 +3676,35 @@ run_allocation_algorithm(SplitAllocResult *result, List *virtual_segments, Query
 		if (isRelationHash) {
 			if (context->keep_hash && assignment_context.virtual_segment_num
 						== targetPolicy->bucketnum) {
-				bool needToChangeHash2Random = allocate_hash_relation(rel_data,
-						&log_context, &idMap, &assignment_context, context);
-				MemoryContextSwitchTo(context->old_memorycontext);
+				ListCell* parlc;
+				bool parentIsHashExist=false;
+				bool parentIsHash =false;
+				/*check whether relation is partition table and need to be checked as random relation*/
+				if (parentRelsType != NULL) {
+					foreach(parlc, parentRelsType)
+					{
+						CurrentRelType* prtype = (CurrentRelType *) lfirst(parlc);
+						if(prtype->relid == rel_data->partition_parent_relid || prtype->relid == rel_data->relid){
+							parentIsHashExist=true;
+							parentIsHash = prtype->isHash;
+						}
+					}
+				}
+				bool needToChangeHash2Random = false;
+				needToChangeHash2Random = allocate_hash_relation(rel_data,
+											&log_context, &idMap, &assignment_context, context, parentIsHashExist,parentIsHash);
+				if (!parentIsHashExist) {
+					/*for partition table, whether to convert from hash to random is determined by the first partition.
+					 * it doesn't need by planner, so it doesn't need to be in global memory context*/
+					CurrentRelType* parentRelType = (CurrentRelType *) palloc(
+							sizeof(CurrentRelType));
+					parentRelType->relid = rel_data->partition_parent_relid;
+					parentRelType->isHash = !needToChangeHash2Random;
+					parentRelsType = lappend(parentRelsType, parentRelType);
+				}
+				MemoryContext cur_memorycontext;
+				cur_memorycontext = MemoryContextSwitchTo(context->old_memorycontext);
 				CurrentRelType* relType = (CurrentRelType *) palloc(sizeof(CurrentRelType));
-				//CurrentRelType* relType = makeNode(CurrentRelType);
 				relType->relid = rel_data->relid;
 				if (needToChangeHash2Random) {
 					relType->isHash = false;
@@ -3707,7 +3712,7 @@ run_allocation_algorithm(SplitAllocResult *result, List *virtual_segments, Query
 					relType->isHash = true;
 				}
 				result->relsType = lappend(result->relsType, relType);
-				MemoryContextSwitchTo(context->datalocality_memorycontext);
+				MemoryContextSwitchTo(cur_memorycontext);
 				if (needToChangeHash2Random) {
 					result->forbid_optimizer = true;
 					allocate_random_relation(rel_data, &log_context, &idMap, 	&assignment_context, context);
@@ -3715,14 +3720,14 @@ run_allocation_algorithm(SplitAllocResult *result, List *virtual_segments, Query
 			}
 			/*allocate hash relation as a random relation*/
 			else{
-				MemoryContextSwitchTo(context->old_memorycontext);
+				MemoryContext cur_memorycontext;
+				cur_memorycontext = MemoryContextSwitchTo(context->old_memorycontext);
 				CurrentRelType* relType = (CurrentRelType *) palloc(
 						sizeof(CurrentRelType));
 				relType->relid = rel_data->relid;
 				relType->isHash = false;
 				result->relsType = lappend(result->relsType, relType);
-				MemoryContextSwitchTo(context->datalocality_memorycontext);
-
+				MemoryContextSwitchTo(cur_memorycontext);
 				result->forbid_optimizer = true;
 				allocate_random_relation(rel_data, &log_context,&idMap, &assignment_context, context);
 			}
@@ -4000,7 +4005,7 @@ calculate_planner_segment_num(Query *query, QueryResourceLife resourceLife,
 						context.randomSegNum = context.hashSegNum;
 					}
 					maxTargetSegmentNumber = context.randomSegNum;
-					minTargetSegmentNumber = context.hashSegNum;
+					minTargetSegmentNumber = minimum_segment_num;
 				} else {
 					maxTargetSegmentNumber = context.hashSegNum;
 					minTargetSegmentNumber = context.hashSegNum;
@@ -4014,7 +4019,7 @@ calculate_planner_segment_num(Query *query, QueryResourceLife resourceLife,
 					context.randomSegNum = context.tableFuncSegNum;
 				}
 				maxTargetSegmentNumber = context.randomSegNum;
-				minTargetSegmentNumber = context.tableFuncSegNum;
+				minTargetSegmentNumber = minimum_segment_num;
 			}
 		} else {
 			maxTargetSegmentNumber = context.randomSegNum;

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/72e29416/src/backend/cdb/cdbtargeteddispatch.c
----------------------------------------------------------------------
diff --git a/src/backend/cdb/cdbtargeteddispatch.c b/src/backend/cdb/cdbtargeteddispatch.c
index 804b1f5..728e3fc 100644
--- a/src/backend/cdb/cdbtargeteddispatch.c
+++ b/src/backend/cdb/cdbtargeteddispatch.c
@@ -26,6 +26,7 @@
 #include "cdb/cdbplan.h"
 #include "cdb/cdbvars.h"
 #include "cdb/cdbutil.h"
+#include "cdb/cdbdatalocality.h"
 
 #include "executor/executor.h"
 
@@ -594,6 +595,21 @@ AssignContentIdsToPlanData(Query *query, Plan *plan, PlannerInfo *root)
 	data.rtable = root->glob->finalrtable;
 	data.allSlices = NULL;
 
+	List* relsType = root->glob->relsType;
+	ListCell *lc;
+	foreach(lc, relsType)
+	{
+		CurrentRelType *relType = (CurrentRelType *) lfirst(lc);
+		ListCell *lctable;
+		foreach(lctable, data.rtable)
+		{
+			RangeTblEntry *rte = (RangeTblEntry *) lfirst(lctable);
+			if (relType->relid == rte->relid) {
+				rte->forceDistRandom = !relType->isHash;
+			}
+		}
+	}
+
 	/* Do it! */
 	AssignContentIdsToPlanData_Walker((Node*)plan, &data);