You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by en...@apache.org on 2015/11/02 20:21:39 UTC

incubator-hawq git commit: HAWQ-80. Support dynamic relation distribution type in gpopt

Repository: incubator-hawq
Updated Branches:
  refs/heads/master ef20b220d -> 4375391dd


HAWQ-80. Support dynamic relation distribution type in gpopt


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/4375391d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/4375391d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/4375391d

Branch: refs/heads/master
Commit: 4375391dd3d9a201fc6759c2bc34f2d40c29868e
Parents: ef20b22
Author: Entong Shen <es...@pivotal.io>
Authored: Fri Oct 23 17:48:23 2015 -0700
Committer: Entong Shen <sh...@gmail.com>
Committed: Mon Nov 2 11:20:30 2015 -0800

----------------------------------------------------------------------
 src/backend/cdb/cdbdatalocality.c               |  7 +--
 src/backend/gpopt/gpdbwrappers.cpp              | 26 +++++++++
 .../translate/CTranslatorRelcacheToDXL.cpp      | 58 ++++++++++++++++++--
 src/backend/optimizer/plan/planner.c            |  3 +-
 src/include/cdb/cdbdatalocality.h               |  2 -
 src/include/gpopt/gpdbwrappers.h                |  7 +++
 .../gpopt/translate/CTranslatorRelcacheToDXL.h  |  4 ++
 7 files changed, 94 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4375391d/src/backend/cdb/cdbdatalocality.c
----------------------------------------------------------------------
diff --git a/src/backend/cdb/cdbdatalocality.c b/src/backend/cdb/cdbdatalocality.c
index d4484fa..c9b6fd2 100644
--- a/src/backend/cdb/cdbdatalocality.c
+++ b/src/backend/cdb/cdbdatalocality.c
@@ -2531,9 +2531,8 @@ static bool allocate_hash_relation(Relation_Data* rel_data,
 			return true;
 		}
 	}
-	/*for now orca doesn't support convert hash to random*/
 	else if((hash_to_random_flag == ENFORCE_HASH_TO_RANDOM ||
-			(relationDatalocality < hash2RandomDatalocalityThreshold && relationDatalocality >= 0 && !optimizer))
+			(relationDatalocality < hash2RandomDatalocalityThreshold && relationDatalocality >= 0 ))
 			&& hash_to_random_flag != ENFORCE_KEEP_HASH){
 		log_context->totalDataSizePerRelation =0;
 		log_context->localDataSizePerRelation =0;
@@ -3716,7 +3715,6 @@ run_allocation_algorithm(SplitAllocResult *result, List *virtual_segments, Query
 				result->relsType = lappend(result->relsType, relType);
 				MemoryContextSwitchTo(cur_memorycontext);
 				if (needToChangeHash2Random) {
-					result->forbid_optimizer = true;
 					allocate_random_relation(rel_data, &log_context, &idMap, 	&assignment_context, context);
 				}
 			}
@@ -3730,7 +3728,6 @@ run_allocation_algorithm(SplitAllocResult *result, List *virtual_segments, Query
 				relType->isHash = false;
 				result->relsType = lappend(result->relsType, relType);
 				MemoryContextSwitchTo(cur_memorycontext);
-				result->forbid_optimizer = true;
 				allocate_random_relation(rel_data, &log_context,&idMap, &assignment_context, context);
 			}
 
@@ -3837,7 +3834,7 @@ calculate_planner_segment_num(Query *query, QueryResourceLife resourceLife,
 	result = (SplitAllocResult *) palloc(sizeof(SplitAllocResult));
 	result->relsType = NIL;
 	result->datalocalityInfo = makeStringInfo();
-	result->forbid_optimizer = false;
+
 	/* fake data locality */
 	if (debug_fake_datalocality) {
 		fp = fopen("/tmp/cdbdatalocality.result", "w+");

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4375391d/src/backend/gpopt/gpdbwrappers.cpp
----------------------------------------------------------------------
diff --git a/src/backend/gpopt/gpdbwrappers.cpp b/src/backend/gpopt/gpdbwrappers.cpp
index 23d984b..1f6a549 100644
--- a/src/backend/gpopt/gpdbwrappers.cpp
+++ b/src/backend/gpopt/gpdbwrappers.cpp
@@ -199,6 +199,8 @@
 #define ALLOW_isMotionGather
 #define ALLOW_estimate_rel_size
 #define ALLOW_rel_partitioning_is_uniform
+#define ALLOW_GetActiveRelType
+#define ALLOW_GetActiveQueryResource
 
 #define ALLOW_mdver_request_version
 #define ALLOW_mdver_enabled
@@ -2364,6 +2366,30 @@ gpdb::Pdistrpolicy
     return NULL;
 }
 
+
+List *
+gpdb::PlActiveRelTypes(void)
+{
+	GP_WRAP_START;
+	{
+		return GetActiveRelType();
+	}
+	GP_WRAP_END;
+	return NULL;
+}
+
+QueryResource *
+gpdb::PqrActiveQueryResource(void)
+{
+	GP_WRAP_START;
+	{
+		return GetActiveQueryResource();
+	}
+	GP_WRAP_END;
+	return NULL;
+}
+
+
 BOOL
 gpdb::FChildPartDistributionMismatch
 	(

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4375391d/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp
----------------------------------------------------------------------
diff --git a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp
index 5e5580d..2800e8b 100644
--- a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp
+++ b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp
@@ -58,6 +58,7 @@
 #include "catalog/pg_exttable.h"
 
 #include "cdb/cdbpartition.h"
+#include "cdb/cdbdatalocality.h"
 #include "catalog/namespace.h"
 #include "catalog/pg_statistic.h"
 
@@ -504,6 +505,52 @@ CTranslatorRelcacheToDXL::CheckUnsupportedRelation
 
 //---------------------------------------------------------------------------
 //	@function:
+//		CTranslatorRelcacheToDXL::FTreatAsRandom
+//
+//	@doc:
+//		Whether we need to treat a hash distributed table as random distributed
+//
+//---------------------------------------------------------------------------
+BOOL
+CTranslatorRelcacheToDXL::FTreatAsRandom
+	(
+	OID oid,
+	GpPolicy *pgppolicy
+	)
+{
+	QueryResource *resource = gpdb::PqrActiveQueryResource();
+	if (NULL == resource)
+	{
+		// no resource has been allocated. In this case we are inside an 'explain',
+		// no hash to random conversion will happen
+		return false;
+	}
+
+	List *lRelsType = gpdb::PlActiveRelTypes();
+	ListCell *lc = NULL;
+	foreach(lc, lRelsType)
+	{
+		CurrentRelType *relType = (CurrentRelType *) lfirst(lc);
+		if (relType->relid == oid)
+		{
+			/* a hash distributed table can be considered as hash if
+			 * 1. The active relation type is hash
+			 * 2. The bucketnum of this relation matches the number of vSegs allocated
+			 */
+			if (relType->isHash && pgppolicy->bucketnum == list_length(resource->segments))
+			{
+				return false; // keep hash distributed
+			}
+			break;
+		}
+	}
+
+	return true;
+}
+
+
+//---------------------------------------------------------------------------
+//	@function:
 //		CTranslatorRelcacheToDXL::Pmdrel
 //
 //	@doc:
@@ -538,7 +585,7 @@ CTranslatorRelcacheToDXL::Pmdrel
 	DrgPmdid *pdrgpmdidIndexes = NULL;
 	DrgPmdid *pdrgpmdidTriggers = NULL;
 	DrgPul *pdrgpulPartKeys = NULL;
-	BOOL fChildDistributionMismatch = false;
+	BOOL fConvertHashToRandom = false;
 	DrgPdrgPul *pdrgpdrgpulKeys = NULL;
 	DrgPmdid *pdrgpmdidCheckConstraints = NULL;
 	BOOL fTemporary = false;
@@ -562,14 +609,16 @@ CTranslatorRelcacheToDXL::Pmdrel
 		// get distribution policy
 		GpPolicy *pgppolicy = gpdb::Pdistrpolicy(rel);
 		ereldistribution = Ereldistribution(pgppolicy);
-		fChildDistributionMismatch = gpdb::FChildPartDistributionMismatch(rel);
 
-		// get distribution columns
+		// determine if table should be treated as randomly distributed, otherwise get distribution columns
 		if (IMDRelation::EreldistrHash == ereldistribution)
 		{
+			fConvertHashToRandom = FTreatAsRandom(oid, pgppolicy);
 			pdrpulDistrCols = PdrpulDistrCols(pmp, pgppolicy, pdrgpmdcol, ulMaxCols);
 		}
 
+		fConvertHashToRandom = fConvertHashToRandom || gpdb::FChildPartDistributionMismatch(rel);
+
 		// collect relation indexes
 		pdrgpmdidIndexes = PdrgpmdidRelIndexes(pmp, rel);
 
@@ -635,6 +684,7 @@ CTranslatorRelcacheToDXL::Pmdrel
 							ereldistribution,
 							pdrgpmdcol,
 							pdrpulDistrCols,
+							fConvertHashToRandom,
 							pdrgpdrgpulKeys,
 							pdrgpmdidIndexes,
 							pdrgpmdidTriggers,
@@ -660,7 +710,7 @@ CTranslatorRelcacheToDXL::Pmdrel
 							pdrgpmdcol,
 							pdrpulDistrCols,
 							pdrgpulPartKeys,
-							fChildDistributionMismatch,
+							fConvertHashToRandom,
 							pdrgpdrgpulKeys,
 							pdrgpmdidIndexes,
 							pdrgpmdidTriggers,

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4375391d/src/backend/optimizer/plan/planner.c
----------------------------------------------------------------------
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index a0d7ec3..e007a69 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -340,8 +340,7 @@ planner(Query *parse, int cursorOptions,
 		* then fall back to the planner.
 		* TODO: caragg 11/08/2013: Enable ORCA when running in utility mode (MPP-21841)
 		*/
-		if (!ppResult->saResult.forbid_optimizer && optimizer
-				&& AmIMaster() && (GP_ROLE_UTILITY != Gp_role))
+    	if (optimizer && AmIMaster() && (GP_ROLE_UTILITY != Gp_role))
 		{
 			if (gp_log_optimization_time)
 			{

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4375391d/src/include/cdb/cdbdatalocality.h
----------------------------------------------------------------------
diff --git a/src/include/cdb/cdbdatalocality.h b/src/include/cdb/cdbdatalocality.h
index 09c02fb..25d1ada 100644
--- a/src/include/cdb/cdbdatalocality.h
+++ b/src/include/cdb/cdbdatalocality.h
@@ -28,8 +28,6 @@ typedef struct SplitAllocResult
   int planner_segments;
   List *relsType;// relation type after datalocality changing
   StringInfo datalocalityInfo;
-  //orca currently doesn't support hash table to be processed as random table.
-  bool forbid_optimizer;
 } SplitAllocResult;
 
 /*

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4375391d/src/include/gpopt/gpdbwrappers.h
----------------------------------------------------------------------
diff --git a/src/include/gpopt/gpdbwrappers.h b/src/include/gpopt/gpdbwrappers.h
index 490a4d7..552f71d 100644
--- a/src/include/gpopt/gpdbwrappers.h
+++ b/src/include/gpopt/gpdbwrappers.h
@@ -55,6 +55,7 @@ struct GpPolicy;
 struct PartitionSelector;
 struct SelectedParts;
 struct Motion;
+struct QueryResource;
 
 namespace gpdb {
 
@@ -488,6 +489,12 @@ namespace gpdb {
     // and the parts are distributed differently, return Random distribution
     GpPolicy *Pdistrpolicy(Relation rel);
     
+    // return active relation distribution types
+    List *PlActiveRelTypes(void);
+
+    // return active query resource
+    QueryResource *PqrActiveQueryResource(void);
+
     // return true if the table is partitioned and hash-distributed, and one of  
     // the child partitions is randomly distributed
     BOOL FChildPartDistributionMismatch(Relation rel);

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4375391d/src/include/gpopt/translate/CTranslatorRelcacheToDXL.h
----------------------------------------------------------------------
diff --git a/src/include/gpopt/translate/CTranslatorRelcacheToDXL.h b/src/include/gpopt/translate/CTranslatorRelcacheToDXL.h
index 5d789b5..89a7ccd 100644
--- a/src/include/gpopt/translate/CTranslatorRelcacheToDXL.h
+++ b/src/include/gpopt/translate/CTranslatorRelcacheToDXL.h
@@ -307,6 +307,10 @@ namespace gpdxl
 			static 
 			BOOL FDefaultPartition(List *plDefaultLevels, ULONG ulLevel);
 			
+			// treat a hash distributed table as random distributed
+			static
+			BOOL FTreatAsRandom(OID oid, GpPolicy *pgppolicy);
+
 			// retrieve part constraint for index
 			static
 			CMDPartConstraintGPDB *PmdpartcnstrIndex