You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by nh...@apache.org on 2016/01/20 21:16:14 UTC

[4/4] incubator-hawq git commit: HAWQ-161. Port GPDB planner fixes to HAWQ Query crashed with segmentation fault with optimizer=off.

HAWQ-161. Port GPDB planner fixes to HAWQ
Query crashed with segmentation fault with optimizer=off.

The crash happened in executor, while the root cause is in planner. The planner
mistakenly believe that, when both the root node of subplan and the corresponding
main plan node are executed in QD, there is no need to call function
ParallelizeCorrelatedSubPlan. While, actually, in this situation presented in
the JIRA, both the main plan node Window and subplan node Agg are executed in QD,
we still need to do the subplan parallelization, otherwise, the SeqScan node
cannot get the parameter from main plan, because they are in different slices.


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/6e2846fa
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/6e2846fa
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/6e2846fa

Branch: refs/heads/master
Commit: 6e2846fafe40c4259a5e81e2f5c187ff04bb8c07
Parents: 1b11926
Author: Kenan Yao <ky...@pivotal.io>
Authored: Mon Nov 24 11:31:47 2014 +0800
Committer: Noa Horn <nh...@pivotal.io>
Committed: Wed Jan 20 12:15:52 2016 -0800

----------------------------------------------------------------------
 src/backend/cdb/cdbllize.c | 37 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/6e2846fa/src/backend/cdb/cdbllize.c
----------------------------------------------------------------------
diff --git a/src/backend/cdb/cdbllize.c b/src/backend/cdb/cdbllize.c
index 4aa8795..8e02f3c 100644
--- a/src/backend/cdb/cdbllize.c
+++ b/src/backend/cdb/cdbllize.c
@@ -329,6 +329,7 @@ typedef struct ParallelizeCorrelatedPlanWalkerContext
 	Movement movement; /* What is the final movement necessary? Is it gather or broadcast */
 	List *rtable; /* rtable from the global context */
 	bool subPlanDistributed; /* is original subplan distributed */
+	bool subPlanHasMotion;/* is original subplan has motion already  */
 } ParallelizeCorrelatedPlanWalkerContext;
 
 /**
@@ -462,6 +463,20 @@ static Node* ParallelizeCorrelatedSubPlanMutator(Node *node, ParallelizeCorrelat
 		|| IsA(node, ShareInputScan))
 	{
 		Plan *scanPlan = (Plan *) node;
+		/**
+		 * If original subplan has no motion, we double check whether the scan
+		 * node is on catalog table or not. If catalog, no need to apply
+		 * parallelization.
+		 * This is for case like:
+		 * SELECT array(select case when p.oid in (select
+		 * unnest(array[typoutput, typsend]) from pg_type) then 'upg_catalog.'
+		 * else 'pg_catalog.' end) FROM pg_proc p;
+		 **/
+		if(!ctx->subPlanHasMotion)
+		{
+			if(!scanPlan->flow || scanPlan->flow->flotype == FLOW_REPLICATED)
+				return (Node *)node;
+		}
 
 		/**
 		 * Steps:
@@ -595,6 +610,19 @@ static Node* ParallelizeCorrelatedSubPlanMutator(Node *node, ParallelizeCorrelat
 		{
 			return node;
 		}
+
+		/**
+		 * Pull up the parallelization of subplan to here, because we want to
+		 * set the is_parallelized flag. We don't want to apply double
+		 * parallelization for nested subplan.
+		 * This is for case like:
+		 * select A.i, A.j, (select sum(C.j) from C where C.j = A.j and C.i =
+		 * (select A.i from A where A.i = C.i)) from A;
+		 **/
+		SubPlan* new_sp = (SubPlan *)plan_tree_mutator(node, ParallelizeCorrelatedSubPlanMutator, ctx);
+		Assert(new_sp);
+		new_sp->is_parallelized = true;
+		return (Node *)new_sp;
 	}
 
 	/**
@@ -603,6 +631,7 @@ static Node* ParallelizeCorrelatedSubPlanMutator(Node *node, ParallelizeCorrelat
 	 */
 	if (IsA(node, Motion))
 	{
+		ctx->subPlanHasMotion = true;
 		Plan *plan = (Plan *) node;
 		node = (Node *) plan->lefttree;
 		Assert(node);
@@ -621,6 +650,7 @@ Plan* ParallelizeCorrelatedSubPlan(PlannerInfo *root, SubPlan *spExpr, Plan *pla
 	ctx.base.node = (Node *) root;
 	ctx.movement = m;
 	ctx.subPlanDistributed = subPlanDistributed;
+	ctx.subPlanHasMotion = false;
 	ctx.sp = spExpr;
 	ctx.rtable = root->glob->finalrtable;
 	return (Plan *) ParallelizeCorrelatedSubPlanMutator((Node *) plan, &ctx);
@@ -655,6 +685,7 @@ void ParallelizeSubplan(SubPlan *spExpr, PlanProfile *context)
 
 	bool containingPlanDistributed = (context->currentPlanFlow && context->currentPlanFlow->flotype == FLOW_PARTITIONED);
 	bool subPlanDistributed = (origPlan->flow && origPlan->flow->flotype == FLOW_PARTITIONED);
+	bool hasParParam = (list_length(spExpr->parParam) > 0);
 
 	/**
 	 * If containing plan is distributed then we must know the flow of the subplan.
@@ -693,7 +724,11 @@ void ParallelizeSubplan(SubPlan *spExpr, PlanProfile *context)
 
 		newPlan = materialize_subplan(context->root, newPlan);
 	}
-	else if (containingPlanDistributed || subPlanDistributed)
+	/* *
+	 * [JIRA: MPP-24563] Adding hasParParam check here, for the kind of cases in
+	 * JIRA, which has both focused parent plan and subplan.
+	 * */
+	else if(containingPlanDistributed || subPlanDistributed || hasParParam)
 	{
 		Movement reqMove = containingPlanDistributed ? MOVEMENT_BROADCAST : MOVEMENT_FOCUS;