You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by ml...@apache.org on 2016/08/01 02:54:34 UTC

[4/5] incubator-hawq git commit: HAWQ-936. Add GUC for array expansion in ORCA optimizer

HAWQ-936. Add GUC for array expansion in ORCA optimizer

Consider the query with the following pattern 'select * from foo where foo.a IN
(1,2,3,...)'. Currently, when the number of constants in the IN subquery is large,
the query optimization time is unacceptable. This is stopping customers from turning
Orca on by default since many of the queries are generated queries with such a
pattern. The root cause is due to the expansion of the IN subquery into an
expression in disjunctive normal form. The objective of this patch is to add a guc
parameter 'optimizer_array_expansion_threshold' to control the maximum
number of array elements in IN array list. By default, the GUC value is
set to 25, which means if the array size is larger than 25, the array
expansion is disabled.


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/950065bb
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/950065bb
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/950065bb

Branch: refs/heads/master
Commit: 950065bb0db627992be6b96afb4671b3c5ab1c90
Parents: eae0585
Author: Haisheng Yuan and Lubomir Petrov <hy...@pivotal.io>
Authored: Mon Jul 18 17:57:51 2016 -0700
Committer: Ming LI <ml...@pivotal.io>
Committed: Mon Aug 1 10:42:07 2016 +0800

----------------------------------------------------------------------
 depends/thirdparty/gporca.commit      |  2 +-
 src/backend/gpopt/ivy.xml             |  2 +-
 src/backend/gpopt/utils/COptTasks.cpp |  5 ++++-
 src/backend/utils/misc/guc.c          | 14 ++++++++++++--
 src/include/utils/guc.h               |  5 +++--
 5 files changed, 21 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/950065bb/depends/thirdparty/gporca.commit
----------------------------------------------------------------------
diff --git a/depends/thirdparty/gporca.commit b/depends/thirdparty/gporca.commit
index d4192e1..cf4eaaa 100644
--- a/depends/thirdparty/gporca.commit
+++ b/depends/thirdparty/gporca.commit
@@ -1 +1 @@
-https://github.com/greenplum-db/gporca.git master 03be7066f58f2b3bd0ab5f866458ea526be96494
+https://github.com/greenplum-db/gporca.git master c5e40f283703b5fa4c2eb40f367ab7c1b1ab4d0d

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/950065bb/src/backend/gpopt/ivy.xml
----------------------------------------------------------------------
diff --git a/src/backend/gpopt/ivy.xml b/src/backend/gpopt/ivy.xml
index 4d86feb..045c717 100644
--- a/src/backend/gpopt/ivy.xml
+++ b/src/backend/gpopt/ivy.xml
@@ -38,7 +38,7 @@ under the License.
     </configurations>
 
     <dependencies>
-      <dependency org="emc"             name="optimizer"       rev="1.637"          conf="osx106_x86->osx106_x86_32;osx106_x86_32->osx106_x86_32;rhel5_x86_64->rhel5_x86_64;suse10_x86_64->suse10_x86_64" />
+      <dependency org="emc"             name="optimizer"       rev="1.638"          conf="osx106_x86->osx106_x86_32;osx106_x86_32->osx106_x86_32;rhel5_x86_64->rhel5_x86_64;suse10_x86_64->suse10_x86_64" />
       <dependency org="emc"             name="libgpos"         rev="1.137"          conf="osx106_x86->osx106_x86_32;osx106_x86_32->osx106_x86_32;rhel5_x86_64->rhel5_x86_64;suse10_x86_64->suse10_x86_64" />
       <dependency org="xerces"          name="xerces-c"        rev="3.1.1-p1"       conf="osx106_x86->osx106_x86_32;osx106_x86_32->osx106_x86_32;rhel5_x86_64->rhel5_x86_64;suse10_x86_64->suse10_x86_64" />
     </dependencies>

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/950065bb/src/backend/gpopt/utils/COptTasks.cpp
----------------------------------------------------------------------
diff --git a/src/backend/gpopt/utils/COptTasks.cpp b/src/backend/gpopt/utils/COptTasks.cpp
index 8511f3e..03de99e 100644
--- a/src/backend/gpopt/utils/COptTasks.cpp
+++ b/src/backend/gpopt/utils/COptTasks.cpp
@@ -764,6 +764,7 @@ COptTasks::PoconfCreate
 	ULONG ulCTEInliningCutoff =  (ULONG) optimizer_cte_inlining_bound;
 	ULONG ulPartsToForceSortOnInsert =  (ULONG) optimizer_parts_to_force_sort_on_insert;
 	ULONG ulJoinArityForAssociativityCommutativity =  (ULONG) optimizer_join_arity_for_associativity_commutativity;
+	ULONG ulArrayExpansionThreshold =  (ULONG) optimizer_array_expansion_threshold;
 
 	return GPOS_NEW(pmp) COptimizerConfig
 						(
@@ -771,7 +772,9 @@ COptTasks::PoconfCreate
 						GPOS_NEW(pmp) CStatisticsConfig(pmp, dDampingFactorFilter, dDampingFactorJoin, dDampingFactorGroupBy),
 						GPOS_NEW(pmp) CCTEConfig(ulCTEInliningCutoff),
 						pcm,
-						GPOS_NEW(pmp) CHint(ulPartsToForceSortOnInsert /* optimizer_parts_to_force_sort_on_insert */, ulJoinArityForAssociativityCommutativity)
+						GPOS_NEW(pmp) CHint(ulPartsToForceSortOnInsert /* optimizer_parts_to_force_sort_on_insert */,
+										ulJoinArityForAssociativityCommutativity,
+										ulArrayExpansionThreshold)
 						);
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/950065bb/src/backend/utils/misc/guc.c
----------------------------------------------------------------------
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 48e09f2..4bd2aa5 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -741,6 +741,9 @@ double 	optimizer_damping_factor_filter;
 double	optimizer_damping_factor_join;
 double 	optimizer_damping_factor_groupby;
 int		optimizer_segments;
+int		optimizer_parts_to_force_sort_on_insert;
+int		optimizer_join_arity_for_associativity_commutativity;
+int		optimizer_array_expansion_threshold;
 bool		optimizer_analyze_root_partition;
 bool		optimizer_analyze_midlevel_partition;
 bool		optimizer_enable_constant_expression_evaluation;
@@ -760,8 +763,6 @@ bool 		optimizer_multilevel_partitioning;
 bool        optimizer_enable_derive_stats_all_groups;
 bool		optimizer_explain_show_status;
 bool		optimizer_prefer_scalar_dqa_multistage_agg;
-int		optimizer_parts_to_force_sort_on_insert;
-int		optimizer_join_arity_for_associativity_commutativity;
 
 /* Security */
 bool		gp_reject_internal_tcp_conn = true;
@@ -6124,6 +6125,15 @@ static struct config_int ConfigureNamesInt[] =
 		43200000, 0, INT_MAX, NULL, NULL
 	},
 	{
+		{"optimizer_array_expansion_threshold", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("Item limit for expansion of arrays in WHERE clause to disjunctive form."),
+			NULL,
+			GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE
+		},
+		&optimizer_array_expansion_threshold,
+		25, 0, INT_MAX, NULL, NULL
+	},
+	{
 		{"memory_profiler_dataset_size", PGC_USERSET, DEVELOPER_OPTIONS,
 			gettext_noop("Set the size in GB"),
 			NULL,

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/950065bb/src/include/utils/guc.h
----------------------------------------------------------------------
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index 337d265..67183af 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -416,6 +416,9 @@ extern double optimizer_damping_factor_filter;
 extern double optimizer_damping_factor_join;
 extern double optimizer_damping_factor_groupby;
 extern int optimizer_segments;
+extern int optimizer_parts_to_force_sort_on_insert;
+extern int optimizer_join_arity_for_associativity_commutativity;
+extern int optimizer_array_expansion_threshold;
 extern bool optimizer_analyze_root_partition;
 extern bool optimizer_analyze_midlevel_partition;
 extern bool optimizer_enable_constant_expression_evaluation;
@@ -436,8 +439,6 @@ extern bool optimizer_multilevel_partitioning;
 extern bool optimizer_enable_derive_stats_all_groups;
 extern bool optimizer_explain_show_status;
 extern bool optimizer_prefer_scalar_dqa_multistage_agg;
-extern int  optimizer_parts_to_force_sort_on_insert;
-extern int  optimizer_join_arity_for_associativity_commutativity;
 
 /**
  * Enable logging of DPE match in optimizer.