You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by en...@apache.org on 2015/11/02 20:21:39 UTC
incubator-hawq git commit: HAWQ-80. Support dynamic relation
distribution type in gpopt
Repository: incubator-hawq
Updated Branches:
refs/heads/master ef20b220d -> 4375391dd
HAWQ-80. Support dynamic relation distribution type in gpopt
Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/4375391d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/4375391d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/4375391d
Branch: refs/heads/master
Commit: 4375391dd3d9a201fc6759c2bc34f2d40c29868e
Parents: ef20b22
Author: Entong Shen <es...@pivotal.io>
Authored: Fri Oct 23 17:48:23 2015 -0700
Committer: Entong Shen <sh...@gmail.com>
Committed: Mon Nov 2 11:20:30 2015 -0800
----------------------------------------------------------------------
src/backend/cdb/cdbdatalocality.c | 7 +--
src/backend/gpopt/gpdbwrappers.cpp | 26 +++++++++
.../translate/CTranslatorRelcacheToDXL.cpp | 58 ++++++++++++++++++--
src/backend/optimizer/plan/planner.c | 3 +-
src/include/cdb/cdbdatalocality.h | 2 -
src/include/gpopt/gpdbwrappers.h | 7 +++
.../gpopt/translate/CTranslatorRelcacheToDXL.h | 4 ++
7 files changed, 94 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4375391d/src/backend/cdb/cdbdatalocality.c
----------------------------------------------------------------------
diff --git a/src/backend/cdb/cdbdatalocality.c b/src/backend/cdb/cdbdatalocality.c
index d4484fa..c9b6fd2 100644
--- a/src/backend/cdb/cdbdatalocality.c
+++ b/src/backend/cdb/cdbdatalocality.c
@@ -2531,9 +2531,8 @@ static bool allocate_hash_relation(Relation_Data* rel_data,
return true;
}
}
- /*for now orca doesn't support convert hash to random*/
else if((hash_to_random_flag == ENFORCE_HASH_TO_RANDOM ||
- (relationDatalocality < hash2RandomDatalocalityThreshold && relationDatalocality >= 0 && !optimizer))
+ (relationDatalocality < hash2RandomDatalocalityThreshold && relationDatalocality >= 0 ))
&& hash_to_random_flag != ENFORCE_KEEP_HASH){
log_context->totalDataSizePerRelation =0;
log_context->localDataSizePerRelation =0;
@@ -3716,7 +3715,6 @@ run_allocation_algorithm(SplitAllocResult *result, List *virtual_segments, Query
result->relsType = lappend(result->relsType, relType);
MemoryContextSwitchTo(cur_memorycontext);
if (needToChangeHash2Random) {
- result->forbid_optimizer = true;
allocate_random_relation(rel_data, &log_context, &idMap, &assignment_context, context);
}
}
@@ -3730,7 +3728,6 @@ run_allocation_algorithm(SplitAllocResult *result, List *virtual_segments, Query
relType->isHash = false;
result->relsType = lappend(result->relsType, relType);
MemoryContextSwitchTo(cur_memorycontext);
- result->forbid_optimizer = true;
allocate_random_relation(rel_data, &log_context,&idMap, &assignment_context, context);
}
@@ -3837,7 +3834,7 @@ calculate_planner_segment_num(Query *query, QueryResourceLife resourceLife,
result = (SplitAllocResult *) palloc(sizeof(SplitAllocResult));
result->relsType = NIL;
result->datalocalityInfo = makeStringInfo();
- result->forbid_optimizer = false;
+
/* fake data locality */
if (debug_fake_datalocality) {
fp = fopen("/tmp/cdbdatalocality.result", "w+");
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4375391d/src/backend/gpopt/gpdbwrappers.cpp
----------------------------------------------------------------------
diff --git a/src/backend/gpopt/gpdbwrappers.cpp b/src/backend/gpopt/gpdbwrappers.cpp
index 23d984b..1f6a549 100644
--- a/src/backend/gpopt/gpdbwrappers.cpp
+++ b/src/backend/gpopt/gpdbwrappers.cpp
@@ -199,6 +199,8 @@
#define ALLOW_isMotionGather
#define ALLOW_estimate_rel_size
#define ALLOW_rel_partitioning_is_uniform
+#define ALLOW_GetActiveRelType
+#define ALLOW_GetActiveQueryResource
#define ALLOW_mdver_request_version
#define ALLOW_mdver_enabled
@@ -2364,6 +2366,30 @@ gpdb::Pdistrpolicy
return NULL;
}
+
+List *
+gpdb::PlActiveRelTypes(void)
+{
+ GP_WRAP_START;
+ {
+ return GetActiveRelType();
+ }
+ GP_WRAP_END;
+ return NULL;
+}
+
+QueryResource *
+gpdb::PqrActiveQueryResource(void)
+{
+ GP_WRAP_START;
+ {
+ return GetActiveQueryResource();
+ }
+ GP_WRAP_END;
+ return NULL;
+}
+
+
BOOL
gpdb::FChildPartDistributionMismatch
(
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4375391d/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp
----------------------------------------------------------------------
diff --git a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp
index 5e5580d..2800e8b 100644
--- a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp
+++ b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp
@@ -58,6 +58,7 @@
#include "catalog/pg_exttable.h"
#include "cdb/cdbpartition.h"
+#include "cdb/cdbdatalocality.h"
#include "catalog/namespace.h"
#include "catalog/pg_statistic.h"
@@ -504,6 +505,52 @@ CTranslatorRelcacheToDXL::CheckUnsupportedRelation
//---------------------------------------------------------------------------
// @function:
+// CTranslatorRelcacheToDXL::FTreatAsRandom
+//
+// @doc:
+// Whether we need to treat a hash distributed table as random distributed
+//
+//---------------------------------------------------------------------------
+BOOL
+CTranslatorRelcacheToDXL::FTreatAsRandom
+ (
+ OID oid,
+ GpPolicy *pgppolicy
+ )
+{
+ QueryResource *resource = gpdb::PqrActiveQueryResource();
+ if (NULL == resource)
+ {
+ // no resource has been allocated. In this case we are inside an 'explain',
+ // no hash to random conversion will happen
+ return false;
+ }
+
+ List *lRelsType = gpdb::PlActiveRelTypes();
+ ListCell *lc = NULL;
+ foreach(lc, lRelsType)
+ {
+ CurrentRelType *relType = (CurrentRelType *) lfirst(lc);
+ if (relType->relid == oid)
+ {
+ /* a hash distributed table can be considered as hash if
+ * 1. The active relation type is hash
+ * 2. The bucketnum of this relation matches the number of vSegs allocated
+ */
+ if (relType->isHash && pgppolicy->bucketnum == list_length(resource->segments))
+ {
+ return false; // keep hash distributed
+ }
+ break;
+ }
+ }
+
+ return true;
+}
+
+
+//---------------------------------------------------------------------------
+// @function:
// CTranslatorRelcacheToDXL::Pmdrel
//
// @doc:
@@ -538,7 +585,7 @@ CTranslatorRelcacheToDXL::Pmdrel
DrgPmdid *pdrgpmdidIndexes = NULL;
DrgPmdid *pdrgpmdidTriggers = NULL;
DrgPul *pdrgpulPartKeys = NULL;
- BOOL fChildDistributionMismatch = false;
+ BOOL fConvertHashToRandom = false;
DrgPdrgPul *pdrgpdrgpulKeys = NULL;
DrgPmdid *pdrgpmdidCheckConstraints = NULL;
BOOL fTemporary = false;
@@ -562,14 +609,16 @@ CTranslatorRelcacheToDXL::Pmdrel
// get distribution policy
GpPolicy *pgppolicy = gpdb::Pdistrpolicy(rel);
ereldistribution = Ereldistribution(pgppolicy);
- fChildDistributionMismatch = gpdb::FChildPartDistributionMismatch(rel);
- // get distribution columns
+ // determine if table should be treated as randomly distributed, otherwise get distribution columns
if (IMDRelation::EreldistrHash == ereldistribution)
{
+ fConvertHashToRandom = FTreatAsRandom(oid, pgppolicy);
pdrpulDistrCols = PdrpulDistrCols(pmp, pgppolicy, pdrgpmdcol, ulMaxCols);
}
+ fConvertHashToRandom = fConvertHashToRandom || gpdb::FChildPartDistributionMismatch(rel);
+
// collect relation indexes
pdrgpmdidIndexes = PdrgpmdidRelIndexes(pmp, rel);
@@ -635,6 +684,7 @@ CTranslatorRelcacheToDXL::Pmdrel
ereldistribution,
pdrgpmdcol,
pdrpulDistrCols,
+ fConvertHashToRandom,
pdrgpdrgpulKeys,
pdrgpmdidIndexes,
pdrgpmdidTriggers,
@@ -660,7 +710,7 @@ CTranslatorRelcacheToDXL::Pmdrel
pdrgpmdcol,
pdrpulDistrCols,
pdrgpulPartKeys,
- fChildDistributionMismatch,
+ fConvertHashToRandom,
pdrgpdrgpulKeys,
pdrgpmdidIndexes,
pdrgpmdidTriggers,
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4375391d/src/backend/optimizer/plan/planner.c
----------------------------------------------------------------------
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index a0d7ec3..e007a69 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -340,8 +340,7 @@ planner(Query *parse, int cursorOptions,
* then fall back to the planner.
* TODO: caragg 11/08/2013: Enable ORCA when running in utility mode (MPP-21841)
*/
- if (!ppResult->saResult.forbid_optimizer && optimizer
- && AmIMaster() && (GP_ROLE_UTILITY != Gp_role))
+ if (optimizer && AmIMaster() && (GP_ROLE_UTILITY != Gp_role))
{
if (gp_log_optimization_time)
{
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4375391d/src/include/cdb/cdbdatalocality.h
----------------------------------------------------------------------
diff --git a/src/include/cdb/cdbdatalocality.h b/src/include/cdb/cdbdatalocality.h
index 09c02fb..25d1ada 100644
--- a/src/include/cdb/cdbdatalocality.h
+++ b/src/include/cdb/cdbdatalocality.h
@@ -28,8 +28,6 @@ typedef struct SplitAllocResult
int planner_segments;
List *relsType;// relation type after datalocality changing
StringInfo datalocalityInfo;
- //orca currently doesn't support hash table to be processed as random table.
- bool forbid_optimizer;
} SplitAllocResult;
/*
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4375391d/src/include/gpopt/gpdbwrappers.h
----------------------------------------------------------------------
diff --git a/src/include/gpopt/gpdbwrappers.h b/src/include/gpopt/gpdbwrappers.h
index 490a4d7..552f71d 100644
--- a/src/include/gpopt/gpdbwrappers.h
+++ b/src/include/gpopt/gpdbwrappers.h
@@ -55,6 +55,7 @@ struct GpPolicy;
struct PartitionSelector;
struct SelectedParts;
struct Motion;
+struct QueryResource;
namespace gpdb {
@@ -488,6 +489,12 @@ namespace gpdb {
// and the parts are distributed differently, return Random distribution
GpPolicy *Pdistrpolicy(Relation rel);
+ // return active relation distribution types
+ List *PlActiveRelTypes(void);
+
+ // return active query resource
+ QueryResource *PqrActiveQueryResource(void);
+
// return true if the table is partitioned and hash-distributed, and one of
// the child partitions is randomly distributed
BOOL FChildPartDistributionMismatch(Relation rel);
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/4375391d/src/include/gpopt/translate/CTranslatorRelcacheToDXL.h
----------------------------------------------------------------------
diff --git a/src/include/gpopt/translate/CTranslatorRelcacheToDXL.h b/src/include/gpopt/translate/CTranslatorRelcacheToDXL.h
index 5d789b5..89a7ccd 100644
--- a/src/include/gpopt/translate/CTranslatorRelcacheToDXL.h
+++ b/src/include/gpopt/translate/CTranslatorRelcacheToDXL.h
@@ -307,6 +307,10 @@ namespace gpdxl
static
BOOL FDefaultPartition(List *plDefaultLevels, ULONG ulLevel);
+ // treat a hash distributed table as random distributed
+ static
+ BOOL FTreatAsRandom(OID oid, GpPolicy *pgppolicy);
+
// retrieve part constraint for index
static
CMDPartConstraintGPDB *PmdpartcnstrIndex