You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by za...@apache.org on 2022/02/22 11:52:45 UTC
[hive] branch master updated: HIVE-25938: Print excluded rules from CBO (Alessandro Solimando, reviewed by Stamatis Zampetakis, John Sherman)
This is an automated email from the ASF dual-hosted git repository.
zabetak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 3c532c2 HIVE-25938: Print excluded rules from CBO (Alessandro Solimando, reviewed by Stamatis Zampetakis, John Sherman)
3c532c2 is described below
commit 3c532c2cd2603edc60c721282d45390e910a0358
Author: Alessandro Solimando <al...@gmail.com>
AuthorDate: Tue Feb 8 17:09:19 2022 +0100
HIVE-25938: Print excluded rules from CBO (Alessandro Solimando, reviewed by Stamatis Zampetakis, John Sherman)
Closes #3011
---
.../apache/hadoop/hive/ql/exec/ExplainTask.java | 39 +++++--
.../hadoop/hive/ql/parse/CalcitePlanner.java | 24 ++++-
.../queries/clientpositive/excluded_rule_explain.q | 11 ++
.../llap/excluded_rule_explain.q.out | 112 +++++++++++++++++++++
.../llap/rule_exclusion_config.q.out | 8 ++
5 files changed, 184 insertions(+), 10 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
index c59f44f..59f9044 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
@@ -101,8 +101,12 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable {
private static final Logger LOG = LoggerFactory.getLogger(ExplainTask.class.getName());
public static final String STAGE_DEPENDENCIES = "STAGE DEPENDENCIES";
+ private static final String EXCLUDED_RULES_PREFIX = "Excluded rules: ";
private static final long serialVersionUID = 1L;
public static final String EXPL_COLUMN_NAME = "Explain";
+ private static final String CBO_INFO_JSON_LABEL = "cboInfo";
+ private static final String CBO_PLAN_JSON_LABEL = "CBOPlan";
+ private static final String CBO_PLAN_TEXT_LABEL = "CBO PLAN:";
private final Set<Operator<?>> visitedOps = new HashSet<Operator<?>>();
private boolean isLogical = false;
@@ -152,15 +156,22 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable {
return outJSONObject;
}
- public JSONObject getJSONCBOPlan(PrintStream out, ExplainWork work) throws Exception {
+ public JSONObject getJSONCBOPlan(PrintStream out, ExplainWork work) {
JSONObject outJSONObject = new JSONObject(new LinkedHashMap<>());
boolean jsonOutput = work.isFormatted();
String cboPlan = work.getCboPlan();
if (cboPlan != null) {
+ String ruleExclusionRegex = getRuleExcludedRegex();
if (jsonOutput) {
- outJSONObject.put("CBOPlan", cboPlan);
+ outJSONObject.put(CBO_PLAN_JSON_LABEL, cboPlan);
+ if (!ruleExclusionRegex.isEmpty()) {
+ outJSONObject.put(CBO_INFO_JSON_LABEL, EXCLUDED_RULES_PREFIX + ruleExclusionRegex);
+ }
} else {
- out.println("CBO PLAN:");
+ if (!ruleExclusionRegex.isEmpty()) {
+ out.println(EXCLUDED_RULES_PREFIX + ruleExclusionRegex + "\n");
+ }
+ out.println(CBO_PLAN_TEXT_LABEL);
out.println(cboPlan);
}
}
@@ -272,6 +283,8 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable {
boolean jsonOutput, boolean isExtended, boolean appendTaskType, String cboInfo,
String cboPlan, String optimizedSQL, String stageIdRearrange) throws Exception {
+ String ruleExclusionRegex = getRuleExcludedRegex();
+
// If the user asked for a formatted output, dump the json output
// in the output stream
JSONObject outJSONObject = new JSONObject(new LinkedHashMap<>());
@@ -282,9 +295,15 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable {
if (cboPlan != null) {
if (jsonOutput) {
- outJSONObject.put("CBOPlan", cboPlan);
+ outJSONObject.put(CBO_PLAN_JSON_LABEL, cboPlan);
+ if (!ruleExclusionRegex.isEmpty()) {
+ outJSONObject.put(CBO_INFO_JSON_LABEL, EXCLUDED_RULES_PREFIX + ruleExclusionRegex);
+ }
} else {
- out.print("CBO PLAN:");
+ if (!ruleExclusionRegex.isEmpty()) {
+ out.println(EXCLUDED_RULES_PREFIX + ruleExclusionRegex);
+ }
+ out.print(CBO_PLAN_TEXT_LABEL);
out.println(cboPlan);
}
}
@@ -327,6 +346,10 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable {
}
if (!suppressOthersForVectorization) {
+ if (!jsonOutput && !ruleExclusionRegex.isEmpty()) {
+ out.println(EXCLUDED_RULES_PREFIX + ruleExclusionRegex + "\n");
+ }
+
JSONObject jsonDependencies = outputDependencies(out, jsonOutput, appendTaskType, ordered);
if (out != null) {
@@ -335,7 +358,7 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable {
if (jsonOutput) {
if (cboInfo != null) {
- outJSONObject.put("cboInfo", cboInfo);
+ outJSONObject.put(CBO_INFO_JSON_LABEL, cboInfo);
}
outJSONObject.put(STAGE_DEPENDENCIES, jsonDependencies);
}
@@ -952,6 +975,10 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable {
return invokeFlag;
}
+ private String getRuleExcludedRegex() {
+ return conf == null ? "" : conf.get(ConfVars.HIVE_CBO_RULE_EXCLUSION_REGEX.varname, "");
+ }
+
@VisibleForTesting
JSONObject outputPlan(Object work, PrintStream out,
boolean extended, boolean jsonOutput, int indent, String appendToHeader) throws Exception {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 24c1671..bc55d0e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -355,6 +355,7 @@ import static org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMateri
public class CalcitePlanner extends SemanticAnalyzer {
+ private static final String EXCLUDED_RULES_PREFIX = "Excluded rules: ";
/**
* {@link org.antlr.runtime.TokenRewriteStream} offers the opportunity of multiple rewrites of the same
* input text (in our case the sql query text). These rewrites are called programs and identified by a string.
@@ -575,7 +576,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
getQB().getParseInfo().setHintList(oldHints);
}
LOG.info("CBO Succeeded; optimized logical plan.");
- this.ctx.setCboInfo("Plan optimized by CBO.");
+
+ this.ctx.setCboInfo(getOptimizedByCboInfo());
this.ctx.setCboSucceeded(true);
} else {
// 1. Convert Plan to AST
@@ -628,7 +630,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
disableJoinMerge = defaultJoinMerge;
sinkOp = genPlan(getQB());
LOG.info("CBO Succeeded; optimized logical plan.");
- this.ctx.setCboInfo("Plan optimized by CBO.");
+
+ this.ctx.setCboInfo(getOptimizedByCboInfo());
this.ctx.setCboSucceeded(true);
if (this.ctx.isExplainPlan()) {
// Enrich explain with information derived from CBO
@@ -707,6 +710,15 @@ public class CalcitePlanner extends SemanticAnalyzer {
return sinkOp;
}
+ private String getOptimizedByCboInfo() {
+ String ruleExclusionRegex = conf.get(ConfVars.HIVE_CBO_RULE_EXCLUSION_REGEX.varname, "");
+ String cboInfo = "Plan optimized by CBO.";
+ if (!ruleExclusionRegex.isEmpty()) {
+ cboInfo = cboInfo + (" " + EXCLUDED_RULES_PREFIX + ruleExclusionRegex);
+ }
+ return cboInfo;
+ }
+
private ASTNode handleCreateViewDDL(ASTNode ast) throws SemanticException {
saveViewDefinition();
String originalText = createVwDesc.getViewOriginalText();
@@ -1957,8 +1969,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
final RelOptCluster optCluster = basePlan.getCluster();
final PerfLogger perfLogger = SessionState.getPerfLogger();
- final boolean useMaterializedViewsRegistry = !conf.get(HiveConf.ConfVars.HIVE_SERVER2_MATERIALIZED_VIEWS_REGISTRY_IMPL.varname)
- .equals("DUMMY");
+ final boolean useMaterializedViewsRegistry =
+ !conf.get(HiveConf.ConfVars.HIVE_SERVER2_MATERIALIZED_VIEWS_REGISTRY_IMPL.varname).equals("DUMMY");
final String ruleExclusionRegex = conf.get(ConfVars.HIVE_CBO_RULE_EXCLUSION_REGEX.varname, "");
final RelNode calcitePreMVRewritingPlan = basePlan;
final Set<TableName> tablesUsedQuery = getTablesUsed(basePlan);
@@ -2026,6 +2038,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
// Optimize plan
if (!ruleExclusionRegex.isEmpty()) {
+ LOG.info("The CBO rules matching the following regex are excluded from planning: {}",
+ ruleExclusionRegex);
planner.setRuleDescExclusionFilter(Pattern.compile(ruleExclusionRegex));
}
planner.setRoot(basePlan);
@@ -2451,6 +2465,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
}
if (!ruleExclusionRegex.isEmpty()) {
+ LOG.info("The CBO rules matching the following regex are excluded from planning: {}",
+ ruleExclusionRegex);
planner.setRuleDescExclusionFilter(Pattern.compile(ruleExclusionRegex));
}
planner.setRoot(basePlan);
diff --git a/ql/src/test/queries/clientpositive/excluded_rule_explain.q b/ql/src/test/queries/clientpositive/excluded_rule_explain.q
new file mode 100644
index 0000000..245a735
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/excluded_rule_explain.q
@@ -0,0 +1,11 @@
+EXPLAIN CBO SELECT 1;
+EXPLAIN FORMATTED CBO SELECT 1;
+EXPLAIN SELECT 1;
+EXPLAIN FORMATTED SELECT 1;
+
+set hive.cbo.rule.exclusion.regex=HiveJoinPushTransitivePredicatesRule|HivePreFilteringRule;
+
+EXPLAIN CBO SELECT 1;
+EXPLAIN FORMATTED CBO SELECT 1;
+EXPLAIN SELECT 1;
+EXPLAIN FORMATTED SELECT 1;
diff --git a/ql/src/test/results/clientpositive/llap/excluded_rule_explain.q.out b/ql/src/test/results/clientpositive/llap/excluded_rule_explain.q.out
new file mode 100644
index 0000000..0ada0c3
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/excluded_rule_explain.q.out
@@ -0,0 +1,112 @@
+PREHOOK: query: EXPLAIN CBO SELECT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO SELECT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(_o__c0=[1])
+ HiveTableScan(table=[[_dummy_database, _dummy_table]], table:alias=[_dummy_table])
+
+PREHOOK: query: EXPLAIN FORMATTED CBO SELECT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN FORMATTED CBO SELECT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"_dummy_database\",\n \"_dummy_table\"\n ],\n \"table:alias\": \"_dummy_table\",\n \"inputs\": [],\n \"rowCount\": 1.0,\n \"avgRowSize\": 129.0,\n \"rowType\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET [...]
+PREHOOK: query: EXPLAIN SELECT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN SELECT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: _dummy_table
+ Row Limit Per Split: 1
+ Select Operator
+ expressions: 1 (type: int)
+ outputColumnNames: _col0
+ ListSink
+
+PREHOOK: query: EXPLAIN FORMATTED SELECT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN FORMATTED SELECT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"_dummy_database\",\n \"_dummy_table\"\n ],\n \"table:alias\": \"_dummy_table\",\n \"inputs\": [],\n \"rowCount\": 1.0,\n \"avgRowSize\": 0.0,\n \"rowType\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__ [...]
+PREHOOK: query: EXPLAIN CBO SELECT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO SELECT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+Excluded rules: HiveJoinPushTransitivePredicatesRule|HivePreFilteringRule
+
+CBO PLAN:
+HiveProject(_o__c0=[1])
+ HiveTableScan(table=[[_dummy_database, _dummy_table]], table:alias=[_dummy_table])
+
+PREHOOK: query: EXPLAIN FORMATTED CBO SELECT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN FORMATTED CBO SELECT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"_dummy_database\",\n \"_dummy_table\"\n ],\n \"table:alias\": \"_dummy_table\",\n \"inputs\": [],\n \"rowCount\": 1.0,\n \"avgRowSize\": 129.0,\n \"rowType\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET [...]
+PREHOOK: query: EXPLAIN SELECT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN SELECT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+Excluded rules: HiveJoinPushTransitivePredicatesRule|HivePreFilteringRule
+
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: _dummy_table
+ Row Limit Per Split: 1
+ Select Operator
+ expressions: 1 (type: int)
+ outputColumnNames: _col0
+ ListSink
+
+PREHOOK: query: EXPLAIN FORMATTED SELECT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN FORMATTED SELECT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"_dummy_database\",\n \"_dummy_table\"\n ],\n \"table:alias\": \"_dummy_table\",\n \"inputs\": [],\n \"rowCount\": 1.0,\n \"avgRowSize\": 0.0,\n \"rowType\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__ [...]
diff --git a/ql/src/test/results/clientpositive/llap/rule_exclusion_config.q.out b/ql/src/test/results/clientpositive/llap/rule_exclusion_config.q.out
index a656edf..298c4a7 100644
--- a/ql/src/test/results/clientpositive/llap/rule_exclusion_config.q.out
+++ b/ql/src/test/results/clientpositive/llap/rule_exclusion_config.q.out
@@ -47,6 +47,8 @@ WHERE src1.key > 10 and src1.key < 20
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
+Excluded rules: HiveJoinPushTransitivePredicatesRule
+
CBO PLAN:
HiveProject(key=[$4], value=[$5], key0=[$2], value0=[$3], key1=[$0], value1=[$1])
HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available])
@@ -79,6 +81,8 @@ WHERE src1.key > 10 and src1.key < 20
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
+Excluded rules: HiveJoinPushTransitivePredicatesRule|HiveJoinAddNotNullRule
+
CBO PLAN:
HiveProject(key=[$4], value=[$5], key0=[$2], value0=[$3], key1=[$0], value1=[$1])
HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available])
@@ -109,6 +113,8 @@ WHERE src1.key > 10 and src1.key < 20
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
+Excluded rules: HiveJoin.*Rule
+
CBO PLAN:
HiveProject(key=[$4], value=[$5], key0=[$2], value0=[$3], key1=[$0], value1=[$1])
HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available])
@@ -139,6 +145,8 @@ WHERE src1.key > 10 and src1.key < 20
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
+Excluded rules: .*
+
CBO PLAN:
HiveProject(key=[$0], value=[$1], key1=[$6], value1=[$7], key2=[$12], value2=[$13])
HiveFilter(condition=[AND(>(CAST($0):DOUBLE, CAST(10):DOUBLE), <(CAST($0):DOUBLE, CAST(20):DOUBLE))])