You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by za...@apache.org on 2022/02/22 11:52:45 UTC

[hive] branch master updated: HIVE-25938: Print excluded rules from CBO (Alessandro Solimando, reviewed by Stamatis Zampetakis, John Sherman)

This is an automated email from the ASF dual-hosted git repository.

zabetak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 3c532c2  HIVE-25938: Print excluded rules from CBO (Alessandro Solimando, reviewed by Stamatis Zampetakis, John Sherman)
3c532c2 is described below

commit 3c532c2cd2603edc60c721282d45390e910a0358
Author: Alessandro Solimando <al...@gmail.com>
AuthorDate: Tue Feb 8 17:09:19 2022 +0100

    HIVE-25938: Print excluded rules from CBO (Alessandro Solimando, reviewed by Stamatis Zampetakis, John Sherman)
    
    Closes #3011
---
 .../apache/hadoop/hive/ql/exec/ExplainTask.java    |  39 +++++--
 .../hadoop/hive/ql/parse/CalcitePlanner.java       |  24 ++++-
 .../queries/clientpositive/excluded_rule_explain.q |  11 ++
 .../llap/excluded_rule_explain.q.out               | 112 +++++++++++++++++++++
 .../llap/rule_exclusion_config.q.out               |   8 ++
 5 files changed, 184 insertions(+), 10 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
index c59f44f..59f9044 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
@@ -101,8 +101,12 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable {
   private static final Logger LOG = LoggerFactory.getLogger(ExplainTask.class.getName());
 
   public static final String STAGE_DEPENDENCIES = "STAGE DEPENDENCIES";
+  private static final String EXCLUDED_RULES_PREFIX = "Excluded rules: ";
   private static final long serialVersionUID = 1L;
   public static final String EXPL_COLUMN_NAME = "Explain";
+  private static final String CBO_INFO_JSON_LABEL = "cboInfo";
+  private static final String CBO_PLAN_JSON_LABEL = "CBOPlan";
+  private static final String CBO_PLAN_TEXT_LABEL = "CBO PLAN:";
   private final Set<Operator<?>> visitedOps = new HashSet<Operator<?>>();
   private boolean isLogical = false;
 
@@ -152,15 +156,22 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable {
     return outJSONObject;
   }
 
-  public JSONObject getJSONCBOPlan(PrintStream out, ExplainWork work) throws Exception {
+  public JSONObject getJSONCBOPlan(PrintStream out, ExplainWork work) {
     JSONObject outJSONObject = new JSONObject(new LinkedHashMap<>());
     boolean jsonOutput = work.isFormatted();
     String cboPlan = work.getCboPlan();
     if (cboPlan != null) {
+      String ruleExclusionRegex = getRuleExcludedRegex();
       if (jsonOutput) {
-        outJSONObject.put("CBOPlan", cboPlan);
+        outJSONObject.put(CBO_PLAN_JSON_LABEL, cboPlan);
+        if (!ruleExclusionRegex.isEmpty()) {
+          outJSONObject.put(CBO_INFO_JSON_LABEL, EXCLUDED_RULES_PREFIX + ruleExclusionRegex);
+        }
       } else {
-        out.println("CBO PLAN:");
+        if (!ruleExclusionRegex.isEmpty()) {
+          out.println(EXCLUDED_RULES_PREFIX + ruleExclusionRegex + "\n");
+        }
+        out.println(CBO_PLAN_TEXT_LABEL);
         out.println(cboPlan);
       }
     }
@@ -272,6 +283,8 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable {
       boolean jsonOutput, boolean isExtended, boolean appendTaskType, String cboInfo,
       String cboPlan, String optimizedSQL, String stageIdRearrange) throws Exception {
 
+    String ruleExclusionRegex = getRuleExcludedRegex();
+
     // If the user asked for a formatted output, dump the json output
     // in the output stream
     JSONObject outJSONObject = new JSONObject(new LinkedHashMap<>());
@@ -282,9 +295,15 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable {
 
     if (cboPlan != null) {
       if (jsonOutput) {
-        outJSONObject.put("CBOPlan", cboPlan);
+        outJSONObject.put(CBO_PLAN_JSON_LABEL, cboPlan);
+        if (!ruleExclusionRegex.isEmpty()) {
+          outJSONObject.put(CBO_INFO_JSON_LABEL, EXCLUDED_RULES_PREFIX + ruleExclusionRegex);
+        }
       } else {
-        out.print("CBO PLAN:");
+        if (!ruleExclusionRegex.isEmpty()) {
+          out.println(EXCLUDED_RULES_PREFIX + ruleExclusionRegex);
+        }
+        out.print(CBO_PLAN_TEXT_LABEL);
         out.println(cboPlan);
       }
     }
@@ -327,6 +346,10 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable {
     }
 
     if (!suppressOthersForVectorization) {
+      if (!jsonOutput && !ruleExclusionRegex.isEmpty()) {
+        out.println(EXCLUDED_RULES_PREFIX + ruleExclusionRegex + "\n");
+      }
+
       JSONObject jsonDependencies = outputDependencies(out, jsonOutput, appendTaskType, ordered);
 
       if (out != null) {
@@ -335,7 +358,7 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable {
 
       if (jsonOutput) {
         if (cboInfo != null) {
-          outJSONObject.put("cboInfo", cboInfo);
+          outJSONObject.put(CBO_INFO_JSON_LABEL, cboInfo);
         }
         outJSONObject.put(STAGE_DEPENDENCIES, jsonDependencies);
       }
@@ -952,6 +975,10 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable {
     return invokeFlag;
   }
 
+  private String getRuleExcludedRegex() {
+    return conf == null ? "" : conf.get(ConfVars.HIVE_CBO_RULE_EXCLUSION_REGEX.varname, "");
+  }
+
   @VisibleForTesting
   JSONObject outputPlan(Object work, PrintStream out,
       boolean extended, boolean jsonOutput, int indent, String appendToHeader) throws Exception {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 24c1671..bc55d0e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -355,6 +355,7 @@ import static org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMateri
 
 public class CalcitePlanner extends SemanticAnalyzer {
 
+  private static final String EXCLUDED_RULES_PREFIX = "Excluded rules: ";
   /**
    * {@link org.antlr.runtime.TokenRewriteStream} offers the opportunity of multiple rewrites of the same
    * input text (in our case the sql query text). These rewrites are called programs and identified by a string.
@@ -575,7 +576,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
               getQB().getParseInfo().setHintList(oldHints);
             }
             LOG.info("CBO Succeeded; optimized logical plan.");
-            this.ctx.setCboInfo("Plan optimized by CBO.");
+
+            this.ctx.setCboInfo(getOptimizedByCboInfo());
             this.ctx.setCboSucceeded(true);
           } else {
             // 1. Convert Plan to AST
@@ -628,7 +630,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
             disableJoinMerge = defaultJoinMerge;
             sinkOp = genPlan(getQB());
             LOG.info("CBO Succeeded; optimized logical plan.");
-            this.ctx.setCboInfo("Plan optimized by CBO.");
+
+            this.ctx.setCboInfo(getOptimizedByCboInfo());
             this.ctx.setCboSucceeded(true);
             if (this.ctx.isExplainPlan()) {
               // Enrich explain with information derived from CBO
@@ -707,6 +710,15 @@ public class CalcitePlanner extends SemanticAnalyzer {
     return sinkOp;
   }
 
+  private String getOptimizedByCboInfo() {
+    String ruleExclusionRegex = conf.get(ConfVars.HIVE_CBO_RULE_EXCLUSION_REGEX.varname, "");
+    String cboInfo = "Plan optimized by CBO.";
+    if (!ruleExclusionRegex.isEmpty()) {
+      cboInfo = cboInfo + (" " + EXCLUDED_RULES_PREFIX + ruleExclusionRegex);
+    }
+    return cboInfo;
+  }
+
   private ASTNode handleCreateViewDDL(ASTNode ast) throws SemanticException {
     saveViewDefinition();
     String originalText = createVwDesc.getViewOriginalText();
@@ -1957,8 +1969,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
       final RelOptCluster optCluster = basePlan.getCluster();
       final PerfLogger perfLogger = SessionState.getPerfLogger();
 
-      final boolean useMaterializedViewsRegistry = !conf.get(HiveConf.ConfVars.HIVE_SERVER2_MATERIALIZED_VIEWS_REGISTRY_IMPL.varname)
-              .equals("DUMMY");
+      final boolean useMaterializedViewsRegistry =
+          !conf.get(HiveConf.ConfVars.HIVE_SERVER2_MATERIALIZED_VIEWS_REGISTRY_IMPL.varname).equals("DUMMY");
       final String ruleExclusionRegex = conf.get(ConfVars.HIVE_CBO_RULE_EXCLUSION_REGEX.varname, "");
       final RelNode calcitePreMVRewritingPlan = basePlan;
       final Set<TableName> tablesUsedQuery = getTablesUsed(basePlan);
@@ -2026,6 +2038,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
 
       // Optimize plan
       if (!ruleExclusionRegex.isEmpty()) {
+        LOG.info("The CBO rules matching the following regex are excluded from planning: {}",
+            ruleExclusionRegex);
         planner.setRuleDescExclusionFilter(Pattern.compile(ruleExclusionRegex));
       }
       planner.setRoot(basePlan);
@@ -2451,6 +2465,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
       }
 
       if (!ruleExclusionRegex.isEmpty()) {
+        LOG.info("The CBO rules matching the following regex are excluded from planning: {}",
+            ruleExclusionRegex);
         planner.setRuleDescExclusionFilter(Pattern.compile(ruleExclusionRegex));
       }
       planner.setRoot(basePlan);
diff --git a/ql/src/test/queries/clientpositive/excluded_rule_explain.q b/ql/src/test/queries/clientpositive/excluded_rule_explain.q
new file mode 100644
index 0000000..245a735
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/excluded_rule_explain.q
@@ -0,0 +1,11 @@
+EXPLAIN CBO SELECT 1;
+EXPLAIN FORMATTED CBO SELECT 1;
+EXPLAIN SELECT 1;
+EXPLAIN FORMATTED SELECT 1;
+
+set hive.cbo.rule.exclusion.regex=HiveJoinPushTransitivePredicatesRule|HivePreFilteringRule;
+
+EXPLAIN CBO SELECT 1;
+EXPLAIN FORMATTED CBO SELECT 1;
+EXPLAIN SELECT 1;
+EXPLAIN FORMATTED SELECT 1;
diff --git a/ql/src/test/results/clientpositive/llap/excluded_rule_explain.q.out b/ql/src/test/results/clientpositive/llap/excluded_rule_explain.q.out
new file mode 100644
index 0000000..0ada0c3
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/excluded_rule_explain.q.out
@@ -0,0 +1,112 @@
+PREHOOK: query: EXPLAIN CBO SELECT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO SELECT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(_o__c0=[1])
+  HiveTableScan(table=[[_dummy_database, _dummy_table]], table:alias=[_dummy_table])
+
+PREHOOK: query: EXPLAIN FORMATTED CBO SELECT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN FORMATTED CBO SELECT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+{"CBOPlan":"{\n  \"rels\": [\n    {\n      \"id\": \"0\",\n      \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n      \"table\": [\n        \"_dummy_database\",\n        \"_dummy_table\"\n      ],\n      \"table:alias\": \"_dummy_table\",\n      \"inputs\": [],\n      \"rowCount\": 1.0,\n      \"avgRowSize\": 129.0,\n      \"rowType\": [\n        {\n          \"type\": \"BIGINT\",\n          \"nullable\": true,\n          \"name\": \"BLOCK__OFFSET [...]
+PREHOOK: query: EXPLAIN SELECT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN SELECT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: _dummy_table
+          Row Limit Per Split: 1
+          Select Operator
+            expressions: 1 (type: int)
+            outputColumnNames: _col0
+            ListSink
+
+PREHOOK: query: EXPLAIN FORMATTED SELECT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN FORMATTED SELECT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+{"CBOPlan":"{\n  \"rels\": [\n    {\n      \"id\": \"0\",\n      \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n      \"table\": [\n        \"_dummy_database\",\n        \"_dummy_table\"\n      ],\n      \"table:alias\": \"_dummy_table\",\n      \"inputs\": [],\n      \"rowCount\": 1.0,\n      \"avgRowSize\": 0.0,\n      \"rowType\": [\n        {\n          \"type\": \"BIGINT\",\n          \"nullable\": true,\n          \"name\": \"BLOCK__OFFSET__ [...]
+PREHOOK: query: EXPLAIN CBO SELECT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO SELECT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+Excluded rules: HiveJoinPushTransitivePredicatesRule|HivePreFilteringRule
+
+CBO PLAN:
+HiveProject(_o__c0=[1])
+  HiveTableScan(table=[[_dummy_database, _dummy_table]], table:alias=[_dummy_table])
+
+PREHOOK: query: EXPLAIN FORMATTED CBO SELECT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN FORMATTED CBO SELECT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+{"CBOPlan":"{\n  \"rels\": [\n    {\n      \"id\": \"0\",\n      \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n      \"table\": [\n        \"_dummy_database\",\n        \"_dummy_table\"\n      ],\n      \"table:alias\": \"_dummy_table\",\n      \"inputs\": [],\n      \"rowCount\": 1.0,\n      \"avgRowSize\": 129.0,\n      \"rowType\": [\n        {\n          \"type\": \"BIGINT\",\n          \"nullable\": true,\n          \"name\": \"BLOCK__OFFSET [...]
+PREHOOK: query: EXPLAIN SELECT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN SELECT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+Excluded rules: HiveJoinPushTransitivePredicatesRule|HivePreFilteringRule
+
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: _dummy_table
+          Row Limit Per Split: 1
+          Select Operator
+            expressions: 1 (type: int)
+            outputColumnNames: _col0
+            ListSink
+
+PREHOOK: query: EXPLAIN FORMATTED SELECT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN FORMATTED SELECT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+{"CBOPlan":"{\n  \"rels\": [\n    {\n      \"id\": \"0\",\n      \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n      \"table\": [\n        \"_dummy_database\",\n        \"_dummy_table\"\n      ],\n      \"table:alias\": \"_dummy_table\",\n      \"inputs\": [],\n      \"rowCount\": 1.0,\n      \"avgRowSize\": 0.0,\n      \"rowType\": [\n        {\n          \"type\": \"BIGINT\",\n          \"nullable\": true,\n          \"name\": \"BLOCK__OFFSET__ [...]
diff --git a/ql/src/test/results/clientpositive/llap/rule_exclusion_config.q.out b/ql/src/test/results/clientpositive/llap/rule_exclusion_config.q.out
index a656edf..298c4a7 100644
--- a/ql/src/test/results/clientpositive/llap/rule_exclusion_config.q.out
+++ b/ql/src/test/results/clientpositive/llap/rule_exclusion_config.q.out
@@ -47,6 +47,8 @@ WHERE src1.key > 10 and src1.key < 20
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 #### A masked pattern was here ####
+Excluded rules: HiveJoinPushTransitivePredicatesRule
+
 CBO PLAN:
 HiveProject(key=[$4], value=[$5], key0=[$2], value0=[$3], key1=[$0], value1=[$1])
   HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available])
@@ -79,6 +81,8 @@ WHERE src1.key > 10 and src1.key < 20
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 #### A masked pattern was here ####
+Excluded rules: HiveJoinPushTransitivePredicatesRule|HiveJoinAddNotNullRule
+
 CBO PLAN:
 HiveProject(key=[$4], value=[$5], key0=[$2], value0=[$3], key1=[$0], value1=[$1])
   HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available])
@@ -109,6 +113,8 @@ WHERE src1.key > 10 and src1.key < 20
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 #### A masked pattern was here ####
+Excluded rules: HiveJoin.*Rule
+
 CBO PLAN:
 HiveProject(key=[$4], value=[$5], key0=[$2], value0=[$3], key1=[$0], value1=[$1])
   HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available])
@@ -139,6 +145,8 @@ WHERE src1.key > 10 and src1.key < 20
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 #### A masked pattern was here ####
+Excluded rules: .*
+
 CBO PLAN:
 HiveProject(key=[$0], value=[$1], key1=[$6], value1=[$7], key2=[$12], value2=[$13])
   HiveFilter(condition=[AND(>(CAST($0):DOUBLE, CAST(10):DOUBLE), <(CAST($0):DOUBLE, CAST(20):DOUBLE))])