You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2020/04/02 20:37:43 UTC

[hive] branch master updated: HIVE-23094: Implement Explain CBO of Update and Delete statements (Krisztian Kasa, reviewed by Jesus Camacho Rodriguez)

This is an automated email from the ASF dual-hosted git repository.

jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 7f9ce94  HIVE-23094: Implement Explain CBO of Update and Delete statements (Krisztian Kasa, reviewed by Jesus Camacho Rodriguez)
7f9ce94 is described below

commit 7f9ce946de05940edcbfa1081048f6f314e8ca16
Author: Krisztian Kasa <kk...@cloudera.com>
AuthorDate: Thu Apr 2 13:36:28 2020 -0700

    HIVE-23094: Implement Explain CBO of Update and Delete statements (Krisztian Kasa, reviewed by Jesus Camacho Rodriguez)
---
 ql/src/java/org/apache/hadoop/hive/ql/Context.java |  12 ++-
 .../ql/parse/UpdateDeleteSemanticAnalyzer.java     |   1 +
 ql/src/test/queries/clientpositive/sort_acid.q     |  15 +++
 .../results/clientpositive/llap/sort_acid.q.out    | 102 +++++++++++++++++++++
 4 files changed, 129 insertions(+), 1 deletion(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java b/ql/src/java/org/apache/hadoop/hive/ql/Context.java
index fd627c6..d618ef9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/Context.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/Context.java
@@ -1105,7 +1105,17 @@ public class Context {
   }
 
   public String getCalcitePlan() {
-    return this.calcitePlan;
+    if (this.calcitePlan != null) {
+      return this.calcitePlan;
+    }
+
+    for (Context context : rewrittenStatementContexts) {
+      if (context.calcitePlan != null) {
+        return context.calcitePlan;
+      }
+    }
+
+    return null;
   }
 
   public void setCalcitePlan(String calcitePlan) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
index 179021e..41a6f61 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
@@ -203,6 +203,7 @@ public class UpdateDeleteSemanticAnalyzer extends RewriteSemanticAnalyzer {
 
     try {
       useSuper = true;
+      // Note: this will overwrite this.ctx with rewrittenCtx
       super.analyze(rewrittenTree, rewrittenCtx);
     } finally {
       useSuper = false;
diff --git a/ql/src/test/queries/clientpositive/sort_acid.q b/ql/src/test/queries/clientpositive/sort_acid.q
index 9d0b9d0..bfa14bd 100644
--- a/ql/src/test/queries/clientpositive/sort_acid.q
+++ b/ql/src/test/queries/clientpositive/sort_acid.q
@@ -12,20 +12,35 @@ explain cbo
 select a, 6 as c, b from acidtlb sort by a, c, b;
 select a, 6 as c, b from acidtlb sort by a, c, b;
 
+explain cbo
+update acidtlb set b=777;
 update acidtlb set b=777;
+
 select * from acidtlb;
 
 
+explain cbo
+update acidtlb set b=350
+where a in (select a from acidtlb where a = 30);
 update acidtlb set b=350
 where a in (select a from acidtlb where a = 30);
 
 select * from acidtlb;
 
+explain cbo
+update acidtlb set b=450
+where a in (select c from othertlb where c < 65);
 update acidtlb set b=450
 where a in (select c from othertlb where c < 65);
 
 select * from acidtlb;
 
+explain cbo
+delete from acidtlb
+where a in (
+    select a from acidtlb a
+             join othertlb o on a.a = o.c
+             where o.d = 21);
 delete from acidtlb
 where a in (
     select a from acidtlb a
diff --git a/ql/src/test/results/clientpositive/llap/sort_acid.q.out b/ql/src/test/results/clientpositive/llap/sort_acid.q.out
index ff0a5ad..46618e6 100644
--- a/ql/src/test/results/clientpositive/llap/sort_acid.q.out
+++ b/ql/src/test/results/clientpositive/llap/sort_acid.q.out
@@ -59,6 +59,21 @@ POSTHOOK: Input: default@acidtlb
 #### A masked pattern was here ####
 10	6	200
 30	6	500
+PREHOOK: query: explain cbo
+update acidtlb set b=777
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acidtlb
+PREHOOK: Output: default@acidtlb
+POSTHOOK: query: explain cbo
+update acidtlb set b=777
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acidtlb
+POSTHOOK: Output: default@acidtlb
+CBO PLAN:
+HiveSortExchange(distribution=[any], collation=[[0]])
+  HiveProject(row__id=[$4], a=[$0], _o__c2=[777])
+    HiveTableScan(table=[[default, acidtlb]], table:alias=[acidtlb])
+
 PREHOOK: query: update acidtlb set b=777
 PREHOOK: type: QUERY
 PREHOOK: Input: default@acidtlb
@@ -78,6 +93,32 @@ POSTHOOK: Input: default@acidtlb
 10	777
 30	777
 Warning: Shuffle Join MERGEJOIN[23][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+PREHOOK: query: explain cbo
+update acidtlb set b=350
+where a in (select a from acidtlb where a = 30)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acidtlb
+PREHOOK: Output: default@acidtlb
+POSTHOOK: query: explain cbo
+update acidtlb set b=350
+where a in (select a from acidtlb where a = 30)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acidtlb
+POSTHOOK: Output: default@acidtlb
+CBO PLAN:
+HiveSortExchange(distribution=[any], collation=[[0]])
+  HiveProject(row__id=[$0], a=[CAST(30):INTEGER], _o__c2=[350])
+    HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available])
+      HiveProject(ROW__ID=[$4])
+        HiveFilter(condition=[=($0, 30)])
+          HiveTableScan(table=[[default, acidtlb]], table:alias=[acidtlb])
+      HiveProject($f0=[$0])
+        HiveAggregate(group=[{0}])
+          HiveProject($f0=[true])
+            HiveFilter(condition=[=($0, 30)])
+              HiveTableScan(table=[[default, acidtlb]], table:alias=[acidtlb])
+
+Warning: Shuffle Join MERGEJOIN[23][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
 PREHOOK: query: update acidtlb set b=350
 where a in (select a from acidtlb where a = 30)
 PREHOOK: type: QUERY
@@ -98,6 +139,31 @@ POSTHOOK: Input: default@acidtlb
 #### A masked pattern was here ####
 10	777
 30	350
+PREHOOK: query: explain cbo
+update acidtlb set b=450
+where a in (select c from othertlb where c < 65)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acidtlb
+PREHOOK: Input: default@othertlb
+PREHOOK: Output: default@acidtlb
+POSTHOOK: query: explain cbo
+update acidtlb set b=450
+where a in (select c from othertlb where c < 65)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acidtlb
+POSTHOOK: Input: default@othertlb
+POSTHOOK: Output: default@acidtlb
+CBO PLAN:
+HiveSortExchange(distribution=[any], collation=[[0]])
+  HiveProject(row__id=[$1], a=[$0], _o__c2=[450])
+    HiveSemiJoin(condition=[=($0, $2)], joinType=[semi])
+      HiveProject(a=[$0], ROW__ID=[$4])
+        HiveFilter(condition=[<($0, 65)])
+          HiveTableScan(table=[[default, acidtlb]], table:alias=[acidtlb])
+      HiveProject(c=[$0])
+        HiveFilter(condition=[<($0, 65)])
+          HiveTableScan(table=[[default, othertlb]], table:alias=[othertlb])
+
 PREHOOK: query: update acidtlb set b=450
 where a in (select c from othertlb where c < 65)
 PREHOOK: type: QUERY
@@ -120,6 +186,42 @@ POSTHOOK: Input: default@acidtlb
 #### A masked pattern was here ####
 10	450
 30	450
+PREHOOK: query: explain cbo
+delete from acidtlb
+where a in (
+    select a from acidtlb a
+             join othertlb o on a.a = o.c
+             where o.d = 21)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acidtlb
+PREHOOK: Input: default@othertlb
+PREHOOK: Output: default@acidtlb
+POSTHOOK: query: explain cbo
+delete from acidtlb
+where a in (
+    select a from acidtlb a
+             join othertlb o on a.a = o.c
+             where o.d = 21)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acidtlb
+POSTHOOK: Input: default@othertlb
+POSTHOOK: Output: default@acidtlb
+CBO PLAN:
+HiveSortExchange(distribution=[any], collation=[[0]])
+  HiveProject(ROW__ID=[$1])
+    HiveSemiJoin(condition=[=($0, $2)], joinType=[semi])
+      HiveProject(a=[$0], ROW__ID=[$4])
+        HiveFilter(condition=[IS NOT NULL($0)])
+          HiveTableScan(table=[[default, acidtlb]], table:alias=[acidtlb])
+      HiveProject(a=[$0])
+        HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available])
+          HiveProject(a=[$0])
+            HiveFilter(condition=[IS NOT NULL($0)])
+              HiveTableScan(table=[[default, acidtlb]], table:alias=[a])
+          HiveProject(c=[$0])
+            HiveFilter(condition=[AND(=($1, 21), IS NOT NULL($0))])
+              HiveTableScan(table=[[default, othertlb]], table:alias=[o])
+
 PREHOOK: query: delete from acidtlb
 where a in (
     select a from acidtlb a