You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2020/02/18 19:42:34 UTC

[hive] branch master updated: HIVE-22892 : Unable to compile query if CTE joined (Krisztian Kasa, reviewed by Vineet Garg)

This is an automated email from the ASF dual-hosted git repository.

jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new d34e62c  HIVE-22892 : Unable to compile query if CTE joined (Krisztian Kasa, reviewed by Vineet Garg)
d34e62c is described below

commit d34e62cb074e198740e035b8dac9294acbba7805
Author: Krisztian Kasa <kk...@cloudera.com>
AuthorDate: Tue Feb 18 11:42:16 2020 -0800

    HIVE-22892 : Unable to compile query if CTE joined (Krisztian Kasa, reviewed by Vineet Garg)
---
 .../hadoop/hive/ql/parse/BaseSemanticAnalyzer.java |  14 ++
 .../hadoop/hive/ql/parse/CalcitePlanner.java       |  30 +--
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java     |  14 +-
 .../apache/hadoop/hive/ql/parse/SubQueryUtils.java |  12 +-
 ql/src/test/queries/clientpositive/cte_join.q      |  32 +++
 ql/src/test/results/clientpositive/cte_join.q.out  | 245 +++++++++++++++++++++
 6 files changed, 302 insertions(+), 45 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
index f7ac6d3..444ec3d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
@@ -461,6 +461,7 @@ public abstract class BaseSemanticAnalyzer {
    */
   public static String getUnescapedUnqualifiedTableName(ASTNode node) throws SemanticException {
     assert node.getChildCount() <= 2;
+    assert node.getType() == HiveParser.TOK_TABNAME;
 
     if (node.getChildCount() == 2) {
       node = (ASTNode) node.getChild(1);
@@ -469,6 +470,19 @@ public abstract class BaseSemanticAnalyzer {
     return getUnescapedName(node);
   }
 
+  public static String getTableAlias(ASTNode node) throws SemanticException {
+    // ptf node form is: ^(TOK_PTBLFUNCTION $name $alias?
+    // partitionTableFunctionSource partitioningSpec? expression*)
+    // guranteed to have an alias here: check done in processJoin
+    if (node.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) {
+      return unescapeIdentifier(node.getChild(1).getText().toLowerCase());
+    }
+    if (node.getChildCount() == 1) {
+      return getUnescapedUnqualifiedTableName((ASTNode) node.getChild(0)).toLowerCase();
+    }
+    return unescapeIdentifier(node.getChild(node.getChildCount() - 1).getText().toLowerCase());
+  }
+
 
   /**
    * Remove the encapsulating "`" pair from the identifier. We allow users to
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 8986315..c313d8e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -2867,14 +2867,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
       if ((left.getToken().getType() == HiveParser.TOK_TABREF)
           || (left.getToken().getType() == HiveParser.TOK_SUBQUERY)
           || (left.getToken().getType() == HiveParser.TOK_PTBLFUNCTION)) {
-        String tableName = getUnescapedUnqualifiedTableName((ASTNode) left.getChild(0)).toLowerCase();
-        leftTableAlias = left.getChildCount() == 1 ? tableName : 
-            unescapeIdentifier(left.getChild(left.getChildCount() - 1).getText().toLowerCase());
-        // ptf node form is: ^(TOK_PTBLFUNCTION $name $alias?
-        // partitionTableFunctionSource partitioningSpec? expression*)
-        // guranteed to have an lias here: check done in processJoin
-        leftTableAlias = (left.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ? 
-            unescapeIdentifier(left.getChild(1).getText().toLowerCase()) : leftTableAlias;
+        leftTableAlias = getTableAlias(left);
         leftRel = aliasToRel.get(leftTableAlias);
       } else if (SemanticAnalyzer.isJoinToken(left)) {
         leftRel = genJoinLogicalPlan(left, aliasToRel);
@@ -2890,14 +2883,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
       if ((right.getToken().getType() == HiveParser.TOK_TABREF)
           || (right.getToken().getType() == HiveParser.TOK_SUBQUERY)
           || (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION)) {
-        String tableName = getUnescapedUnqualifiedTableName((ASTNode) right.getChild(0)).toLowerCase();
-        rightTableAlias = right.getChildCount() == 1 ? tableName : 
-            unescapeIdentifier(right.getChild(right.getChildCount() - 1).getText().toLowerCase());
-        // ptf node form is: ^(TOK_PTBLFUNCTION $name $alias?
-        // partitionTableFunctionSource partitioningSpec? expression*)
-        // guranteed to have an lias here: check done in processJoin
-        rightTableAlias = (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ? 
-            unescapeIdentifier(right.getChild(1).getText().toLowerCase()) : rightTableAlias;
+        rightTableAlias = getTableAlias(right);
         rightRel = aliasToRel.get(rightTableAlias);
       } else if (right.getToken().getType() == HiveParser.TOK_LATERAL_VIEW) {
         rightRel = genLateralViewPlans(right, aliasToRel);
@@ -3258,17 +3244,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
         case HiveParser.TOK_TABREF:
         case HiveParser.TOK_SUBQUERY:
         case HiveParser.TOK_PTBLFUNCTION:
-          String inputTableName = getUnescapedUnqualifiedTableName((ASTNode) next.getChild(0)).toLowerCase();
-          String inputTableAlias;
-          if (next.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) {
-            // ptf node form is: ^(TOK_PTBLFUNCTION $name $alias?
-            // partitionTableFunctionSource partitioningSpec? expression*)
-            // ptf node guaranteed to have an alias here
-            inputTableAlias = unescapeIdentifier(next.getChild(1).getText().toLowerCase());
-          } else {
-            inputTableAlias = next.getChildCount() == 1 ? inputTableName :
-                unescapeIdentifier(next.getChild(next.getChildCount() - 1).getText().toLowerCase());
-          }
+          String inputTableAlias = getTableAlias(next);
           inputRel = aliasToRel.get(inputTableAlias);
           break;
         case HiveParser.TOK_LATERAL_VIEW:
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 33d3beb..961ddf4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -9977,9 +9977,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     }
 
     if (isValidLeftToken) {
-      String tableName = getUnescapedUnqualifiedTableName((ASTNode) left.getChild(0))
-          .toLowerCase();
-      String alias = extractJoinAlias(left, tableName);
+      String alias = extractJoinAlias(left);
       joinTree.setLeftAlias(alias);
       String[] leftAliases = new String[1];
       leftAliases[0] = alias;
@@ -10005,9 +10003,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     }
 
     if (isValidRightToken) {
-      String tableName = getUnescapedUnqualifiedTableName((ASTNode) right.getChild(0))
-          .toLowerCase();
-      String alias = extractJoinAlias(right, tableName);
+      String alias = extractJoinAlias(right);
       String[] rightAliases = new String[1];
       rightAliases[0] = alias;
       joinTree.setRightAliases(rightAliases);
@@ -10099,7 +10095,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
         || (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION);
   }
 
-  private String extractJoinAlias(ASTNode node, String tableName) {
+  private String extractJoinAlias(ASTNode node) throws SemanticException {
     // ptf node form is:
     // ^(TOK_PTBLFUNCTION $name $alias? partitionTableFunctionSource partitioningSpec? expression*)
     // guaranteed to have an alias here: check done in processJoin
@@ -10107,14 +10103,14 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
       return unescapeIdentifier(node.getChild(1).getText().toLowerCase());
     }
     if (node.getChildCount() == 1) {
-      return tableName;
+      return getUnescapedUnqualifiedTableName((ASTNode) node.getChild(0)).toLowerCase();
     }
     for (int i = node.getChildCount() - 1; i >= 1; i--) {
       if (node.getChild(i).getType() == HiveParser.Identifier) {
         return unescapeIdentifier(node.getChild(i).getText().toLowerCase());
       }
     }
-    return tableName;
+    throw new SemanticException("Unable to get join alias.");
   }
 
   private void parseStreamTables(QBJoinTree joinTree, QB qb) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java
index e7ad914..b6dc51e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.parse;
 
+import static org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.getTableAlias;
+
 import java.util.ArrayDeque;
 import java.util.ArrayList;
 import java.util.Deque;
@@ -365,15 +367,7 @@ public class SubQueryUtils {
     if ((joinNode.getToken().getType() == HiveParser.TOK_TABREF)
         || (joinNode.getToken().getType() == HiveParser.TOK_SUBQUERY)
         || (joinNode.getToken().getType() == HiveParser.TOK_PTBLFUNCTION)) {
-      String tableName = SemanticAnalyzer.getUnescapedUnqualifiedTableName((ASTNode) joinNode.getChild(0))
-          .toLowerCase();
-      String alias = joinNode.getChildCount() == 1 ? tableName
-          : SemanticAnalyzer.unescapeIdentifier(joinNode.getChild(joinNode.getChildCount() - 1)
-          .getText().toLowerCase());
-      alias = (joinNode.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ?
-          SemanticAnalyzer.unescapeIdentifier(joinNode.getChild(1).getText().toLowerCase()) :
-          alias;
-      aliases.add(alias);
+      aliases.add(getTableAlias(joinNode));
     } else {
       ASTNode left = (ASTNode) joinNode.getChild(0);
       ASTNode right = (ASTNode) joinNode.getChild(1);
diff --git a/ql/src/test/queries/clientpositive/cte_join.q b/ql/src/test/queries/clientpositive/cte_join.q
new file mode 100644
index 0000000..50819a7
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/cte_join.q
@@ -0,0 +1,32 @@
+CREATE TABLE t1 (a int, b varchar(100));
+
+EXPLAIN AST
+SELECT S.a, t1.a, t1.b FROM (
+WITH
+ sub1 AS (SELECT a, b FROM t1 WHERE b = 'c')
+ SELECT sub1.a, sub1.b FROM sub1
+) S
+JOIN t1 ON S.a = t1.a;
+
+EXPLAIN CBO
+SELECT S.a, t1.a, t1.b FROM (
+WITH
+ sub1 AS (SELECT a, b FROM t1 WHERE b = 'c')
+ SELECT sub1.a, sub1.b FROM sub1
+) S
+JOIN t1 ON S.a = t1.a;
+
+EXPLAIN
+SELECT S.a, t1.a, t1.b FROM (
+WITH
+ sub1 AS (SELECT a, b FROM t1 WHERE b = 'c')
+ SELECT sub1.a, sub1.b FROM sub1
+) S
+JOIN t1 ON S.a = t1.a;
+
+SELECT S.a, t1.a, t1.b FROM (
+WITH
+ sub1 AS (SELECT a, b FROM t1 WHERE b = 'c')
+ SELECT sub1.a, sub1.b FROM sub1
+) S
+JOIN t1 ON S.a = t1.a;
diff --git a/ql/src/test/results/clientpositive/cte_join.q.out b/ql/src/test/results/clientpositive/cte_join.q.out
new file mode 100644
index 0000000..4211b8b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/cte_join.q.out
@@ -0,0 +1,245 @@
+PREHOOK: query: CREATE TABLE t1 (a int, b varchar(100))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: CREATE TABLE t1 (a int, b varchar(100))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: EXPLAIN AST
+SELECT S.a, t1.a, t1.b FROM (
+WITH
+ sub1 AS (SELECT a, b FROM t1 WHERE b = 'c')
+ SELECT sub1.a, sub1.b FROM sub1
+) S
+JOIN t1 ON S.a = t1.a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN AST
+SELECT S.a, t1.a, t1.b FROM (
+WITH
+ sub1 AS (SELECT a, b FROM t1 WHERE b = 'c')
+ SELECT sub1.a, sub1.b FROM sub1
+) S
+JOIN t1 ON S.a = t1.a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+ABSTRACT SYNTAX TREE:
+  
+TOK_QUERY
+   TOK_FROM
+      TOK_JOIN
+         TOK_SUBQUERY
+            TOK_QUERY
+               TOK_CTE
+                  TOK_SUBQUERY
+                     TOK_QUERY
+                        TOK_FROM
+                           TOK_TABREF
+                              TOK_TABNAME
+                                 t1
+                        TOK_INSERT
+                           TOK_DESTINATION
+                              TOK_DIR
+                                 TOK_TMP_FILE
+                           TOK_SELECT
+                              TOK_SELEXPR
+                                 TOK_TABLE_OR_COL
+                                    a
+                              TOK_SELEXPR
+                                 TOK_TABLE_OR_COL
+                                    b
+                           TOK_WHERE
+                              =
+                                 TOK_TABLE_OR_COL
+                                    b
+                                 'c'
+                     sub1
+               TOK_FROM
+                  TOK_TABREF
+                     TOK_TABNAME
+                        sub1
+               TOK_INSERT
+                  TOK_DESTINATION
+                     TOK_DIR
+                        TOK_TMP_FILE
+                  TOK_SELECT
+                     TOK_SELEXPR
+                        .
+                           TOK_TABLE_OR_COL
+                              sub1
+                           a
+                     TOK_SELEXPR
+                        .
+                           TOK_TABLE_OR_COL
+                              sub1
+                           b
+            S
+         TOK_TABREF
+            TOK_TABNAME
+               t1
+         =
+            .
+               TOK_TABLE_OR_COL
+                  S
+               a
+            .
+               TOK_TABLE_OR_COL
+                  t1
+               a
+   TOK_INSERT
+      TOK_DESTINATION
+         TOK_DIR
+            TOK_TMP_FILE
+      TOK_SELECT
+         TOK_SELEXPR
+            .
+               TOK_TABLE_OR_COL
+                  S
+               a
+         TOK_SELEXPR
+            .
+               TOK_TABLE_OR_COL
+                  t1
+               a
+         TOK_SELEXPR
+            .
+               TOK_TABLE_OR_COL
+                  t1
+               b
+
+PREHOOK: query: EXPLAIN CBO
+SELECT S.a, t1.a, t1.b FROM (
+WITH
+ sub1 AS (SELECT a, b FROM t1 WHERE b = 'c')
+ SELECT sub1.a, sub1.b FROM sub1
+) S
+JOIN t1 ON S.a = t1.a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO
+SELECT S.a, t1.a, t1.b FROM (
+WITH
+ sub1 AS (SELECT a, b FROM t1 WHERE b = 'c')
+ SELECT sub1.a, sub1.b FROM sub1
+) S
+JOIN t1 ON S.a = t1.a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+CBO PLAN:
+HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available])
+  HiveProject(a=[$0])
+    HiveFilter(condition=[AND(=($1, _UTF-16LE'c'), IS NOT NULL($0))])
+      HiveTableScan(table=[[default, t1]], table:alias=[t1])
+  HiveProject(a=[$0], b=[$1])
+    HiveFilter(condition=[IS NOT NULL($0)])
+      HiveTableScan(table=[[default, t1]], table:alias=[t1])
+
+PREHOOK: query: EXPLAIN
+SELECT S.a, t1.a, t1.b FROM (
+WITH
+ sub1 AS (SELECT a, b FROM t1 WHERE b = 'c')
+ SELECT sub1.a, sub1.b FROM sub1
+) S
+JOIN t1 ON S.a = t1.a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN
+SELECT S.a, t1.a, t1.b FROM (
+WITH
+ sub1 AS (SELECT a, b FROM t1 WHERE b = 'c')
+ SELECT sub1.a, sub1.b FROM sub1
+) S
+JOIN t1 ON S.a = t1.a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t1
+            filterExpr: ((b = 'c') and a is not null) (type: boolean)
+            Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: ((b = 'c') and a is not null) (type: boolean)
+              Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: a (type: int)
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  null sort order: z
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+          TableScan
+            alias: t1
+            filterExpr: a is not null (type: boolean)
+            Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: a is not null (type: boolean)
+              Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: a (type: int), b (type: varchar(100))
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  null sort order: z
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: varchar(100))
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col0 (type: int)
+            1 _col0 (type: int)
+          outputColumnNames: _col0, _col1, _col2
+          Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT S.a, t1.a, t1.b FROM (
+WITH
+ sub1 AS (SELECT a, b FROM t1 WHERE b = 'c')
+ SELECT sub1.a, sub1.b FROM sub1
+) S
+JOIN t1 ON S.a = t1.a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT S.a, t1.a, t1.b FROM (
+WITH
+ sub1 AS (SELECT a, b FROM t1 WHERE b = 'c')
+ SELECT sub1.a, sub1.b FROM sub1
+) S
+JOIN t1 ON S.a = t1.a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####