You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2020/02/18 19:42:34 UTC
[hive] branch master updated: HIVE-22892 : Unable to compile query
if CTE joined (Krisztian Kasa, reviewed by Vineet Garg)
This is an automated email from the ASF dual-hosted git repository.
jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new d34e62c HIVE-22892 : Unable to compile query if CTE joined (Krisztian Kasa, reviewed by Vineet Garg)
d34e62c is described below
commit d34e62cb074e198740e035b8dac9294acbba7805
Author: Krisztian Kasa <kk...@cloudera.com>
AuthorDate: Tue Feb 18 11:42:16 2020 -0800
HIVE-22892 : Unable to compile query if CTE joined (Krisztian Kasa, reviewed by Vineet Garg)
---
.../hadoop/hive/ql/parse/BaseSemanticAnalyzer.java | 14 ++
.../hadoop/hive/ql/parse/CalcitePlanner.java | 30 +--
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 14 +-
.../apache/hadoop/hive/ql/parse/SubQueryUtils.java | 12 +-
ql/src/test/queries/clientpositive/cte_join.q | 32 +++
ql/src/test/results/clientpositive/cte_join.q.out | 245 +++++++++++++++++++++
6 files changed, 302 insertions(+), 45 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
index f7ac6d3..444ec3d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
@@ -461,6 +461,7 @@ public abstract class BaseSemanticAnalyzer {
*/
public static String getUnescapedUnqualifiedTableName(ASTNode node) throws SemanticException {
assert node.getChildCount() <= 2;
+ assert node.getType() == HiveParser.TOK_TABNAME;
if (node.getChildCount() == 2) {
node = (ASTNode) node.getChild(1);
@@ -469,6 +470,19 @@ public abstract class BaseSemanticAnalyzer {
return getUnescapedName(node);
}
+ public static String getTableAlias(ASTNode node) throws SemanticException {
+ // ptf node form is: ^(TOK_PTBLFUNCTION $name $alias?
+ // partitionTableFunctionSource partitioningSpec? expression*)
+ // guranteed to have an alias here: check done in processJoin
+ if (node.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) {
+ return unescapeIdentifier(node.getChild(1).getText().toLowerCase());
+ }
+ if (node.getChildCount() == 1) {
+ return getUnescapedUnqualifiedTableName((ASTNode) node.getChild(0)).toLowerCase();
+ }
+ return unescapeIdentifier(node.getChild(node.getChildCount() - 1).getText().toLowerCase());
+ }
+
/**
* Remove the encapsulating "`" pair from the identifier. We allow users to
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 8986315..c313d8e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -2867,14 +2867,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
if ((left.getToken().getType() == HiveParser.TOK_TABREF)
|| (left.getToken().getType() == HiveParser.TOK_SUBQUERY)
|| (left.getToken().getType() == HiveParser.TOK_PTBLFUNCTION)) {
- String tableName = getUnescapedUnqualifiedTableName((ASTNode) left.getChild(0)).toLowerCase();
- leftTableAlias = left.getChildCount() == 1 ? tableName :
- unescapeIdentifier(left.getChild(left.getChildCount() - 1).getText().toLowerCase());
- // ptf node form is: ^(TOK_PTBLFUNCTION $name $alias?
- // partitionTableFunctionSource partitioningSpec? expression*)
- // guranteed to have an lias here: check done in processJoin
- leftTableAlias = (left.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ?
- unescapeIdentifier(left.getChild(1).getText().toLowerCase()) : leftTableAlias;
+ leftTableAlias = getTableAlias(left);
leftRel = aliasToRel.get(leftTableAlias);
} else if (SemanticAnalyzer.isJoinToken(left)) {
leftRel = genJoinLogicalPlan(left, aliasToRel);
@@ -2890,14 +2883,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
if ((right.getToken().getType() == HiveParser.TOK_TABREF)
|| (right.getToken().getType() == HiveParser.TOK_SUBQUERY)
|| (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION)) {
- String tableName = getUnescapedUnqualifiedTableName((ASTNode) right.getChild(0)).toLowerCase();
- rightTableAlias = right.getChildCount() == 1 ? tableName :
- unescapeIdentifier(right.getChild(right.getChildCount() - 1).getText().toLowerCase());
- // ptf node form is: ^(TOK_PTBLFUNCTION $name $alias?
- // partitionTableFunctionSource partitioningSpec? expression*)
- // guranteed to have an lias here: check done in processJoin
- rightTableAlias = (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ?
- unescapeIdentifier(right.getChild(1).getText().toLowerCase()) : rightTableAlias;
+ rightTableAlias = getTableAlias(right);
rightRel = aliasToRel.get(rightTableAlias);
} else if (right.getToken().getType() == HiveParser.TOK_LATERAL_VIEW) {
rightRel = genLateralViewPlans(right, aliasToRel);
@@ -3258,17 +3244,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
case HiveParser.TOK_TABREF:
case HiveParser.TOK_SUBQUERY:
case HiveParser.TOK_PTBLFUNCTION:
- String inputTableName = getUnescapedUnqualifiedTableName((ASTNode) next.getChild(0)).toLowerCase();
- String inputTableAlias;
- if (next.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) {
- // ptf node form is: ^(TOK_PTBLFUNCTION $name $alias?
- // partitionTableFunctionSource partitioningSpec? expression*)
- // ptf node guaranteed to have an alias here
- inputTableAlias = unescapeIdentifier(next.getChild(1).getText().toLowerCase());
- } else {
- inputTableAlias = next.getChildCount() == 1 ? inputTableName :
- unescapeIdentifier(next.getChild(next.getChildCount() - 1).getText().toLowerCase());
- }
+ String inputTableAlias = getTableAlias(next);
inputRel = aliasToRel.get(inputTableAlias);
break;
case HiveParser.TOK_LATERAL_VIEW:
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 33d3beb..961ddf4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -9977,9 +9977,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
}
if (isValidLeftToken) {
- String tableName = getUnescapedUnqualifiedTableName((ASTNode) left.getChild(0))
- .toLowerCase();
- String alias = extractJoinAlias(left, tableName);
+ String alias = extractJoinAlias(left);
joinTree.setLeftAlias(alias);
String[] leftAliases = new String[1];
leftAliases[0] = alias;
@@ -10005,9 +10003,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
}
if (isValidRightToken) {
- String tableName = getUnescapedUnqualifiedTableName((ASTNode) right.getChild(0))
- .toLowerCase();
- String alias = extractJoinAlias(right, tableName);
+ String alias = extractJoinAlias(right);
String[] rightAliases = new String[1];
rightAliases[0] = alias;
joinTree.setRightAliases(rightAliases);
@@ -10099,7 +10095,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
|| (right.getToken().getType() == HiveParser.TOK_PTBLFUNCTION);
}
- private String extractJoinAlias(ASTNode node, String tableName) {
+ private String extractJoinAlias(ASTNode node) throws SemanticException {
// ptf node form is:
// ^(TOK_PTBLFUNCTION $name $alias? partitionTableFunctionSource partitioningSpec? expression*)
// guaranteed to have an alias here: check done in processJoin
@@ -10107,14 +10103,14 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
return unescapeIdentifier(node.getChild(1).getText().toLowerCase());
}
if (node.getChildCount() == 1) {
- return tableName;
+ return getUnescapedUnqualifiedTableName((ASTNode) node.getChild(0)).toLowerCase();
}
for (int i = node.getChildCount() - 1; i >= 1; i--) {
if (node.getChild(i).getType() == HiveParser.Identifier) {
return unescapeIdentifier(node.getChild(i).getText().toLowerCase());
}
}
- return tableName;
+ throw new SemanticException("Unable to get join alias.");
}
private void parseStreamTables(QBJoinTree joinTree, QB qb) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java
index e7ad914..b6dc51e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.parse;
+import static org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.getTableAlias;
+
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Deque;
@@ -365,15 +367,7 @@ public class SubQueryUtils {
if ((joinNode.getToken().getType() == HiveParser.TOK_TABREF)
|| (joinNode.getToken().getType() == HiveParser.TOK_SUBQUERY)
|| (joinNode.getToken().getType() == HiveParser.TOK_PTBLFUNCTION)) {
- String tableName = SemanticAnalyzer.getUnescapedUnqualifiedTableName((ASTNode) joinNode.getChild(0))
- .toLowerCase();
- String alias = joinNode.getChildCount() == 1 ? tableName
- : SemanticAnalyzer.unescapeIdentifier(joinNode.getChild(joinNode.getChildCount() - 1)
- .getText().toLowerCase());
- alias = (joinNode.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ?
- SemanticAnalyzer.unescapeIdentifier(joinNode.getChild(1).getText().toLowerCase()) :
- alias;
- aliases.add(alias);
+ aliases.add(getTableAlias(joinNode));
} else {
ASTNode left = (ASTNode) joinNode.getChild(0);
ASTNode right = (ASTNode) joinNode.getChild(1);
diff --git a/ql/src/test/queries/clientpositive/cte_join.q b/ql/src/test/queries/clientpositive/cte_join.q
new file mode 100644
index 0000000..50819a7
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/cte_join.q
@@ -0,0 +1,32 @@
+CREATE TABLE t1 (a int, b varchar(100));
+
+EXPLAIN AST
+SELECT S.a, t1.a, t1.b FROM (
+WITH
+ sub1 AS (SELECT a, b FROM t1 WHERE b = 'c')
+ SELECT sub1.a, sub1.b FROM sub1
+) S
+JOIN t1 ON S.a = t1.a;
+
+EXPLAIN CBO
+SELECT S.a, t1.a, t1.b FROM (
+WITH
+ sub1 AS (SELECT a, b FROM t1 WHERE b = 'c')
+ SELECT sub1.a, sub1.b FROM sub1
+) S
+JOIN t1 ON S.a = t1.a;
+
+EXPLAIN
+SELECT S.a, t1.a, t1.b FROM (
+WITH
+ sub1 AS (SELECT a, b FROM t1 WHERE b = 'c')
+ SELECT sub1.a, sub1.b FROM sub1
+) S
+JOIN t1 ON S.a = t1.a;
+
+SELECT S.a, t1.a, t1.b FROM (
+WITH
+ sub1 AS (SELECT a, b FROM t1 WHERE b = 'c')
+ SELECT sub1.a, sub1.b FROM sub1
+) S
+JOIN t1 ON S.a = t1.a;
diff --git a/ql/src/test/results/clientpositive/cte_join.q.out b/ql/src/test/results/clientpositive/cte_join.q.out
new file mode 100644
index 0000000..4211b8b
--- /dev/null
+++ b/ql/src/test/results/clientpositive/cte_join.q.out
@@ -0,0 +1,245 @@
+PREHOOK: query: CREATE TABLE t1 (a int, b varchar(100))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: CREATE TABLE t1 (a int, b varchar(100))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: EXPLAIN AST
+SELECT S.a, t1.a, t1.b FROM (
+WITH
+ sub1 AS (SELECT a, b FROM t1 WHERE b = 'c')
+ SELECT sub1.a, sub1.b FROM sub1
+) S
+JOIN t1 ON S.a = t1.a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN AST
+SELECT S.a, t1.a, t1.b FROM (
+WITH
+ sub1 AS (SELECT a, b FROM t1 WHERE b = 'c')
+ SELECT sub1.a, sub1.b FROM sub1
+) S
+JOIN t1 ON S.a = t1.a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+ABSTRACT SYNTAX TREE:
+
+TOK_QUERY
+ TOK_FROM
+ TOK_JOIN
+ TOK_SUBQUERY
+ TOK_QUERY
+ TOK_CTE
+ TOK_SUBQUERY
+ TOK_QUERY
+ TOK_FROM
+ TOK_TABREF
+ TOK_TABNAME
+ t1
+ TOK_INSERT
+ TOK_DESTINATION
+ TOK_DIR
+ TOK_TMP_FILE
+ TOK_SELECT
+ TOK_SELEXPR
+ TOK_TABLE_OR_COL
+ a
+ TOK_SELEXPR
+ TOK_TABLE_OR_COL
+ b
+ TOK_WHERE
+ =
+ TOK_TABLE_OR_COL
+ b
+ 'c'
+ sub1
+ TOK_FROM
+ TOK_TABREF
+ TOK_TABNAME
+ sub1
+ TOK_INSERT
+ TOK_DESTINATION
+ TOK_DIR
+ TOK_TMP_FILE
+ TOK_SELECT
+ TOK_SELEXPR
+ .
+ TOK_TABLE_OR_COL
+ sub1
+ a
+ TOK_SELEXPR
+ .
+ TOK_TABLE_OR_COL
+ sub1
+ b
+ S
+ TOK_TABREF
+ TOK_TABNAME
+ t1
+ =
+ .
+ TOK_TABLE_OR_COL
+ S
+ a
+ .
+ TOK_TABLE_OR_COL
+ t1
+ a
+ TOK_INSERT
+ TOK_DESTINATION
+ TOK_DIR
+ TOK_TMP_FILE
+ TOK_SELECT
+ TOK_SELEXPR
+ .
+ TOK_TABLE_OR_COL
+ S
+ a
+ TOK_SELEXPR
+ .
+ TOK_TABLE_OR_COL
+ t1
+ a
+ TOK_SELEXPR
+ .
+ TOK_TABLE_OR_COL
+ t1
+ b
+
+PREHOOK: query: EXPLAIN CBO
+SELECT S.a, t1.a, t1.b FROM (
+WITH
+ sub1 AS (SELECT a, b FROM t1 WHERE b = 'c')
+ SELECT sub1.a, sub1.b FROM sub1
+) S
+JOIN t1 ON S.a = t1.a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO
+SELECT S.a, t1.a, t1.b FROM (
+WITH
+ sub1 AS (SELECT a, b FROM t1 WHERE b = 'c')
+ SELECT sub1.a, sub1.b FROM sub1
+) S
+JOIN t1 ON S.a = t1.a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+CBO PLAN:
+HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(a=[$0])
+ HiveFilter(condition=[AND(=($1, _UTF-16LE'c'), IS NOT NULL($0))])
+ HiveTableScan(table=[[default, t1]], table:alias=[t1])
+ HiveProject(a=[$0], b=[$1])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, t1]], table:alias=[t1])
+
+PREHOOK: query: EXPLAIN
+SELECT S.a, t1.a, t1.b FROM (
+WITH
+ sub1 AS (SELECT a, b FROM t1 WHERE b = 'c')
+ SELECT sub1.a, sub1.b FROM sub1
+) S
+JOIN t1 ON S.a = t1.a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN
+SELECT S.a, t1.a, t1.b FROM (
+WITH
+ sub1 AS (SELECT a, b FROM t1 WHERE b = 'c')
+ SELECT sub1.a, sub1.b FROM sub1
+) S
+JOIN t1 ON S.a = t1.a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: t1
+ filterExpr: ((b = 'c') and a is not null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((b = 'c') and a is not null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: a (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ alias: t1
+ filterExpr: a is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: a is not null (type: boolean)
+ Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: a (type: int), b (type: varchar(100))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: varchar(100))
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT S.a, t1.a, t1.b FROM (
+WITH
+ sub1 AS (SELECT a, b FROM t1 WHERE b = 'c')
+ SELECT sub1.a, sub1.b FROM sub1
+) S
+JOIN t1 ON S.a = t1.a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT S.a, t1.a, t1.b FROM (
+WITH
+ sub1 AS (SELECT a, b FROM t1 WHERE b = 'c')
+ SELECT sub1.a, sub1.b FROM sub1
+) S
+JOIN t1 ON S.a = t1.a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####