You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mb...@apache.org on 2021/12/02 08:53:49 UTC
[hive] branch master updated: HIVE-25754: Fix column projection for union all queries with multiple aliases (Marton Bod, reviewed by Krisztian Kasa)
This is an automated email from the ASF dual-hosted git repository.
mbod pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new cceee0a HIVE-25754: Fix column projection for union all queries with multiple aliases (Marton Bod, reviewed by Krisztian Kasa)
cceee0a is described below
commit cceee0a61a75274520178cd31cad26e3b1a25b12
Author: Marton Bod <mb...@cloudera.com>
AuthorDate: Thu Dec 2 09:53:32 2021 +0100
HIVE-25754: Fix column projection for union all queries with multiple aliases (Marton Bod, reviewed by Krisztian Kasa)
---
.../apache/hadoop/hive/ql/parse/ParseUtils.java | 96 ++++++++++++----------
ql/src/test/queries/clientpositive/udf_explode.q | 6 ++
.../results/clientpositive/llap/udf_explode.q.out | 52 ++++++++++++
3 files changed, 109 insertions(+), 45 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
index eb46c22..e7c15ff 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
@@ -445,7 +445,7 @@ public final class ParseUtils {
// Repeat the procedure for the new select.
}
- // Found the proper columns.
+ // Find the proper columns.
List<ASTNode> newChildren = new ArrayList<>(select.getChildCount());
HashSet<String> aliases = new HashSet<>();
for (int i = 0; i < select.getChildCount(); ++i) {
@@ -453,51 +453,57 @@ public final class ParseUtils {
if (selExpr.getType() == HiveParser.QUERY_HINT) continue;
assert selExpr.getType() == HiveParser.TOK_SELEXPR;
assert selExpr.getChildCount() > 0;
- // Examine the last child. It could be an alias.
- Tree child = selExpr.getChild(selExpr.getChildCount() - 1);
- switch (child.getType()) {
- case HiveParser.TOK_SETCOLREF:
- // We have a nested setcolref. Process that and start from scratch TODO: use stack?
- processSetColsNode((ASTNode)child, searcher, ctx);
- processSetColsNode(setCols, searcher, ctx);
- return;
- case HiveParser.TOK_ALLCOLREF:
- // We should find an alias of this insert and do (alias).*. This however won't fix e.g.
- // positional order by alias case, cause we'd still have a star on the top level. Bail.
- LOG.debug("Replacing SETCOLREF with ALLCOLREF because of nested ALLCOLREF");
- setCols.token.setType(HiveParser.TOK_ALLCOLREF);
- return;
- case HiveParser.TOK_TABLE_OR_COL:
- Tree idChild = child.getChild(0);
- assert idChild.getType() == HiveParser.Identifier : idChild;
- if (!createChildColumnRef(idChild, alias, newChildren, aliases, ctx)) {
- setCols.token.setType(HiveParser.TOK_ALLCOLREF);
- return;
- }
- break;
- case HiveParser.Identifier:
- if (!createChildColumnRef(child, alias, newChildren, aliases, ctx)) {
- setCols.token.setType(HiveParser.TOK_ALLCOLREF);
- return;
- }
- break;
- case HiveParser.DOT: {
- Tree colChild = child.getChild(child.getChildCount() - 1);
- assert colChild.getType() == HiveParser.Identifier : colChild;
- if (!createChildColumnRef(colChild, alias, newChildren, aliases, ctx)) {
- setCols.token.setType(HiveParser.TOK_ALLCOLREF);
- return;
+ // we can have functions which generate multiple aliases (e.g. explode(map(x, y)) as (key, val))
+ boolean isFunctionWithMultipleParameters =
+ selExpr.getChild(0).getType() == HiveParser.TOK_FUNCTION && selExpr.getChildCount() > 2;
+ // if so let's skip the function token buth then examine all its parameters - otherwise check only the last item
+ int start = isFunctionWithMultipleParameters ? 1 : selExpr.getChildCount() - 1;
+ for (int j = start; j < selExpr.getChildCount(); ++j) {
+ Tree child = selExpr.getChild(j);
+ switch (child.getType()) {
+ case HiveParser.TOK_SETCOLREF:
+ // We have a nested setcolref. Process that and start from scratch TODO: use stack?
+ processSetColsNode((ASTNode) child, searcher, ctx);
+ processSetColsNode(setCols, searcher, ctx);
+ return;
+ case HiveParser.TOK_ALLCOLREF:
+ // We should find an alias of this insert and do (alias).*. This however won't fix e.g.
+ // positional order by alias case, cause we'd still have a star on the top level. Bail.
+ LOG.debug("Replacing SETCOLREF with ALLCOLREF because of nested ALLCOLREF");
+ setCols.token.setType(HiveParser.TOK_ALLCOLREF);
+ return;
+ case HiveParser.TOK_TABLE_OR_COL:
+ Tree idChild = child.getChild(0);
+ assert idChild.getType() == HiveParser.Identifier : idChild;
+ if (!createChildColumnRef(idChild, alias, newChildren, aliases, ctx)) {
+ setCols.token.setType(HiveParser.TOK_ALLCOLREF);
+ return;
+ }
+ break;
+ case HiveParser.Identifier:
+ if (!createChildColumnRef(child, alias, newChildren, aliases, ctx)) {
+ setCols.token.setType(HiveParser.TOK_ALLCOLREF);
+ return;
+ }
+ break;
+ case HiveParser.DOT: {
+ Tree colChild = child.getChild(child.getChildCount() - 1);
+ assert colChild.getType() == HiveParser.Identifier : colChild;
+ if (!createChildColumnRef(colChild, alias, newChildren, aliases, ctx)) {
+ setCols.token.setType(HiveParser.TOK_ALLCOLREF);
+ return;
+ }
+ break;
+ }
+ default:
+ // Not really sure how to refer to this (or if we can).
+ // TODO: We could find a different from branch for the union, that might have an alias?
+ // Or we could add an alias here to refer to, but that might break other branches.
+ LOG.debug("Replacing SETCOLREF with ALLCOLREF because of the nested node "
+ + child.getType() + " " + child.getText());
+ setCols.token.setType(HiveParser.TOK_ALLCOLREF);
+ return;
}
- break;
- }
- default:
- // Not really sure how to refer to this (or if we can).
- // TODO: We could find a different from branch for the union, that might have an alias?
- // Or we could add an alias here to refer to, but that might break other branches.
- LOG.debug("Replacing SETCOLREF with ALLCOLREF because of the nested node "
- + child.getType() + " " + child.getText());
- setCols.token.setType(HiveParser.TOK_ALLCOLREF);
- return;
}
}
// Insert search in the beginning would have failed if these parents didn't exist.
diff --git a/ql/src/test/queries/clientpositive/udf_explode.q b/ql/src/test/queries/clientpositive/udf_explode.q
index 7825eca..2c017fe 100644
--- a/ql/src/test/queries/clientpositive/udf_explode.q
+++ b/ql/src/test/queries/clientpositive/udf_explode.q
@@ -23,3 +23,9 @@ INSERT OVERWRITE TABLE lazy_array_map select map(1, 'one', 2, 'two', 3, 'three')
SELECT array_col, myCol FROM lazy_array_map lateral view explode(array_col) X AS myCol ORDER BY array_col, myCol;
SELECT map_col, myKey, myValue FROM lazy_array_map lateral view explode(map_col) X AS myKey, myValue ORDER BY map_col, myKey, myValue;
+
+create table source1 (dt string, d1 int, d2 int) stored as orc;
+create table source2 (dt string, d1 int, d2 int) stored as orc;
+insert into source1 values ('20211107', 1, 2);
+insert into source2 values ('20211108', 11, 22);
+select explode(map('D219', d1,'D220', d2)) as (keyx, valuex) from source1 union all select explode(map('D221', d1,'D222', d2)) as (keyy, valuey) from source2;
diff --git a/ql/src/test/results/clientpositive/llap/udf_explode.q.out b/ql/src/test/results/clientpositive/llap/udf_explode.q.out
index 8cfdbe2..2577e2e 100644
--- a/ql/src/test/results/clientpositive/llap/udf_explode.q.out
+++ b/ql/src/test/results/clientpositive/llap/udf_explode.q.out
@@ -653,3 +653,55 @@ POSTHOOK: Input: default@lazy_array_map
{1:"one",2:"two",3:"three"} 1 one
{1:"one",2:"two",3:"three"} 2 two
{1:"one",2:"two",3:"three"} 3 three
+PREHOOK: query: create table source1 (dt string, d1 int, d2 int) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@source1
+POSTHOOK: query: create table source1 (dt string, d1 int, d2 int) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@source1
+PREHOOK: query: create table source2 (dt string, d1 int, d2 int) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@source2
+POSTHOOK: query: create table source2 (dt string, d1 int, d2 int) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@source2
+PREHOOK: query: insert into source1 values ('20211107', 1, 2)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@source1
+POSTHOOK: query: insert into source1 values ('20211107', 1, 2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@source1
+POSTHOOK: Lineage: source1.d1 SCRIPT []
+POSTHOOK: Lineage: source1.d2 SCRIPT []
+POSTHOOK: Lineage: source1.dt SCRIPT []
+PREHOOK: query: insert into source2 values ('20211108', 11, 22)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@source2
+POSTHOOK: query: insert into source2 values ('20211108', 11, 22)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@source2
+POSTHOOK: Lineage: source2.d1 SCRIPT []
+POSTHOOK: Lineage: source2.d2 SCRIPT []
+POSTHOOK: Lineage: source2.dt SCRIPT []
+PREHOOK: query: select explode(map('D219', d1,'D220', d2)) as (keyx, valuex) from source1 union all select explode(map('D221', d1,'D222', d2)) as (keyy, valuey) from source2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@source1
+PREHOOK: Input: default@source2
+#### A masked pattern was here ####
+POSTHOOK: query: select explode(map('D219', d1,'D220', d2)) as (keyx, valuex) from source1 union all select explode(map('D221', d1,'D222', d2)) as (keyy, valuey) from source2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@source1
+POSTHOOK: Input: default@source2
+#### A masked pattern was here ####
+D219 1
+D220 2
+D221 11
+D222 22