You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mb...@apache.org on 2021/12/02 08:53:49 UTC

[hive] branch master updated: HIVE-25754: Fix column projection for union all queries with multiple aliases (Marton Bod, reviewed by Krisztian Kasa)

This is an automated email from the ASF dual-hosted git repository.

mbod pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new cceee0a  HIVE-25754: Fix column projection for union all queries with multiple aliases (Marton Bod, reviewed by Krisztian Kasa)
cceee0a is described below

commit cceee0a61a75274520178cd31cad26e3b1a25b12
Author: Marton Bod <mb...@cloudera.com>
AuthorDate: Thu Dec 2 09:53:32 2021 +0100

    HIVE-25754: Fix column projection for union all queries with multiple aliases (Marton Bod, reviewed by Krisztian Kasa)
---
 .../apache/hadoop/hive/ql/parse/ParseUtils.java    | 96 ++++++++++++----------
 ql/src/test/queries/clientpositive/udf_explode.q   |  6 ++
 .../results/clientpositive/llap/udf_explode.q.out  | 52 ++++++++++++
 3 files changed, 109 insertions(+), 45 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
index eb46c22..e7c15ff 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java
@@ -445,7 +445,7 @@ public final class ParseUtils {
         // Repeat the procedure for the new select.
       }
 
-      // Found the proper columns.
+      // Find the proper columns.
       List<ASTNode> newChildren = new ArrayList<>(select.getChildCount());
       HashSet<String> aliases = new HashSet<>();
       for (int i = 0; i < select.getChildCount(); ++i) {
@@ -453,51 +453,57 @@ public final class ParseUtils {
         if (selExpr.getType() == HiveParser.QUERY_HINT) continue;
         assert selExpr.getType() == HiveParser.TOK_SELEXPR;
         assert selExpr.getChildCount() > 0;
-        // Examine the last child. It could be an alias.
-        Tree child = selExpr.getChild(selExpr.getChildCount() - 1);
-        switch (child.getType()) {
-        case HiveParser.TOK_SETCOLREF:
-          // We have a nested setcolref. Process that and start from scratch TODO: use stack?
-          processSetColsNode((ASTNode)child, searcher, ctx);
-          processSetColsNode(setCols, searcher, ctx);
-          return;
-        case HiveParser.TOK_ALLCOLREF:
-          // We should find an alias of this insert and do (alias).*. This however won't fix e.g.
-          // positional order by alias case, cause we'd still have a star on the top level. Bail.
-          LOG.debug("Replacing SETCOLREF with ALLCOLREF because of nested ALLCOLREF");
-          setCols.token.setType(HiveParser.TOK_ALLCOLREF);
-          return;
-        case HiveParser.TOK_TABLE_OR_COL:
-          Tree idChild = child.getChild(0);
-          assert idChild.getType() == HiveParser.Identifier : idChild;
-          if (!createChildColumnRef(idChild, alias, newChildren, aliases, ctx)) {
-            setCols.token.setType(HiveParser.TOK_ALLCOLREF);
-            return;
-          }
-          break;
-        case HiveParser.Identifier:
-          if (!createChildColumnRef(child, alias, newChildren, aliases, ctx)) {
-            setCols.token.setType(HiveParser.TOK_ALLCOLREF);
-            return;
-          }
-          break;
-        case HiveParser.DOT: {
-          Tree colChild = child.getChild(child.getChildCount() - 1);
-          assert colChild.getType() == HiveParser.Identifier : colChild;
-          if (!createChildColumnRef(colChild, alias, newChildren, aliases, ctx)) {
-            setCols.token.setType(HiveParser.TOK_ALLCOLREF);
-            return;
+        // we can have functions which generate multiple aliases (e.g. explode(map(x, y)) as (key, val))
+        boolean isFunctionWithMultipleParameters =
+            selExpr.getChild(0).getType() == HiveParser.TOK_FUNCTION && selExpr.getChildCount() > 2;
+        // if so let's skip the function token buth then examine all its parameters - otherwise check only the last item
+        int start = isFunctionWithMultipleParameters ? 1 : selExpr.getChildCount() - 1;
+        for (int j = start; j < selExpr.getChildCount(); ++j) {
+          Tree child = selExpr.getChild(j);
+          switch (child.getType()) {
+            case HiveParser.TOK_SETCOLREF:
+              // We have a nested setcolref. Process that and start from scratch TODO: use stack?
+              processSetColsNode((ASTNode) child, searcher, ctx);
+              processSetColsNode(setCols, searcher, ctx);
+              return;
+            case HiveParser.TOK_ALLCOLREF:
+              // We should find an alias of this insert and do (alias).*. This however won't fix e.g.
+              // positional order by alias case, cause we'd still have a star on the top level. Bail.
+              LOG.debug("Replacing SETCOLREF with ALLCOLREF because of nested ALLCOLREF");
+              setCols.token.setType(HiveParser.TOK_ALLCOLREF);
+              return;
+            case HiveParser.TOK_TABLE_OR_COL:
+              Tree idChild = child.getChild(0);
+              assert idChild.getType() == HiveParser.Identifier : idChild;
+              if (!createChildColumnRef(idChild, alias, newChildren, aliases, ctx)) {
+                setCols.token.setType(HiveParser.TOK_ALLCOLREF);
+                return;
+              }
+              break;
+            case HiveParser.Identifier:
+              if (!createChildColumnRef(child, alias, newChildren, aliases, ctx)) {
+                setCols.token.setType(HiveParser.TOK_ALLCOLREF);
+                return;
+              }
+              break;
+            case HiveParser.DOT: {
+              Tree colChild = child.getChild(child.getChildCount() - 1);
+              assert colChild.getType() == HiveParser.Identifier : colChild;
+              if (!createChildColumnRef(colChild, alias, newChildren, aliases, ctx)) {
+                setCols.token.setType(HiveParser.TOK_ALLCOLREF);
+                return;
+              }
+              break;
+            }
+            default:
+              // Not really sure how to refer to this (or if we can).
+              // TODO: We could find a different from branch for the union, that might have an alias?
+              //       Or we could add an alias here to refer to, but that might break other branches.
+              LOG.debug("Replacing SETCOLREF with ALLCOLREF because of the nested node "
+                  + child.getType() + " " + child.getText());
+              setCols.token.setType(HiveParser.TOK_ALLCOLREF);
+              return;
           }
-          break;
-        }
-        default:
-          // Not really sure how to refer to this (or if we can).
-          // TODO: We could find a different from branch for the union, that might have an alias?
-          //       Or we could add an alias here to refer to, but that might break other branches.
-          LOG.debug("Replacing SETCOLREF with ALLCOLREF because of the nested node "
-              + child.getType() + " " + child.getText());
-          setCols.token.setType(HiveParser.TOK_ALLCOLREF);
-          return;
         }
       }
       // Insert search in the beginning would have failed if these parents didn't exist.
diff --git a/ql/src/test/queries/clientpositive/udf_explode.q b/ql/src/test/queries/clientpositive/udf_explode.q
index 7825eca..2c017fe 100644
--- a/ql/src/test/queries/clientpositive/udf_explode.q
+++ b/ql/src/test/queries/clientpositive/udf_explode.q
@@ -23,3 +23,9 @@ INSERT OVERWRITE TABLE lazy_array_map select map(1, 'one', 2, 'two', 3, 'three')
 
 SELECT array_col, myCol FROM lazy_array_map lateral view explode(array_col) X AS myCol ORDER BY array_col, myCol;
 SELECT map_col, myKey, myValue FROM lazy_array_map lateral view explode(map_col) X AS myKey, myValue ORDER BY map_col, myKey, myValue;
+
+create table source1 (dt string, d1 int, d2 int) stored as orc;
+create table source2 (dt string, d1 int, d2 int) stored as orc;
+insert into source1 values ('20211107', 1, 2);
+insert into source2 values ('20211108', 11, 22);
+select explode(map('D219', d1,'D220', d2)) as (keyx, valuex) from source1 union all select explode(map('D221', d1,'D222', d2)) as (keyy, valuey) from source2;
diff --git a/ql/src/test/results/clientpositive/llap/udf_explode.q.out b/ql/src/test/results/clientpositive/llap/udf_explode.q.out
index 8cfdbe2..2577e2e 100644
--- a/ql/src/test/results/clientpositive/llap/udf_explode.q.out
+++ b/ql/src/test/results/clientpositive/llap/udf_explode.q.out
@@ -653,3 +653,55 @@ POSTHOOK: Input: default@lazy_array_map
 {1:"one",2:"two",3:"three"}	1	one
 {1:"one",2:"two",3:"three"}	2	two
 {1:"one",2:"two",3:"three"}	3	three
+PREHOOK: query: create table source1 (dt string, d1 int, d2 int) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@source1
+POSTHOOK: query: create table source1 (dt string, d1 int, d2 int) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@source1
+PREHOOK: query: create table source2 (dt string, d1 int, d2 int) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@source2
+POSTHOOK: query: create table source2 (dt string, d1 int, d2 int) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@source2
+PREHOOK: query: insert into source1 values ('20211107', 1, 2)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@source1
+POSTHOOK: query: insert into source1 values ('20211107', 1, 2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@source1
+POSTHOOK: Lineage: source1.d1 SCRIPT []
+POSTHOOK: Lineage: source1.d2 SCRIPT []
+POSTHOOK: Lineage: source1.dt SCRIPT []
+PREHOOK: query: insert into source2 values ('20211108', 11, 22)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@source2
+POSTHOOK: query: insert into source2 values ('20211108', 11, 22)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@source2
+POSTHOOK: Lineage: source2.d1 SCRIPT []
+POSTHOOK: Lineage: source2.d2 SCRIPT []
+POSTHOOK: Lineage: source2.dt SCRIPT []
+PREHOOK: query: select explode(map('D219', d1,'D220', d2)) as (keyx, valuex) from source1 union all select explode(map('D221', d1,'D222', d2)) as (keyy, valuey) from source2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@source1
+PREHOOK: Input: default@source2
+#### A masked pattern was here ####
+POSTHOOK: query: select explode(map('D219', d1,'D220', d2)) as (keyx, valuex) from source1 union all select explode(map('D221', d1,'D222', d2)) as (keyy, valuey) from source2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@source1
+POSTHOOK: Input: default@source2
+#### A masked pattern was here ####
+D219	1
+D220	2
+D221	11
+D222	22