You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2020/03/10 19:24:29 UTC

[hive] branch master updated: HIVE-21660 : Wrong result when union all and later view with explode is used (Ganesha Shreedhara via Jesus Camacho Rodriguez)

This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 1a3b9bb  HIVE-21660 : Wrong result when union all and later view with explode is used (Ganesha Shreedhara via Jesus Camacho Rodriguez)
1a3b9bb is described below

commit 1a3b9bb973b173731a61584d8185d0db5d655141
Author: Ganesha Shreedhara <ga...@gmail.com>
AuthorDate: Tue Mar 10 12:23:22 2020 -0700

    HIVE-21660 : Wrong result when union all and later view with explode is used (Ganesha Shreedhara via Jesus Camacho Rodriguez)
    
    Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
---
 .../apache/hadoop/hive/ql/parse/GenTezUtils.java   |   3 +-
 .../queries/clientpositive/unionall_lateralview1.q |  26 +++++
 .../clientpositive/unionall_lateralview1.q.out     | 125 +++++++++++++++++++++
 3 files changed, 152 insertions(+), 2 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
index 78be42e..4441ea3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
@@ -312,9 +312,8 @@ public class GenTezUtils {
 
     while(!operators.isEmpty()) {
       Operator<?> current = operators.pop();
-      seen.add(current);
 
-      if (current instanceof FileSinkOperator) {
+      if (seen.add(current) && current instanceof FileSinkOperator) {
         FileSinkOperator fileSink = (FileSinkOperator)current;
 
         // remember it for additional processing later
diff --git a/ql/src/test/queries/clientpositive/unionall_lateralview1.q b/ql/src/test/queries/clientpositive/unionall_lateralview1.q
new file mode 100644
index 0000000..08b8032
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/unionall_lateralview1.q
@@ -0,0 +1,26 @@
+drop table if exists unionall_lv_src1;
+drop table if exists unionall_lv_src2;
+drop table if exists unionall_lv_dest;
+
+create table unionall_lv_src1(id int, dt string);
+insert into unionall_lv_src1 values (2, '2019-04-01');
+
+create table unionall_lv_src2( id int, dates array<string>);
+insert into unionall_lv_src2 select 1 as id, array('2019-01-01','2019-01-02','2019-01-03') as dates;
+
+create table unionall_lv_dest (id int) partitioned by (dt string);
+
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.exec.dynamic.partition=true;
+
+insert overwrite table unionall_lv_dest partition (dt)
+select t.id, t.dt from (
+select id, dt from unionall_lv_src1
+union all
+select id, dts as dt from unionall_lv_src2 tt lateral view explode(tt.dates) dd as dts ) t;
+
+select * from unionall_lv_dest;
+
+drop table unionall_lv_src1;
+drop table unionall_lv_src2;
+drop table unionall_lv_dest;
diff --git a/ql/src/test/results/clientpositive/unionall_lateralview1.q.out b/ql/src/test/results/clientpositive/unionall_lateralview1.q.out
new file mode 100644
index 0000000..ff8dd70
--- /dev/null
+++ b/ql/src/test/results/clientpositive/unionall_lateralview1.q.out
@@ -0,0 +1,125 @@
+PREHOOK: query: drop table if exists unionall_lv_src1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists unionall_lv_src1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists unionall_lv_src2
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists unionall_lv_src2
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists unionall_lv_dest
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists unionall_lv_dest
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table unionall_lv_src1(id int, dt string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@unionall_lv_src1
+POSTHOOK: query: create table unionall_lv_src1(id int, dt string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@unionall_lv_src1
+PREHOOK: query: insert into unionall_lv_src1 values (2, '2019-04-01')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@unionall_lv_src1
+POSTHOOK: query: insert into unionall_lv_src1 values (2, '2019-04-01')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@unionall_lv_src1
+POSTHOOK: Lineage: unionall_lv_src1.dt SCRIPT []
+POSTHOOK: Lineage: unionall_lv_src1.id SCRIPT []
+PREHOOK: query: create table unionall_lv_src2( id int, dates array<string>)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@unionall_lv_src2
+POSTHOOK: query: create table unionall_lv_src2( id int, dates array<string>)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@unionall_lv_src2
+PREHOOK: query: insert into unionall_lv_src2 select 1 as id, array('2019-01-01','2019-01-02','2019-01-03') as dates
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@unionall_lv_src2
+POSTHOOK: query: insert into unionall_lv_src2 select 1 as id, array('2019-01-01','2019-01-02','2019-01-03') as dates
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@unionall_lv_src2
+POSTHOOK: Lineage: unionall_lv_src2.dates EXPRESSION []
+POSTHOOK: Lineage: unionall_lv_src2.id SIMPLE []
+PREHOOK: query: create table unionall_lv_dest (id int) partitioned by (dt string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@unionall_lv_dest
+POSTHOOK: query: create table unionall_lv_dest (id int) partitioned by (dt string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@unionall_lv_dest
+PREHOOK: query: insert overwrite table unionall_lv_dest partition (dt)
+select t.id, t.dt from (
+select id, dt from unionall_lv_src1
+union all
+select id, dts as dt from unionall_lv_src2 tt lateral view explode(tt.dates) dd as dts ) t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@unionall_lv_src1
+PREHOOK: Input: default@unionall_lv_src2
+PREHOOK: Output: default@unionall_lv_dest
+POSTHOOK: query: insert overwrite table unionall_lv_dest partition (dt)
+select t.id, t.dt from (
+select id, dt from unionall_lv_src1
+union all
+select id, dts as dt from unionall_lv_src2 tt lateral view explode(tt.dates) dd as dts ) t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@unionall_lv_src1
+POSTHOOK: Input: default@unionall_lv_src2
+POSTHOOK: Output: default@unionall_lv_dest@dt=2019-01-01
+POSTHOOK: Output: default@unionall_lv_dest@dt=2019-01-02
+POSTHOOK: Output: default@unionall_lv_dest@dt=2019-01-03
+POSTHOOK: Output: default@unionall_lv_dest@dt=2019-04-01
+POSTHOOK: Lineage: unionall_lv_dest PARTITION(dt=2019-01-01).id EXPRESSION [(unionall_lv_src1)unionall_lv_src1.FieldSchema(name:id, type:int, comment:null), (unionall_lv_src2)tt.FieldSchema(name:id, type:int, comment:null), ]
+POSTHOOK: Lineage: unionall_lv_dest PARTITION(dt=2019-01-02).id EXPRESSION [(unionall_lv_src1)unionall_lv_src1.FieldSchema(name:id, type:int, comment:null), (unionall_lv_src2)tt.FieldSchema(name:id, type:int, comment:null), ]
+POSTHOOK: Lineage: unionall_lv_dest PARTITION(dt=2019-01-03).id EXPRESSION [(unionall_lv_src1)unionall_lv_src1.FieldSchema(name:id, type:int, comment:null), (unionall_lv_src2)tt.FieldSchema(name:id, type:int, comment:null), ]
+POSTHOOK: Lineage: unionall_lv_dest PARTITION(dt=2019-04-01).id EXPRESSION [(unionall_lv_src1)unionall_lv_src1.FieldSchema(name:id, type:int, comment:null), (unionall_lv_src2)tt.FieldSchema(name:id, type:int, comment:null), ]
+PREHOOK: query: select * from unionall_lv_dest
+PREHOOK: type: QUERY
+PREHOOK: Input: default@unionall_lv_dest
+PREHOOK: Input: default@unionall_lv_dest@dt=2019-01-01
+PREHOOK: Input: default@unionall_lv_dest@dt=2019-01-02
+PREHOOK: Input: default@unionall_lv_dest@dt=2019-01-03
+PREHOOK: Input: default@unionall_lv_dest@dt=2019-04-01
+#### A masked pattern was here ####
+POSTHOOK: query: select * from unionall_lv_dest
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@unionall_lv_dest
+POSTHOOK: Input: default@unionall_lv_dest@dt=2019-01-01
+POSTHOOK: Input: default@unionall_lv_dest@dt=2019-01-02
+POSTHOOK: Input: default@unionall_lv_dest@dt=2019-01-03
+POSTHOOK: Input: default@unionall_lv_dest@dt=2019-04-01
+#### A masked pattern was here ####
+1	2019-01-01
+1	2019-01-02
+1	2019-01-03
+2	2019-04-01
+PREHOOK: query: drop table unionall_lv_src1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@unionall_lv_src1
+PREHOOK: Output: default@unionall_lv_src1
+POSTHOOK: query: drop table unionall_lv_src1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@unionall_lv_src1
+POSTHOOK: Output: default@unionall_lv_src1
+PREHOOK: query: drop table unionall_lv_src2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@unionall_lv_src2
+PREHOOK: Output: default@unionall_lv_src2
+POSTHOOK: query: drop table unionall_lv_src2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@unionall_lv_src2
+POSTHOOK: Output: default@unionall_lv_src2
+PREHOOK: query: drop table unionall_lv_dest
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@unionall_lv_dest
+PREHOOK: Output: default@unionall_lv_dest
+POSTHOOK: query: drop table unionall_lv_dest
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@unionall_lv_dest
+POSTHOOK: Output: default@unionall_lv_dest