You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by vg...@apache.org on 2019/07/08 19:51:27 UTC

[hive] branch master updated: HIVE-21915: Hive with TEZ UNION ALL and UDTF results in data loss (Wei Zhang, reviewed by Vineet Garg)

This is an automated email from the ASF dual-hosted git repository.

vgarg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new d7214ea  HIVE-21915: Hive with TEZ UNION ALL and UDTF results in data loss (Wei Zhang, reviewed by Vineet Garg)
d7214ea is described below

commit d7214eae068b8c76fd84bb4bcfbc6de7c22da335
Author: Vineet Garg <vg...@apache.org>
AuthorDate: Mon Jul 8 12:50:35 2019 -0700

    HIVE-21915: Hive with TEZ UNION ALL and UDTF results in data loss (Wei Zhang, reviewed by Vineet Garg)
---
 .../test/resources/testconfiguration.properties    |   3 +-
 .../apache/hadoop/hive/ql/parse/GenTezUtils.java   |   6 +-
 .../test/queries/clientpositive/tez_union_udtf.q   |  22 +++
 .../clientpositive/tez/tez_union_udtf.q.out        | 153 +++++++++++++++++++++
 4 files changed, 182 insertions(+), 2 deletions(-)

diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 7e3294f..6d6e5db 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -44,7 +44,8 @@ minitez.query.files=acid_vectorization_original_tez.q,\
   hybridgrace_hashjoin_2.q,\
   multi_count_distinct.q,\
   tez-tag.q,\
-  tez_union_with_udf.q
+  tez_union_with_udf.q,\
+  tez_union_udtf.q
 
 
 minillap.shared.query.files=insert_into1.q,\
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
index d64f983..2877479 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
@@ -318,7 +318,11 @@ public class GenTezUtils {
         FileSinkOperator fileSink = (FileSinkOperator)current;
 
         // remember it for additional processing later
-        context.fileSinkSet.add(fileSink);
+        if (context.fileSinkSet.contains(fileSink)) {
+          continue;
+        } else {
+          context.fileSinkSet.add(fileSink);
+        }
 
         FileSinkDesc desc = fileSink.getConf();
         Path path = desc.getDirName();
diff --git a/ql/src/test/queries/clientpositive/tez_union_udtf.q b/ql/src/test/queries/clientpositive/tez_union_udtf.q
new file mode 100644
index 0000000..ed58cfd
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/tez_union_udtf.q
@@ -0,0 +1,22 @@
+--! qt:dataset:src1
+--! qt:dataset:src
+set hive.merge.tezfiles=true;
+-- SORT_BEFORE_DIFF
+
+EXPLAIN
+CREATE TABLE x AS
+  SELECT key, 1 as tag FROM src WHERE key = '238'
+  UNION ALL
+  SELECT key, tag FROM src1
+  LATERAL VIEW EXPLODE(array(2)) tf as tag
+  WHERE key = '238';
+
+CREATE TABLE x AS
+  SELECT key, 1 as tag FROM src WHERE key = '238'
+  UNION ALL
+  SELECT key, tag FROM src1
+  LATERAL VIEW EXPLODE(array(2)) tf as tag
+  WHERE key = '238';
+
+SELECT * FROM x;
+
diff --git a/ql/src/test/results/clientpositive/tez/tez_union_udtf.q.out b/ql/src/test/results/clientpositive/tez/tez_union_udtf.q.out
new file mode 100644
index 0000000..cfe4481
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/tez_union_udtf.q.out
@@ -0,0 +1,153 @@
+PREHOOK: query: EXPLAIN
+CREATE TABLE x AS
+  SELECT key, 1 as tag FROM src WHERE key = '238'
+  UNION ALL
+  SELECT key, tag FROM src1
+  LATERAL VIEW EXPLODE(array(2)) tf as tag
+  WHERE key = '238'
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@x
+POSTHOOK: query: EXPLAIN
+CREATE TABLE x AS
+  SELECT key, 1 as tag FROM src WHERE key = '238'
+  UNION ALL
+  SELECT key, tag FROM src1
+  LATERAL VIEW EXPLODE(array(2)) tf as tag
+  WHERE key = '238'
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@x
+Plan not optimized by CBO because the statement has lateral views
+
+Vertex dependency in root stage
+Map 1 <- Union 2 (CONTAINS)
+Map 4 <- Union 2 (CONTAINS)
+Reducer 3 <- Union 2 (CUSTOM_SIMPLE_EDGE)
+
+Stage-3
+  Stats Work{}
+    Stage-9
+      Create Table{"name:":"default.x"}
+        Stage-0
+          Move Operator
+            Stage-5(CONDITIONAL)
+              Move Operator
+                Stage-8(CONDITIONAL CHILD TASKS: Stage-5, Stage-4, Stage-6)
+                  Conditional Operator
+                    Stage-1
+                      Reducer 3
+                      File Output Operator [FS_23]
+                        Group By Operator [GBY_21] (rows=1 width=880)
+                          Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"]
+                        <-Union 2 [CUSTOM_SIMPLE_EDGE]
+                          <-Map 1 [CONTAINS]
+                            File Output Operator [FS_31]
+                              table:{"name:":"default.x"}
+                              Select Operator [SEL_30] (rows=6 width=91)
+                                Output:["_col0","_col1"]
+                                Select Operator [SEL_28] (rows=2 width=91)
+                                  Output:["_col1"]
+                                  Filter Operator [FIL_27] (rows=2 width=87)
+                                    predicate:(key = '238')
+                                    TableScan [TS_26] (rows=500 width=87)
+                                      Output:["key"]
+                            Reduce Output Operator [RS_34]
+                              Group By Operator [GBY_33] (rows=1 width=864)
+                                Output:["_col0","_col1"],aggregations:["compute_stats(col1, 'hll')","compute_stats(col2, 'hll')"]
+                                Select Operator [SEL_32] (rows=6 width=91)
+                                  Output:["col1","col2"]
+                                   Please refer to the previous Select Operator [SEL_30]
+                          <-Map 4 [CONTAINS]
+                            File Output Operator [FS_45]
+                              table:{"name:":"default.x"}
+                              Select Operator [SEL_44] (rows=6 width=91)
+                                Output:["_col0","_col1"]
+                                Select Operator [SEL_42] (rows=4 width=87)
+                                  Output:["_col1"]
+                                  Lateral View Join Operator [LVJ_40] (rows=4 width=239)
+                                    Output:["_col5"]
+                                    Select Operator [SEL_38] (rows=2 width=431)
+                                      Lateral View Forward [LVF_37] (rows=2 width=86)
+                                        Filter Operator [FIL_36] (rows=2 width=86)
+                                          predicate:(key = '238')
+                                          TableScan [TS_35] (rows=25 width=86)
+                                            Output:["key"]
+                            Reduce Output Operator [RS_48]
+                              Group By Operator [GBY_47] (rows=1 width=864)
+                                Output:["_col0","_col1"],aggregations:["compute_stats(col1, 'hll')","compute_stats(col2, 'hll')"]
+                                Select Operator [SEL_46] (rows=6 width=91)
+                                  Output:["col1","col2"]
+                                   Please refer to the previous Select Operator [SEL_44]
+                            File Output Operator [FS_45]
+                              table:{"name:":"default.x"}
+                              Select Operator [SEL_44] (rows=6 width=91)
+                                Output:["_col0","_col1"]
+                                Select Operator [SEL_42] (rows=4 width=87)
+                                  Output:["_col1"]
+                                  Lateral View Join Operator [LVJ_40] (rows=4 width=239)
+                                    Output:["_col5"]
+                                    UDTF Operator [UDTF_41] (rows=2 width=48)
+                                      function name:explode
+                                      Select Operator [SEL_39] (rows=2 width=48)
+                                        Output:["_col0"]
+                                         Please refer to the previous Lateral View Forward [LVF_37]
+                            Reduce Output Operator [RS_48]
+                              Group By Operator [GBY_47] (rows=1 width=864)
+                                Output:["_col0","_col1"],aggregations:["compute_stats(col1, 'hll')","compute_stats(col2, 'hll')"]
+                                Select Operator [SEL_46] (rows=6 width=91)
+                                  Output:["col1","col2"]
+                                   Please refer to the previous Select Operator [SEL_44]
+            Stage-4(CONDITIONAL)
+              File Merge
+                 Please refer to the previous Stage-8(CONDITIONAL CHILD TASKS: Stage-5, Stage-4, Stage-6)
+            Stage-7
+              Move Operator
+                Stage-6(CONDITIONAL)
+                  File Merge
+                     Please refer to the previous Stage-8(CONDITIONAL CHILD TASKS: Stage-5, Stage-4, Stage-6)
+        Stage-2
+          Dependency Collection{}
+             Please refer to the previous Stage-5(CONDITIONAL)
+             Please refer to the previous Stage-4(CONDITIONAL)
+             Please refer to the previous Stage-7
+
+PREHOOK: query: CREATE TABLE x AS
+  SELECT key, 1 as tag FROM src WHERE key = '238'
+  UNION ALL
+  SELECT key, tag FROM src1
+  LATERAL VIEW EXPLODE(array(2)) tf as tag
+  WHERE key = '238'
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@x
+POSTHOOK: query: CREATE TABLE x AS
+  SELECT key, 1 as tag FROM src WHERE key = '238'
+  UNION ALL
+  SELECT key, tag FROM src1
+  LATERAL VIEW EXPLODE(array(2)) tf as tag
+  WHERE key = '238'
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@src1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@x
+POSTHOOK: Lineage: x.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src1)src1.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: x.tag EXPRESSION []
+PREHOOK: query: SELECT * FROM x
+PREHOOK: type: QUERY
+PREHOOK: Input: default@x
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT * FROM x
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@x
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+238	1
+238	1
+238	2