You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2018/10/12 18:07:03 UTC
[1/2] hive git commit: HIVE-20590 : Allow merge statement to have
column schema (Miklos Gergely via Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master 24f7d2473 -> beccce398
http://git-wip-us.apache.org/repos/asf/hive/blob/beccce39/ql/src/test/results/clientpositive/llap/sqlmerge_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/sqlmerge_stats.q.out b/ql/src/test/results/clientpositive/llap/sqlmerge_stats.q.out
index 02aa87a..cd178cf 100644
--- a/ql/src/test/results/clientpositive/llap/sqlmerge_stats.q.out
+++ b/ql/src/test/results/clientpositive/llap/sqlmerge_stats.q.out
@@ -458,7 +458,7 @@ Storage Desc Params:
serialization.format 1
PREHOOK: query: merge into t as t using upd_t as u ON t.a = u.a
WHEN MATCHED THEN DELETE
-WHEN NOT MATCHED THEN INSERT VALUES(u.a, u.b)
+WHEN NOT MATCHED THEN INSERT (a, b) VALUES(u.a, u.b)
PREHOOK: type: QUERY
PREHOOK: Input: default@t
PREHOOK: Input: default@upd_t
@@ -467,7 +467,7 @@ PREHOOK: Output: default@t
PREHOOK: Output: default@t
POSTHOOK: query: merge into t as t using upd_t as u ON t.a = u.a
WHEN MATCHED THEN DELETE
-WHEN NOT MATCHED THEN INSERT VALUES(u.a, u.b)
+WHEN NOT MATCHED THEN INSERT (a, b) VALUES(u.a, u.b)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t
POSTHOOK: Input: default@upd_t
@@ -522,3 +522,2064 @@ Bucket Columns: [a]
Sort Columns: []
Storage Desc Params:
serialization.format 1
+PREHOOK: query: create table t2(a int, b int, c int default 1) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t2
+POSTHOOK: query: create table t2(a int, b int, c int default 1) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t2
+PREHOOK: query: create table upd_t2_1(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='false')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@upd_t2_1
+POSTHOOK: query: create table upd_t2_1(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='false')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@upd_t2_1
+PREHOOK: query: create table upd_t2_2(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='false')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@upd_t2_2
+POSTHOOK: query: create table upd_t2_2(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='false')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@upd_t2_2
+PREHOOK: query: create table upd_t2_3(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='false')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@upd_t2_3
+POSTHOOK: query: create table upd_t2_3(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='false')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@upd_t2_3
+PREHOOK: query: create table upd_t2_4(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='false')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@upd_t2_4
+POSTHOOK: query: create table upd_t2_4(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='false')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@upd_t2_4
+PREHOOK: query: desc formatted t2
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@t2
+POSTHOOK: query: desc formatted t2
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@t2
+# col_name data_type comment
+a int
+b int
+c int
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}}
+ bucketing_version 2
+ numFiles 0
+ numRows 0
+ rawDataSize 0
+ totalSize 0
+ transactional true
+ transactional_properties default
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+Compressed: No
+Num Buckets: 2
+Bucket Columns: [a]
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+
+# Constraints
+
+# Default Constraints
+Table: default.t2
+Constraint Name: #### A masked pattern was here ####
+Column Name:c Default Value:1
+
+PREHOOK: query: insert into t2 (a, b) values (1,1), (3,3), (5,5), (7,7)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t2
+POSTHOOK: query: insert into t2 (a, b) values (1,1), (3,3), (5,5), (7,7)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t2
+POSTHOOK: Lineage: t2.a SCRIPT []
+POSTHOOK: Lineage: t2.b SCRIPT []
+POSTHOOK: Lineage: t2.c SIMPLE []
+PREHOOK: query: insert into upd_t2_1 values (1,1),(2,2)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@upd_t2_1
+POSTHOOK: query: insert into upd_t2_1 values (1,1),(2,2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@upd_t2_1
+POSTHOOK: Lineage: upd_t2_1.a SCRIPT []
+POSTHOOK: Lineage: upd_t2_1.b SCRIPT []
+PREHOOK: query: insert into upd_t2_2 values (3,3),(4,4)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@upd_t2_2
+POSTHOOK: query: insert into upd_t2_2 values (3,3),(4,4)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@upd_t2_2
+POSTHOOK: Lineage: upd_t2_2.a SCRIPT []
+POSTHOOK: Lineage: upd_t2_2.b SCRIPT []
+PREHOOK: query: insert into upd_t2_3 values (5,5),(6,6)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@upd_t2_3
+POSTHOOK: query: insert into upd_t2_3 values (5,5),(6,6)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@upd_t2_3
+POSTHOOK: Lineage: upd_t2_3.a SCRIPT []
+POSTHOOK: Lineage: upd_t2_3.b SCRIPT []
+PREHOOK: query: insert into upd_t2_4 values (7,7),(8,8)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@upd_t2_4
+POSTHOOK: query: insert into upd_t2_4 values (7,7),(8,8)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@upd_t2_4
+POSTHOOK: Lineage: upd_t2_4.a SCRIPT []
+POSTHOOK: Lineage: upd_t2_4.b SCRIPT []
+PREHOOK: query: explain merge into t2 as t using upd_t2_1 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = 99
+WHEN NOT MATCHED THEN INSERT VALUES(u.a, u.b, default)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t2
+PREHOOK: Input: default@upd_t2_1
+PREHOOK: Output: default@merge_tmp_table
+PREHOOK: Output: default@t2
+PREHOOK: Output: default@t2
+POSTHOOK: query: explain merge into t2 as t using upd_t2_1 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = 99
+WHEN NOT MATCHED THEN INSERT VALUES(u.a, u.b, default)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t2
+POSTHOOK: Input: default@upd_t2_1
+POSTHOOK: Output: default@merge_tmp_table
+POSTHOOK: Output: default@t2
+POSTHOOK: Output: default@t2
+STAGE DEPENDENCIES:
+ Stage-3 is a root stage
+ Stage-4 depends on stages: Stage-3
+ Stage-1 depends on stages: Stage-4
+ Stage-5 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-4
+ Stage-6 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-4
+ Stage-7 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-3
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t
+ Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: a (type: int)
+ sort order: +
+ Map-reduce partition columns: a (type: int)
+ Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: c (type: int), ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ Execution mode: vectorized, llap
+ LLAP IO: may be used (ACID table)
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: u
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: a (type: int)
+ sort order: +
+ Map-reduce partition columns: a (type: int)
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: b (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Right Outer Join 0 to 1
+ keys:
+ 0 a (type: int)
+ 1 a (type: int)
+ outputColumnNames: _col0, _col2, _col5, _col6, _col7
+ Statistics: Num rows: 4 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (_col0 = _col6) (type: boolean)
+ Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col5 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), _col0 (type: int), _col2 (type: int)
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ sort order: +
+ Map-reduce partition columns: UDFToInteger(_col0) (type: int)
+ Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: int), _col3 (type: int)
+ Filter Operator
+ predicate: (_col0 = _col6) (type: boolean)
+ Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col5 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ outputColumnNames: _col5
+ Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ keys: _col5 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Filter Operator
+ predicate: _col0 is null (type: boolean)
+ Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col6 (type: int), _col7 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: int)
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: int), 99 (type: int), VALUE._col1 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.t2
+ Write Type: UPDATE
+ Reducer 4
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (_col1 > 1L) (type: boolean)
+ Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: cardinality_violation(_col0) (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.merge_tmp_table
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: val
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: compute_stats(val, 'hll')
+ mode: complete
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Execution mode: llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), 1 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.t2
+ Write Type: INSERT
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), 1 (type: int)
+ outputColumnNames: a, b, c
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+ Reducer 6
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-4
+ Dependency Collection
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.t2
+ Write Type: UPDATE
+
+ Stage: Stage-5
+ Stats Work
+ Basic Stats Work:
+
+ Stage: Stage-2
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.merge_tmp_table
+
+ Stage: Stage-6
+ Stats Work
+ Basic Stats Work:
+ Column Stats Desc:
+ Columns: val
+ Column Types: int
+ Table: default.merge_tmp_table
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.t2
+ Write Type: INSERT
+
+ Stage: Stage-7
+ Stats Work
+ Basic Stats Work:
+ Column Stats Desc:
+ Columns: a, b, c
+ Column Types: int, int, int
+ Table: default.t2
+
+PREHOOK: query: merge into t2 as t using upd_t2_1 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = 99
+WHEN NOT MATCHED THEN INSERT VALUES(u.a, u.b, default)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t2
+PREHOOK: Input: default@upd_t2_1
+PREHOOK: Output: default@merge_tmp_table
+PREHOOK: Output: default@t2
+PREHOOK: Output: default@t2
+POSTHOOK: query: merge into t2 as t using upd_t2_1 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = 99
+WHEN NOT MATCHED THEN INSERT VALUES(u.a, u.b, default)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t2
+POSTHOOK: Input: default@upd_t2_1
+POSTHOOK: Output: default@merge_tmp_table
+POSTHOOK: Output: default@t2
+POSTHOOK: Output: default@t2
+POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(t2)t.FieldSchema(name:ROW__ID, type:struct<writeId:bigint,bucketId:int,rowId:bigint>, comment:), ]
+POSTHOOK: Lineage: t2.a SIMPLE [(upd_t2_1)u.FieldSchema(name:a, type:int, comment:null), ]
+POSTHOOK: Lineage: t2.b SIMPLE [(upd_t2_1)u.FieldSchema(name:b, type:int, comment:null), ]
+POSTHOOK: Lineage: t2.c SIMPLE []
+PREHOOK: query: explain merge into t2 as t using upd_t2_2 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = 98
+WHEN NOT MATCHED THEN INSERT (a, b) VALUES(u.a, u.b)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t2
+PREHOOK: Input: default@upd_t2_2
+PREHOOK: Output: default@merge_tmp_table
+PREHOOK: Output: default@t2
+PREHOOK: Output: default@t2
+POSTHOOK: query: explain merge into t2 as t using upd_t2_2 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = 98
+WHEN NOT MATCHED THEN INSERT (a, b) VALUES(u.a, u.b)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t2
+POSTHOOK: Input: default@upd_t2_2
+POSTHOOK: Output: default@merge_tmp_table
+POSTHOOK: Output: default@t2
+POSTHOOK: Output: default@t2
+STAGE DEPENDENCIES:
+ Stage-3 is a root stage
+ Stage-4 depends on stages: Stage-3
+ Stage-1 depends on stages: Stage-4
+ Stage-5 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-4
+ Stage-6 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-4
+ Stage-7 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-3
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t
+ Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: a (type: int)
+ sort order: +
+ Map-reduce partition columns: a (type: int)
+ Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: c (type: int), ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ Execution mode: vectorized, llap
+ LLAP IO: may be used (ACID table)
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: u
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: a (type: int)
+ sort order: +
+ Map-reduce partition columns: a (type: int)
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: b (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Right Outer Join 0 to 1
+ keys:
+ 0 a (type: int)
+ 1 a (type: int)
+ outputColumnNames: _col0, _col2, _col5, _col6, _col7
+ Statistics: Num rows: 4 Data size: 284 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (_col0 = _col6) (type: boolean)
+ Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col5 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), _col0 (type: int), _col2 (type: int)
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ sort order: +
+ Map-reduce partition columns: UDFToInteger(_col0) (type: int)
+ Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: int), _col3 (type: int)
+ Filter Operator
+ predicate: (_col0 = _col6) (type: boolean)
+ Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col5 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ outputColumnNames: _col5
+ Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ keys: _col5 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Filter Operator
+ predicate: _col0 is null (type: boolean)
+ Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col6 (type: int), _col7 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: int)
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: int), 98 (type: int), VALUE._col1 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.t2
+ Write Type: UPDATE
+ Reducer 4
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (_col1 > 1L) (type: boolean)
+ Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: cardinality_violation(_col0) (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.merge_tmp_table
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: val
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: compute_stats(val, 'hll')
+ mode: complete
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Execution mode: llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), 1 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.t2
+ Write Type: INSERT
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), 1 (type: int)
+ outputColumnNames: a, b, c
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+ Reducer 6
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-4
+ Dependency Collection
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.t2
+ Write Type: UPDATE
+
+ Stage: Stage-5
+ Stats Work
+ Basic Stats Work:
+
+ Stage: Stage-2
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.merge_tmp_table
+
+ Stage: Stage-6
+ Stats Work
+ Basic Stats Work:
+ Column Stats Desc:
+ Columns: val
+ Column Types: int
+ Table: default.merge_tmp_table
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.t2
+ Write Type: INSERT
+
+ Stage: Stage-7
+ Stats Work
+ Basic Stats Work:
+ Column Stats Desc:
+ Columns: a, b, c
+ Column Types: int, int, int
+ Table: default.t2
+
+PREHOOK: query: merge into t2 as t using upd_t2_2 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = 98
+WHEN NOT MATCHED THEN INSERT (a, b) VALUES(u.a, u.b)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t2
+PREHOOK: Input: default@upd_t2_2
+PREHOOK: Output: default@merge_tmp_table
+PREHOOK: Output: default@t2
+PREHOOK: Output: default@t2
+POSTHOOK: query: merge into t2 as t using upd_t2_2 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = 98
+WHEN NOT MATCHED THEN INSERT (a, b) VALUES(u.a, u.b)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t2
+POSTHOOK: Input: default@upd_t2_2
+POSTHOOK: Output: default@merge_tmp_table
+POSTHOOK: Output: default@t2
+POSTHOOK: Output: default@t2
+POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(t2)t.FieldSchema(name:ROW__ID, type:struct<writeId:bigint,bucketId:int,rowId:bigint>, comment:), ]
+POSTHOOK: Lineage: t2.a SIMPLE [(upd_t2_2)u.FieldSchema(name:a, type:int, comment:null), ]
+POSTHOOK: Lineage: t2.b SIMPLE [(upd_t2_2)u.FieldSchema(name:b, type:int, comment:null), ]
+POSTHOOK: Lineage: t2.c SIMPLE []
+PREHOOK: query: explain merge into t2 as t using upd_t2_3 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = 97
+WHEN NOT MATCHED THEN INSERT (a, b, c) VALUES(u.a, u.b, default)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t2
+PREHOOK: Input: default@upd_t2_3
+PREHOOK: Output: default@merge_tmp_table
+PREHOOK: Output: default@t2
+PREHOOK: Output: default@t2
+POSTHOOK: query: explain merge into t2 as t using upd_t2_3 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = 97
+WHEN NOT MATCHED THEN INSERT (a, b, c) VALUES(u.a, u.b, default)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t2
+POSTHOOK: Input: default@upd_t2_3
+POSTHOOK: Output: default@merge_tmp_table
+POSTHOOK: Output: default@t2
+POSTHOOK: Output: default@t2
+STAGE DEPENDENCIES:
+ Stage-3 is a root stage
+ Stage-4 depends on stages: Stage-3
+ Stage-1 depends on stages: Stage-4
+ Stage-5 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-4
+ Stage-6 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-4
+ Stage-7 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-3
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t
+ Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: a (type: int)
+ sort order: +
+ Map-reduce partition columns: a (type: int)
+ Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: c (type: int), ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ Execution mode: vectorized, llap
+ LLAP IO: may be used (ACID table)
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: u
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: a (type: int)
+ sort order: +
+ Map-reduce partition columns: a (type: int)
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: b (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Right Outer Join 0 to 1
+ keys:
+ 0 a (type: int)
+ 1 a (type: int)
+ outputColumnNames: _col0, _col2, _col5, _col6, _col7
+ Statistics: Num rows: 5 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (_col0 = _col6) (type: boolean)
+ Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col5 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), _col0 (type: int), _col2 (type: int)
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ sort order: +
+ Map-reduce partition columns: UDFToInteger(_col0) (type: int)
+ Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: int), _col3 (type: int)
+ Filter Operator
+ predicate: (_col0 = _col6) (type: boolean)
+ Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col5 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ outputColumnNames: _col5
+ Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ keys: _col5 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Filter Operator
+ predicate: _col0 is null (type: boolean)
+ Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col6 (type: int), _col7 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: int)
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: int), 97 (type: int), VALUE._col1 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.t2
+ Write Type: UPDATE
+ Reducer 4
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (_col1 > 1L) (type: boolean)
+ Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: cardinality_violation(_col0) (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.merge_tmp_table
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: val
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: compute_stats(val, 'hll')
+ mode: complete
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Execution mode: llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), 1 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.t2
+ Write Type: INSERT
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), 1 (type: int)
+ outputColumnNames: a, b, c
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+ Reducer 6
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-4
+ Dependency Collection
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.t2
+ Write Type: UPDATE
+
+ Stage: Stage-5
+ Stats Work
+ Basic Stats Work:
+
+ Stage: Stage-2
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.merge_tmp_table
+
+ Stage: Stage-6
+ Stats Work
+ Basic Stats Work:
+ Column Stats Desc:
+ Columns: val
+ Column Types: int
+ Table: default.merge_tmp_table
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.t2
+ Write Type: INSERT
+
+ Stage: Stage-7
+ Stats Work
+ Basic Stats Work:
+ Column Stats Desc:
+ Columns: a, b, c
+ Column Types: int, int, int
+ Table: default.t2
+
+PREHOOK: query: merge into t2 as t using upd_t2_3 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = 97
+WHEN NOT MATCHED THEN INSERT (a, b, c) VALUES(u.a, u.b, default)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t2
+PREHOOK: Input: default@upd_t2_3
+PREHOOK: Output: default@merge_tmp_table
+PREHOOK: Output: default@t2
+PREHOOK: Output: default@t2
+POSTHOOK: query: merge into t2 as t using upd_t2_3 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = 97
+WHEN NOT MATCHED THEN INSERT (a, b, c) VALUES(u.a, u.b, default)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t2
+POSTHOOK: Input: default@upd_t2_3
+POSTHOOK: Output: default@merge_tmp_table
+POSTHOOK: Output: default@t2
+POSTHOOK: Output: default@t2
+POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(t2)t.FieldSchema(name:ROW__ID, type:struct<writeId:bigint,bucketId:int,rowId:bigint>, comment:), ]
+POSTHOOK: Lineage: t2.a SIMPLE [(upd_t2_3)u.FieldSchema(name:a, type:int, comment:null), ]
+POSTHOOK: Lineage: t2.b SIMPLE [(upd_t2_3)u.FieldSchema(name:b, type:int, comment:null), ]
+POSTHOOK: Lineage: t2.c SIMPLE []
+PREHOOK: query: explain merge into t2 as t using upd_t2_4 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = 96
+WHEN NOT MATCHED THEN INSERT (b, c, a) VALUES(u.b, default, u.a)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t2
+PREHOOK: Input: default@upd_t2_4
+PREHOOK: Output: default@merge_tmp_table
+PREHOOK: Output: default@t2
+PREHOOK: Output: default@t2
+POSTHOOK: query: explain merge into t2 as t using upd_t2_4 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = 96
+WHEN NOT MATCHED THEN INSERT (b, c, a) VALUES(u.b, default, u.a)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t2
+POSTHOOK: Input: default@upd_t2_4
+POSTHOOK: Output: default@merge_tmp_table
+POSTHOOK: Output: default@t2
+POSTHOOK: Output: default@t2
+STAGE DEPENDENCIES:
+ Stage-3 is a root stage
+ Stage-4 depends on stages: Stage-3
+ Stage-1 depends on stages: Stage-4
+ Stage-5 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-4
+ Stage-6 depends on stages: Stage-2
+ Stage-0 depends on stages: Stage-4
+ Stage-7 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-3
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t
+ Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: a (type: int)
+ sort order: +
+ Map-reduce partition columns: a (type: int)
+ Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: c (type: int), ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ Execution mode: vectorized, llap
+ LLAP IO: may be used (ACID table)
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: u
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: a (type: int)
+ sort order: +
+ Map-reduce partition columns: a (type: int)
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: b (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Right Outer Join 0 to 1
+ keys:
+ 0 a (type: int)
+ 1 a (type: int)
+ outputColumnNames: _col0, _col2, _col5, _col6, _col7
+ Statistics: Num rows: 5 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (_col0 = _col6) (type: boolean)
+ Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col5 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), _col0 (type: int), _col2 (type: int)
+ outputColumnNames: _col0, _col1, _col3
+ Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ sort order: +
+ Map-reduce partition columns: UDFToInteger(_col0) (type: int)
+ Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: int), _col3 (type: int)
+ Filter Operator
+ predicate: (_col0 = _col6) (type: boolean)
+ Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col5 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ outputColumnNames: _col5
+ Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ keys: _col5 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Filter Operator
+ predicate: _col0 is null (type: boolean)
+ Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col6 (type: int), _col7 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: int)
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col0 (type: int), 96 (type: int), VALUE._col1 (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.t2
+ Write Type: UPDATE
+ Reducer 4
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (_col1 > 1L) (type: boolean)
+ Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: cardinality_violation(_col0) (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.merge_tmp_table
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: val
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: compute_stats(val, 'hll')
+ mode: complete
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Execution mode: llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int), VALUE._col1 (type: int), 1 (type: int)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.t2
+ Write Type: INSERT
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), 1 (type: int)
+ outputColumnNames: a, b, c
+ Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll'), compute_stats(c, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+ Reducer 6
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-4
+ Dependency Collection
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.t2
+ Write Type: UPDATE
+
+ Stage: Stage-5
+ Stats Work
+ Basic Stats Work:
+
+ Stage: Stage-2
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.merge_tmp_table
+
+ Stage: Stage-6
+ Stats Work
+ Basic Stats Work:
+ Column Stats Desc:
+ Columns: val
+ Column Types: int
+ Table: default.merge_tmp_table
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.t2
+ Write Type: INSERT
+
+ Stage: Stage-7
+ Stats Work
+ Basic Stats Work:
+ Column Stats Desc:
+ Columns: a, b, c
+ Column Types: int, int, int
+ Table: default.t2
+
+PREHOOK: query: merge into t2 as t using upd_t2_4 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = 96
+WHEN NOT MATCHED THEN INSERT (b, c, a) VALUES(u.b, default, u.a)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t2
+PREHOOK: Input: default@upd_t2_4
+PREHOOK: Output: default@merge_tmp_table
+PREHOOK: Output: default@t2
+PREHOOK: Output: default@t2
+POSTHOOK: query: merge into t2 as t using upd_t2_4 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = 96
+WHEN NOT MATCHED THEN INSERT (b, c, a) VALUES(u.b, default, u.a)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t2
+POSTHOOK: Input: default@upd_t2_4
+POSTHOOK: Output: default@merge_tmp_table
+POSTHOOK: Output: default@t2
+POSTHOOK: Output: default@t2
+POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(t2)t.FieldSchema(name:ROW__ID, type:struct<writeId:bigint,bucketId:int,rowId:bigint>, comment:), ]
+POSTHOOK: Lineage: t2.a SIMPLE [(upd_t2_4)u.FieldSchema(name:a, type:int, comment:null), ]
+POSTHOOK: Lineage: t2.b SIMPLE [(upd_t2_4)u.FieldSchema(name:b, type:int, comment:null), ]
+POSTHOOK: Lineage: t2.c SIMPLE []
+PREHOOK: query: select * from t2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+2 2 1
+1 99 1
+4 4 1
+3 98 1
+6 6 1
+5 97 1
+8 8 1
+7 96 1
+PREHOOK: query: create table t3(a int, b int default 1) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t3
+POSTHOOK: query: create table t3(a int, b int default 1) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t3
+PREHOOK: query: create table upd_t3(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@upd_t3
+POSTHOOK: query: create table upd_t3(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@upd_t3
+PREHOOK: query: insert into t3 values (1,2), (2,4)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t3
+POSTHOOK: query: insert into t3 values (1,2), (2,4)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t3
+POSTHOOK: Lineage: t3.a SCRIPT []
+POSTHOOK: Lineage: t3.b SCRIPT []
+PREHOOK: query: insert into upd_t3 values (1,3), (3,5)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@upd_t3
+POSTHOOK: query: insert into upd_t3 values (1,3), (3,5)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@upd_t3
+POSTHOOK: Lineage: upd_t3.a SCRIPT []
+POSTHOOK: Lineage: upd_t3.b SCRIPT []
+PREHOOK: query: explain merge into t3 as t using upd_t3 as u ON t.a = u.a
+WHEN MATCHED THEN DELETE
+WHEN NOT MATCHED THEN INSERT (b, a) VALUES(default, u.b)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+PREHOOK: Input: default@upd_t3
+PREHOOK: Output: default@merge_tmp_table
+PREHOOK: Output: default@t3
+PREHOOK: Output: default@t3
+POSTHOOK: query: explain merge into t3 as t using upd_t3 as u ON t.a = u.a
+WHEN MATCHED THEN DELETE
+WHEN NOT MATCHED THEN INSERT (b, a) VALUES(default, u.b)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+POSTHOOK: Input: default@upd_t3
+POSTHOOK: Output: default@merge_tmp_table
+POSTHOOK: Output: default@t3
+POSTHOOK: Output: default@t3
+STAGE DEPENDENCIES:
+ Stage-3 is a root stage
+ Stage-4 depends on stages: Stage-3
+ Stage-0 depends on stages: Stage-4
+ Stage-5 depends on stages: Stage-0
+ Stage-2 depends on stages: Stage-4
+ Stage-6 depends on stages: Stage-2
+ Stage-1 depends on stages: Stage-4
+ Stage-7 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-3
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 4 <- Reducer 2 (SIMPLE_EDGE)
+ Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: a (type: int)
+ sort order: +
+ Map-reduce partition columns: a (type: int)
+ Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: ROW__ID (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ Execution mode: vectorized, llap
+ LLAP IO: may be used (ACID table)
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: u
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: a (type: int)
+ sort order: +
+ Map-reduce partition columns: a (type: int)
+ Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: b (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: may be used (ACID table)
+ Reducer 2
+ Execution mode: llap
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Right Outer Join 0 to 1
+ keys:
+ 0 a (type: int)
+ 1 a (type: int)
+ outputColumnNames: _col0, _col4, _col5, _col6
+ Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (_col0 = _col5) (type: boolean)
+ Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col4 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ sort order: +
+ Map-reduce partition columns: UDFToInteger(_col0) (type: int)
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (_col0 = _col5) (type: boolean)
+ Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col4 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ outputColumnNames: _col4
+ Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ keys: _col4 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
+ Filter Operator
+ predicate: _col0 is null (type: boolean)
+ Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col6 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int)
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.t3
+ Write Type: DELETE
+ Reducer 4
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ keys: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (_col1 > 1L) (type: boolean)
+ Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: cardinality_violation(_col0) (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.merge_tmp_table
+ Select Operator
+ expressions: _col0 (type: int)
+ outputColumnNames: val
+ Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: compute_stats(val, 'hll')
+ mode: complete
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Execution mode: llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int), 1 (type: int)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.t3
+ Write Type: INSERT
+ Select Operator
+ expressions: _col0 (type: int), 1 (type: int)
+ outputColumnNames: a, b
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: compute_stats(a, 'hll'), compute_stats(b, 'hll')
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 848 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
+ Reducer 6
+ Execution mode: llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-4
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.t3
+ Write Type: DELETE
+
+ Stage: Stage-5
+ Stats Work
+ Basic Stats Work:
+
+ Stage: Stage-2
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.merge_tmp_table
+
+ Stage: Stage-6
+ Stats Work
+ Basic Stats Work:
+ Column Stats Desc:
+ Columns: val
+ Column Types: int
+ Table: default.merge_tmp_table
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
+ serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
+ name: default.t3
+ Write Type: INSERT
+
+ Stage: Stage-7
+ Stats Work
+ Basic Stats Work:
+ Column Stats Desc:
+ Columns: a, b
+ Column Types: int, int
+ Table: default.t3
+
+PREHOOK: query: merge into t3 as t using upd_t3 as u ON t.a = u.a
+WHEN MATCHED THEN DELETE
+WHEN NOT MATCHED THEN INSERT (b, a) VALUES(default, u.b)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+PREHOOK: Input: default@upd_t3
+PREHOOK: Output: default@merge_tmp_table
+PREHOOK: Output: default@t3
+PREHOOK: Output: default@t3
+POSTHOOK: query: merge into t3 as t using upd_t3 as u ON t.a = u.a
+WHEN MATCHED THEN DELETE
+WHEN NOT MATCHED THEN INSERT (b, a) VALUES(default, u.b)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+POSTHOOK: Input: default@upd_t3
+POSTHOOK: Output: default@merge_tmp_table
+POSTHOOK: Output: default@t3
+POSTHOOK: Output: default@t3
+POSTHOOK: Lineage: merge_tmp_table.val EXPRESSION [(t3)t.FieldSchema(name:ROW__ID, type:struct<writeId:bigint,bucketId:int,rowId:bigint>, comment:), ]
+POSTHOOK: Lineage: t3.a SIMPLE [(upd_t3)u.FieldSchema(name:b, type:int, comment:null), ]
+POSTHOOK: Lineage: t3.b SIMPLE []
+PREHOOK: query: select * from t3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t3
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t3
+#### A masked pattern was here ####
+2 4
+5 1
+PREHOOK: query: create table t4(a int, b int default 1) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t4
+POSTHOOK: query: create table t4(a int, b int default 1) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t4
+PREHOOK: query: create table upd_t4(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@upd_t4
+POSTHOOK: query: create table upd_t4(a int, b int) cl
<TRUNCATED>
[2/2] hive git commit: HIVE-20590 : Allow merge statement to have
column schema (Miklos Gergely via Ashutosh Chauhan)
Posted by ha...@apache.org.
HIVE-20590 : Allow merge statement to have column schema (Miklos Gergely via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/beccce39
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/beccce39
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/beccce39
Branch: refs/heads/master
Commit: beccce3987ae409c65deb810cd571ba06088bae1
Parents: 24f7d24
Author: Miklos Gergely <mg...@hortonworks.com>
Authored: Tue Oct 2 06:22:00 2018 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Fri Oct 12 11:06:18 2018 -0700
----------------------------------------------------------------------
.../apache/hadoop/hive/ql/parse/HiveParser.g | 4 +-
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 62 +-
.../ql/parse/UpdateDeleteSemanticAnalyzer.java | 88 +-
.../generic/GenericUDFCardinalityViolation.java | 19 +-
.../clientnegative/merge_column_mismatch.q | 15 +
.../queries/clientpositive/sqlmerge_stats.q | 83 +-
.../clientnegative/merge_column_mismatch.q.out | 37 +
.../clientpositive/llap/sqlmerge_stats.q.out | 2065 +++++++++++++++++-
8 files changed, 2288 insertions(+), 85 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/beccce39/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
index 78bc87c..bc95c46 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
@@ -2999,8 +2999,8 @@ whenNotMatchedClause
@init { pushMsg("WHEN NOT MATCHED clause", state); }
@after { popMsg(state); }
:
- KW_WHEN KW_NOT KW_MATCHED (KW_AND expression)? KW_THEN KW_INSERT KW_VALUES valueRowConstructor ->
- ^(TOK_NOT_MATCHED ^(TOK_INSERT valueRowConstructor) expression?)
+ KW_WHEN KW_NOT KW_MATCHED (KW_AND expression)? KW_THEN KW_INSERT (targetCols=columnParenthesesList)? KW_VALUES valueRowConstructor ->
+ ^(TOK_NOT_MATCHED ^(TOK_INSERT $targetCols? valueRowConstructor) expression?)
;
whenMatchedAndClause
@init { pushMsg("WHEN MATCHED AND clause", state); }
http://git-wip-us.apache.org/repos/asf/hive/blob/beccce39/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 31bc38e..6a6e6c3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -726,17 +726,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
* @throws SemanticException
*/
private static List<String> getDefaultConstraints(Table tbl, List<String> targetSchema) throws SemanticException{
- Map<String, String> colNameToDefaultVal = null;
- try {
- DefaultConstraint dc = Hive.get().getEnabledDefaultConstraints(tbl.getDbName(), tbl.getTableName());
- colNameToDefaultVal = dc.getColNameToDefaultValueMap();
- } catch (Exception e) {
- if (e instanceof SemanticException) {
- throw (SemanticException) e;
- } else {
- throw (new RuntimeException(e));
- }
- }
+ Map<String, String> colNameToDefaultVal = getColNameToDefaultValueMap(tbl);
List<String> defaultConstraints = new ArrayList<>();
if(targetSchema != null) {
for (String colName : targetSchema) {
@@ -751,6 +741,21 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
return defaultConstraints;
}
+ protected static Map<String, String> getColNameToDefaultValueMap(Table tbl) throws SemanticException {
+ Map<String, String> colNameToDefaultVal = null;
+ try {
+ DefaultConstraint dc = Hive.get().getEnabledDefaultConstraints(tbl.getDbName(), tbl.getTableName());
+ colNameToDefaultVal = dc.getColNameToDefaultValueMap();
+ } catch (Exception e) {
+ if (e instanceof SemanticException) {
+ throw (SemanticException) e;
+ } else {
+ throw (new RuntimeException(e));
+ }
+ }
+ return colNameToDefaultVal;
+ }
+
/**
* Constructs an AST for given DEFAULT string
* @param newValue
@@ -772,28 +777,6 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
return newNode;
}
- public static String replaceDefaultKeywordForMerge(String valueClause,Table targetTable)
- throws SemanticException {
- List<String> defaultConstraints = null;
- String[] values = valueClause.trim().split(",");
- StringBuilder newValueClause = new StringBuilder();
- for (int i = 0; i < values.length; i++) {
- if (values[i].trim().toLowerCase().equals("`default`")) {
- if (defaultConstraints == null) {
- defaultConstraints = getDefaultConstraints(targetTable, null);
- }
- newValueClause.append(defaultConstraints.get(i));
- }
- else {
- newValueClause.append(values[i]);
- }
- if(i != values.length-1) {
- newValueClause.append(",");
- }
- }
- return newValueClause.toString();
- }
-
/**
* This method replaces ASTNode corresponding to DEFAULT keyword with either DEFAULT constraint
* expression if exists or NULL otherwise
@@ -4634,17 +4617,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
// see if we need to fetch default constraints from metastore
if(targetCol2Projection.size() < targetTableColNames.size()) {
- try {
- DefaultConstraint dc = Hive.get().getEnabledDefaultConstraints(target.getDbName(), target.getTableName());
- colNameToDefaultVal = dc.getColNameToDefaultValueMap();
- } catch (Exception e) {
- if (e instanceof SemanticException) {
- throw (SemanticException) e;
- } else {
- throw (new RuntimeException(e));
- }
- }
-
+ colNameToDefaultVal = getColNameToDefaultValueMap(target);
}
for (int i = 0; i < targetTableColNames.size(); i++) {
String f = targetTableColNames.get(i);
@@ -6396,7 +6369,6 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
ASTNode value = entry.getValue();
- ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
// 0 is the function name
for (int i = 1; i < value.getChildCount(); i++) {
ASTNode paraExpr = (ASTNode) value.getChild(i);
http://git-wip-us.apache.org/repos/asf/hive/blob/beccce39/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
index e8823e1..8651afd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
@@ -32,6 +32,8 @@ import java.util.UUID;
import org.antlr.runtime.TokenRewriteStream;
import org.antlr.runtime.tree.Tree;
+import org.apache.commons.collections.MapUtils;
+import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
@@ -993,7 +995,6 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
insClauseIdx < rewrittenTree.getChildCount() - (validating ? 1 : 0/*skip cardinality violation clause*/);
insClauseIdx++, whenClauseIdx++) {
//we've added Insert clauses in order or WHEN items in whenClauses
- ASTNode insertClause = (ASTNode) rewrittenTree.getChild(insClauseIdx);
switch (getWhenClauseOperation(whenClauses.get(whenClauseIdx)).getType()) {
case HiveParser.TOK_INSERT:
rewrittenCtx.addDestNamePrefix(insClauseIdx, Context.DestClausePrefix.INSERT);
@@ -1185,7 +1186,7 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
String targetName = getSimpleTableName(target);
rewrittenQueryStr.append("INSERT INTO ").append(getFullTableNameForSQL(target));
addPartitionColsToInsert(targetTable.getPartCols(), rewrittenQueryStr);
- rewrittenQueryStr.append(" -- update clause\n select ");
+ rewrittenQueryStr.append(" -- update clause\n SELECT ");
if (hintStr != null) {
rewrittenQueryStr.append(hintStr);
}
@@ -1226,7 +1227,7 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
if(deleteExtraPredicate != null) {
rewrittenQueryStr.append(" AND NOT(").append(deleteExtraPredicate).append(")");
}
- rewrittenQueryStr.append("\n sort by ");
+ rewrittenQueryStr.append("\n SORT BY ");
rewrittenQueryStr.append(targetName).append(".ROW__ID \n");
setUpAccessControlInfoForUpdate(targetTable, setColsExprs);
@@ -1249,7 +1250,7 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
rewrittenQueryStr.append("INSERT INTO ").append(getFullTableNameForSQL(target));
addPartitionColsToInsert(partCols, rewrittenQueryStr);
- rewrittenQueryStr.append(" -- delete clause\n select ");
+ rewrittenQueryStr.append(" -- delete clause\n SELECT ");
if (hintStr != null) {
rewrittenQueryStr.append(hintStr);
}
@@ -1264,7 +1265,7 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
if(updateExtraPredicate != null) {
rewrittenQueryStr.append(" AND NOT(").append(updateExtraPredicate).append(")");
}
- rewrittenQueryStr.append("\n sort by ");
+ rewrittenQueryStr.append("\n SORT BY ");
rewrittenQueryStr.append(targetName).append(".ROW__ID \n");
return extraPredicate;
}
@@ -1353,7 +1354,7 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
*/
private String getWhenClausePredicate(ASTNode whenClause) {
if(!(whenClause.getType() == HiveParser.TOK_MATCHED || whenClause.getType() == HiveParser.TOK_NOT_MATCHED)) {
- throw raiseWrongType("Expected TOK_MATCHED|TOK_NOT_MATCHED", whenClause);
+ throw raiseWrongType("Expected TOK_MATCHED|TOK_NOT_MATCHED", whenClause);
}
if(whenClause.getChildCount() == 2) {
return getMatchedText((ASTNode)whenClause.getChild(1));
@@ -1366,33 +1367,80 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
* @throws SemanticException
*/
private void handleInsert(ASTNode whenNotMatchedClause, StringBuilder rewrittenQueryStr, ASTNode target,
- ASTNode onClause, Table targetTable,
- String targetTableNameInSourceQuery, String onClauseAsString, String hintStr) throws SemanticException {
+ ASTNode onClause, Table targetTable, String targetTableNameInSourceQuery,
+ String onClauseAsString, String hintStr) throws SemanticException {
+ ASTNode whenClauseOperation = getWhenClauseOperation(whenNotMatchedClause);
assert whenNotMatchedClause.getType() == HiveParser.TOK_NOT_MATCHED;
- assert getWhenClauseOperation(whenNotMatchedClause).getType() == HiveParser.TOK_INSERT;
- List<FieldSchema> partCols = targetTable.getPartCols();
- String valuesClause = getMatchedText((ASTNode)getWhenClauseOperation(whenNotMatchedClause).getChild(0));
- valuesClause = valuesClause.substring(1, valuesClause.length() - 1);//strip '(' and ')'
- valuesClause = SemanticAnalyzer.replaceDefaultKeywordForMerge(valuesClause, targetTable);
+ assert whenClauseOperation.getType() == HiveParser.TOK_INSERT;
+
+ // identify the node that contains the values to insert and the optional column list node
+ ArrayList<Node> children = whenClauseOperation.getChildren();
+ ASTNode valuesNode =
+ (ASTNode)children.stream().filter(n -> ((ASTNode)n).getType() == HiveParser.TOK_FUNCTION).findFirst().get();
+ ASTNode columnListNode =
+ (ASTNode)children.stream().filter(n -> ((ASTNode)n).getType() == HiveParser.TOK_TABCOLNAME).findFirst()
+ .orElse(null);
+
+ // if column list is specified, then it has to have the same number of elements as the values
+ // valuesNode has a child for struct, the rest are the columns
+ if (columnListNode != null && columnListNode.getChildCount() != (valuesNode.getChildCount() - 1)) {
+ throw new SemanticException(String.format("Column schema must have the same length as values (%d vs %d)",
+ columnListNode.getChildCount(), valuesNode.getChildCount() - 1));
+ }
rewrittenQueryStr.append("INSERT INTO ").append(getFullTableNameForSQL(target));
- addPartitionColsToInsert(partCols, rewrittenQueryStr);
+ if (columnListNode != null) {
+ rewrittenQueryStr.append(' ').append(getMatchedText(columnListNode));
+ }
+ addPartitionColsToInsert(targetTable.getPartCols(), rewrittenQueryStr);
- OnClauseAnalyzer oca = new OnClauseAnalyzer(onClause, targetTable, targetTableNameInSourceQuery,
- conf, onClauseAsString);
- oca.analyze();
- rewrittenQueryStr.append(" -- insert clause\n select ");
+ rewrittenQueryStr.append(" -- insert clause\n SELECT ");
if (hintStr != null) {
rewrittenQueryStr.append(hintStr);
}
+
+ OnClauseAnalyzer oca = new OnClauseAnalyzer(onClause, targetTable, targetTableNameInSourceQuery,
+ conf, onClauseAsString);
+ oca.analyze();
+
+ String valuesClause = getMatchedText(valuesNode);
+ valuesClause = valuesClause.substring(1, valuesClause.length() - 1);//strip '(' and ')'
+ valuesClause = replaceDefaultKeywordForMerge(valuesClause, targetTable, columnListNode);
rewrittenQueryStr.append(valuesClause).append("\n WHERE ").append(oca.getPredicate());
+
String extraPredicate = getWhenClausePredicate(whenNotMatchedClause);
- if(extraPredicate != null) {
+ if (extraPredicate != null) {
//we have WHEN NOT MATCHED AND <boolean expr> THEN INSERT
rewrittenQueryStr.append(" AND ")
.append(getMatchedText(((ASTNode)whenNotMatchedClause.getChild(1)))).append('\n');
}
}
+
+ private String replaceDefaultKeywordForMerge(String valueClause, Table table, ASTNode columnListNode)
+ throws SemanticException {
+ if (!valueClause.toLowerCase().contains("`default`")) {
+ return valueClause;
+ }
+
+ Map<String, String> colNameToDefaultConstraint = getColNameToDefaultValueMap(table);
+ String[] values = valueClause.trim().split(",");
+ String[] replacedValues = new String[values.length];
+
+ // the list of the column names may be set in the query
+ String[] columnNames = columnListNode == null ?
+ table.getAllCols().stream().map(f -> f.getName()).toArray(size -> new String[size]) :
+ columnListNode.getChildren().stream().map(n -> ((ASTNode)n).toString()).toArray(size -> new String[size]);
+
+ for (int i = 0; i < values.length; i++) {
+ if (values[i].trim().toLowerCase().equals("`default`")) {
+ replacedValues[i] = MapUtils.getString(colNameToDefaultConstraint, columnNames[i], "null");
+ } else {
+ replacedValues[i] = values[i];
+ }
+ }
+ return StringUtils.join(replacedValues, ',');
+ }
+
/**
* Suppose the input Merge statement has ON target.a = source.b and c = d. Assume, that 'c' is from
* target table and 'd' is from source expression. In order to properly
@@ -1503,7 +1551,7 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
List<String> targetCols = table2column.get(targetTableNameInSourceQuery.toLowerCase());
if(targetCols == null) {
/*e.g. ON source.t=1
- * this is not strictly speaking invlaid but it does ensure that all columns from target
+ * this is not strictly speaking invalid but it does ensure that all columns from target
* table are all NULL for every row. This would make any WHEN MATCHED clause invalid since
* we don't have a ROW__ID. The WHEN NOT MATCHED could be meaningful but it's just data from
* source satisfying source.t=1... not worth the effort to support this*/
http://git-wip-us.apache.org/repos/asf/hive/blob/beccce39/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCardinalityViolation.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCardinalityViolation.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCardinalityViolation.java
index b688447..b3c1a06 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCardinalityViolation.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCardinalityViolation.java
@@ -18,29 +18,18 @@
package org.apache.hadoop.hive.ql.udf.generic;
-import java.util.ArrayList;
-
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector;
-import org.apache.logging.log4j.core.layout.StringBuilderEncoder;
/**
- * GenericUDFArray.
- *
+ * Function intended to fail. It is used in query parts which should not return anything, and thus mark the problem.
*/
@Description(name = "cardinality_violation",
value = "_FUNC_(n0, n1...) - raises Cardinality Violation")
public class GenericUDFCardinalityViolation extends GenericUDF {
- private transient Converter[] converters;
- private transient ArrayList<Object> ret = new ArrayList<Object>();
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
@@ -50,8 +39,10 @@ public class GenericUDFCardinalityViolation extends GenericUDF {
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
StringBuilder nonUniqueKey = new StringBuilder();
- for(DeferredObject t : arguments) {
- if(nonUniqueKey.length() > 0) {nonUniqueKey.append(','); }
+ for (DeferredObject t : arguments) {
+ if (nonUniqueKey.length() > 0) {
+ nonUniqueKey.append(',');
+ }
nonUniqueKey.append(t.get());
}
throw new RuntimeException("Cardinality Violation in Merge statement: " + nonUniqueKey);
http://git-wip-us.apache.org/repos/asf/hive/blob/beccce39/ql/src/test/queries/clientnegative/merge_column_mismatch.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/merge_column_mismatch.q b/ql/src/test/queries/clientnegative/merge_column_mismatch.q
new file mode 100644
index 0000000..5f78ea4
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/merge_column_mismatch.q
@@ -0,0 +1,15 @@
+set hive.mapred.mode=nonstrict;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.explain.user=false;
+set hive.merge.cardinality.check=true;
+
+create table t (a int, b int default 1) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true');
+create table upd_t (a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true');
+
+insert into t values (1,2), (2,4);
+insert into upd_t values (1,3), (3,5);
+
+merge into t as t using upd_t as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = default
+WHEN NOT MATCHED THEN INSERT (a) VALUES(u.a, default);
http://git-wip-us.apache.org/repos/asf/hive/blob/beccce39/ql/src/test/queries/clientpositive/sqlmerge_stats.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/sqlmerge_stats.q b/ql/src/test/queries/clientpositive/sqlmerge_stats.q
index c480eb6..453060e 100644
--- a/ql/src/test/queries/clientpositive/sqlmerge_stats.q
+++ b/ql/src/test/queries/clientpositive/sqlmerge_stats.q
@@ -29,10 +29,89 @@ desc formatted t;
merge into t as t using upd_t as u ON t.a = u.a
WHEN MATCHED THEN DELETE
-WHEN NOT MATCHED THEN INSERT VALUES(u.a, u.b);
-
+WHEN NOT MATCHED THEN INSERT (a, b) VALUES(u.a, u.b);
select assert_true(count(1) = 0) from t group by a>-1;
-- rownum is 0; because the orc writer can keep track of delta
desc formatted t;
+create table t2(a int, b int, c int default 1) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true');
+create table upd_t2_1(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='false');
+create table upd_t2_2(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='false');
+create table upd_t2_3(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='false');
+create table upd_t2_4(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='false');
+
+desc formatted t2;
+
+insert into t2 (a, b) values (1,1), (3,3), (5,5), (7,7);
+insert into upd_t2_1 values (1,1),(2,2);
+insert into upd_t2_2 values (3,3),(4,4);
+insert into upd_t2_3 values (5,5),(6,6);
+insert into upd_t2_4 values (7,7),(8,8);
+
+explain merge into t2 as t using upd_t2_1 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = 99
+WHEN NOT MATCHED THEN INSERT VALUES(u.a, u.b, default);
+
+merge into t2 as t using upd_t2_1 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = 99
+WHEN NOT MATCHED THEN INSERT VALUES(u.a, u.b, default);
+
+explain merge into t2 as t using upd_t2_2 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = 98
+WHEN NOT MATCHED THEN INSERT (a, b) VALUES(u.a, u.b);
+
+merge into t2 as t using upd_t2_2 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = 98
+WHEN NOT MATCHED THEN INSERT (a, b) VALUES(u.a, u.b);
+
+explain merge into t2 as t using upd_t2_3 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = 97
+WHEN NOT MATCHED THEN INSERT (a, b, c) VALUES(u.a, u.b, default);
+
+merge into t2 as t using upd_t2_3 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = 97
+WHEN NOT MATCHED THEN INSERT (a, b, c) VALUES(u.a, u.b, default);
+
+explain merge into t2 as t using upd_t2_4 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = 96
+WHEN NOT MATCHED THEN INSERT (b, c, a) VALUES(u.b, default, u.a);
+
+merge into t2 as t using upd_t2_4 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = 96
+WHEN NOT MATCHED THEN INSERT (b, c, a) VALUES(u.b, default, u.a);
+
+select * from t2;
+
+create table t3(a int, b int default 1) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true');
+create table upd_t3(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true');
+
+insert into t3 values (1,2), (2,4);
+insert into upd_t3 values (1,3), (3,5);
+
+explain merge into t3 as t using upd_t3 as u ON t.a = u.a
+WHEN MATCHED THEN DELETE
+WHEN NOT MATCHED THEN INSERT (b, a) VALUES(default, u.b);
+
+merge into t3 as t using upd_t3 as u ON t.a = u.a
+WHEN MATCHED THEN DELETE
+WHEN NOT MATCHED THEN INSERT (b, a) VALUES(default, u.b);
+
+select * from t3;
+
+create table t4(a int, b int default 1) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true');
+create table upd_t4(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true');
+
+insert into t4 values (1,2), (2,4);
+insert into upd_t4 values (1,3), (3,5);
+
+explain merge into t4 as t using upd_t4 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = default
+WHEN NOT MATCHED THEN INSERT (b, a) VALUES(default, u.b);
+
+merge into t4 as t using upd_t4 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = default
+WHEN NOT MATCHED THEN INSERT (b, a) VALUES(default, u.b);
+
+select * from t4;
+
http://git-wip-us.apache.org/repos/asf/hive/blob/beccce39/ql/src/test/results/clientnegative/merge_column_mismatch.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/merge_column_mismatch.q.out b/ql/src/test/results/clientnegative/merge_column_mismatch.q.out
new file mode 100644
index 0000000..844986e
--- /dev/null
+++ b/ql/src/test/results/clientnegative/merge_column_mismatch.q.out
@@ -0,0 +1,37 @@
+PREHOOK: query: create table t (a int, b int default 1) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t
+POSTHOOK: query: create table t (a int, b int default 1) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t
+PREHOOK: query: create table upd_t (a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@upd_t
+POSTHOOK: query: create table upd_t (a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@upd_t
+PREHOOK: query: insert into t values (1,2), (2,4)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (1,2), (2,4)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.a SCRIPT []
+POSTHOOK: Lineage: t.b SCRIPT []
+PREHOOK: query: insert into upd_t values (1,3), (3,5)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@upd_t
+POSTHOOK: query: insert into upd_t values (1,3), (3,5)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@upd_t
+POSTHOOK: Lineage: upd_t.a SCRIPT []
+POSTHOOK: Lineage: upd_t.b SCRIPT []
+FAILED: SemanticException Column schema must have the same length as values (1 vs 2)