You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kr...@apache.org on 2023/01/31 09:42:44 UTC
[hive] branch master updated: HIVE-26980: CTAS and CMV fails if target table is Iceberg and source table has unsupported column type (Krisztian Kasa, reviewed by Denys Kuzmenko, Zsolt Miskolczi)
This is an automated email from the ASF dual-hosted git repository.
krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 8fe73e6d1e8 HIVE-26980: CTAS and CMV fails if target table is Iceberg and source table has unsupported column type (Krisztian Kasa, reviewed by Denys Kuzmenko, Zsolt Miskolczi)
8fe73e6d1e8 is described below
commit 8fe73e6d1e87edc5d8897ff610b166ee29469c02
Author: Krisztian Kasa <ka...@gmail.com>
AuthorDate: Tue Jan 31 10:42:27 2023 +0100
HIVE-26980: CTAS and CMV fails if target table is Iceberg and source table has unsupported column type (Krisztian Kasa, reviewed by Denys Kuzmenko, Zsolt Miskolczi)
---
.../src/test/queries/positive/ctas_iceberg_orc.q | 39 ++-
.../test/results/positive/ctas_iceberg_orc.q.out | 347 ++++++++++++++++++---
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 19 +-
3 files changed, 357 insertions(+), 48 deletions(-)
diff --git a/iceberg/iceberg-handler/src/test/queries/positive/ctas_iceberg_orc.q b/iceberg/iceberg-handler/src/test/queries/positive/ctas_iceberg_orc.q
index 0ab773daed3..05a1e6fd17c 100644
--- a/iceberg/iceberg-handler/src/test/queries/positive/ctas_iceberg_orc.q
+++ b/iceberg/iceberg-handler/src/test/queries/positive/ctas_iceberg_orc.q
@@ -1,15 +1,44 @@
set hive.explain.user=false;
+set iceberg.mr.schema.auto.conversion=true;
-create table source(a int, b string, c int);
+create table source(
+ i int,
+ s string,
+ vc varchar(256),
+ c char(10),
+ t tinyint,
+ si smallint);
-insert into source values (1, 'one', 3);
-insert into source values (1, 'two', 4);
+insert into source values (1, 'one', 'one_1', 'ch_1', 10, 11);
+insert into source values (1, 'two', 'two_2', 'ch_2', 20, 22);
explain
create external table tbl_ice stored by iceberg stored as orc tblproperties ('format-version'='2') as
-select a, b, c from source;
+select i, s, vc,c, t, si from source;
create external table tbl_ice stored by iceberg stored as orc tblproperties ('format-version'='2') as
-select a, b, c from source;
+select i, s, vc,c, t, si from source;
+
+select * from tbl_ice;
+
+
+-- Test insert - select
+explain
+insert into tbl_ice
+select * from source;
+
+insert into tbl_ice
+select * from source;
+
+select * from tbl_ice;
+
+
+-- Test insert overwrite
+explain
+insert overwrite table tbl_ice
+select * from source;
+
+insert overwrite table tbl_ice
+select * from source;
select * from tbl_ice;
diff --git a/iceberg/iceberg-handler/src/test/results/positive/ctas_iceberg_orc.q.out b/iceberg/iceberg-handler/src/test/results/positive/ctas_iceberg_orc.q.out
index a110715c9c1..4619a3984d0 100644
--- a/iceberg/iceberg-handler/src/test/results/positive/ctas_iceberg_orc.q.out
+++ b/iceberg/iceberg-handler/src/test/results/positive/ctas_iceberg_orc.q.out
@@ -1,36 +1,54 @@
-PREHOOK: query: create table source(a int, b string, c int)
+PREHOOK: query: create table source(
+ i int,
+ s string,
+ vc varchar(256),
+ c char(10),
+ t tinyint,
+ si smallint)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@source
-POSTHOOK: query: create table source(a int, b string, c int)
+POSTHOOK: query: create table source(
+ i int,
+ s string,
+ vc varchar(256),
+ c char(10),
+ t tinyint,
+ si smallint)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@source
-PREHOOK: query: insert into source values (1, 'one', 3)
+PREHOOK: query: insert into source values (1, 'one', 'one_1', 'ch_1', 10, 11)
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@source
-POSTHOOK: query: insert into source values (1, 'one', 3)
+POSTHOOK: query: insert into source values (1, 'one', 'one_1', 'ch_1', 10, 11)
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@source
-POSTHOOK: Lineage: source.a SCRIPT []
-POSTHOOK: Lineage: source.b SCRIPT []
POSTHOOK: Lineage: source.c SCRIPT []
-PREHOOK: query: insert into source values (1, 'two', 4)
+POSTHOOK: Lineage: source.i SCRIPT []
+POSTHOOK: Lineage: source.s SCRIPT []
+POSTHOOK: Lineage: source.si SCRIPT []
+POSTHOOK: Lineage: source.t SCRIPT []
+POSTHOOK: Lineage: source.vc SCRIPT []
+PREHOOK: query: insert into source values (1, 'two', 'two_2', 'ch_2', 20, 22)
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@source
-POSTHOOK: query: insert into source values (1, 'two', 4)
+POSTHOOK: query: insert into source values (1, 'two', 'two_2', 'ch_2', 20, 22)
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@source
-POSTHOOK: Lineage: source.a SCRIPT []
-POSTHOOK: Lineage: source.b SCRIPT []
POSTHOOK: Lineage: source.c SCRIPT []
+POSTHOOK: Lineage: source.i SCRIPT []
+POSTHOOK: Lineage: source.s SCRIPT []
+POSTHOOK: Lineage: source.si SCRIPT []
+POSTHOOK: Lineage: source.t SCRIPT []
+POSTHOOK: Lineage: source.vc SCRIPT []
PREHOOK: query: explain
create external table tbl_ice stored by iceberg stored as orc tblproperties ('format-version'='2') as
-select a, b, c from source
+select i, s, vc,c, t, si from source
PREHOOK: type: CREATETABLE_AS_SELECT
PREHOOK: Input: default@source
PREHOOK: Output: database:default
@@ -38,7 +56,7 @@ PREHOOK: Output: default@tbl_ice
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: explain
create external table tbl_ice stored by iceberg stored as orc tblproperties ('format-version'='2') as
-select a, b, c from source
+select i, s, vc,c, t, si from source
POSTHOOK: type: CREATETABLE_AS_SELECT
POSTHOOK: Input: default@source
POSTHOOK: Output: database:default
@@ -54,7 +72,7 @@ STAGE DEPENDENCIES:
STAGE PLANS:
Stage: Stage-4
Create Table
- columns: a int, b string, c int
+ columns: i int, s string, vc varchar(256), c char(10), t tinyint, si smallint
name: default.tbl_ice
input format: org.apache.iceberg.mr.hive.HiveIcebergInputFormat
location: hdfs://### HDFS PATH ###
@@ -79,50 +97,50 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: source
- Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: a (type: int), b (type: string), c (type: int)
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
+ expressions: i (type: int), s (type: string), CAST( vc AS STRING) (type: string), CAST( c AS STRING) (type: string), UDFToInteger(t) (type: int), UDFToInteger(si) (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 2 Data size: 934 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 2 Data size: 934 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.iceberg.mr.hive.HiveIcebergInputFormat
output format: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe
name: default.tbl_ice
Select Operator
- expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int)
- outputColumnNames: col1, col2, col3
- Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int)
+ outputColumnNames: col1, col2, col3, col4, col5, col6
+ Statistics: Num rows: 2 Data size: 934 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: min(col1), max(col1), count(1), count(col1), compute_bit_vector_hll(col1), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector_hll(col2), min(col3), max(col3), count(col3), compute_bit_vector_hll(col3)
+ aggregations: min(col1), max(col1), count(1), count(col1), compute_bit_vector_hll(col1), max(length(col2)), avg(COALESCE(length(col2),0)), count(col2), compute_bit_vector_hll(col2), max(length(col3)), avg(COALESCE(length(col3),0)), count(col3), compute_bit_vector_hll(col3), max(length(col4)), avg(COALESCE(length(col4),0)), count(col4), compute_bit_vector_hll(col4), min(col5), max(col5), count(col5), compute_bit_vector_hll(col5), min(col6), max(col6), count(col6), [...]
minReductionHashAggr: 0.5
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24
+ Statistics: Num rows: 1 Data size: 1184 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
null sort order:
sort order:
- Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct<count:bigint,sum:double,input:int>), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: int), _col11 (type: bigint), _col12 (type: binary)
+ Statistics: Num rows: 1 Data size: 1184 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct<count:bigint,sum:double,input:int>), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct<count:bigint,sum:double,input:int>), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct<count:bigint,sum:double,input:int>), _col15 (type: bigint), [...]
Execution mode: vectorized
Reducer 2
Execution mode: vectorized
Reduce Operator Tree:
Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8), min(VALUE._col9), max(VALUE._col10), count(VALUE._col11), compute_bit_vector_hll(VALUE._col12)
+ aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector_hll(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector_hll(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), comp [...]
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
- Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24
+ Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'LONG' (type: string), UDFToLong(_col9) (typ [...]
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17
- Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE
+ expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE( [...]
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35
+ Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 794 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -135,8 +153,8 @@ STAGE PLANS:
Stats Work
Basic Stats Work:
Column Stats Desc:
- Columns: a, b, c
- Column Types: int, string, int
+ Columns: i, s, vc, c, t, si
+ Column Types: int, string, string, string, int, int
Table: default.tbl_ice
Stage: Stage-0
@@ -146,22 +164,25 @@ STAGE PLANS:
destination: hdfs://### HDFS PATH ###
PREHOOK: query: create external table tbl_ice stored by iceberg stored as orc tblproperties ('format-version'='2') as
-select a, b, c from source
+select i, s, vc,c, t, si from source
PREHOOK: type: CREATETABLE_AS_SELECT
PREHOOK: Input: default@source
PREHOOK: Output: database:default
PREHOOK: Output: default@tbl_ice
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: create external table tbl_ice stored by iceberg stored as orc tblproperties ('format-version'='2') as
-select a, b, c from source
+select i, s, vc,c, t, si from source
POSTHOOK: type: CREATETABLE_AS_SELECT
POSTHOOK: Input: default@source
POSTHOOK: Output: database:default
POSTHOOK: Output: default@tbl_ice
POSTHOOK: Output: hdfs://### HDFS PATH ###
-POSTHOOK: Lineage: tbl_ice.a SIMPLE [(source)source.FieldSchema(name:a, type:int, comment:null), ]
-POSTHOOK: Lineage: tbl_ice.b SIMPLE [(source)source.FieldSchema(name:b, type:string, comment:null), ]
-POSTHOOK: Lineage: tbl_ice.c SIMPLE [(source)source.FieldSchema(name:c, type:int, comment:null), ]
+POSTHOOK: Lineage: tbl_ice.c EXPRESSION [(source)source.FieldSchema(name:c, type:char(10), comment:null), ]
+POSTHOOK: Lineage: tbl_ice.i SIMPLE [(source)source.FieldSchema(name:i, type:int, comment:null), ]
+POSTHOOK: Lineage: tbl_ice.s SIMPLE [(source)source.FieldSchema(name:s, type:string, comment:null), ]
+POSTHOOK: Lineage: tbl_ice.si EXPRESSION [(source)source.FieldSchema(name:si, type:smallint, comment:null), ]
+POSTHOOK: Lineage: tbl_ice.t EXPRESSION [(source)source.FieldSchema(name:t, type:tinyint, comment:null), ]
+POSTHOOK: Lineage: tbl_ice.vc EXPRESSION [(source)source.FieldSchema(name:vc, type:varchar(256), comment:null), ]
PREHOOK: query: select * from tbl_ice
PREHOOK: type: QUERY
PREHOOK: Input: default@tbl_ice
@@ -170,5 +191,247 @@ POSTHOOK: query: select * from tbl_ice
POSTHOOK: type: QUERY
POSTHOOK: Input: default@tbl_ice
POSTHOOK: Output: hdfs://### HDFS PATH ###
-1 one 3
-1 two 4
+1 one one_1 ch_1 10 11
+1 two two_2 ch_2 20 22
+PREHOOK: query: explain
+insert into tbl_ice
+select * from source
+PREHOOK: type: QUERY
+PREHOOK: Input: default@source
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: explain
+insert into tbl_ice
+select * from source
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@source
+POSTHOOK: Output: default@tbl_ice
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: source
+ Statistics: Num rows: 2 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: i (type: int), s (type: string), CAST( vc AS STRING) (type: string), CAST( c AS STRING) (type: string), UDFToInteger(t) (type: int), UDFToInteger(si) (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 2 Data size: 934 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 934 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.iceberg.mr.hive.HiveIcebergInputFormat
+ output format: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
+ serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe
+ name: default.tbl_ice
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int)
+ outputColumnNames: i, s, vc, c, t, si
+ Statistics: Num rows: 2 Data size: 934 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(i), max(i), count(1), count(i), compute_bit_vector_hll(i), max(length(s)), avg(COALESCE(length(s),0)), count(s), compute_bit_vector_hll(s), max(length(vc)), avg(COALESCE(length(vc),0)), count(vc), compute_bit_vector_hll(vc), max(length(c)), avg(COALESCE(length(c),0)), count(c), compute_bit_vector_hll(c), min(t), max(t), count(t), compute_bit_vector_hll(t), min(si), max(si), count(si), compute_bit_vector_hll(si)
+ minReductionHashAggr: 0.5
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24
+ Statistics: Num rows: 1 Data size: 1184 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 1184 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct<count:bigint,sum:double,input:int>), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct<count:bigint,sum:double,input:int>), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct<count:bigint,sum:double,input:int>), _col15 (type: bigint), [...]
+ Execution mode: vectorized
+ Reducer 2
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector_hll(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector_hll(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), comp [...]
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24
+ Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE( [...]
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35
+ Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.iceberg.mr.hive.HiveIcebergInputFormat
+ output format: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
+ serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe
+ name: default.tbl_ice
+
+ Stage: Stage-3
+ Stats Work
+ Basic Stats Work:
+ Column Stats Desc:
+ Columns: i, s, vc, c, t, si
+ Column Types: int, string, string, string, int, int
+ Table: default.tbl_ice
+
+PREHOOK: query: insert into tbl_ice
+select * from source
+PREHOOK: type: QUERY
+PREHOOK: Input: default@source
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: insert into tbl_ice
+select * from source
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@source
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: select * from tbl_ice
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from tbl_ice
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+1 one one_1 ch_1 10 11
+1 two two_2 ch_2 20 22
+1 one one_1 ch_1 10 11
+1 two two_2 ch_2 20 22
+PREHOOK: query: explain
+insert overwrite table tbl_ice
+select * from source
+PREHOOK: type: QUERY
+PREHOOK: Input: default@source
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: explain
+insert overwrite table tbl_ice
+select * from source
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@source
+POSTHOOK: Output: default@tbl_ice
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: source
+ Statistics: Num rows: 2 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: i (type: int), s (type: string), CAST( vc AS STRING) (type: string), CAST( c AS STRING) (type: string), UDFToInteger(t) (type: int), UDFToInteger(si) (type: int)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 2 Data size: 934 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 934 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.iceberg.mr.hive.HiveIcebergInputFormat
+ output format: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
+ serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe
+ name: default.tbl_ice
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: int), _col5 (type: int)
+ outputColumnNames: i, s, vc, c, t, si
+ Statistics: Num rows: 2 Data size: 934 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(i), max(i), count(1), count(i), compute_bit_vector_hll(i), max(length(s)), avg(COALESCE(length(s),0)), count(s), compute_bit_vector_hll(s), max(length(vc)), avg(COALESCE(length(vc),0)), count(vc), compute_bit_vector_hll(vc), max(length(c)), avg(COALESCE(length(c),0)), count(c), compute_bit_vector_hll(c), min(t), max(t), count(t), compute_bit_vector_hll(t), min(si), max(si), count(si), compute_bit_vector_hll(si)
+ minReductionHashAggr: 0.5
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24
+ Statistics: Num rows: 1 Data size: 1184 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 1184 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), _col5 (type: int), _col6 (type: struct<count:bigint,sum:double,input:int>), _col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: struct<count:bigint,sum:double,input:int>), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 (type: struct<count:bigint,sum:double,input:int>), _col15 (type: bigint), [...]
+ Execution mode: vectorized
+ Reducer 2
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), compute_bit_vector_hll(VALUE._col8), max(VALUE._col9), avg(VALUE._col10), count(VALUE._col11), compute_bit_vector_hll(VALUE._col12), max(VALUE._col13), avg(VALUE._col14), count(VALUE._col15), compute_bit_vector_hll(VALUE._col16), min(VALUE._col17), max(VALUE._col18), count(VALUE._col19), comp [...]
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24
+ Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: 'LONG' (type: string), UDFToLong(_col0) (type: bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE( [...]
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35
+ Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 1590 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.iceberg.mr.hive.HiveIcebergInputFormat
+ output format: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
+ serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe
+ name: default.tbl_ice
+
+ Stage: Stage-3
+ Stats Work
+ Basic Stats Work:
+ Column Stats Desc:
+ Columns: i, s, vc, c, t, si
+ Column Types: int, string, string, string, int, int
+ Table: default.tbl_ice
+
+PREHOOK: query: insert overwrite table tbl_ice
+select * from source
+PREHOOK: type: QUERY
+PREHOOK: Input: default@source
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: insert overwrite table tbl_ice
+select * from source
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@source
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: select * from tbl_ice
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from tbl_ice
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+1 one one_1 ch_1 10 11
+1 two two_2 ch_2 20 22
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index e460ae1df8c..c709209d9d8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -7889,6 +7889,15 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
throw new SemanticException("Unknown destination type: " + destType);
}
+ if (!(destType == QBMetaData.DEST_DFS_FILE && qb.getIsQuery())
+ && destinationTable != null && destinationTable.getStorageHandler() != null) {
+ try {
+ input = genConversionSelectOperator(
+ dest, qb, input, tableDescriptor.getDeserializer(conf), dpCtx, null, destinationTable);
+ } catch (Exception e) {
+ throw new SemanticException(e);
+ }
+ }
inputRR = opParseCtx.get(input).getRowResolver();
@@ -8592,7 +8601,15 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
List<ColumnInfo> rowFields = opParseCtx.get(input).getRowResolver().getColumnInfos();
int inColumnCnt = rowFields.size();
int outColumnCnt = tableFields.size();
- if (dynPart && dpCtx != null) {
+
+ // if target table is always unpartitioned, then the output object inspector will already contain the partition cols
+ // too, therefore we shouldn't add the partition col num to the output col num
+ boolean alreadyContainsPartCols = Optional.ofNullable(table)
+ .map(Table::getStorageHandler)
+ .map(HiveStorageHandler::alwaysUnpartitioned)
+ .orElse(Boolean.FALSE);
+
+ if (dynPart && dpCtx != null && !alreadyContainsPartCols) {
outColumnCnt += dpCtx.getNumDPCols();
}