You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kr...@apache.org on 2023/01/20 03:17:45 UTC
[hive] branch master updated: HIVE-26925: MV with iceberg storage format fails when contains 'PARTITIONED ON' clause due to column number/types difference. (Krisztian Kasa, reviewed by Aman Sinha, Stamatis Zampetakis)
This is an automated email from the ASF dual-hosted git repository.
krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new ed3261f3a4e HIVE-26925: MV with iceberg storage format fails when contains 'PARTITIONED ON' clause due to column number/types difference. (Krisztian Kasa, reviewed by Aman Sinha, Stamatis Zampetakis)
ed3261f3a4e is described below
commit ed3261f3a4e2600d3519b0d4326eb765705a20fb
Author: Krisztian Kasa <ka...@gmail.com>
AuthorDate: Fri Jan 20 04:17:38 2023 +0100
HIVE-26925: MV with iceberg storage format fails when contains 'PARTITIONED ON' clause due to column number/types difference. (Krisztian Kasa, reviewed by Aman Sinha, Stamatis Zampetakis)
---
.../iceberg/mr/hive/HiveIcebergMetaHook.java | 5 +-
.../queries/positive/mv_iceberg_partitioned_orc.q | 21 +++
.../queries/positive/mv_iceberg_partitioned_orc2.q | 21 +++
.../positive/mv_iceberg_partitioned_orc.q.out | 173 ++++++++++++++++++++
.../positive/mv_iceberg_partitioned_orc2.q.out | 175 +++++++++++++++++++++
.../org/apache/hadoop/hive/ql/parse/HiveParser.g | 2 +
.../org/apache/hadoop/hive/ql/ddl/DDLUtils.java | 31 ++++
.../hive/ql/ddl/table/create/CreateTableDesc.java | 26 +--
.../set/AlterTableSetPartitionSpecAnalyzer.java | 7 +-
.../view/create/CreateMaterializedViewDesc.java | 10 +-
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 78 ++++-----
.../hadoop/hive/ql/session/SessionStateUtil.java | 6 +
12 files changed, 481 insertions(+), 74 deletions(-)
diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
index 53bbb7d149e..c44ca3132ff 100644
--- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
+++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
@@ -363,10 +363,7 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
db.dropPartitions(hmsTable.getDbName(), hmsTable.getTableName(), EMPTY_FILTER, DROP_OPTIONS);
List<TransformSpec> spec = PartitionTransform.getPartitionTransformSpec(hmsTable.getPartitionKeys());
- if (!SessionStateUtil.addResource(conf, hive_metastoreConstants.PARTITION_TRANSFORM_SPEC, spec)) {
- throw new MetaException("Query state attached to Session state must be not null. " +
- "Partition transform metadata cannot be saved.");
- }
+ SessionStateUtil.addResourceOrThrow(conf, hive_metastoreConstants.PARTITION_TRANSFORM_SPEC, spec);
hmsTable.getSd().getCols().addAll(hmsTable.getPartitionKeys());
hmsTable.setPartitionKeysIsSet(false);
}
diff --git a/iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_partitioned_orc.q b/iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_partitioned_orc.q
new file mode 100644
index 00000000000..e3eefc6881c
--- /dev/null
+++ b/iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_partitioned_orc.q
@@ -0,0 +1,21 @@
+-- MV data is stored by partitioned iceberg testing the existing Hive syntax (also used by native mv) to specify partition cols.
+--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/
+-- SORT_QUERY_RESULTS
+
+drop materialized view if exists mat1;
+drop table if exists tbl_ice;
+
+create table tbl_ice(a int, b string, c int) stored by iceberg stored as orc tblproperties ('format-version'='1');
+insert into tbl_ice values (1, 'one', 50), (2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54);
+
+create materialized view mat1 partitioned on (b) stored by iceberg stored as orc tblproperties ('format-version'='1') as
+select tbl_ice.b, tbl_ice.c from tbl_ice where tbl_ice.c > 52;
+
+describe formatted mat1;
+
+select * from mat1;
+
+create materialized view mat2 partitioned on (b) stored by iceberg stored as orc tblproperties ('format-version'='2') as
+select tbl_ice.b, tbl_ice.c from tbl_ice where tbl_ice.c > 52;
+
+describe formatted mat2;
diff --git a/iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_partitioned_orc2.q b/iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_partitioned_orc2.q
new file mode 100644
index 00000000000..191a404b068
--- /dev/null
+++ b/iceberg/iceberg-handler/src/test/queries/positive/mv_iceberg_partitioned_orc2.q
@@ -0,0 +1,21 @@
+-- MV data is stored by partitioned iceberg with partition spec
+--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/
+-- SORT_QUERY_RESULTS
+
+drop materialized view if exists mat1;
+drop table if exists tbl_ice;
+
+create table tbl_ice(a int, b string, c int) stored by iceberg stored as orc tblproperties ('format-version'='1');
+insert into tbl_ice values (1, 'one', 50), (2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54);
+
+create materialized view mat1 partitioned on spec (bucket(16, b), truncate(3, c)) stored by iceberg stored as orc tblproperties ('format-version'='1') as
+select tbl_ice.b, tbl_ice.c from tbl_ice where tbl_ice.c > 52;
+
+describe formatted mat1;
+
+select * from mat1;
+
+create materialized view mat2 partitioned on spec (bucket(16, b), truncate(3, c)) stored by iceberg stored as orc tblproperties ('format-version'='2') as
+select tbl_ice.b, tbl_ice.c from tbl_ice where tbl_ice.c > 52;
+
+describe formatted mat2;
diff --git a/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_partitioned_orc.q.out b/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_partitioned_orc.q.out
new file mode 100644
index 00000000000..fc85cee962d
--- /dev/null
+++ b/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_partitioned_orc.q.out
@@ -0,0 +1,173 @@
+PREHOOK: query: drop materialized view if exists mat1
+PREHOOK: type: DROP_MATERIALIZED_VIEW
+POSTHOOK: query: drop materialized view if exists mat1
+POSTHOOK: type: DROP_MATERIALIZED_VIEW
+PREHOOK: query: drop table if exists tbl_ice
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists tbl_ice
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table tbl_ice(a int, b string, c int) stored by iceberg stored as orc tblproperties ('format-version'='1')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: create table tbl_ice(a int, b string, c int) stored by iceberg stored as orc tblproperties ('format-version'='1')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: create materialized view mat1 partitioned on (b) stored by iceberg stored as orc tblproperties ('format-version'='1') as
+select tbl_ice.b, tbl_ice.c from tbl_ice where tbl_ice.c > 52
+PREHOOK: type: CREATE_MATERIALIZED_VIEW
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mat1
+PREHOOK: Output: default@mat1
+POSTHOOK: query: create materialized view mat1 partitioned on (b) stored by iceberg stored as orc tblproperties ('format-version'='1') as
+select tbl_ice.b, tbl_ice.c from tbl_ice where tbl_ice.c > 52
+POSTHOOK: type: CREATE_MATERIALIZED_VIEW
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mat1
+POSTHOOK: Output: default@mat1
+POSTHOOK: Lineage: mat1.b SIMPLE [(tbl_ice)tbl_ice.FieldSchema(name:b, type:string, comment:null), ]
+POSTHOOK: Lineage: mat1.c SIMPLE [(tbl_ice)tbl_ice.FieldSchema(name:c, type:int, comment:null), ]
+PREHOOK: query: describe formatted mat1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@mat1
+POSTHOOK: query: describe formatted mat1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@mat1
+# col_name data_type comment
+c int
+b string
+
+# Partition Transform Information
+# col_name transform_type
+b IDENTITY
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MATERIALIZED_VIEW
+Table Parameters:
+ COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"b\":\"true\",\"c\":\"true\"}}
+ bucketing_version 2
+ engine.hive.enabled true
+ format-version 1
+ iceberg.orc.files.only false
+ metadata_location hdfs://### HDFS PATH ###
+ numFiles 2
+ numRows 2
+ previous_metadata_location hdfs://### HDFS PATH ###
+ storage_handler org.apache.iceberg.mr.hive.HiveIcebergStorageHandler
+ table_type ICEBERG
+ totalSize 1282
+#### A masked pattern was here ####
+ uuid #Masked#
+
+# Storage Information
+SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe
+InputFormat: org.apache.iceberg.mr.hive.HiveIcebergInputFormat
+OutputFormat: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
+Compressed: No
+Sort Columns: []
+
+# Materialized View Information
+Original Query: select tbl_ice.b, tbl_ice.c from tbl_ice where tbl_ice.c > 52
+Expanded Query: SELECT `c`, `b` FROM (select `tbl_ice`.`b`, `tbl_ice`.`c` from `default`.`tbl_ice` where `tbl_ice`.`c` > 52) `mat1`
+Rewrite Enabled: Yes
+Outdated for Rewriting: Unknown
+
+# Materialized View Source table information
+Table name I/U/D since last rebuild
+hive.default.tbl_ice 0/0/0
+PREHOOK: query: select * from mat1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mat1
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from mat1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mat1
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+53 four
+54 five
+PREHOOK: query: create materialized view mat2 partitioned on (b) stored by iceberg stored as orc tblproperties ('format-version'='2') as
+select tbl_ice.b, tbl_ice.c from tbl_ice where tbl_ice.c > 52
+PREHOOK: type: CREATE_MATERIALIZED_VIEW
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mat2
+PREHOOK: Output: default@mat2
+POSTHOOK: query: create materialized view mat2 partitioned on (b) stored by iceberg stored as orc tblproperties ('format-version'='2') as
+select tbl_ice.b, tbl_ice.c from tbl_ice where tbl_ice.c > 52
+POSTHOOK: type: CREATE_MATERIALIZED_VIEW
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mat2
+POSTHOOK: Output: default@mat2
+POSTHOOK: Lineage: mat2.b SIMPLE [(tbl_ice)tbl_ice.FieldSchema(name:b, type:string, comment:null), ]
+POSTHOOK: Lineage: mat2.c SIMPLE [(tbl_ice)tbl_ice.FieldSchema(name:c, type:int, comment:null), ]
+PREHOOK: query: describe formatted mat2
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@mat2
+POSTHOOK: query: describe formatted mat2
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@mat2
+# col_name data_type comment
+c int
+b string
+
+# Partition Transform Information
+# col_name transform_type
+b IDENTITY
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MATERIALIZED_VIEW
+Table Parameters:
+ COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"b\":\"true\",\"c\":\"true\"}}
+ bucketing_version 2
+ engine.hive.enabled true
+ format-version 2
+ iceberg.orc.files.only false
+ metadata_location hdfs://### HDFS PATH ###
+ numFiles 2
+ numRows 2
+ previous_metadata_location hdfs://### HDFS PATH ###
+ storage_handler org.apache.iceberg.mr.hive.HiveIcebergStorageHandler
+ table_type ICEBERG
+ totalSize 1282
+#### A masked pattern was here ####
+ uuid #Masked#
+ write.delete.mode merge-on-read
+ write.merge.mode merge-on-read
+ write.update.mode merge-on-read
+
+# Storage Information
+SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe
+InputFormat: org.apache.iceberg.mr.hive.HiveIcebergInputFormat
+OutputFormat: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
+Compressed: No
+Sort Columns: []
+
+# Materialized View Information
+Original Query: select tbl_ice.b, tbl_ice.c from tbl_ice where tbl_ice.c > 52
+Expanded Query: SELECT `c`, `b` FROM (select `tbl_ice`.`b`, `tbl_ice`.`c` from `default`.`tbl_ice` where `tbl_ice`.`c` > 52) `mat2`
+Rewrite Enabled: Yes
+Outdated for Rewriting: Unknown
+
+# Materialized View Source table information
+Table name I/U/D since last rebuild
+hive.default.tbl_ice 0/0/0
diff --git a/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_partitioned_orc2.q.out b/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_partitioned_orc2.q.out
new file mode 100644
index 00000000000..9632d48ca39
--- /dev/null
+++ b/iceberg/iceberg-handler/src/test/results/positive/mv_iceberg_partitioned_orc2.q.out
@@ -0,0 +1,175 @@
+PREHOOK: query: drop materialized view if exists mat1
+PREHOOK: type: DROP_MATERIALIZED_VIEW
+POSTHOOK: query: drop materialized view if exists mat1
+POSTHOOK: type: DROP_MATERIALIZED_VIEW
+PREHOOK: query: drop table if exists tbl_ice
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists tbl_ice
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table tbl_ice(a int, b string, c int) stored by iceberg stored as orc tblproperties ('format-version'='1')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: create table tbl_ice(a int, b string, c int) stored by iceberg stored as orc tblproperties ('format-version'='1')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: create materialized view mat1 partitioned on spec (bucket(16, b), truncate(3, c)) stored by iceberg stored as orc tblproperties ('format-version'='1') as
+select tbl_ice.b, tbl_ice.c from tbl_ice where tbl_ice.c > 52
+PREHOOK: type: CREATE_MATERIALIZED_VIEW
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mat1
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: create materialized view mat1 partitioned on spec (bucket(16, b), truncate(3, c)) stored by iceberg stored as orc tblproperties ('format-version'='1') as
+select tbl_ice.b, tbl_ice.c from tbl_ice where tbl_ice.c > 52
+POSTHOOK: type: CREATE_MATERIALIZED_VIEW
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mat1
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: Lineage: mat1.b SIMPLE [(tbl_ice)tbl_ice.FieldSchema(name:b, type:string, comment:null), ]
+POSTHOOK: Lineage: mat1.c SIMPLE [(tbl_ice)tbl_ice.FieldSchema(name:c, type:int, comment:null), ]
+PREHOOK: query: describe formatted mat1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@mat1
+POSTHOOK: query: describe formatted mat1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@mat1
+# col_name data_type comment
+b string
+c int
+
+# Partition Transform Information
+# col_name transform_type
+b BUCKET[16]
+c TRUNCATE[3]
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MATERIALIZED_VIEW
+Table Parameters:
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"c\":\"true\"}}
+ bucketing_version 2
+ engine.hive.enabled true
+ format-version 1
+ iceberg.orc.files.only false
+ metadata_location hdfs://### HDFS PATH ###
+ numFiles 2
+ numRows 2
+ previous_metadata_location hdfs://### HDFS PATH ###
+ storage_handler org.apache.iceberg.mr.hive.HiveIcebergStorageHandler
+ table_type ICEBERG
+ totalSize 1282
+#### A masked pattern was here ####
+ uuid #Masked#
+
+# Storage Information
+SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe
+InputFormat: org.apache.iceberg.mr.hive.HiveIcebergInputFormat
+OutputFormat: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
+Compressed: No
+Sort Columns: []
+
+# Materialized View Information
+Original Query: select tbl_ice.b, tbl_ice.c from tbl_ice where tbl_ice.c > 52
+Expanded Query: select `tbl_ice`.`b`, `tbl_ice`.`c` from `default`.`tbl_ice` where `tbl_ice`.`c` > 52
+Rewrite Enabled: Yes
+Outdated for Rewriting: Unknown
+
+# Materialized View Source table information
+Table name I/U/D since last rebuild
+hive.default.tbl_ice 0/0/0
+PREHOOK: query: select * from mat1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mat1
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from mat1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mat1
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+five 54
+four 53
+PREHOOK: query: create materialized view mat2 partitioned on spec (bucket(16, b), truncate(3, c)) stored by iceberg stored as orc tblproperties ('format-version'='2') as
+select tbl_ice.b, tbl_ice.c from tbl_ice where tbl_ice.c > 52
+PREHOOK: type: CREATE_MATERIALIZED_VIEW
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mat2
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: create materialized view mat2 partitioned on spec (bucket(16, b), truncate(3, c)) stored by iceberg stored as orc tblproperties ('format-version'='2') as
+select tbl_ice.b, tbl_ice.c from tbl_ice where tbl_ice.c > 52
+POSTHOOK: type: CREATE_MATERIALIZED_VIEW
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mat2
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: Lineage: mat2.b SIMPLE [(tbl_ice)tbl_ice.FieldSchema(name:b, type:string, comment:null), ]
+POSTHOOK: Lineage: mat2.c SIMPLE [(tbl_ice)tbl_ice.FieldSchema(name:c, type:int, comment:null), ]
+PREHOOK: query: describe formatted mat2
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@mat2
+POSTHOOK: query: describe formatted mat2
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@mat2
+# col_name data_type comment
+b string
+c int
+
+# Partition Transform Information
+# col_name transform_type
+b BUCKET[16]
+c TRUNCATE[3]
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MATERIALIZED_VIEW
+Table Parameters:
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"c\":\"true\"}}
+ bucketing_version 2
+ engine.hive.enabled true
+ format-version 2
+ iceberg.orc.files.only false
+ metadata_location hdfs://### HDFS PATH ###
+ numFiles 2
+ numRows 2
+ previous_metadata_location hdfs://### HDFS PATH ###
+ storage_handler org.apache.iceberg.mr.hive.HiveIcebergStorageHandler
+ table_type ICEBERG
+ totalSize 1282
+#### A masked pattern was here ####
+ uuid #Masked#
+ write.delete.mode merge-on-read
+ write.merge.mode merge-on-read
+ write.update.mode merge-on-read
+
+# Storage Information
+SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe
+InputFormat: org.apache.iceberg.mr.hive.HiveIcebergInputFormat
+OutputFormat: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
+Compressed: No
+Sort Columns: []
+
+# Materialized View Information
+Original Query: select tbl_ice.b, tbl_ice.c from tbl_ice where tbl_ice.c > 52
+Expanded Query: select `tbl_ice`.`b`, `tbl_ice`.`c` from `default`.`tbl_ice` where `tbl_ice`.`c` > 52
+Rewrite Enabled: Yes
+Outdated for Rewriting: Unknown
+
+# Materialized View Source table information
+Table name I/U/D since last rebuild
+hive.default.tbl_ice 0/0/0
diff --git a/parser/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/parser/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
index 93755f7caf4..12816c77665 100644
--- a/parser/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
+++ b/parser/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
@@ -1685,6 +1685,8 @@ viewPartition
@after { popMsg(state); }
: KW_PARTITIONED KW_ON LPAREN columnNameList RPAREN
-> ^(TOK_VIEWPARTCOLS columnNameList)
+ | KW_PARTITIONED KW_ON KW_SPEC LPAREN (spec = partitionTransformSpec) RPAREN
+ -> ^(TOK_TABLEPARTCOLSBYSPEC $spec)
;
viewOrganization
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/DDLUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/DDLUtils.java
index 00f31c5fbeb..0b9830a7b50 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/DDLUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/DDLUtils.java
@@ -18,7 +18,10 @@
package org.apache.hadoop.hive.ql.ddl;
+import java.util.ArrayList;
+import java.util.List;
import java.util.Map;
+import java.util.Optional;
import java.util.Set;
import org.apache.hadoop.hive.common.TableName;
@@ -26,16 +29,22 @@ import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.hooks.Entity.Type;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.parse.ASTNode;
+import org.apache.hadoop.hive.ql.parse.PartitionTransform;
import org.apache.hadoop.hive.ql.parse.ReplicationSpec;
import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.parse.TransformSpec;
import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.hive.ql.session.SessionStateUtil;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hive.common.util.ReflectionUtil;
import org.slf4j.Logger;
@@ -186,4 +195,26 @@ public final class DDLUtils {
table.setTemporary(isTemporary);
outputs.add(new WriteEntity(table, WriteEntity.WriteType.DDL_NO_LOCK));
}
+
+ public static void setColumnsAndStorePartitionTransformSpecOfTable(
+ List<FieldSchema> columns, List<FieldSchema> partitionColumns,
+ HiveConf conf, Table tbl) {
+ Optional<List<FieldSchema>> cols = Optional.ofNullable(columns);
+ Optional<List<FieldSchema>> partCols = Optional.ofNullable(partitionColumns);
+ HiveStorageHandler storageHandler = tbl.getStorageHandler();
+
+ if (storageHandler != null && storageHandler.alwaysUnpartitioned()) {
+ tbl.getSd().setCols(new ArrayList<>());
+ cols.ifPresent(c -> tbl.getSd().getCols().addAll(c));
+ if (partCols.isPresent() && !partCols.get().isEmpty()) {
+ // Add the partition columns to the normal columns and save the transform to the session state
+ tbl.getSd().getCols().addAll(partCols.get());
+ List<TransformSpec> spec = PartitionTransform.getPartitionTransformSpec(partCols.get());
+ SessionStateUtil.addResourceOrThrow(conf, hive_metastoreConstants.PARTITION_TRANSFORM_SPEC, spec);
+ }
+ } else {
+ cols.ifPresent(tbl::setFields);
+ partCols.ifPresent(tbl::setPartCols);
+ }
+ }
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableDesc.java
index 5703cc3a104..a228cca5045 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableDesc.java
@@ -24,7 +24,6 @@ import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
-import java.util.Optional;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.fs.Path;
@@ -55,8 +54,6 @@ import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
import org.apache.hadoop.hive.ql.parse.ParseUtils;
-import org.apache.hadoop.hive.ql.parse.PartitionTransform;
-import org.apache.hadoop.hive.ql.parse.TransformSpec;
import org.apache.hadoop.hive.ql.parse.ReplicationSpec;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.Explain;
@@ -64,7 +61,6 @@ import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
import org.apache.hadoop.hive.ql.plan.PlanUtils;
import org.apache.hadoop.hive.ql.plan.ValidationUtility;
import org.apache.hadoop.hive.ql.plan.Explain.Level;
-import org.apache.hadoop.hive.ql.session.SessionStateUtil;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
@@ -72,6 +68,8 @@ import org.apache.hadoop.mapred.OutputFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import static org.apache.hadoop.hive.ql.ddl.DDLUtils.setColumnsAndStorePartitionTransformSpecOfTable;
+
/**
* DDL task description for CREATE TABLE commands.
*/
@@ -820,25 +818,7 @@ public class CreateTableDesc implements DDLDesc, Serializable {
}
}
- Optional<List<FieldSchema>> cols = Optional.ofNullable(getCols());
- Optional<List<FieldSchema>> partCols = Optional.ofNullable(getPartCols());
-
- if (storageHandler != null && storageHandler.alwaysUnpartitioned()) {
- tbl.getSd().setCols(new ArrayList<>());
- cols.ifPresent(c -> tbl.getSd().getCols().addAll(c));
- if (partCols.isPresent() && !partCols.get().isEmpty()) {
- // Add the partition columns to the normal columns and save the transform to the session state
- tbl.getSd().getCols().addAll(partCols.get());
- List<TransformSpec> spec = PartitionTransform.getPartitionTransformSpec(partCols.get());
- if (!SessionStateUtil.addResource(conf, hive_metastoreConstants.PARTITION_TRANSFORM_SPEC, spec)) {
- throw new HiveException("Query state attached to Session state must be not null. " +
- "Partition transform metadata cannot be saved.");
- }
- }
- } else {
- cols.ifPresent(c -> tbl.setFields(c));
- partCols.ifPresent(c -> tbl.setPartCols(c));
- }
+ setColumnsAndStorePartitionTransformSpecOfTable(getCols(), getPartCols(), conf, tbl);
if (getBucketCols() != null) {
tbl.setBucketCols(getBucketCols());
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/set/AlterTableSetPartitionSpecAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/set/AlterTableSetPartitionSpecAnalyzer.java
index 4764eedf561..92c0b169052 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/set/AlterTableSetPartitionSpecAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/set/AlterTableSetPartitionSpecAnalyzer.java
@@ -56,11 +56,8 @@ public class AlterTableSetPartitionSpecAnalyzer extends AbstractAlterTableAnalyz
inputs.add(new ReadEntity(table));
List<TransformSpec> partitionTransformSpec =
PartitionTransform.getPartitionTransformSpec(command);
- if (!SessionStateUtil.addResource(conf, hive_metastoreConstants.PARTITION_TRANSFORM_SPEC,
- partitionTransformSpec)) {
- throw new SemanticException("Query state attached to Session state must be not null. " +
- "Partition transform metadata cannot be saved.");
- }
+ SessionStateUtil.addResourceOrThrow(conf, hive_metastoreConstants.PARTITION_TRANSFORM_SPEC,
+ partitionTransformSpec);
AlterTableSetPartitionSpecDesc desc = new AlterTableSetPartitionSpecDesc(tableName, partitionSpec);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/create/CreateMaterializedViewDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/create/CreateMaterializedViewDesc.java
index 0d9ebff5274..631a8868f10 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/create/CreateMaterializedViewDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/view/create/CreateMaterializedViewDesc.java
@@ -45,6 +45,8 @@ import org.apache.hadoop.hive.ql.plan.PlanUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import static org.apache.hadoop.hive.ql.ddl.DDLUtils.setColumnsAndStorePartitionTransformSpecOfTable;
+
/**
* DDL task description for CREATE VIEW commands.
*/
@@ -312,7 +314,7 @@ public class CreateMaterializedViewDesc implements DDLDesc, Serializable {
tbl.setTableType(TableType.MATERIALIZED_VIEW);
tbl.setSerializationLib(null);
tbl.clearSerDeInfo();
- tbl.setFields(getSchema());
+
if (getComment() != null) {
tbl.setProperty("comment", getComment());
}
@@ -321,10 +323,6 @@ public class CreateMaterializedViewDesc implements DDLDesc, Serializable {
tbl.getParameters().putAll(tblProps);
}
- if (!CollectionUtils.isEmpty(partCols)) {
- tbl.setPartCols(partCols);
- }
-
if (!CollectionUtils.isEmpty(sortColNames)) {
tbl.setProperty(Constants.MATERIALIZED_VIEW_SORT_COLUMNS,
Utilities.encodeColumnNames(sortColNames));
@@ -353,6 +351,8 @@ public class CreateMaterializedViewDesc implements DDLDesc, Serializable {
}
HiveStorageHandler storageHandler = tbl.getStorageHandler();
+ setColumnsAndStorePartitionTransformSpecOfTable(getSchema(), getPartCols(), conf, tbl);
+
/*
* If the user didn't specify a SerDe, we use the default.
*/
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 41a947dc985..f5750b646c7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -13720,15 +13720,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
}
break;
case HiveParser.TOK_TABLEPARTCOLSBYSPEC:
- List<TransformSpec> partitionTransformSpec =
- PartitionTransform.getPartitionTransformSpec(child);
-
- if (!SessionStateUtil.addResource(conf, hive_metastoreConstants.PARTITION_TRANSFORM_SPEC,
- partitionTransformSpec)) {
- throw new SemanticException("Query state attached to Session state must be not null. " +
- "Partition transform metadata cannot be saved.");
- }
-
+ SessionStateUtil.addResourceOrThrow(conf, hive_metastoreConstants.PARTITION_TRANSFORM_SPEC,
+ PartitionTransform.getPartitionTransformSpec(child));
partitionTransformSpecExists = true;
break;
case HiveParser.TOK_TABLEPARTCOLNAMES:
@@ -13783,30 +13776,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
}
}
- HiveStorageHandler handler;
- try {
- handler = HiveUtils.getStorageHandler(conf, storageFormat.getStorageHandler());
- } catch (HiveException e) {
- throw new SemanticException("Failed to load storage handler: " + e.getMessage());
- }
-
- if (handler != null) {
- if (partitionTransformSpecExists && !handler.supportsPartitionTransform()) {
- throw new SemanticException("Partition transform is not supported for " + handler.getClass().getName());
- }
-
- String fileFormatPropertyKey = handler.getFileFormatPropertyKey();
- if (fileFormatPropertyKey != null) {
- if (tblProps != null && tblProps.containsKey(fileFormatPropertyKey) && storageFormat.getSerdeProps() != null &&
- storageFormat.getSerdeProps().containsKey(fileFormatPropertyKey)) {
- String fileFormat = tblProps.get(fileFormatPropertyKey);
- throw new SemanticException(
- "Provide only one of the following: STORED BY " + fileFormat + " or WITH SERDEPROPERTIES('" +
- fileFormatPropertyKey + "'='" + fileFormat + "') or" + " TBLPROPERTIES('" + fileFormatPropertyKey
- + "'='" + fileFormat + "')");
- }
- }
- }
+ validateStorageFormat(storageFormat, tblProps, partitionTransformSpecExists);
if (command_type == CREATE_TABLE || command_type == CTLT || command_type == CTT || command_type == CTLF) {
queryState.setCommandType(HiveOperation.CREATETABLE);
@@ -13926,10 +13896,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
throw new SemanticException(e);
}
}
- if (!SessionStateUtil.addResource(conf, META_TABLE_LOCATION, tblLocation)) {
- throw new SemanticException(
- "Query state attached to Session state must be not null. Table location cannot be saved.");
- }
+ SessionStateUtil.addResourceOrThrow(conf, META_TABLE_LOCATION, tblLocation);
break;
case CTT: // CREATE TRANSACTIONAL TABLE
if (isExt && !isDefaultTableTypeChanged) {
@@ -14078,6 +14045,35 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
return null;
}
+ private void validateStorageFormat(
+ StorageFormat storageFormat, Map<String, String> tblProps, boolean partitionTransformSpecExists)
+ throws SemanticException {
+ HiveStorageHandler handler;
+ try {
+ handler = HiveUtils.getStorageHandler(conf, storageFormat.getStorageHandler());
+ } catch (HiveException e) {
+ throw new SemanticException("Failed to load storage handler: " + e.getMessage());
+ }
+
+ if (handler != null) {
+ if (partitionTransformSpecExists && !handler.supportsPartitionTransform()) {
+ throw new SemanticException("Partition transform is not supported for " + handler.getClass().getName());
+ }
+
+ String fileFormatPropertyKey = handler.getFileFormatPropertyKey();
+ if (fileFormatPropertyKey != null) {
+ if (tblProps != null && tblProps.containsKey(fileFormatPropertyKey) && storageFormat.getSerdeProps() != null &&
+ storageFormat.getSerdeProps().containsKey(fileFormatPropertyKey)) {
+ String fileFormat = tblProps.get(fileFormatPropertyKey);
+ throw new SemanticException(
+ "Provide only one of the following: STORED BY " + fileFormat + " or WITH SERDEPROPERTIES('" +
+ fileFormatPropertyKey + "'='" + fileFormat + "') or" + " TBLPROPERTIES('" + fileFormatPropertyKey
+ + "'='" + fileFormat + "')");
+ }
+ }
+ }
+ }
+
/** Adds entities for create table/create view. */
private void addDbAndTabToOutputs(String[] qualifiedTabName, TableType type,
boolean isTemporary, Map<String, String> tblProps, StorageFormat storageFormat) throws SemanticException {
@@ -14126,6 +14122,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
String location = null;
RowFormatParams rowFormatParams = new RowFormatParams();
StorageFormat storageFormat = new StorageFormat(conf);
+ boolean partitionTransformSpecExists = false;
LOG.info("Creating view " + dbDotTable + " position="
+ ast.getCharPositionInLine());
@@ -14190,11 +14187,18 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
storageFormat.getSerdeProps());
}
break;
+ case HiveParser.TOK_TABLEPARTCOLSBYSPEC:
+ SessionStateUtil.addResourceOrThrow(conf, hive_metastoreConstants.PARTITION_TRANSFORM_SPEC,
+ PartitionTransform.getPartitionTransformSpec(child));
+ partitionTransformSpecExists = true;
+ break;
default:
assert false;
}
}
+ validateStorageFormat(storageFormat, tblProps, partitionTransformSpecExists);
+
storageFormat.fillDefaultStorageFormat(false, true);
if (!ifNotExists) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionStateUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionStateUtil.java
index c400ace1224..b7791fe3625 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionStateUtil.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionStateUtil.java
@@ -72,6 +72,12 @@ public class SessionStateUtil {
}
}
+ public static void addResourceOrThrow(Configuration conf, String key, Object resource) {
+ getQueryState(conf)
+ .orElseThrow(() -> new IllegalStateException("Query state is missing; failed to add resource for " + key))
+ .addResource(key, resource);
+ }
+
/**
* @param conf Configuration object used for getting the query state, should contain the query id
* @param tableName Name of the table for which the commit info should be retrieved