You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ay...@apache.org on 2023/01/30 17:41:32 UTC
[hive] branch master updated: HIVE-26974: Iceberg: CTL from iceberg table should copy partition fields correctly. (#3980). (Ayush Saxena, reviewed by Ramesh Kumar Thangarajan)
This is an automated email from the ASF dual-hosted git repository.
ayushsaxena pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new e5c4c2bf5e0 HIVE-26974: Iceberg: CTL from iceberg table should copy partition fields correctly. (#3980). (Ayush Saxena, reviewed by Ramesh Kumar Thangarajan)
e5c4c2bf5e0 is described below
commit e5c4c2bf5e09674c5a1ca3d8f4d70616f0cdc0fe
Author: Ayush Saxena <ay...@apache.org>
AuthorDate: Mon Jan 30 23:11:18 2023 +0530
HIVE-26974: Iceberg: CTL from iceberg table should copy partition fields correctly. (#3980). (Ayush Saxena, reviewed by Ramesh Kumar Thangarajan)
---
.../iceberg/mr/hive/HiveIcebergStorageHandler.java | 25 ++++
.../src/test/queries/positive/ctlt_iceberg.q | 23 ++++
.../src/test/results/positive/ctlt_iceberg.q.out | 136 +++++++++++++++++++++
.../create/like/CreateTableLikeOperation.java | 19 ++-
.../hive/ql/metadata/HiveStorageHandler.java | 10 ++
.../org/apache/hadoop/hive/ql/metadata/Table.java | 12 ++
6 files changed, 219 insertions(+), 6 deletions(-)
diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index e01a2587cfe..fc54f826e63 100644
--- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -63,6 +63,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler;
import org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.parse.AlterTableExecuteSpec;
+import org.apache.hadoop.hive.ql.parse.PartitionTransform;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.parse.TransformSpec;
import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
@@ -1127,5 +1128,29 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H
tbl.getParameters().put("TRANSLATED_TO_EXTERNAL", "TRUE");
desc.setIsExternal(true);
}
+
+ // If source is Iceberg table set the schema and the partition spec
+ if ("ICEBERG".equalsIgnoreCase(origParams.get("table_type"))) {
+ tbl.getParameters()
+ .put(InputFormatConfig.TABLE_SCHEMA, origParams.get(InputFormatConfig.TABLE_SCHEMA));
+ tbl.getParameters()
+ .put(InputFormatConfig.PARTITION_SPEC, origParams.get(InputFormatConfig.PARTITION_SPEC));
+ } else {
+ // if the source is partitioned non-iceberg table set the partition transform spec and set the table as
+ // unpartitioned
+ List<TransformSpec> spec = PartitionTransform.getPartitionTransformSpec(tbl.getPartitionKeys());
+ SessionStateUtil.addResourceOrThrow(conf, hive_metastoreConstants.PARTITION_TRANSFORM_SPEC, spec);
+ tbl.getSd().getCols().addAll(tbl.getPartitionKeys());
+ tbl.getTTable().setPartitionKeysIsSet(false);
+ }
+ }
+
+ @Override
+ public Map<String, String> getNativeProperties(org.apache.hadoop.hive.ql.metadata.Table table) {
+ Table origTable = IcebergTableUtil.getTable(conf, table.getTTable());
+ Map<String, String> props = Maps.newHashMap();
+ props.put(InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(origTable.schema()));
+ props.put(InputFormatConfig.PARTITION_SPEC, PartitionSpecParser.toJson(origTable.spec()));
+ return props;
}
}
diff --git a/iceberg/iceberg-handler/src/test/queries/positive/ctlt_iceberg.q b/iceberg/iceberg-handler/src/test/queries/positive/ctlt_iceberg.q
index b235ca9c2df..8d77812a729 100644
--- a/iceberg/iceberg-handler/src/test/queries/positive/ctlt_iceberg.q
+++ b/iceberg/iceberg-handler/src/test/queries/positive/ctlt_iceberg.q
@@ -22,3 +22,26 @@ select a from target order by a;
delete from target where a=2;
select a from target order by a;
+
+--create a partitioned iceberg table
+create table emp_iceberg (id int) partitioned by (company string) stored by iceberg;
+
+show create table emp_iceberg;
+
+-- CTLT with the source as the partitioned iceberg table
+create table emp_like1 like emp_iceberg stored by iceberg;
+
+-- Partition column should be there
+show create table emp_like1;
+
+--create a partitioned non iceberg table
+create table emp (id int) partitioned by (company string);
+
+show create table emp;
+
+-- CTLT with the source as the partitioned iceberg table
+create table emp_like2 like emp stored by iceberg;
+
+-- Partition column should be there
+show create table emp_like2;
+
diff --git a/iceberg/iceberg-handler/src/test/results/positive/ctlt_iceberg.q.out b/iceberg/iceberg-handler/src/test/results/positive/ctlt_iceberg.q.out
index 157c018222c..389f59df599 100644
--- a/iceberg/iceberg-handler/src/test/results/positive/ctlt_iceberg.q.out
+++ b/iceberg/iceberg-handler/src/test/results/positive/ctlt_iceberg.q.out
@@ -95,3 +95,139 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@target
POSTHOOK: Output: hdfs://### HDFS PATH ###
1
+PREHOOK: query: create table emp_iceberg (id int) partitioned by (company string) stored by iceberg
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@emp_iceberg
+POSTHOOK: query: create table emp_iceberg (id int) partitioned by (company string) stored by iceberg
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@emp_iceberg
+PREHOOK: query: show create table emp_iceberg
+PREHOOK: type: SHOW_CREATETABLE
+PREHOOK: Input: default@emp_iceberg
+POSTHOOK: query: show create table emp_iceberg
+POSTHOOK: type: SHOW_CREATETABLE
+POSTHOOK: Input: default@emp_iceberg
+CREATE TABLE `emp_iceberg`(
+ `id` int,
+ `company` string)
+PARTITIONED BY SPEC (
+company)
+ROW FORMAT SERDE
+ 'org.apache.iceberg.mr.hive.HiveIcebergSerDe'
+STORED BY
+ 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler'
+
+LOCATION
+ 'hdfs://### HDFS PATH ###'
+TBLPROPERTIES (
+ 'bucketing_version'='2',
+ 'engine.hive.enabled'='true',
+ 'iceberg.orc.files.only'='false',
+ 'metadata_location'='hdfs://### HDFS PATH ###',
+ 'serialization.format'='1',
+ 'table_type'='ICEBERG',
+#### A masked pattern was here ####
+ 'uuid'='#Masked#')
+PREHOOK: query: create table emp_like1 like emp_iceberg stored by iceberg
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@emp_like1
+POSTHOOK: query: create table emp_like1 like emp_iceberg stored by iceberg
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@emp_like1
+PREHOOK: query: show create table emp_like1
+PREHOOK: type: SHOW_CREATETABLE
+PREHOOK: Input: default@emp_like1
+POSTHOOK: query: show create table emp_like1
+POSTHOOK: type: SHOW_CREATETABLE
+POSTHOOK: Input: default@emp_like1
+CREATE EXTERNAL TABLE `emp_like1`(
+ `id` int,
+ `company` string)
+PARTITIONED BY SPEC (
+company)
+ROW FORMAT SERDE
+ 'org.apache.iceberg.mr.hive.HiveIcebergSerDe'
+STORED BY
+ 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler'
+
+LOCATION
+ 'hdfs://### HDFS PATH ###'
+TBLPROPERTIES (
+ 'TRANSLATED_TO_EXTERNAL'='TRUE',
+ 'bucketing_version'='2',
+ 'created_with_ctlt'='true',
+ 'engine.hive.enabled'='true',
+ 'iceberg.orc.files.only'='false',
+ 'metadata_location'='hdfs://### HDFS PATH ###',
+ 'table_type'='ICEBERG',
+#### A masked pattern was here ####
+ 'uuid'='#Masked#')
+PREHOOK: query: create table emp (id int) partitioned by (company string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@emp
+POSTHOOK: query: create table emp (id int) partitioned by (company string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@emp
+PREHOOK: query: show create table emp
+PREHOOK: type: SHOW_CREATETABLE
+PREHOOK: Input: default@emp
+POSTHOOK: query: show create table emp
+POSTHOOK: type: SHOW_CREATETABLE
+POSTHOOK: Input: default@emp
+CREATE TABLE `emp`(
+ `id` int)
+PARTITIONED BY (
+ `company` string)
+ROW FORMAT SERDE
+ 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+STORED AS INPUTFORMAT
+ 'org.apache.hadoop.mapred.TextInputFormat'
+OUTPUTFORMAT
+ 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+LOCATION
+ 'hdfs://### HDFS PATH ###'
+TBLPROPERTIES (
+ 'bucketing_version'='2',
+#### A masked pattern was here ####
+PREHOOK: query: create table emp_like2 like emp stored by iceberg
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@emp_like2
+POSTHOOK: query: create table emp_like2 like emp stored by iceberg
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@emp_like2
+PREHOOK: query: show create table emp_like2
+PREHOOK: type: SHOW_CREATETABLE
+PREHOOK: Input: default@emp_like2
+POSTHOOK: query: show create table emp_like2
+POSTHOOK: type: SHOW_CREATETABLE
+POSTHOOK: Input: default@emp_like2
+CREATE EXTERNAL TABLE `emp_like2`(
+ `id` int,
+ `company` string)
+PARTITIONED BY SPEC (
+company)
+ROW FORMAT SERDE
+ 'org.apache.iceberg.mr.hive.HiveIcebergSerDe'
+STORED BY
+ 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler'
+
+LOCATION
+ 'hdfs://### HDFS PATH ###'
+TBLPROPERTIES (
+ 'TRANSLATED_TO_EXTERNAL'='TRUE',
+ 'bucketing_version'='2',
+ 'created_with_ctlt'='true',
+ 'engine.hive.enabled'='true',
+ 'iceberg.orc.files.only'='false',
+ 'metadata_location'='hdfs://### HDFS PATH ###',
+ 'table_type'='ICEBERG',
+#### A masked pattern was here ####
+ 'uuid'='#Masked#')
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/like/CreateTableLikeOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/like/CreateTableLikeOperation.java
index cde116b83f8..6534d70e326 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/like/CreateTableLikeOperation.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/like/CreateTableLikeOperation.java
@@ -55,7 +55,13 @@ public class CreateTableLikeOperation extends DDLOperation<CreateTableLikeDesc>
if (oldTable.getTableType() == TableType.VIRTUAL_VIEW || oldTable.getTableType() == TableType.MATERIALIZED_VIEW) {
tbl = createViewLikeTable(oldTable);
} else {
- tbl = createTableLikeTable(oldTable);
+ Map<String, String> originalProperties = new HashMap<>();
+ // Get the storage handler without caching, since the storage handler can get changed when copying the
+ // properties of the target table and
+ if (oldTable.getStorageHandlerWithoutCaching() != null) {
+ originalProperties = new HashMap<>(oldTable.getStorageHandlerWithoutCaching().getNativeProperties(oldTable));
+ }
+ tbl = createTableLikeTable(oldTable, originalProperties);
}
// If location is specified - ensure that it is a full qualified name
@@ -104,7 +110,8 @@ public class CreateTableLikeOperation extends DDLOperation<CreateTableLikeDesc>
return table;
}
- private Table createTableLikeTable(Table table) throws SemanticException, HiveException {
+ private Table createTableLikeTable(Table table, Map<String, String> originalProperties)
+ throws SemanticException, HiveException {
String[] names = Utilities.getDbTableName(desc.getTableName());
table.setDbName(names[0]);
table.setTableName(names[1]);
@@ -112,7 +119,7 @@ public class CreateTableLikeOperation extends DDLOperation<CreateTableLikeDesc>
setUserSpecifiedLocation(table);
- setTableParameters(table);
+ setTableParameters(table, originalProperties);
if (desc.isUserStorageFormat() || (table.getInputFormatClass() == null) || (table.getOutputFormatClass() == null)) {
setStorage(table);
@@ -139,18 +146,18 @@ public class CreateTableLikeOperation extends DDLOperation<CreateTableLikeDesc>
}
}
- private void setTableParameters(Table tbl) throws HiveException {
+ private void setTableParameters(Table tbl, Map<String, String> originalProperties) throws HiveException {
// With Hive-25813, we'll not copy over table properties from the source.
// CTLT should should copy column schema but not table properties. It is also consistent
// with other query engines like mysql, redshift.
- Map<String, String> origParams = new HashMap<>(tbl.getParameters());
+ originalProperties.putAll(tbl.getParameters());
tbl.getParameters().clear();
if (desc.getTblProps() != null) {
tbl.setParameters(desc.getTblProps());
}
HiveStorageHandler storageHandler = tbl.getStorageHandler();
if (storageHandler != null) {
- storageHandler.setTableParametersForCTLT(tbl, desc, origParams);
+ storageHandler.setTableParametersForCTLT(tbl, desc, originalProperties);
}
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
index 15a3c823118..7f765270c03 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
@@ -55,6 +55,7 @@ import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputFormat;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
@@ -281,6 +282,15 @@ public interface HiveStorageHandler extends Configurable {
Map<String, String> origParams) {
}
+ /**
+ * Extract the native properties of the table which aren't stored in the HMS
+ * @param table the table
+ * @return map with native table level properties
+ */
+ default Map<String, String> getNativeProperties(org.apache.hadoop.hive.ql.metadata.Table table) {
+ return new HashMap<>();
+ }
+
enum AcidSupportType {
NONE,
WITH_TRANSACTIONS,
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
index 493cf627f4c..57e02cf3699 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
@@ -368,6 +368,18 @@ public class Table implements Serializable {
return storageHandler;
}
+ public HiveStorageHandler getStorageHandlerWithoutCaching() {
+ if (storageHandler != null || !isNonNative()) {
+ return storageHandler;
+ }
+ try {
+ return HiveUtils.getStorageHandler(SessionState.getSessionConf(),
+ getProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE));
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
public void setStorageHandler(HiveStorageHandler sh){
storageHandler = sh;
}