You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ay...@apache.org on 2023/01/30 17:41:32 UTC

[hive] branch master updated: HIVE-26974: Iceberg: CTL from iceberg table should copy partition fields correctly. (#3980). (Ayush Saxena, reviewed by Ramesh Kumar Thangarajan)

This is an automated email from the ASF dual-hosted git repository.

ayushsaxena pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new e5c4c2bf5e0 HIVE-26974: Iceberg: CTL from iceberg table should copy partition fields correctly. (#3980). (Ayush Saxena, reviewed by Ramesh Kumar Thangarajan)
e5c4c2bf5e0 is described below

commit e5c4c2bf5e09674c5a1ca3d8f4d70616f0cdc0fe
Author: Ayush Saxena <ay...@apache.org>
AuthorDate: Mon Jan 30 23:11:18 2023 +0530

    HIVE-26974: Iceberg: CTL from iceberg table should copy partition fields correctly. (#3980). (Ayush Saxena, reviewed by Ramesh Kumar Thangarajan)
---
 .../iceberg/mr/hive/HiveIcebergStorageHandler.java |  25 ++++
 .../src/test/queries/positive/ctlt_iceberg.q       |  23 ++++
 .../src/test/results/positive/ctlt_iceberg.q.out   | 136 +++++++++++++++++++++
 .../create/like/CreateTableLikeOperation.java      |  19 ++-
 .../hive/ql/metadata/HiveStorageHandler.java       |  10 ++
 .../org/apache/hadoop/hive/ql/metadata/Table.java  |  12 ++
 6 files changed, 219 insertions(+), 6 deletions(-)

diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index e01a2587cfe..fc54f826e63 100644
--- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -63,6 +63,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler;
 import org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler;
 import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 import org.apache.hadoop.hive.ql.parse.AlterTableExecuteSpec;
+import org.apache.hadoop.hive.ql.parse.PartitionTransform;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.parse.TransformSpec;
 import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
@@ -1127,5 +1128,29 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H
       tbl.getParameters().put("TRANSLATED_TO_EXTERNAL", "TRUE");
       desc.setIsExternal(true);
     }
+
+    // If source is Iceberg table set the schema and the partition spec
+    if ("ICEBERG".equalsIgnoreCase(origParams.get("table_type"))) {
+      tbl.getParameters()
+          .put(InputFormatConfig.TABLE_SCHEMA, origParams.get(InputFormatConfig.TABLE_SCHEMA));
+      tbl.getParameters()
+          .put(InputFormatConfig.PARTITION_SPEC, origParams.get(InputFormatConfig.PARTITION_SPEC));
+    } else {
+      // if the source is partitioned non-iceberg table set the partition transform spec and set the table as
+      // unpartitioned
+      List<TransformSpec> spec = PartitionTransform.getPartitionTransformSpec(tbl.getPartitionKeys());
+      SessionStateUtil.addResourceOrThrow(conf, hive_metastoreConstants.PARTITION_TRANSFORM_SPEC, spec);
+      tbl.getSd().getCols().addAll(tbl.getPartitionKeys());
+      tbl.getTTable().setPartitionKeysIsSet(false);
+    }
+  }
+
+  @Override
+  public Map<String, String> getNativeProperties(org.apache.hadoop.hive.ql.metadata.Table table) {
+    Table origTable = IcebergTableUtil.getTable(conf, table.getTTable());
+    Map<String, String> props = Maps.newHashMap();
+    props.put(InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(origTable.schema()));
+    props.put(InputFormatConfig.PARTITION_SPEC, PartitionSpecParser.toJson(origTable.spec()));
+    return props;
   }
 }
diff --git a/iceberg/iceberg-handler/src/test/queries/positive/ctlt_iceberg.q b/iceberg/iceberg-handler/src/test/queries/positive/ctlt_iceberg.q
index b235ca9c2df..8d77812a729 100644
--- a/iceberg/iceberg-handler/src/test/queries/positive/ctlt_iceberg.q
+++ b/iceberg/iceberg-handler/src/test/queries/positive/ctlt_iceberg.q
@@ -22,3 +22,26 @@ select a from target order by a;
 delete from target where a=2;
 
 select a from target order by a;
+
+--create a partitioned iceberg table
+create table emp_iceberg (id int) partitioned by (company string) stored by iceberg;
+
+show create table emp_iceberg;
+
+-- CTLT with the source as the partitioned iceberg table
+create table emp_like1 like emp_iceberg stored by iceberg;
+
+-- Partition column should be there
+show create table emp_like1;
+
+--create a partitioned non iceberg table
+create table emp (id int) partitioned by (company string);
+
+show create table emp;
+
+-- CTLT with the source as the partitioned iceberg table
+create table emp_like2 like emp stored by iceberg;
+
+-- Partition column should be there
+show create table emp_like2;
+
diff --git a/iceberg/iceberg-handler/src/test/results/positive/ctlt_iceberg.q.out b/iceberg/iceberg-handler/src/test/results/positive/ctlt_iceberg.q.out
index 157c018222c..389f59df599 100644
--- a/iceberg/iceberg-handler/src/test/results/positive/ctlt_iceberg.q.out
+++ b/iceberg/iceberg-handler/src/test/results/positive/ctlt_iceberg.q.out
@@ -95,3 +95,139 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@target
 POSTHOOK: Output: hdfs://### HDFS PATH ###
 1
+PREHOOK: query: create table emp_iceberg (id int) partitioned by (company string) stored by iceberg
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@emp_iceberg
+POSTHOOK: query: create table emp_iceberg (id int) partitioned by (company string) stored by iceberg
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@emp_iceberg
+PREHOOK: query: show create table emp_iceberg
+PREHOOK: type: SHOW_CREATETABLE
+PREHOOK: Input: default@emp_iceberg
+POSTHOOK: query: show create table emp_iceberg
+POSTHOOK: type: SHOW_CREATETABLE
+POSTHOOK: Input: default@emp_iceberg
+CREATE TABLE `emp_iceberg`(
+  `id` int, 
+  `company` string)
+PARTITIONED BY SPEC ( 
+company)
+ROW FORMAT SERDE 
+  'org.apache.iceberg.mr.hive.HiveIcebergSerDe' 
+STORED BY 
+  'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' 
+
+LOCATION
+  'hdfs://### HDFS PATH ###'
+TBLPROPERTIES (
+  'bucketing_version'='2', 
+  'engine.hive.enabled'='true', 
+  'iceberg.orc.files.only'='false', 
+  'metadata_location'='hdfs://### HDFS PATH ###', 
+  'serialization.format'='1', 
+  'table_type'='ICEBERG', 
+#### A masked pattern was here ####
+  'uuid'='#Masked#')
+PREHOOK: query: create table emp_like1 like emp_iceberg stored by iceberg
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@emp_like1
+POSTHOOK: query: create table emp_like1 like emp_iceberg stored by iceberg
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@emp_like1
+PREHOOK: query: show create table emp_like1
+PREHOOK: type: SHOW_CREATETABLE
+PREHOOK: Input: default@emp_like1
+POSTHOOK: query: show create table emp_like1
+POSTHOOK: type: SHOW_CREATETABLE
+POSTHOOK: Input: default@emp_like1
+CREATE EXTERNAL TABLE `emp_like1`(
+  `id` int, 
+  `company` string)
+PARTITIONED BY SPEC ( 
+company)
+ROW FORMAT SERDE 
+  'org.apache.iceberg.mr.hive.HiveIcebergSerDe' 
+STORED BY 
+  'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' 
+
+LOCATION
+  'hdfs://### HDFS PATH ###'
+TBLPROPERTIES (
+  'TRANSLATED_TO_EXTERNAL'='TRUE', 
+  'bucketing_version'='2', 
+  'created_with_ctlt'='true', 
+  'engine.hive.enabled'='true', 
+  'iceberg.orc.files.only'='false', 
+  'metadata_location'='hdfs://### HDFS PATH ###', 
+  'table_type'='ICEBERG', 
+#### A masked pattern was here ####
+  'uuid'='#Masked#')
+PREHOOK: query: create table emp (id int) partitioned by (company string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@emp
+POSTHOOK: query: create table emp (id int) partitioned by (company string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@emp
+PREHOOK: query: show create table emp
+PREHOOK: type: SHOW_CREATETABLE
+PREHOOK: Input: default@emp
+POSTHOOK: query: show create table emp
+POSTHOOK: type: SHOW_CREATETABLE
+POSTHOOK: Input: default@emp
+CREATE TABLE `emp`(
+  `id` int)
+PARTITIONED BY ( 
+  `company` string)
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.mapred.TextInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+LOCATION
+  'hdfs://### HDFS PATH ###'
+TBLPROPERTIES (
+  'bucketing_version'='2', 
+#### A masked pattern was here ####
+PREHOOK: query: create table emp_like2 like emp stored by iceberg
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@emp_like2
+POSTHOOK: query: create table emp_like2 like emp stored by iceberg
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@emp_like2
+PREHOOK: query: show create table emp_like2
+PREHOOK: type: SHOW_CREATETABLE
+PREHOOK: Input: default@emp_like2
+POSTHOOK: query: show create table emp_like2
+POSTHOOK: type: SHOW_CREATETABLE
+POSTHOOK: Input: default@emp_like2
+CREATE EXTERNAL TABLE `emp_like2`(
+  `id` int, 
+  `company` string)
+PARTITIONED BY SPEC ( 
+company)
+ROW FORMAT SERDE 
+  'org.apache.iceberg.mr.hive.HiveIcebergSerDe' 
+STORED BY 
+  'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' 
+
+LOCATION
+  'hdfs://### HDFS PATH ###'
+TBLPROPERTIES (
+  'TRANSLATED_TO_EXTERNAL'='TRUE', 
+  'bucketing_version'='2', 
+  'created_with_ctlt'='true', 
+  'engine.hive.enabled'='true', 
+  'iceberg.orc.files.only'='false', 
+  'metadata_location'='hdfs://### HDFS PATH ###', 
+  'table_type'='ICEBERG', 
+#### A masked pattern was here ####
+  'uuid'='#Masked#')
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/like/CreateTableLikeOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/like/CreateTableLikeOperation.java
index cde116b83f8..6534d70e326 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/like/CreateTableLikeOperation.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/like/CreateTableLikeOperation.java
@@ -55,7 +55,13 @@ public class CreateTableLikeOperation extends DDLOperation<CreateTableLikeDesc>
     if (oldTable.getTableType() == TableType.VIRTUAL_VIEW || oldTable.getTableType() == TableType.MATERIALIZED_VIEW) {
       tbl = createViewLikeTable(oldTable);
     } else {
-      tbl = createTableLikeTable(oldTable);
+      Map<String, String> originalProperties = new HashMap<>();
+      // Get the storage handler without caching, since the storage handler can get changed when copying the
+      // properties of the target table and
+      if (oldTable.getStorageHandlerWithoutCaching() != null) {
+        originalProperties = new HashMap<>(oldTable.getStorageHandlerWithoutCaching().getNativeProperties(oldTable));
+      }
+      tbl = createTableLikeTable(oldTable, originalProperties);
     }
 
     // If location is specified - ensure that it is a full qualified name
@@ -104,7 +110,8 @@ public class CreateTableLikeOperation extends DDLOperation<CreateTableLikeDesc>
     return table;
   }
 
-  private Table createTableLikeTable(Table table) throws SemanticException, HiveException {
+  private Table createTableLikeTable(Table table, Map<String, String> originalProperties)
+      throws SemanticException, HiveException {
     String[] names = Utilities.getDbTableName(desc.getTableName());
     table.setDbName(names[0]);
     table.setTableName(names[1]);
@@ -112,7 +119,7 @@ public class CreateTableLikeOperation extends DDLOperation<CreateTableLikeDesc>
 
     setUserSpecifiedLocation(table);
 
-    setTableParameters(table);
+    setTableParameters(table, originalProperties);
 
     if (desc.isUserStorageFormat() || (table.getInputFormatClass() == null) || (table.getOutputFormatClass() == null)) {
       setStorage(table);
@@ -139,18 +146,18 @@ public class CreateTableLikeOperation extends DDLOperation<CreateTableLikeDesc>
     }
   }
 
-  private void setTableParameters(Table tbl) throws HiveException {
+  private void setTableParameters(Table tbl, Map<String, String> originalProperties) throws HiveException {
     // With Hive-25813, we'll not copy over table properties from the source.
     // CTLT should should copy column schema but not table properties. It is also consistent
     // with other query engines like mysql, redshift.
-    Map<String, String> origParams = new HashMap<>(tbl.getParameters());
+    originalProperties.putAll(tbl.getParameters());
     tbl.getParameters().clear();
     if (desc.getTblProps() != null) {
       tbl.setParameters(desc.getTblProps());
     }
     HiveStorageHandler storageHandler = tbl.getStorageHandler();
     if (storageHandler != null) {
-      storageHandler.setTableParametersForCTLT(tbl, desc, origParams);
+      storageHandler.setTableParametersForCTLT(tbl, desc, originalProperties);
     }
   }
 
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
index 15a3c823118..7f765270c03 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
@@ -55,6 +55,7 @@ import org.apache.hadoop.mapred.InputFormat;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.OutputFormat;
 
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Properties;
@@ -281,6 +282,15 @@ public interface HiveStorageHandler extends Configurable {
       Map<String, String> origParams) {
   }
 
+  /**
+   * Extract the native properties of the table which aren't stored in the HMS
+   * @param table the table
+   * @return map with native table level properties
+   */
+  default Map<String, String> getNativeProperties(org.apache.hadoop.hive.ql.metadata.Table table) {
+    return new HashMap<>();
+  }
+
   enum AcidSupportType {
     NONE,
     WITH_TRANSACTIONS,
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
index 493cf627f4c..57e02cf3699 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
@@ -368,6 +368,18 @@ public class Table implements Serializable {
     return storageHandler;
   }
 
+  public HiveStorageHandler getStorageHandlerWithoutCaching() {
+    if (storageHandler != null || !isNonNative()) {
+      return storageHandler;
+    }
+    try {
+      return HiveUtils.getStorageHandler(SessionState.getSessionConf(),
+          getProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE));
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+
   public void setStorageHandler(HiveStorageHandler sh){
     storageHandler = sh;
   }