You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mb...@apache.org on 2022/01/24 10:22:43 UTC

[hive] branch master updated: HIVE-25890: Fix truncate problem with Iceberg CTAS tables (#2963) (Marton Bod, reviewed by Peter Vary)

This is an automated email from the ASF dual-hosted git repository.

mbod pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new d7ac388  HIVE-25890: Fix truncate problem with Iceberg CTAS tables (#2963) (Marton Bod, reviewed by Peter Vary)
d7ac388 is described below

commit d7ac38890311b7568da6531b4a7c58e345cffb2d
Author: Marton Bod <mb...@cloudera.com>
AuthorDate: Mon Jan 24 11:22:32 2022 +0100

    HIVE-25890: Fix truncate problem with Iceberg CTAS tables (#2963) (Marton Bod, reviewed by Peter Vary)
---
 .../apache/iceberg/mr/hive/HiveIcebergMetaHook.java | 18 +++++++++---------
 .../apache/iceberg/mr/hive/HiveIcebergSerDe.java    |  9 +++++++--
 .../apache/iceberg/mr/hive/TestHiveIcebergCTAS.java | 21 +++++++++++++++++++++
 3 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
index 39b02f4..21b45d7 100644
--- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
+++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
@@ -89,6 +89,10 @@ import org.slf4j.LoggerFactory;
 
 public class HiveIcebergMetaHook implements HiveMetaHook {
   private static final Logger LOG = LoggerFactory.getLogger(HiveIcebergMetaHook.class);
+  public static final Map<String, String> COMMON_HMS_PROPERTIES = ImmutableMap.of(
+      BaseMetastoreTableOperations.TABLE_TYPE_PROP, BaseMetastoreTableOperations.ICEBERG_TABLE_TYPE_VALUE.toUpperCase(),
+      InputFormatConfig.EXTERNAL_TABLE_PURGE, "TRUE"
+  );
   private static final Set<String> PARAMETERS_TO_REMOVE = ImmutableSet
       .of(InputFormatConfig.TABLE_SCHEMA, Catalogs.LOCATION, Catalogs.NAME, InputFormatConfig.PARTITION_SPEC);
   private static final Set<String> PROPERTIES_TO_REMOVE = ImmutableSet
@@ -452,24 +456,20 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
   }
 
   private void setCommonHmsTablePropertiesForIceberg(org.apache.hadoop.hive.metastore.api.Table hmsTable) {
-    // Set the table type even for non HiveCatalog based tables
-    hmsTable.getParameters().put(BaseMetastoreTableOperations.TABLE_TYPE_PROP,
-        BaseMetastoreTableOperations.ICEBERG_TABLE_TYPE_VALUE.toUpperCase());
-
-    // Allow purging table data if the table is created now and not set otherwise
-    hmsTable.getParameters().putIfAbsent(InputFormatConfig.EXTERNAL_TABLE_PURGE, "TRUE");
-
     // If the table is not managed by Hive catalog then the location should be set
     if (!Catalogs.hiveCatalog(conf, catalogProperties)) {
       Preconditions.checkArgument(hmsTable.getSd() != null && hmsTable.getSd().getLocation() != null,
           "Table location not set");
     }
 
+    Map<String, String> hmsParams = hmsTable.getParameters();
+    COMMON_HMS_PROPERTIES.forEach(hmsParams::putIfAbsent);
+
     // Remove null values from hms table properties
-    hmsTable.getParameters().entrySet().removeIf(e -> e.getKey() == null || e.getValue() == null);
+    hmsParams.entrySet().removeIf(e -> e.getKey() == null || e.getValue() == null);
 
     // Remove creation related properties
-    PARAMETERS_TO_REMOVE.forEach(hmsTable.getParameters()::remove);
+    PARAMETERS_TO_REMOVE.forEach(hmsParams::remove);
   }
 
   /**
diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergSerDe.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergSerDe.java
index c365a9e..dc799cc 100644
--- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergSerDe.java
+++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergSerDe.java
@@ -154,9 +154,7 @@ public class HiveIcebergSerDe extends AbstractSerDe {
   }
 
   private void createTableForCTAS(Configuration configuration, Properties serDeProperties) {
-    serDeProperties.setProperty(TableProperties.ENGINE_HIVE_ENABLED, "true");
     serDeProperties.setProperty(InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(tableSchema));
-
     // build partition spec, if any
     if (!getPartitionColumnNames().isEmpty()) {
       List<FieldSchema> partitionFields = IntStream.range(0, getPartitionColumnNames().size())
@@ -182,6 +180,8 @@ public class HiveIcebergSerDe extends AbstractSerDe {
 
   private Properties getCTASTableCreationProperties(Properties serDeProperties) {
     Properties tblProps = (Properties) serDeProperties.clone();
+
+    // remove the serialization-only related props
     tblProps.remove(serdeConstants.LIST_PARTITION_COLUMNS);
     tblProps.remove(serdeConstants.LIST_PARTITION_COLUMN_TYPES);
     tblProps.remove(serdeConstants.LIST_PARTITION_COLUMN_COMMENTS);
@@ -193,6 +193,11 @@ public class HiveIcebergSerDe extends AbstractSerDe {
     tblProps.remove(serdeConstants.COLUMN_NAME_DELIMITER);
     tblProps.remove(serdeConstants.SERIALIZATION_LIB);
     tblProps.remove(hive_metastoreConstants.TABLE_IS_CTAS);
+
+    // add the commonly-needed table properties
+    HiveIcebergMetaHook.COMMON_HMS_PROPERTIES.forEach(tblProps::putIfAbsent);
+    tblProps.setProperty(TableProperties.ENGINE_HIVE_ENABLED, "true");
+
     return tblProps;
   }
 
diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergCTAS.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergCTAS.java
index 4c3e642..b5a3269 100644
--- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergCTAS.java
+++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergCTAS.java
@@ -139,4 +139,25 @@ public class TestHiveIcebergCTAS extends HiveIcebergStorageHandlerWithEngineBase
       Assert.assertThrows(NoSuchTableException.class, () -> testTables.loadTable(target));
     }
   }
+
+  @Test
+  public void testCTASFollowedByTruncate() throws IOException {
+    Assume.assumeTrue(HiveIcebergSerDe.CTAS_EXCEPTION_MSG, testTableType == TestTables.TestTableType.HIVE_CATALOG);
+
+    testTables.createTable(shell, "source", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, fileFormat,
+        HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS);
+
+    shell.executeStatement(String.format(
+        "CREATE TABLE target STORED BY ICEBERG STORED AS %s %s AS SELECT * FROM source",
+        fileFormat, testTables.locationForCreateTableSQL(TableIdentifier.of("default", "target"))));
+
+    List<Object[]> objects = shell.executeStatement("SELECT * FROM target ORDER BY customer_id");
+    HiveIcebergTestUtils.validateData(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS,
+        HiveIcebergTestUtils.valueForRow(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, objects), 0);
+
+    shell.executeStatement("TRUNCATE TABLE target");
+
+    objects = shell.executeStatement("SELECT * FROM target");
+    Assert.assertTrue(objects.isEmpty());
+  }
 }