You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sz...@apache.org on 2021/08/11 09:42:58 UTC

[hive] branch master updated: HIVE-25375: Partition column rename support for Iceberg tables (Adam Szita, reviewed by Marta Kuczora)

This is an automated email from the ASF dual-hosted git repository.

szita pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new d22eca1  HIVE-25375: Partition column rename support for Iceberg tables (Adam Szita, reviewed by Marta Kuczora)
d22eca1 is described below

commit d22eca1877bc79b5f3e17972684b0e985469fac7
Author: Adam Szita <40...@users.noreply.github.com>
AuthorDate: Wed Aug 11 11:42:45 2021 +0200

    HIVE-25375: Partition column rename support for Iceberg tables (Adam Szita, reviewed by Marta Kuczora)
---
 .../iceberg/mr/hive/HiveIcebergMetaHook.java       | 35 ++++++++++++++++++----
 .../hive/TestHiveIcebergStorageHandlerNoScan.java  | 30 +++++++++++++++++++
 .../hive/ql/parse/PartitionTransformSpec.java      |  2 +-
 3 files changed, 61 insertions(+), 6 deletions(-)

diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
index 6301f93..8d773b9 100644
--- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
+++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
@@ -60,6 +60,8 @@ import org.apache.iceberg.Table;
 import org.apache.iceberg.TableMetadata;
 import org.apache.iceberg.TableMetadataParser;
 import org.apache.iceberg.TableProperties;
+import org.apache.iceberg.Transaction;
+import org.apache.iceberg.UpdatePartitionSpec;
 import org.apache.iceberg.UpdateProperties;
 import org.apache.iceberg.UpdateSchema;
 import org.apache.iceberg.catalog.TableIdentifier;
@@ -103,6 +105,8 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
   private boolean canMigrateHiveTable;
   private PreAlterTableProperties preAlterTableProperties;
   private UpdateSchema updateSchema;
+  private UpdatePartitionSpec updatePartitionSpec;
+  private Transaction transaction;
   private AlterTableType currentAlterTableOp;
 
   public HiveIcebergMetaHook(Configuration conf) {
@@ -293,8 +297,8 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
         case REPLACE_COLUMNS:
         case RENAME_COLUMN:
         case ADDCOLS:
-          if (updateSchema != null) {
-            updateSchema.commit();
+          if (transaction != null) {
+            transaction.commitTransaction();
           }
           break;
         case ADDPROPS:
@@ -497,12 +501,14 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
         HiveSchemaUtil.getSchemaDiff(hmsTable.getSd().getCols(), HiveSchemaUtil.convert(icebergTable.schema()), false)
             .getMissingFromSecond();
     if (!addedCols.isEmpty()) {
-      updateSchema = icebergTable.updateSchema();
+      transaction = icebergTable.newTransaction();
+      updateSchema = transaction.updateSchema();
     }
     for (FieldSchema addedCol : addedCols) {
       updateSchema.addColumn(addedCol.getName(),
           HiveSchemaUtil.convert(TypeInfoUtils.getTypeInfoFromTypeString(addedCol.getType())), addedCol.getComment());
     }
+    updateSchema.commit();
   }
 
   private void handleReplaceColumns(org.apache.hadoop.hive.metastore.api.Table hmsTable) throws MetaException {
@@ -531,12 +537,14 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
           "mismatches between HMS and Iceberg, please consider the UPDATE COLUMNS command.");
     }
 
-    updateSchema = icebergTable.updateSchema();
+    transaction = icebergTable.newTransaction();
+    updateSchema = transaction.updateSchema();
     LOG.info("handleReplaceColumns: Dropping the following columns for Iceberg table {}, cols: {}",
         hmsTable.getTableName(), schemaDifference.getMissingFromFirst());
     for (FieldSchema droppedCol : schemaDifference.getMissingFromFirst()) {
       updateSchema.deleteColumn(droppedCol.getName());
     }
+    updateSchema.commit();
   }
 
   private void handleChangeColumn(org.apache.hadoop.hive.metastore.api.Table hmsTable) throws MetaException {
@@ -555,7 +563,8 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
         renameMapping);
 
     if (!schemaDifference.isEmpty() || outOfOrder != null) {
-      updateSchema = icebergTable.updateSchema();
+      transaction = icebergTable.newTransaction();
+      updateSchema = transaction.updateSchema();
     } else {
       // we should get here if the user didn't change anything about the column
       // i.e. no changes to the name, type, comment or order
@@ -597,6 +606,22 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
         updateSchema.moveFirst(outOfOrder.first());
       }
     }
+    updateSchema.commit();
+
+    handlePartitionRename(schemaDifference);
+  }
+
+  private void handlePartitionRename(HiveSchemaUtil.SchemaDifference schemaDifference) {
+    // in case a partition column has been renamed, spec needs to be adjusted too
+    if (!schemaDifference.getMissingFromSecond().isEmpty()) {
+      FieldSchema oldField = schemaDifference.getMissingFromFirst().get(0);
+      FieldSchema updatedField = schemaDifference.getMissingFromSecond().get(0);
+      if (icebergTable.spec().fields().stream().anyMatch(pf -> pf.name().equals(oldField.getName()))) {
+        updatePartitionSpec = transaction.updateSpec();
+        updatePartitionSpec.renameField(oldField.getName(), updatedField.getName());
+        updatePartitionSpec.commit();
+      }
+    }
   }
 
   private Type.PrimitiveType getPrimitiveTypeOrThrow(FieldSchema field) throws MetaException {
diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
index 4d01c5d..be04719 100644
--- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
+++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
@@ -40,6 +40,7 @@ import org.apache.iceberg.AssertHelpers;
 import org.apache.iceberg.BaseMetastoreTableOperations;
 import org.apache.iceberg.BaseTable;
 import org.apache.iceberg.FileFormat;
+import org.apache.iceberg.PartitionField;
 import org.apache.iceberg.PartitionSpec;
 import org.apache.iceberg.PartitionSpecParser;
 import org.apache.iceberg.Schema;
@@ -949,6 +950,35 @@ public class TestHiveIcebergStorageHandlerNoScan {
   }
 
   @Test
+  public void testAlterTableRenamePartitionColumn() throws Exception {
+    TableIdentifier identifier = TableIdentifier.of("default", "customers");
+
+    testTables.createTable(shell, identifier.name(), HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, SPEC,
+        FileFormat.PARQUET, ImmutableList.of());
+    shell.executeStatement("ALTER TABLE default.customers SET PARTITION SPEC (last_name)");
+
+    // Renaming (and reordering) a partition column
+    shell.executeStatement("ALTER TABLE default.customers CHANGE last_name family_name string FIRST");
+    List<PartitionField> partitionFields = testTables.loadTable(identifier).spec().fields();
+    Assert.assertEquals(1, partitionFields.size());
+    Assert.assertEquals("family_name", partitionFields.get(0).name());
+
+    // Addign new columns, assigning them as partition columns then removing 1 partition column
+    shell.executeStatement("ALTER TABLE default.customers ADD COLUMNS (p1 string, p2 string)");
+    shell.executeStatement("ALTER TABLE default.customers SET PARTITION SPEC (family_name, p1, p2)");
+
+    shell.executeStatement("ALTER TABLE default.customers CHANGE p1 region string");
+    shell.executeStatement("ALTER TABLE default.customers CHANGE p2 city string");
+
+    shell.executeStatement("ALTER TABLE default.customers SET PARTITION SPEC (region, city)");
+
+    List<Object[]> result = shell.executeStatement("DESCRIBE default.customers");
+    Assert.assertArrayEquals(new String[] {"family_name", "VOID", null}, result.get(8));
+    Assert.assertArrayEquals(new String[] {"region", "IDENTITY", null}, result.get(9));
+    Assert.assertArrayEquals(new String[] {"city", "IDENTITY", null}, result.get(10));
+  }
+
+  @Test
   public void testAlterTableReplaceColumns() throws TException, InterruptedException {
     TableIdentifier identifier = TableIdentifier.of("default", "customers");
 
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransformSpec.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransformSpec.java
index 108a006..268660f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransformSpec.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransformSpec.java
@@ -22,7 +22,7 @@ import java.util.Optional;
 public class PartitionTransformSpec {
 
   public enum TransformType {
-    IDENTITY, YEAR, MONTH, DAY, HOUR, TRUNCATE, BUCKET
+    IDENTITY, YEAR, MONTH, DAY, HOUR, TRUNCATE, BUCKET, VOID
   }
 
   private String columnName;