You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sz...@apache.org on 2021/08/11 09:42:58 UTC
[hive] branch master updated: HIVE-25375: Partition column rename
support for Iceberg tables (Adam Szita, reviewed by Marta Kuczora)
This is an automated email from the ASF dual-hosted git repository.
szita pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new d22eca1 HIVE-25375: Partition column rename support for Iceberg tables (Adam Szita, reviewed by Marta Kuczora)
d22eca1 is described below
commit d22eca1877bc79b5f3e17972684b0e985469fac7
Author: Adam Szita <40...@users.noreply.github.com>
AuthorDate: Wed Aug 11 11:42:45 2021 +0200
HIVE-25375: Partition column rename support for Iceberg tables (Adam Szita, reviewed by Marta Kuczora)
---
.../iceberg/mr/hive/HiveIcebergMetaHook.java | 35 ++++++++++++++++++----
.../hive/TestHiveIcebergStorageHandlerNoScan.java | 30 +++++++++++++++++++
.../hive/ql/parse/PartitionTransformSpec.java | 2 +-
3 files changed, 61 insertions(+), 6 deletions(-)
diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
index 6301f93..8d773b9 100644
--- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
+++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
@@ -60,6 +60,8 @@ import org.apache.iceberg.Table;
import org.apache.iceberg.TableMetadata;
import org.apache.iceberg.TableMetadataParser;
import org.apache.iceberg.TableProperties;
+import org.apache.iceberg.Transaction;
+import org.apache.iceberg.UpdatePartitionSpec;
import org.apache.iceberg.UpdateProperties;
import org.apache.iceberg.UpdateSchema;
import org.apache.iceberg.catalog.TableIdentifier;
@@ -103,6 +105,8 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
private boolean canMigrateHiveTable;
private PreAlterTableProperties preAlterTableProperties;
private UpdateSchema updateSchema;
+ private UpdatePartitionSpec updatePartitionSpec;
+ private Transaction transaction;
private AlterTableType currentAlterTableOp;
public HiveIcebergMetaHook(Configuration conf) {
@@ -293,8 +297,8 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
case REPLACE_COLUMNS:
case RENAME_COLUMN:
case ADDCOLS:
- if (updateSchema != null) {
- updateSchema.commit();
+ if (transaction != null) {
+ transaction.commitTransaction();
}
break;
case ADDPROPS:
@@ -497,12 +501,14 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
HiveSchemaUtil.getSchemaDiff(hmsTable.getSd().getCols(), HiveSchemaUtil.convert(icebergTable.schema()), false)
.getMissingFromSecond();
if (!addedCols.isEmpty()) {
- updateSchema = icebergTable.updateSchema();
+ transaction = icebergTable.newTransaction();
+ updateSchema = transaction.updateSchema();
}
for (FieldSchema addedCol : addedCols) {
updateSchema.addColumn(addedCol.getName(),
HiveSchemaUtil.convert(TypeInfoUtils.getTypeInfoFromTypeString(addedCol.getType())), addedCol.getComment());
}
+ updateSchema.commit();
}
private void handleReplaceColumns(org.apache.hadoop.hive.metastore.api.Table hmsTable) throws MetaException {
@@ -531,12 +537,14 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
"mismatches between HMS and Iceberg, please consider the UPDATE COLUMNS command.");
}
- updateSchema = icebergTable.updateSchema();
+ transaction = icebergTable.newTransaction();
+ updateSchema = transaction.updateSchema();
LOG.info("handleReplaceColumns: Dropping the following columns for Iceberg table {}, cols: {}",
hmsTable.getTableName(), schemaDifference.getMissingFromFirst());
for (FieldSchema droppedCol : schemaDifference.getMissingFromFirst()) {
updateSchema.deleteColumn(droppedCol.getName());
}
+ updateSchema.commit();
}
private void handleChangeColumn(org.apache.hadoop.hive.metastore.api.Table hmsTable) throws MetaException {
@@ -555,7 +563,8 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
renameMapping);
if (!schemaDifference.isEmpty() || outOfOrder != null) {
- updateSchema = icebergTable.updateSchema();
+ transaction = icebergTable.newTransaction();
+ updateSchema = transaction.updateSchema();
} else {
// we should get here if the user didn't change anything about the column
// i.e. no changes to the name, type, comment or order
@@ -597,6 +606,22 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
updateSchema.moveFirst(outOfOrder.first());
}
}
+ updateSchema.commit();
+
+ handlePartitionRename(schemaDifference);
+ }
+
+ private void handlePartitionRename(HiveSchemaUtil.SchemaDifference schemaDifference) {
+ // in case a partition column has been renamed, spec needs to be adjusted too
+ if (!schemaDifference.getMissingFromSecond().isEmpty()) {
+ FieldSchema oldField = schemaDifference.getMissingFromFirst().get(0);
+ FieldSchema updatedField = schemaDifference.getMissingFromSecond().get(0);
+ if (icebergTable.spec().fields().stream().anyMatch(pf -> pf.name().equals(oldField.getName()))) {
+ updatePartitionSpec = transaction.updateSpec();
+ updatePartitionSpec.renameField(oldField.getName(), updatedField.getName());
+ updatePartitionSpec.commit();
+ }
+ }
}
private Type.PrimitiveType getPrimitiveTypeOrThrow(FieldSchema field) throws MetaException {
diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
index 4d01c5d..be04719 100644
--- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
+++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
@@ -40,6 +40,7 @@ import org.apache.iceberg.AssertHelpers;
import org.apache.iceberg.BaseMetastoreTableOperations;
import org.apache.iceberg.BaseTable;
import org.apache.iceberg.FileFormat;
+import org.apache.iceberg.PartitionField;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.PartitionSpecParser;
import org.apache.iceberg.Schema;
@@ -949,6 +950,35 @@ public class TestHiveIcebergStorageHandlerNoScan {
}
@Test
+ public void testAlterTableRenamePartitionColumn() throws Exception {
+ TableIdentifier identifier = TableIdentifier.of("default", "customers");
+
+ testTables.createTable(shell, identifier.name(), HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, SPEC,
+ FileFormat.PARQUET, ImmutableList.of());
+ shell.executeStatement("ALTER TABLE default.customers SET PARTITION SPEC (last_name)");
+
+ // Renaming (and reordering) a partition column
+ shell.executeStatement("ALTER TABLE default.customers CHANGE last_name family_name string FIRST");
+ List<PartitionField> partitionFields = testTables.loadTable(identifier).spec().fields();
+ Assert.assertEquals(1, partitionFields.size());
+ Assert.assertEquals("family_name", partitionFields.get(0).name());
+
+ // Addign new columns, assigning them as partition columns then removing 1 partition column
+ shell.executeStatement("ALTER TABLE default.customers ADD COLUMNS (p1 string, p2 string)");
+ shell.executeStatement("ALTER TABLE default.customers SET PARTITION SPEC (family_name, p1, p2)");
+
+ shell.executeStatement("ALTER TABLE default.customers CHANGE p1 region string");
+ shell.executeStatement("ALTER TABLE default.customers CHANGE p2 city string");
+
+ shell.executeStatement("ALTER TABLE default.customers SET PARTITION SPEC (region, city)");
+
+ List<Object[]> result = shell.executeStatement("DESCRIBE default.customers");
+ Assert.assertArrayEquals(new String[] {"family_name", "VOID", null}, result.get(8));
+ Assert.assertArrayEquals(new String[] {"region", "IDENTITY", null}, result.get(9));
+ Assert.assertArrayEquals(new String[] {"city", "IDENTITY", null}, result.get(10));
+ }
+
+ @Test
public void testAlterTableReplaceColumns() throws TException, InterruptedException {
TableIdentifier identifier = TableIdentifier.of("default", "customers");
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransformSpec.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransformSpec.java
index 108a006..268660f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransformSpec.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/PartitionTransformSpec.java
@@ -22,7 +22,7 @@ import java.util.Optional;
public class PartitionTransformSpec {
public enum TransformType {
- IDENTITY, YEAR, MONTH, DAY, HOUR, TRUNCATE, BUCKET
+ IDENTITY, YEAR, MONTH, DAY, HOUR, TRUNCATE, BUCKET, VOID
}
private String columnName;