You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pv...@apache.org on 2022/03/04 13:38:50 UTC

[hive] branch master updated: HIVE-25894: Table migration to Iceberg doesn't remove HMS partitions (Peter Vary reviewed by Marton Bod and Laszlo Pinter) (#3061)

This is an automated email from the ASF dual-hosted git repository.

pvary pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 274a21d  HIVE-25894: Table migration to Iceberg doesn't remove HMS partitions (Peter Vary reviewed by Marton Bod and Laszlo Pinter) (#3061)
274a21d is described below

commit 274a21da875f9a37e717e282580d2c2dfc7174bb
Author: pvary <pv...@cloudera.com>
AuthorDate: Fri Mar 4 14:37:47 2022 +0100

    HIVE-25894: Table migration to Iceberg doesn't remove HMS partitions (Peter Vary reviewed by Marton Bod and Laszlo Pinter) (#3061)
---
 .../org/apache/iceberg/hive/TestHiveMetastore.java |  6 ++++
 .../iceberg/mr/hive/HiveIcebergMetaHook.java       | 42 ++++++++++++++++------
 .../iceberg/mr/hive/TestHiveIcebergMigration.java  |  7 ++++
 .../hive/TestHiveIcebergStorageHandlerNoScan.java  |  2 +-
 .../apache/hadoop/hive/metastore/HiveMetaHook.java |  4 +--
 .../hadoop/hive/metastore/HiveMetaStoreClient.java |  6 ++--
 .../apache/hadoop/hive/metastore/ObjectStore.java  |  4 +--
 7 files changed, 51 insertions(+), 20 deletions(-)

diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java
index c359277..0d86d6f 100644
--- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java
+++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java
@@ -29,11 +29,13 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.HMSHandler;
 import org.apache.hadoop.hive.metastore.IHMSHandler;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
 import org.apache.hadoop.hive.metastore.RetryingHMSHandler;
 import org.apache.hadoop.hive.metastore.TSetIpAddressProcessor;
 import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
 import org.apache.hadoop.hive.metastore.utils.TestTxnDbUtil;
+import org.apache.iceberg.ClientPool;
 import org.apache.iceberg.catalog.TableIdentifier;
 import org.apache.iceberg.common.DynConstructors;
 import org.apache.iceberg.common.DynMethods;
@@ -201,6 +203,10 @@ public class TestHiveMetastore {
     return getTable(identifier.namespace().toString(), identifier.name());
   }
 
+  public <R> R run(ClientPool.Action<R, IMetaStoreClient, TException> action) throws InterruptedException, TException {
+    return clientPool.run(action, false);
+  }
+
   private TServer newThriftServer(TServerSocket socket, int poolSize, HiveConf conf) throws Exception {
     HiveConf serverConf = new HiveConf(conf);
     serverConf.set(HiveConf.ConfVars.METASTORECONNECTURLKEY.varname, "jdbc:derby:" + getDerbyPath() + ";create=true");
diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
index 21b45d7..cb036dd 100644
--- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
+++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
@@ -34,6 +34,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.TableName;
 import org.apache.hadoop.hive.metastore.HiveMetaHook;
+import org.apache.hadoop.hive.metastore.PartitionDropOptions;
 import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.MetaException;
@@ -44,10 +45,14 @@ import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy;
 import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
 import org.apache.hadoop.hive.ql.ddl.table.AlterTableType;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.parse.PartitionTransform;
 import org.apache.hadoop.hive.ql.parse.PartitionTransformSpec;
+import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.ql.session.SessionStateUtil;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.util.StringUtils;
 import org.apache.iceberg.BaseMetastoreTableOperations;
 import org.apache.iceberg.BaseTable;
 import org.apache.iceberg.CatalogUtil;
@@ -84,6 +89,7 @@ import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
 import org.apache.iceberg.relocated.com.google.common.collect.Lists;
 import org.apache.iceberg.types.Type;
 import org.apache.iceberg.util.Pair;
+import org.apache.thrift.TException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -108,6 +114,9 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
       FileFormat.PARQUET.name().toLowerCase(),
       FileFormat.ORC.name().toLowerCase(),
       FileFormat.AVRO.name().toLowerCase());
+  private static final PartitionDropOptions DROP_OPTIONS = new PartitionDropOptions().deleteData(false).ifExists(true);
+  private static final List<org.apache.commons.lang3.tuple.Pair<Integer, byte[]>> EMPTY_FILTER =
+      Lists.newArrayList(org.apache.commons.lang3.tuple.Pair.of(1, new byte[0]));
   static final String MIGRATED_TO_ICEBERG = "MIGRATED_TO_ICEBERG";
 
   private final Configuration conf;
@@ -271,15 +280,27 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
 
       context.getProperties().put(HiveMetaHook.ALLOW_PARTITION_KEY_CHANGE, "true");
       // If there are partition keys specified remove them from the HMS table and add them to the column list
-      if (hmsTable.isSetPartitionKeys() && !hmsTable.getPartitionKeys().isEmpty()) {
-        List<PartitionTransformSpec> spec = PartitionTransform.getPartitionTransformSpec(hmsTable.getPartitionKeys());
-        if (!SessionStateUtil.addResource(conf, hive_metastoreConstants.PARTITION_TRANSFORM_SPEC, spec)) {
-          throw new MetaException("Query state attached to Session state must be not null. " +
-              "Partition transform metadata cannot be saved.");
+      try {
+        Hive db = SessionState.get().getHiveDb();
+        preAlterTableProperties.partitionSpecProxy = db.getMSC().listPartitionSpecs(hmsTable.getCatName(),
+            hmsTable.getDbName(), hmsTable.getTableName(), Integer.MAX_VALUE);
+        if (hmsTable.isSetPartitionKeys() && !hmsTable.getPartitionKeys().isEmpty()) {
+          db.dropPartitions(hmsTable.getDbName(), hmsTable.getTableName(), EMPTY_FILTER, DROP_OPTIONS);
+
+          List<PartitionTransformSpec> spec = PartitionTransform.getPartitionTransformSpec(hmsTable.getPartitionKeys());
+          if (!SessionStateUtil.addResource(conf, hive_metastoreConstants.PARTITION_TRANSFORM_SPEC, spec)) {
+            throw new MetaException("Query state attached to Session state must be not null. " +
+                "Partition transform metadata cannot be saved.");
+          }
+          hmsTable.getSd().getCols().addAll(hmsTable.getPartitionKeys());
+          hmsTable.setPartitionKeysIsSet(false);
         }
-        hmsTable.getSd().getCols().addAll(hmsTable.getPartitionKeys());
-        hmsTable.setPartitionKeysIsSet(false);
+      } catch (MetaException me) {
+        throw me;
+      } catch (HiveException | TException e) {
+        throw new MetaException(StringUtils.stringifyException(e));
       }
+
       preAlterTableProperties.spec = spec(conf, preAlterTableProperties.schema, hmsTable);
 
       sd.setInputFormat(HiveIcebergInputFormat.class.getCanonicalName());
@@ -342,8 +363,8 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
   }
 
   @Override
-  public void commitAlterTable(org.apache.hadoop.hive.metastore.api.Table hmsTable, EnvironmentContext context,
-      PartitionSpecProxy partitionSpecProxy) throws MetaException {
+  public void commitAlterTable(org.apache.hadoop.hive.metastore.api.Table hmsTable, EnvironmentContext context)
+      throws MetaException {
     if (isTableMigration) {
       catalogProperties = getCatalogProperties(hmsTable);
       catalogProperties.put(InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(preAlterTableProperties.schema));
@@ -353,7 +374,7 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
         catalogProperties.put(TableProperties.ENGINE_HIVE_ENABLED, true);
       }
       HiveTableUtil.importFiles(preAlterTableProperties.tableLocation, preAlterTableProperties.format,
-          partitionSpecProxy, preAlterTableProperties.partitionKeys, catalogProperties, conf);
+          preAlterTableProperties.partitionSpecProxy, preAlterTableProperties.partitionKeys, catalogProperties, conf);
     } else if (currentAlterTableOp != null) {
       switch (currentAlterTableOp) {
         case REPLACE_COLUMNS:
@@ -696,5 +717,6 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
     private Schema schema;
     private PartitionSpec spec;
     private List<FieldSchema> partitionKeys;
+    private PartitionSpecProxy partitionSpecProxy;
   }
 }
diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergMigration.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergMigration.java
index ed9962f..5962583 100644
--- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergMigration.java
+++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergMigration.java
@@ -187,6 +187,13 @@ public class TestHiveIcebergMigration extends HiveIcebergStorageHandlerWithEngin
     Table hmsTable = shell.metastore().getTable("default", tableName);
     validateSd(hmsTable, "iceberg");
     validateTblProps(hmsTable, true);
+    validatePartitions(tableName);
+  }
+
+  private void validatePartitions(String tableName) throws TException, InterruptedException {
+    List<String> partitions = shell.metastore().run(client ->
+        client.listPartitionNames("default", tableName, (short) -1));
+    Assert.assertTrue(partitions.isEmpty());
   }
 
   private void validateMigrationRollback(String tableName) throws TException, InterruptedException {
diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
index edaf123..a4b7554 100644
--- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
+++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
@@ -1318,7 +1318,7 @@ public class TestHiveIcebergStorageHandlerNoScan {
     EnvironmentContext environmentContext = new EnvironmentContext(new HashMap<>());
 
     metaHook.preAlterTable(hmsTable, environmentContext);
-    metaHook.commitAlterTable(hmsTable, environmentContext, null);
+    metaHook.commitAlterTable(hmsTable, environmentContext);
   }
 
   @Test
diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java
index b9372a9..e8945b5 100644
--- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java
+++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java
@@ -23,7 +23,6 @@ import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
 import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.Table;
-import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy;
 import com.google.common.collect.ImmutableList;
 
 import java.util.List;
@@ -144,9 +143,8 @@ public interface HiveMetaHook {
    * Called after a table is altered in the metastore during ALTER TABLE.
    * @param table new table definition
    * @param context environment context, containing information about the alter operation type
-   * @param partitionSpecProxy list of partitions wrapped in {@link PartitionSpecProxy}
    */
-  default void commitAlterTable(Table table, EnvironmentContext context, PartitionSpecProxy partitionSpecProxy) throws MetaException {
+  default void commitAlterTable(Table table, EnvironmentContext context) throws MetaException {
     // Do nothing
   }
 
diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
index dbfac3c..7878e6d 100644
--- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
+++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
@@ -520,8 +520,7 @@ public class HiveMetaStoreClient implements IMetaStoreClient, AutoCloseable {
     try {
       client.alter_table_req(req);
       if (hook != null) {
-        PartitionSpecProxy partitionSpecProxy = listPartitionSpecs(dbname, tbl_name, Integer.MAX_VALUE);
-        hook.commitAlterTable(new_tbl, envContext, partitionSpecProxy);
+        hook.commitAlterTable(new_tbl, envContext);
       }
       success = true;
     } finally {
@@ -565,8 +564,7 @@ public class HiveMetaStoreClient implements IMetaStoreClient, AutoCloseable {
     try {
       client.alter_table_req(req);
       if (hook != null) {
-        PartitionSpecProxy partitionSpecProxy = listPartitionSpecs(catName, dbName, tbl_name, Integer.MAX_VALUE);
-        hook.commitAlterTable(new_tbl, envContext, partitionSpecProxy);
+        hook.commitAlterTable(new_tbl, envContext);
       }
       success = true;
     } finally {
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
index f5a78a2..90664e2 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
@@ -3971,8 +3971,8 @@ public class ObjectStore implements RawStore, Configurable {
       boolean allowSql, boolean allowJdo) throws TException {
     assert result != null;
 
-    final ExpressionTree exprTree = PartFilterExprUtil.makeExpressionTree(expressionProxy, expr,
-                                                    getDefaultPartitionName(defaultPartitionName), conf);
+    final ExpressionTree exprTree = expr.length != 0 ? PartFilterExprUtil.makeExpressionTree(
+          expressionProxy, expr, getDefaultPartitionName(defaultPartitionName), conf) : ExpressionTree.EMPTY_TREE;
     final AtomicBoolean hasUnknownPartitions = new AtomicBoolean(false);
 
     catName = normalizeIdentifier(catName);