You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pv...@apache.org on 2022/03/04 13:38:50 UTC
[hive] branch master updated: HIVE-25894: Table migration to Iceberg doesn't remove HMS partitions (Peter Vary reviewed by Marton Bod and Laszlo Pinter) (#3061)
This is an automated email from the ASF dual-hosted git repository.
pvary pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 274a21d HIVE-25894: Table migration to Iceberg doesn't remove HMS partitions (Peter Vary reviewed by Marton Bod and Laszlo Pinter) (#3061)
274a21d is described below
commit 274a21da875f9a37e717e282580d2c2dfc7174bb
Author: pvary <pv...@cloudera.com>
AuthorDate: Fri Mar 4 14:37:47 2022 +0100
HIVE-25894: Table migration to Iceberg doesn't remove HMS partitions (Peter Vary reviewed by Marton Bod and Laszlo Pinter) (#3061)
---
.../org/apache/iceberg/hive/TestHiveMetastore.java | 6 ++++
.../iceberg/mr/hive/HiveIcebergMetaHook.java | 42 ++++++++++++++++------
.../iceberg/mr/hive/TestHiveIcebergMigration.java | 7 ++++
.../hive/TestHiveIcebergStorageHandlerNoScan.java | 2 +-
.../apache/hadoop/hive/metastore/HiveMetaHook.java | 4 +--
.../hadoop/hive/metastore/HiveMetaStoreClient.java | 6 ++--
.../apache/hadoop/hive/metastore/ObjectStore.java | 4 +--
7 files changed, 51 insertions(+), 20 deletions(-)
diff --git a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java
index c359277..0d86d6f 100644
--- a/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java
+++ b/iceberg/iceberg-catalog/src/test/java/org/apache/iceberg/hive/TestHiveMetastore.java
@@ -29,11 +29,13 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HMSHandler;
import org.apache.hadoop.hive.metastore.IHMSHandler;
+import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.RetryingHMSHandler;
import org.apache.hadoop.hive.metastore.TSetIpAddressProcessor;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
import org.apache.hadoop.hive.metastore.utils.TestTxnDbUtil;
+import org.apache.iceberg.ClientPool;
import org.apache.iceberg.catalog.TableIdentifier;
import org.apache.iceberg.common.DynConstructors;
import org.apache.iceberg.common.DynMethods;
@@ -201,6 +203,10 @@ public class TestHiveMetastore {
return getTable(identifier.namespace().toString(), identifier.name());
}
+ public <R> R run(ClientPool.Action<R, IMetaStoreClient, TException> action) throws InterruptedException, TException {
+ return clientPool.run(action, false);
+ }
+
private TServer newThriftServer(TServerSocket socket, int poolSize, HiveConf conf) throws Exception {
HiveConf serverConf = new HiveConf(conf);
serverConf.set(HiveConf.ConfVars.METASTORECONNECTURLKEY.varname, "jdbc:derby:" + getDerbyPath() + ";create=true");
diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
index 21b45d7..cb036dd 100644
--- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
+++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergMetaHook.java
@@ -34,6 +34,7 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.TableName;
import org.apache.hadoop.hive.metastore.HiveMetaHook;
+import org.apache.hadoop.hive.metastore.PartitionDropOptions;
import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.MetaException;
@@ -44,10 +45,14 @@ import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy;
import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
import org.apache.hadoop.hive.ql.ddl.table.AlterTableType;
import org.apache.hadoop.hive.ql.io.AcidUtils;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.PartitionTransform;
import org.apache.hadoop.hive.ql.parse.PartitionTransformSpec;
+import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.session.SessionStateUtil;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.util.StringUtils;
import org.apache.iceberg.BaseMetastoreTableOperations;
import org.apache.iceberg.BaseTable;
import org.apache.iceberg.CatalogUtil;
@@ -84,6 +89,7 @@ import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.util.Pair;
+import org.apache.thrift.TException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -108,6 +114,9 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
FileFormat.PARQUET.name().toLowerCase(),
FileFormat.ORC.name().toLowerCase(),
FileFormat.AVRO.name().toLowerCase());
+ private static final PartitionDropOptions DROP_OPTIONS = new PartitionDropOptions().deleteData(false).ifExists(true);
+ private static final List<org.apache.commons.lang3.tuple.Pair<Integer, byte[]>> EMPTY_FILTER =
+ Lists.newArrayList(org.apache.commons.lang3.tuple.Pair.of(1, new byte[0]));
static final String MIGRATED_TO_ICEBERG = "MIGRATED_TO_ICEBERG";
private final Configuration conf;
@@ -271,15 +280,27 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
context.getProperties().put(HiveMetaHook.ALLOW_PARTITION_KEY_CHANGE, "true");
// If there are partition keys specified remove them from the HMS table and add them to the column list
- if (hmsTable.isSetPartitionKeys() && !hmsTable.getPartitionKeys().isEmpty()) {
- List<PartitionTransformSpec> spec = PartitionTransform.getPartitionTransformSpec(hmsTable.getPartitionKeys());
- if (!SessionStateUtil.addResource(conf, hive_metastoreConstants.PARTITION_TRANSFORM_SPEC, spec)) {
- throw new MetaException("Query state attached to Session state must be not null. " +
- "Partition transform metadata cannot be saved.");
+ try {
+ Hive db = SessionState.get().getHiveDb();
+ preAlterTableProperties.partitionSpecProxy = db.getMSC().listPartitionSpecs(hmsTable.getCatName(),
+ hmsTable.getDbName(), hmsTable.getTableName(), Integer.MAX_VALUE);
+ if (hmsTable.isSetPartitionKeys() && !hmsTable.getPartitionKeys().isEmpty()) {
+ db.dropPartitions(hmsTable.getDbName(), hmsTable.getTableName(), EMPTY_FILTER, DROP_OPTIONS);
+
+ List<PartitionTransformSpec> spec = PartitionTransform.getPartitionTransformSpec(hmsTable.getPartitionKeys());
+ if (!SessionStateUtil.addResource(conf, hive_metastoreConstants.PARTITION_TRANSFORM_SPEC, spec)) {
+ throw new MetaException("Query state attached to Session state must be not null. " +
+ "Partition transform metadata cannot be saved.");
+ }
+ hmsTable.getSd().getCols().addAll(hmsTable.getPartitionKeys());
+ hmsTable.setPartitionKeysIsSet(false);
}
- hmsTable.getSd().getCols().addAll(hmsTable.getPartitionKeys());
- hmsTable.setPartitionKeysIsSet(false);
+ } catch (MetaException me) {
+ throw me;
+ } catch (HiveException | TException e) {
+ throw new MetaException(StringUtils.stringifyException(e));
}
+
preAlterTableProperties.spec = spec(conf, preAlterTableProperties.schema, hmsTable);
sd.setInputFormat(HiveIcebergInputFormat.class.getCanonicalName());
@@ -342,8 +363,8 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
}
@Override
- public void commitAlterTable(org.apache.hadoop.hive.metastore.api.Table hmsTable, EnvironmentContext context,
- PartitionSpecProxy partitionSpecProxy) throws MetaException {
+ public void commitAlterTable(org.apache.hadoop.hive.metastore.api.Table hmsTable, EnvironmentContext context)
+ throws MetaException {
if (isTableMigration) {
catalogProperties = getCatalogProperties(hmsTable);
catalogProperties.put(InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(preAlterTableProperties.schema));
@@ -353,7 +374,7 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
catalogProperties.put(TableProperties.ENGINE_HIVE_ENABLED, true);
}
HiveTableUtil.importFiles(preAlterTableProperties.tableLocation, preAlterTableProperties.format,
- partitionSpecProxy, preAlterTableProperties.partitionKeys, catalogProperties, conf);
+ preAlterTableProperties.partitionSpecProxy, preAlterTableProperties.partitionKeys, catalogProperties, conf);
} else if (currentAlterTableOp != null) {
switch (currentAlterTableOp) {
case REPLACE_COLUMNS:
@@ -696,5 +717,6 @@ public class HiveIcebergMetaHook implements HiveMetaHook {
private Schema schema;
private PartitionSpec spec;
private List<FieldSchema> partitionKeys;
+ private PartitionSpecProxy partitionSpecProxy;
}
}
diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergMigration.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergMigration.java
index ed9962f..5962583 100644
--- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergMigration.java
+++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergMigration.java
@@ -187,6 +187,13 @@ public class TestHiveIcebergMigration extends HiveIcebergStorageHandlerWithEngin
Table hmsTable = shell.metastore().getTable("default", tableName);
validateSd(hmsTable, "iceberg");
validateTblProps(hmsTable, true);
+ validatePartitions(tableName);
+ }
+
+ private void validatePartitions(String tableName) throws TException, InterruptedException {
+ List<String> partitions = shell.metastore().run(client ->
+ client.listPartitionNames("default", tableName, (short) -1));
+ Assert.assertTrue(partitions.isEmpty());
}
private void validateMigrationRollback(String tableName) throws TException, InterruptedException {
diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
index edaf123..a4b7554 100644
--- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
+++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java
@@ -1318,7 +1318,7 @@ public class TestHiveIcebergStorageHandlerNoScan {
EnvironmentContext environmentContext = new EnvironmentContext(new HashMap<>());
metaHook.preAlterTable(hmsTable, environmentContext);
- metaHook.commitAlterTable(hmsTable, environmentContext, null);
+ metaHook.commitAlterTable(hmsTable, environmentContext);
}
@Test
diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java
index b9372a9..e8945b5 100644
--- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java
+++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaHook.java
@@ -23,7 +23,6 @@ import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Table;
-import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy;
import com.google.common.collect.ImmutableList;
import java.util.List;
@@ -144,9 +143,8 @@ public interface HiveMetaHook {
* Called after a table is altered in the metastore during ALTER TABLE.
* @param table new table definition
* @param context environment context, containing information about the alter operation type
- * @param partitionSpecProxy list of partitions wrapped in {@link PartitionSpecProxy}
*/
- default void commitAlterTable(Table table, EnvironmentContext context, PartitionSpecProxy partitionSpecProxy) throws MetaException {
+ default void commitAlterTable(Table table, EnvironmentContext context) throws MetaException {
// Do nothing
}
diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
index dbfac3c..7878e6d 100644
--- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
+++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
@@ -520,8 +520,7 @@ public class HiveMetaStoreClient implements IMetaStoreClient, AutoCloseable {
try {
client.alter_table_req(req);
if (hook != null) {
- PartitionSpecProxy partitionSpecProxy = listPartitionSpecs(dbname, tbl_name, Integer.MAX_VALUE);
- hook.commitAlterTable(new_tbl, envContext, partitionSpecProxy);
+ hook.commitAlterTable(new_tbl, envContext);
}
success = true;
} finally {
@@ -565,8 +564,7 @@ public class HiveMetaStoreClient implements IMetaStoreClient, AutoCloseable {
try {
client.alter_table_req(req);
if (hook != null) {
- PartitionSpecProxy partitionSpecProxy = listPartitionSpecs(catName, dbName, tbl_name, Integer.MAX_VALUE);
- hook.commitAlterTable(new_tbl, envContext, partitionSpecProxy);
+ hook.commitAlterTable(new_tbl, envContext);
}
success = true;
} finally {
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
index f5a78a2..90664e2 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
@@ -3971,8 +3971,8 @@ public class ObjectStore implements RawStore, Configurable {
boolean allowSql, boolean allowJdo) throws TException {
assert result != null;
- final ExpressionTree exprTree = PartFilterExprUtil.makeExpressionTree(expressionProxy, expr,
- getDefaultPartitionName(defaultPartitionName), conf);
+ final ExpressionTree exprTree = expr.length != 0 ? PartFilterExprUtil.makeExpressionTree(
+ expressionProxy, expr, getDefaultPartitionName(defaultPartitionName), conf) : ExpressionTree.EMPTY_TREE;
final AtomicBoolean hasUnknownPartitions = new AtomicBoolean(false);
catName = normalizeIdentifier(catName);