You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mg...@apache.org on 2019/09/11 07:01:00 UTC
[hive] branch master updated: HIVE-22174 Clean up Drop Partition
(Miklos Gergely reviewd by Jesus Camacho Rodriguez)
This is an automated email from the ASF dual-hosted git repository.
mgergely pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 32615f9 HIVE-22174 Clean up Drop Partition (Miklos Gergely reviewd by Jesus Camacho Rodriguez)
32615f9 is described below
commit 32615f9d231861682f77c8e7766d27b9a0778920
Author: miklosgergely <mg...@cloudera.com>
AuthorDate: Fri Sep 6 08:01:57 2019 +0200
HIVE-22174 Clean up Drop Partition (Miklos Gergely reviewd by Jesus Camacho Rodriguez)
---
.../AlterTableDropPartitionOperation.java | 29 +++-
.../org/apache/hadoop/hive/ql/metadata/Hive.java | 193 +++------------------
2 files changed, 41 insertions(+), 181 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/AlterTableDropPartitionOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/AlterTableDropPartitionOperation.java
index e7cc6d3..13c9416 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/AlterTableDropPartitionOperation.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/AlterTableDropPartitionOperation.java
@@ -23,9 +23,12 @@ import java.util.List;
import org.apache.hadoop.hive.metastore.PartitionDropOptions;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
+import org.apache.hadoop.hive.metastore.utils.ObjectPair;
import org.apache.hadoop.hive.ql.ddl.DDLOperation;
import org.apache.hadoop.hive.ql.ddl.DDLOperationContext;
import org.apache.hadoop.hive.ql.ddl.DDLUtils;
+import org.apache.hadoop.hive.ql.exec.SerializationUtilities;
+import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.InvalidTableException;
@@ -46,16 +49,16 @@ public class AlterTableDropPartitionOperation extends DDLOperation<AlterTableDro
@Override
public int execute() throws HiveException {
// We need to fetch the table before it is dropped so that it can be passed to post-execution hook
- Table tbl = null;
+ Table table = null;
try {
- tbl = context.getDb().getTable(desc.getTableName());
+ table = context.getDb().getTable(desc.getTableName());
} catch (InvalidTableException e) {
// drop table is idempotent
}
ReplicationSpec replicationSpec = desc.getReplicationSpec();
if (replicationSpec.isInReplicationScope()) {
- dropPartitionForReplication(tbl, replicationSpec);
+ dropPartitionForReplication(table, replicationSpec);
} else {
dropPartitions();
}
@@ -63,7 +66,7 @@ public class AlterTableDropPartitionOperation extends DDLOperation<AlterTableDro
return 0;
}
- private void dropPartitionForReplication(Table tbl, ReplicationSpec replicationSpec) throws HiveException {
+ private void dropPartitionForReplication(Table table, ReplicationSpec replicationSpec) throws HiveException {
/**
* ALTER TABLE DROP PARTITION ... FOR REPLICATION(x) behaves as a DROP PARTITION IF OLDER THAN x
*
@@ -79,7 +82,7 @@ public class AlterTableDropPartitionOperation extends DDLOperation<AlterTableDro
// to the metastore to allow it to do drop a partition or not, depending on a Predicate on the
// parameter key values.
- if (tbl == null) {
+ if (table == null) {
// If table is missing, then partitions are also would've been dropped. Just no-op.
return;
}
@@ -87,9 +90,9 @@ public class AlterTableDropPartitionOperation extends DDLOperation<AlterTableDro
for (AlterTableDropPartitionDesc.PartitionDesc partSpec : desc.getPartSpecs()) {
List<Partition> partitions = new ArrayList<>();
try {
- context.getDb().getPartitionsByExpr(tbl, partSpec.getPartSpec(), context.getConf(), partitions);
+ context.getDb().getPartitionsByExpr(table, partSpec.getPartSpec(), context.getConf(), partitions);
for (Partition p : Iterables.filter(partitions, replicationSpec.allowEventReplacementInto())) {
- context.getDb().dropPartition(tbl.getDbName(), tbl.getTableName(), p.getValues(), true);
+ context.getDb().dropPartition(table.getDbName(), table.getTableName(), p.getValues(), true);
}
} catch (NoSuchObjectException e) {
// ignore NSOE because that means there's nothing to drop.
@@ -101,9 +104,17 @@ public class AlterTableDropPartitionOperation extends DDLOperation<AlterTableDro
private void dropPartitions() throws HiveException {
// ifExists is currently verified in DDLSemanticAnalyzer
- List<Partition> droppedParts = context.getDb().dropPartitions(desc.getTableName(), desc.getPartSpecs(),
+ String[] names = Utilities.getDbTableName(desc.getTableName());
+
+ List<ObjectPair<Integer, byte[]>> partitionExpressions = new ArrayList<>(desc.getPartSpecs().size());
+ for (AlterTableDropPartitionDesc.PartitionDesc partSpec : desc.getPartSpecs()) {
+ partitionExpressions.add(new ObjectPair<>(partSpec.getPrefixLength(),
+ SerializationUtilities.serializeExpressionToKryo(partSpec.getPartSpec())));
+ }
+
+ List<Partition> droppedPartitions = context.getDb().dropPartitions(names[0], names[1], partitionExpressions,
PartitionDropOptions.instance().deleteData(true).ifExists(true).purgeData(desc.getIfPurge()));
- for (Partition partition : droppedParts) {
+ for (Partition partition : droppedPartitions) {
context.getConsole().printInfo("Dropped the partition " + partition.getName());
// We have already locked the table, don't lock the partitions.
DDLUtils.addIfAbsentByName(new WriteEntity(partition, WriteEntity.WriteType.DDL_NO_LOCK), context);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index 0730ca6..522c20a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@ -31,7 +31,6 @@ import static org.apache.hadoop.hive.conf.Constants.MATERIALIZED_VIEW_REWRITING_
import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE;
import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.getDefaultCatalog;
import static org.apache.hadoop.hive.ql.io.AcidUtils.getFullTableName;
-import static org.apache.hadoop.hive.ql.parse.DDLSemanticAnalyzer.makeBinaryPredicate;
import static org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT;
import static org.apache.hadoop.hive.serde.serdeConstants.STRING_TYPE_NAME;
@@ -89,7 +88,15 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.hdfs.DistributedFileSystem;
-import org.apache.hadoop.hive.common.*;
+import org.apache.hadoop.hive.common.FileUtils;
+import org.apache.hadoop.hive.common.HiveStatsUtils;
+import org.apache.hadoop.hive.common.ObjectPair;
+import org.apache.hadoop.hive.common.StatsSetupConst;
+import org.apache.hadoop.hive.common.TableName;
+import org.apache.hadoop.hive.common.ValidReaderWriteIdList;
+import org.apache.hadoop.hive.common.ValidTxnList;
+import org.apache.hadoop.hive.common.ValidTxnWriteIdList;
+import org.apache.hadoop.hive.common.ValidWriteIdList;
import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate;
import org.apache.hadoop.hive.common.classification.InterfaceStability.Unstable;
import org.apache.hadoop.hive.common.log.InPlaceUpdate;
@@ -114,7 +121,6 @@ import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils;
import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
import org.apache.hadoop.hive.ql.ErrorMsg;
-import org.apache.hadoop.hive.ql.ddl.table.partition.AlterTableDropPartitionDesc;
import org.apache.hadoop.hive.ql.exec.AbstractFileMergeOperator;
import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.FunctionUtils;
@@ -129,8 +135,6 @@ import org.apache.hadoop.hive.ql.log.PerfLogger;
import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveAugmentMaterializationRule;
import org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPrunerUtils;
-import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.LoadTableDesc.LoadFileType;
import org.apache.hadoop.hive.ql.session.CreateTableAutomaticGrant;
@@ -138,8 +142,6 @@ import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.shims.HadoopShims;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.mapred.InputFormat;
@@ -1478,21 +1480,6 @@ public class Hive {
}
}
- private List<Table> getTableObjects(String dbName, List<String> tableNames) throws HiveException {
- try {
- return Lists.transform(getMSC().getTableObjectsByName(dbName, tableNames),
- new com.google.common.base.Function<org.apache.hadoop.hive.metastore.api.Table, Table>() {
- @Override
- public Table apply(org.apache.hadoop.hive.metastore.api.Table table) {
- return new Table(table);
- }
- }
- );
- } catch (Exception e) {
- throw new HiveException(e);
- }
- }
-
/**
* Returns all existing tables from default database which match the given
* pattern. The matching occurs as per Java regular expressions
@@ -1838,20 +1825,6 @@ public class Hive {
}
/**
- * Get materialized views for the specified database that have enabled rewriting.
- * @param dbName
- * @return List of materialized view table objects
- * @throws HiveException
- */
- private List<String> getMaterializedViewsForRewriting(String dbName) throws HiveException {
- try {
- return getMSC().getMaterializedViewsForRewriting(dbName);
- } catch (Exception e) {
- throw new HiveException(e);
- }
- }
-
- /**
* Get all existing database names.
*
* @return List of database names.
@@ -2388,13 +2361,6 @@ public class Hive {
return destPath;
}
- private boolean areEventsForDmlNeeded(Table tbl, Partition oldPart) {
- // For Acid IUD, add partition is a meta data only operation. So need to add the new files added
- // information into the TXN_WRITE_NOTIFICATION_LOG table.
- return conf.getBoolVar(ConfVars.FIRE_EVENTS_FOR_DML) && !tbl.isTemporary() &&
- ((null != oldPart) || AcidUtils.isTransactionalTable(tbl));
- }
-
public static void listFilesInsideAcidDirectory(Path acidDir, FileSystem srcFs, List<Path> newFiles)
throws IOException {
// list out all the files/directory in the path
@@ -3168,10 +3134,6 @@ private void constructOneLBLocationMap(FileStatus fSta,
String partPath) throws HiveException, InvalidOperationException {
alterPartitionSpecInMemory(tbl, partSpec, tpart, inheritTableSpecs, partPath);
- String fullName = tbl.getTableName();
- if (!org.apache.commons.lang.StringUtils.isEmpty(tbl.getDbName())) {
- fullName = tbl.getFullyQualifiedName();
- }
alterPartition(tbl.getCatalogName(), tbl.getDbName(), tbl.getTableName(),
new Partition(tbl, tpart), null, true);
}
@@ -3343,22 +3305,15 @@ private void constructOneLBLocationMap(FileStatus fSta,
}
}
- public boolean dropPartition(String tblName, List<String> part_vals, boolean deleteData)
+ public boolean dropPartition(String dbName, String tableName, List<String> partitionValues, boolean deleteData)
throws HiveException {
- String[] names = Utilities.getDbTableName(tblName);
- return dropPartition(names[0], names[1], part_vals, deleteData);
- }
-
- public boolean dropPartition(String db_name, String tbl_name,
- List<String> part_vals, boolean deleteData) throws HiveException {
- return dropPartition(db_name, tbl_name, part_vals,
- PartitionDropOptions.instance().deleteData(deleteData));
+ return dropPartition(dbName, tableName, partitionValues, PartitionDropOptions.instance().deleteData(deleteData));
}
- public boolean dropPartition(String dbName, String tableName, List<String> partVals, PartitionDropOptions options)
- throws HiveException {
+ public boolean dropPartition(String dbName, String tableName, List<String> partitionValues,
+ PartitionDropOptions options) throws HiveException {
try {
- return getMSC().dropPartition(dbName, tableName, partVals, options);
+ return getMSC().dropPartition(dbName, tableName, partitionValues, options);
} catch (NoSuchObjectException e) {
throw new HiveException("Partition or table doesn't exist.", e);
} catch (Exception e) {
@@ -3366,116 +3321,14 @@ private void constructOneLBLocationMap(FileStatus fSta,
}
}
- /**
- * drop the partitions specified as directory names associated with the table.
- *
- * @param table object for which partition is needed
- * @param partDirNames partition directories that need to be dropped
- * @param deleteData whether data should be deleted from file system
- * @param ifExists check for existence before attempting delete
- *
- * @return list of partition objects that were deleted
- *
- * @throws HiveException
- */
- public List<Partition> dropPartitions(Table table, List<String>partDirNames,
- boolean deleteData, boolean ifExists) throws HiveException {
- // partitions to be dropped in this batch
- List<AlterTableDropPartitionDesc.PartitionDesc> partSpecs = new ArrayList<>(partDirNames.size());
-
- // parts of the partition
- String[] parts = null;
-
- // Expression splits of each part of the partition
- String[] partExprParts = null;
-
- // Column Types of all partitioned columns. Used for generating partition specification
- Map<String, String> colTypes = new HashMap<String, String>();
- for (FieldSchema fs : table.getPartitionKeys()) {
- colTypes.put(fs.getName(), fs.getType());
- }
-
- // Key to be used to save the partition to be dropped in partSpecs
- int partSpecKey = 0;
-
- for (String partDir : partDirNames) {
- // The expression to identify the partition to be dropped
- ExprNodeGenericFuncDesc expr = null;
-
- // Split by "/" to identify partition parts
- parts = partDir.split("/");
-
- // Loop through the partitions and form the expression
- for (String part : parts) {
- // Split the partition predicate to identify column and value
- partExprParts = part.split("=");
-
- // Only two elements expected in partExprParts partition column name and partition value
- assert partExprParts.length == 2;
-
- // Partition Column
- String partCol = partExprParts[0];
-
- // Column Type
- PrimitiveTypeInfo pti = TypeInfoFactory.getPrimitiveTypeInfo(colTypes.get(partCol));
-
- // Form the expression node corresponding to column
- ExprNodeColumnDesc column = new ExprNodeColumnDesc(pti, partCol, null, true);
-
- // Build the expression based on the partition predicate
- ExprNodeGenericFuncDesc op =
- makeBinaryPredicate("=", column, new ExprNodeConstantDesc(pti, partExprParts[1]));
-
- // the multiple parts to partition predicate are joined using and
- expr = (expr == null) ? op : makeBinaryPredicate("and", expr, op);
- }
-
- // Add the expression to partition specification
- partSpecs.add(new AlterTableDropPartitionDesc.PartitionDesc(expr, partSpecKey));
-
- // Increment dropKey to get a new key for hash map
- ++partSpecKey;
- }
-
- String[] names = Utilities.getDbTableName(table.getFullyQualifiedName());
- return dropPartitions(names[0], names[1], partSpecs, deleteData, ifExists);
- }
-
- public List<Partition> dropPartitions(String tblName, List<AlterTableDropPartitionDesc.PartitionDesc> partSpecs,
- boolean deleteData, boolean ifExists) throws HiveException {
- String[] names = Utilities.getDbTableName(tblName);
- return dropPartitions(names[0], names[1], partSpecs, deleteData, ifExists);
- }
-
- public List<Partition> dropPartitions(String dbName, String tblName,
- List<AlterTableDropPartitionDesc.PartitionDesc> partSpecs, boolean deleteData,
- boolean ifExists) throws HiveException {
- return dropPartitions(dbName, tblName, partSpecs,
- PartitionDropOptions.instance()
- .deleteData(deleteData)
- .ifExists(ifExists));
- }
-
- public List<Partition> dropPartitions(String tblName, List<AlterTableDropPartitionDesc.PartitionDesc> partSpecs,
- PartitionDropOptions dropOptions) throws HiveException {
- String[] names = Utilities.getDbTableName(tblName);
- return dropPartitions(names[0], names[1], partSpecs, dropOptions);
- }
-
- public List<Partition> dropPartitions(String dbName, String tblName,
- List<AlterTableDropPartitionDesc.PartitionDesc> partSpecs, PartitionDropOptions dropOptions)
- throws HiveException {
+ public List<Partition> dropPartitions(String dbName, String tableName,
+ List<org.apache.hadoop.hive.metastore.utils.ObjectPair<Integer, byte[]>> partitionExpressions,
+ PartitionDropOptions dropOptions) throws HiveException {
try {
- Table tbl = getTable(dbName, tblName);
- List<org.apache.hadoop.hive.metastore.utils.ObjectPair<Integer, byte[]>> partExprs =
- new ArrayList<>(partSpecs.size());
- for (AlterTableDropPartitionDesc.PartitionDesc partSpec : partSpecs) {
- partExprs.add(new org.apache.hadoop.hive.metastore.utils.ObjectPair<>(partSpec.getPrefixLength(),
- SerializationUtilities.serializeExpressionToKryo(partSpec.getPartSpec())));
- }
- List<org.apache.hadoop.hive.metastore.api.Partition> tParts = getMSC().dropPartitions(
- dbName, tblName, partExprs, dropOptions);
- return convertFromMetastore(tbl, tParts);
+ Table table = getTable(dbName, tableName);
+ List<org.apache.hadoop.hive.metastore.api.Partition> partitions = getMSC().dropPartitions(dbName, tableName,
+ partitionExpressions, dropOptions);
+ return convertFromMetastore(table, partitions);
} catch (NoSuchObjectException e) {
throw new HiveException("Partition or table doesn't exist.", e);
} catch (Exception e) {
@@ -4271,7 +4124,6 @@ private void constructOneLBLocationMap(FileStatus fSta,
throw new HiveException(e.getMessage(), e);
}
- HdfsUtils.HadoopFileStatus destStatus = null;
String configuredOwner = HiveConf.getVar(conf, ConfVars.HIVE_LOAD_DATA_OWNER);
// If source path is a subdirectory of the destination path (or the other way around):
@@ -4286,7 +4138,6 @@ private void constructOneLBLocationMap(FileStatus fSta,
try {
if (replace) {
try{
- destStatus = new HdfsUtils.HadoopFileStatus(conf, destFs, destf);
//if destf is an existing directory:
//if replace is true, delete followed by rename(mv) is equivalent to replace
//if replace is false, rename (mv) actually move the src under dest dir
@@ -4299,7 +4150,6 @@ private void constructOneLBLocationMap(FileStatus fSta,
} catch (FileNotFoundException ignore) {
}
}
- final HdfsUtils.HadoopFileStatus desiredStatus = destStatus;
final SessionState parentSession = SessionState.get();
if (isSrcLocal) {
// For local src file, copy to hdfs
@@ -5996,4 +5846,3 @@ private void constructOneLBLocationMap(FileStatus fSta,
}
}
}
-