You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mg...@apache.org on 2019/09/11 07:01:00 UTC

[hive] branch master updated: HIVE-22174 Clean up Drop Partition (Miklos Gergely reviewd by Jesus Camacho Rodriguez)

This is an automated email from the ASF dual-hosted git repository.

mgergely pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 32615f9  HIVE-22174 Clean up Drop Partition (Miklos Gergely reviewd by Jesus Camacho Rodriguez)
32615f9 is described below

commit 32615f9d231861682f77c8e7766d27b9a0778920
Author: miklosgergely <mg...@cloudera.com>
AuthorDate: Fri Sep 6 08:01:57 2019 +0200

    HIVE-22174 Clean up Drop Partition (Miklos Gergely reviewd by Jesus Camacho Rodriguez)
---
 .../AlterTableDropPartitionOperation.java          |  29 +++-
 .../org/apache/hadoop/hive/ql/metadata/Hive.java   | 193 +++------------------
 2 files changed, 41 insertions(+), 181 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/AlterTableDropPartitionOperation.java b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/AlterTableDropPartitionOperation.java
index e7cc6d3..13c9416 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/AlterTableDropPartitionOperation.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/partition/AlterTableDropPartitionOperation.java
@@ -23,9 +23,12 @@ import java.util.List;
 
 import org.apache.hadoop.hive.metastore.PartitionDropOptions;
 import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
+import org.apache.hadoop.hive.metastore.utils.ObjectPair;
 import org.apache.hadoop.hive.ql.ddl.DDLOperation;
 import org.apache.hadoop.hive.ql.ddl.DDLOperationContext;
 import org.apache.hadoop.hive.ql.ddl.DDLUtils;
+import org.apache.hadoop.hive.ql.exec.SerializationUtilities;
+import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.hooks.WriteEntity;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.InvalidTableException;
@@ -46,16 +49,16 @@ public class AlterTableDropPartitionOperation extends DDLOperation<AlterTableDro
   @Override
   public int execute() throws HiveException {
     // We need to fetch the table before it is dropped so that it can be passed to post-execution hook
-    Table tbl = null;
+    Table table = null;
     try {
-      tbl = context.getDb().getTable(desc.getTableName());
+      table = context.getDb().getTable(desc.getTableName());
     } catch (InvalidTableException e) {
       // drop table is idempotent
     }
 
     ReplicationSpec replicationSpec = desc.getReplicationSpec();
     if (replicationSpec.isInReplicationScope()) {
-      dropPartitionForReplication(tbl, replicationSpec);
+      dropPartitionForReplication(table, replicationSpec);
     } else {
       dropPartitions();
     }
@@ -63,7 +66,7 @@ public class AlterTableDropPartitionOperation extends DDLOperation<AlterTableDro
     return 0;
   }
 
-  private void dropPartitionForReplication(Table tbl, ReplicationSpec replicationSpec) throws HiveException {
+  private void dropPartitionForReplication(Table table, ReplicationSpec replicationSpec) throws HiveException {
     /**
      * ALTER TABLE DROP PARTITION ... FOR REPLICATION(x) behaves as a DROP PARTITION IF OLDER THAN x
      *
@@ -79,7 +82,7 @@ public class AlterTableDropPartitionOperation extends DDLOperation<AlterTableDro
     // to the  metastore to allow it to do drop a partition or not, depending on a Predicate on the
     // parameter key values.
 
-    if (tbl == null) {
+    if (table == null) {
       // If table is missing, then partitions are also would've been dropped. Just no-op.
       return;
     }
@@ -87,9 +90,9 @@ public class AlterTableDropPartitionOperation extends DDLOperation<AlterTableDro
     for (AlterTableDropPartitionDesc.PartitionDesc partSpec : desc.getPartSpecs()) {
       List<Partition> partitions = new ArrayList<>();
       try {
-        context.getDb().getPartitionsByExpr(tbl, partSpec.getPartSpec(), context.getConf(), partitions);
+        context.getDb().getPartitionsByExpr(table, partSpec.getPartSpec(), context.getConf(), partitions);
         for (Partition p : Iterables.filter(partitions, replicationSpec.allowEventReplacementInto())) {
-          context.getDb().dropPartition(tbl.getDbName(), tbl.getTableName(), p.getValues(), true);
+          context.getDb().dropPartition(table.getDbName(), table.getTableName(), p.getValues(), true);
         }
       } catch (NoSuchObjectException e) {
         // ignore NSOE because that means there's nothing to drop.
@@ -101,9 +104,17 @@ public class AlterTableDropPartitionOperation extends DDLOperation<AlterTableDro
 
   private void dropPartitions() throws HiveException {
     // ifExists is currently verified in DDLSemanticAnalyzer
-    List<Partition> droppedParts = context.getDb().dropPartitions(desc.getTableName(), desc.getPartSpecs(),
+    String[] names = Utilities.getDbTableName(desc.getTableName());
+
+    List<ObjectPair<Integer, byte[]>> partitionExpressions = new ArrayList<>(desc.getPartSpecs().size());
+    for (AlterTableDropPartitionDesc.PartitionDesc partSpec : desc.getPartSpecs()) {
+      partitionExpressions.add(new ObjectPair<>(partSpec.getPrefixLength(),
+          SerializationUtilities.serializeExpressionToKryo(partSpec.getPartSpec())));
+    }
+
+    List<Partition> droppedPartitions = context.getDb().dropPartitions(names[0], names[1], partitionExpressions,
         PartitionDropOptions.instance().deleteData(true).ifExists(true).purgeData(desc.getIfPurge()));
-    for (Partition partition : droppedParts) {
+    for (Partition partition : droppedPartitions) {
       context.getConsole().printInfo("Dropped the partition " + partition.getName());
       // We have already locked the table, don't lock the partitions.
       DDLUtils.addIfAbsentByName(new WriteEntity(partition, WriteEntity.WriteType.DDL_NO_LOCK), context);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index 0730ca6..522c20a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@ -31,7 +31,6 @@ import static org.apache.hadoop.hive.conf.Constants.MATERIALIZED_VIEW_REWRITING_
 import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_STORAGE;
 import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.getDefaultCatalog;
 import static org.apache.hadoop.hive.ql.io.AcidUtils.getFullTableName;
-import static org.apache.hadoop.hive.ql.parse.DDLSemanticAnalyzer.makeBinaryPredicate;
 import static org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_FORMAT;
 import static org.apache.hadoop.hive.serde.serdeConstants.STRING_TYPE_NAME;
 
@@ -89,7 +88,15 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.fs.permission.FsAction;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
-import org.apache.hadoop.hive.common.*;
+import org.apache.hadoop.hive.common.FileUtils;
+import org.apache.hadoop.hive.common.HiveStatsUtils;
+import org.apache.hadoop.hive.common.ObjectPair;
+import org.apache.hadoop.hive.common.StatsSetupConst;
+import org.apache.hadoop.hive.common.TableName;
+import org.apache.hadoop.hive.common.ValidReaderWriteIdList;
+import org.apache.hadoop.hive.common.ValidTxnList;
+import org.apache.hadoop.hive.common.ValidTxnWriteIdList;
+import org.apache.hadoop.hive.common.ValidWriteIdList;
 import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate;
 import org.apache.hadoop.hive.common.classification.InterfaceStability.Unstable;
 import org.apache.hadoop.hive.common.log.InPlaceUpdate;
@@ -114,7 +121,6 @@ import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
 import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils;
 import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
 import org.apache.hadoop.hive.ql.ErrorMsg;
-import org.apache.hadoop.hive.ql.ddl.table.partition.AlterTableDropPartitionDesc;
 import org.apache.hadoop.hive.ql.exec.AbstractFileMergeOperator;
 import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
 import org.apache.hadoop.hive.ql.exec.FunctionUtils;
@@ -129,8 +135,6 @@ import org.apache.hadoop.hive.ql.log.PerfLogger;
 import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
 import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveAugmentMaterializationRule;
 import org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPrunerUtils;
-import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 import org.apache.hadoop.hive.ql.plan.LoadTableDesc.LoadFileType;
 import org.apache.hadoop.hive.ql.session.CreateTableAutomaticGrant;
@@ -138,8 +142,6 @@ import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.serde2.Deserializer;
 import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 import org.apache.hadoop.hive.shims.HadoopShims;
 import org.apache.hadoop.hive.shims.ShimLoader;
 import org.apache.hadoop.mapred.InputFormat;
@@ -1478,21 +1480,6 @@ public class Hive {
     }
   }
 
-  private List<Table> getTableObjects(String dbName, List<String> tableNames) throws HiveException {
-    try {
-      return Lists.transform(getMSC().getTableObjectsByName(dbName, tableNames),
-        new com.google.common.base.Function<org.apache.hadoop.hive.metastore.api.Table, Table>() {
-          @Override
-          public Table apply(org.apache.hadoop.hive.metastore.api.Table table) {
-            return new Table(table);
-          }
-        }
-      );
-    } catch (Exception e) {
-      throw new HiveException(e);
-    }
-  }
-
   /**
    * Returns all existing tables from default database which match the given
    * pattern. The matching occurs as per Java regular expressions
@@ -1838,20 +1825,6 @@ public class Hive {
   }
 
   /**
-   * Get materialized views for the specified database that have enabled rewriting.
-   * @param dbName
-   * @return List of materialized view table objects
-   * @throws HiveException
-   */
-  private List<String> getMaterializedViewsForRewriting(String dbName) throws HiveException {
-    try {
-      return getMSC().getMaterializedViewsForRewriting(dbName);
-    } catch (Exception e) {
-      throw new HiveException(e);
-    }
-  }
-
-  /**
    * Get all existing database names.
    *
    * @return List of database names.
@@ -2388,13 +2361,6 @@ public class Hive {
     return destPath;
   }
 
-  private boolean areEventsForDmlNeeded(Table tbl, Partition oldPart) {
-    // For Acid IUD, add partition is a meta data only operation. So need to add the new files added
-    // information into the TXN_WRITE_NOTIFICATION_LOG table.
-    return conf.getBoolVar(ConfVars.FIRE_EVENTS_FOR_DML) && !tbl.isTemporary() &&
-            ((null != oldPart) || AcidUtils.isTransactionalTable(tbl));
-  }
-
   public static void listFilesInsideAcidDirectory(Path acidDir, FileSystem srcFs, List<Path> newFiles)
           throws IOException {
     // list out all the files/directory in the path
@@ -3168,10 +3134,6 @@ private void constructOneLBLocationMap(FileStatus fSta,
                                   String partPath) throws HiveException, InvalidOperationException {
 
     alterPartitionSpecInMemory(tbl, partSpec, tpart, inheritTableSpecs, partPath);
-    String fullName = tbl.getTableName();
-    if (!org.apache.commons.lang.StringUtils.isEmpty(tbl.getDbName())) {
-      fullName = tbl.getFullyQualifiedName();
-    }
     alterPartition(tbl.getCatalogName(), tbl.getDbName(), tbl.getTableName(),
         new Partition(tbl, tpart), null, true);
   }
@@ -3343,22 +3305,15 @@ private void constructOneLBLocationMap(FileStatus fSta,
     }
   }
 
-  public boolean dropPartition(String tblName, List<String> part_vals, boolean deleteData)
+  public boolean dropPartition(String dbName, String tableName, List<String> partitionValues, boolean deleteData)
       throws HiveException {
-    String[] names = Utilities.getDbTableName(tblName);
-    return dropPartition(names[0], names[1], part_vals, deleteData);
-  }
-
-  public boolean dropPartition(String db_name, String tbl_name,
-      List<String> part_vals, boolean deleteData) throws HiveException {
-    return dropPartition(db_name, tbl_name, part_vals,
-                         PartitionDropOptions.instance().deleteData(deleteData));
+    return dropPartition(dbName, tableName, partitionValues, PartitionDropOptions.instance().deleteData(deleteData));
   }
 
-  public boolean dropPartition(String dbName, String tableName, List<String> partVals, PartitionDropOptions options)
-      throws HiveException {
+  public boolean dropPartition(String dbName, String tableName, List<String> partitionValues,
+      PartitionDropOptions options) throws HiveException {
     try {
-      return getMSC().dropPartition(dbName, tableName, partVals, options);
+      return getMSC().dropPartition(dbName, tableName, partitionValues, options);
     } catch (NoSuchObjectException e) {
       throw new HiveException("Partition or table doesn't exist.", e);
     } catch (Exception e) {
@@ -3366,116 +3321,14 @@ private void constructOneLBLocationMap(FileStatus fSta,
     }
   }
 
-  /**
-   * drop the partitions specified as directory names associated with the table.
-   *
-   * @param table object for which partition is needed
-   * @param partDirNames partition directories that need to be dropped
-   * @param deleteData whether data should be deleted from file system
-   * @param ifExists check for existence before attempting delete
-   *
-   * @return list of partition objects that were deleted
-   *
-   * @throws HiveException
-   */
-  public List<Partition> dropPartitions(Table table, List<String>partDirNames,
-      boolean deleteData, boolean ifExists) throws HiveException {
-    // partitions to be dropped in this batch
-    List<AlterTableDropPartitionDesc.PartitionDesc> partSpecs = new ArrayList<>(partDirNames.size());
-
-    // parts of the partition
-    String[] parts = null;
-
-    // Expression splits of each part of the partition
-    String[] partExprParts = null;
-
-    // Column Types of all partitioned columns.  Used for generating partition specification
-    Map<String, String> colTypes = new HashMap<String, String>();
-    for (FieldSchema fs : table.getPartitionKeys()) {
-      colTypes.put(fs.getName(), fs.getType());
-    }
-
-    // Key to be used to save the partition to be dropped in partSpecs
-    int partSpecKey = 0;
-
-    for (String partDir : partDirNames) {
-      // The expression to identify the partition to be dropped
-      ExprNodeGenericFuncDesc expr = null;
-
-      // Split by "/" to identify partition parts
-      parts = partDir.split("/");
-
-      // Loop through the partitions and form the expression
-      for (String part : parts) {
-        // Split the partition predicate to identify column and value
-        partExprParts = part.split("=");
-
-        // Only two elements expected in partExprParts partition column name and partition value
-        assert partExprParts.length == 2;
-
-        // Partition Column
-        String partCol = partExprParts[0];
-
-        // Column Type
-        PrimitiveTypeInfo pti = TypeInfoFactory.getPrimitiveTypeInfo(colTypes.get(partCol));
-
-        // Form the expression node corresponding to column
-        ExprNodeColumnDesc column = new ExprNodeColumnDesc(pti, partCol, null, true);
-
-        // Build the expression based on the partition predicate
-        ExprNodeGenericFuncDesc op =
-            makeBinaryPredicate("=", column, new ExprNodeConstantDesc(pti, partExprParts[1]));
-
-        // the multiple parts to partition predicate are joined using and
-        expr = (expr == null) ? op : makeBinaryPredicate("and", expr, op);
-      }
-
-      // Add the expression to partition specification
-      partSpecs.add(new AlterTableDropPartitionDesc.PartitionDesc(expr, partSpecKey));
-
-      // Increment dropKey to get a new key for hash map
-      ++partSpecKey;
-    }
-
-    String[] names = Utilities.getDbTableName(table.getFullyQualifiedName());
-    return dropPartitions(names[0], names[1], partSpecs, deleteData, ifExists);
-  }
-
-  public List<Partition> dropPartitions(String tblName, List<AlterTableDropPartitionDesc.PartitionDesc> partSpecs,
-      boolean deleteData, boolean ifExists) throws HiveException {
-    String[] names = Utilities.getDbTableName(tblName);
-    return dropPartitions(names[0], names[1], partSpecs, deleteData, ifExists);
-  }
-
-  public List<Partition> dropPartitions(String dbName, String tblName,
-      List<AlterTableDropPartitionDesc.PartitionDesc> partSpecs,  boolean deleteData,
-      boolean ifExists) throws HiveException {
-    return dropPartitions(dbName, tblName, partSpecs,
-                          PartitionDropOptions.instance()
-                                              .deleteData(deleteData)
-                                              .ifExists(ifExists));
-  }
-
-  public List<Partition> dropPartitions(String tblName, List<AlterTableDropPartitionDesc.PartitionDesc> partSpecs,
-                                        PartitionDropOptions dropOptions) throws HiveException {
-    String[] names = Utilities.getDbTableName(tblName);
-    return dropPartitions(names[0], names[1], partSpecs, dropOptions);
-  }
-
-  public List<Partition> dropPartitions(String dbName, String tblName,
-      List<AlterTableDropPartitionDesc.PartitionDesc> partSpecs, PartitionDropOptions dropOptions)
-      throws HiveException {
+  public List<Partition> dropPartitions(String dbName, String tableName,
+      List<org.apache.hadoop.hive.metastore.utils.ObjectPair<Integer, byte[]>> partitionExpressions,
+      PartitionDropOptions dropOptions) throws HiveException {
     try {
-      Table tbl = getTable(dbName, tblName);
-      List<org.apache.hadoop.hive.metastore.utils.ObjectPair<Integer, byte[]>> partExprs =
-          new ArrayList<>(partSpecs.size());
-      for (AlterTableDropPartitionDesc.PartitionDesc partSpec : partSpecs) {
-        partExprs.add(new org.apache.hadoop.hive.metastore.utils.ObjectPair<>(partSpec.getPrefixLength(),
-            SerializationUtilities.serializeExpressionToKryo(partSpec.getPartSpec())));
-      }
-      List<org.apache.hadoop.hive.metastore.api.Partition> tParts = getMSC().dropPartitions(
-          dbName, tblName, partExprs, dropOptions);
-      return convertFromMetastore(tbl, tParts);
+      Table table = getTable(dbName, tableName);
+      List<org.apache.hadoop.hive.metastore.api.Partition> partitions = getMSC().dropPartitions(dbName, tableName,
+          partitionExpressions, dropOptions);
+      return convertFromMetastore(table, partitions);
     } catch (NoSuchObjectException e) {
       throw new HiveException("Partition or table doesn't exist.", e);
     } catch (Exception e) {
@@ -4271,7 +4124,6 @@ private void constructOneLBLocationMap(FileStatus fSta,
       throw new HiveException(e.getMessage(), e);
     }
 
-    HdfsUtils.HadoopFileStatus destStatus = null;
     String configuredOwner = HiveConf.getVar(conf, ConfVars.HIVE_LOAD_DATA_OWNER);
 
     // If source path is a subdirectory of the destination path (or the other way around):
@@ -4286,7 +4138,6 @@ private void constructOneLBLocationMap(FileStatus fSta,
     try {
       if (replace) {
         try{
-          destStatus = new HdfsUtils.HadoopFileStatus(conf, destFs, destf);
           //if destf is an existing directory:
           //if replace is true, delete followed by rename(mv) is equivalent to replace
           //if replace is false, rename (mv) actually move the src under dest dir
@@ -4299,7 +4150,6 @@ private void constructOneLBLocationMap(FileStatus fSta,
         } catch (FileNotFoundException ignore) {
         }
       }
-      final HdfsUtils.HadoopFileStatus desiredStatus = destStatus;
       final SessionState parentSession = SessionState.get();
       if (isSrcLocal) {
         // For local src file, copy to hdfs
@@ -5996,4 +5846,3 @@ private void constructOneLBLocationMap(FileStatus fSta,
     }
   }
 }
-