You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pv...@apache.org on 2018/06/14 08:11:38 UTC

hive git commit: HIVE-19718: Adding partitions in bulk also fetches table for each partition (Peter Vary, reviewed by Vihang Karajgaonkar and Alexander Kolbasov)

Repository: hive
Updated Branches:
  refs/heads/master 1bcf40329 -> bcbd2d529


HIVE-19718: Adding partitions in bulk also fetches table for each partition (Peter Vary, reviewed by Vihang Karajgaonkar and Alexander Kolbasov)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bcbd2d52
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bcbd2d52
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bcbd2d52

Branch: refs/heads/master
Commit: bcbd2d5299ddc937f1488a97faec23145b22cded
Parents: 1bcf403
Author: Peter Vary <pv...@cloudera.com>
Authored: Thu Jun 14 10:09:05 2018 +0200
Committer: Peter Vary <pv...@cloudera.com>
Committed: Thu Jun 14 10:09:05 2018 +0200

----------------------------------------------------------------------
 .../listener/DummyRawStoreFailEvent.java        |  5 +-
 .../hadoop/hive/metastore/HiveMetaStore.java    | 11 ++-
 .../hadoop/hive/metastore/ObjectStore.java      | 97 +++++++++++++++-----
 .../apache/hadoop/hive/metastore/RawStore.java  |  4 +-
 .../hive/metastore/cache/CachedStore.java       |  7 +-
 .../DummyRawStoreControlledCommit.java          |  5 +-
 .../DummyRawStoreForJdoConnection.java          |  3 +-
 7 files changed, 96 insertions(+), 36 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/bcbd2d52/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java
----------------------------------------------------------------------
diff --git a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java b/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java
index aa66c84..8f9a03f 100644
--- a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java
+++ b/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java
@@ -779,8 +779,9 @@ public class DummyRawStoreFailEvent implements RawStore, Configurable {
 
   @Override
   public boolean doesPartitionExist(String catName, String dbName, String tableName,
-                                    List<String> partVals) throws MetaException, NoSuchObjectException {
-    return objectStore.doesPartitionExist(catName, dbName, tableName, partVals);
+                                    List<FieldSchema> partKeys, List<String> partVals)
+      throws MetaException, NoSuchObjectException {
+    return objectStore.doesPartitionExist(catName, dbName, tableName, partKeys, partVals);
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/hive/blob/bcbd2d52/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
index 278c58c..9241e29 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
@@ -3304,7 +3304,7 @@ public class HiveMetaStore extends ThriftHiveMetastore {
             throw new MetaException("Partition value cannot be null.");
           }
 
-          boolean shouldAdd = startAddPartition(ms, part, ifNotExists);
+          boolean shouldAdd = startAddPartition(ms, part, tbl.getPartitionKeys(), ifNotExists);
           if (!shouldAdd) {
             existingParts.add(part);
             LOG.info("Not adding partition {} as it already exists", part);
@@ -3570,7 +3570,7 @@ public class HiveMetaStore extends ThriftHiveMetastore {
             throw new MetaException("The partition values cannot be null or empty.");
           }
 
-          boolean shouldAdd = startAddPartition(ms, part, ifNotExists);
+          boolean shouldAdd = startAddPartition(ms, part, tbl.getPartitionKeys(), ifNotExists);
           if (!shouldAdd) {
             LOG.info("Not adding partition {} as it already exists", part);
             continue;
@@ -3678,11 +3678,12 @@ public class HiveMetaStore extends ThriftHiveMetastore {
     }
 
     private boolean startAddPartition(
-        RawStore ms, Partition part, boolean ifNotExists) throws TException {
+        RawStore ms, Partition part, List<FieldSchema> partitionKeys, boolean ifNotExists)
+        throws TException {
       MetaStoreUtils.validatePartitionNameCharacters(part.getValues(),
           partitionValidationPattern);
       boolean doesExist = ms.doesPartitionExist(part.getCatName(),
-          part.getDbName(), part.getTableName(), part.getValues());
+          part.getDbName(), part.getTableName(), partitionKeys, part.getValues());
       if (doesExist && !ifNotExists) {
         throw new AlreadyExistsException("Partition already exists: " + part);
       }
@@ -3805,7 +3806,7 @@ public class HiveMetaStore extends ThriftHiveMetastore {
         if (part.getValues() == null || part.getValues().isEmpty()) {
           throw new MetaException("The partition values cannot be null or empty.");
         }
-        boolean shouldAdd = startAddPartition(ms, part, false);
+        boolean shouldAdd = startAddPartition(ms, part, tbl.getPartitionKeys(), false);
         assert shouldAdd; // start would throw if it already existed here
         boolean madeDir = createLocationForAddedPartition(tbl, part);
         try {

http://git-wip-us.apache.org/repos/asf/hive/blob/bcbd2d52/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
index 191c535..660b119 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
@@ -1934,7 +1934,6 @@ public class ObjectStore implements RawStore, Configurable {
         lowered_tbl_names.add(normalizeIdentifier(t));
       }
       query = pm.newQuery(MTable.class);
-//<<<<<<< HEAD
       query.setFilter("database.name == db && database.catalogName == cat && tbl_names.contains(tableName)");
       query.declareParameters("java.lang.String db, java.lang.String cat, java.util.Collection tbl_names");
       Collection mtables = (Collection) query.execute(db, catName, lowered_tbl_names);
@@ -2331,7 +2330,7 @@ public class ObjectStore implements RawStore, Configurable {
           throw new MetaException("Partition does not belong to target table "
               + dbName + "." + tblName + ": " + part);
         }
-        MPartition mpart = convertToMPart(part, true);
+        MPartition mpart = convertToMPart(part, table, true);
         toPersist.add(mpart);
         int now = (int)(System.currentTimeMillis()/1000);
         if (tabGrants != null) {
@@ -2367,11 +2366,11 @@ public class ObjectStore implements RawStore, Configurable {
   }
 
   private boolean isValidPartition(
-      Partition part, boolean ifNotExists) throws MetaException {
+      Partition part, List<FieldSchema> partitionKeys, boolean ifNotExists) throws MetaException {
     MetaStoreUtils.validatePartitionNameCharacters(part.getValues(),
         partitionValidationPattern);
     boolean doesExist = doesPartitionExist(part.getCatName(),
-        part.getDbName(), part.getTableName(), part.getValues());
+        part.getDbName(), part.getTableName(), partitionKeys, part.getValues());
     if (doesExist && !ifNotExists) {
       throw new MetaException("Partition already exists: " + part);
     }
@@ -2402,11 +2401,12 @@ public class ObjectStore implements RawStore, Configurable {
 
       int now = (int)(System.currentTimeMillis()/1000);
 
+      List<FieldSchema> partitionKeys = convertToFieldSchemas(table.getPartitionKeys());
       while (iterator.hasNext()) {
         Partition part = iterator.next();
 
-        if (isValidPartition(part, ifNotExists)) {
-          MPartition mpart = convertToMPart(part, true);
+        if (isValidPartition(part, partitionKeys, ifNotExists)) {
+          MPartition mpart = convertToMPart(part, table, true);
           pm.makePersistent(mpart);
           if (tabGrants != null) {
             for (MTablePrivilege tab : tabGrants) {
@@ -2506,26 +2506,62 @@ public class ObjectStore implements RawStore, Configurable {
     return part;
   }
 
+  /**
+   * Getting MPartition object. Use this method only if the partition name is not available,
+   * since then the table will be queried to get the partition keys.
+   * @param catName The catalogue
+   * @param dbName The database
+   * @param tableName The table
+   * @param part_vals The values defining the partition
+   * @return The MPartition object in the backend database
+   * @throws MetaException
+   */
   private MPartition getMPartition(String catName, String dbName, String tableName, List<String> part_vals)
       throws MetaException {
-    List<MPartition> mparts = null;
-    MPartition ret = null;
-    boolean commited = false;
-    Query query = null;
+    catName = normalizeIdentifier(catName);
+    dbName = normalizeIdentifier(dbName);
+    tableName = normalizeIdentifier(tableName);
+    boolean committed = false;
+    MPartition result = null;
     try {
       openTransaction();
-      catName = normalizeIdentifier(catName);
-      dbName = normalizeIdentifier(dbName);
-      tableName = normalizeIdentifier(tableName);
       MTable mtbl = getMTable(catName, dbName, tableName);
       if (mtbl == null) {
-        commited = commitTransaction();
         return null;
       }
       // Change the query to use part_vals instead of the name which is
       // redundant TODO: callers of this often get part_vals out of name for no reason...
       String name =
           Warehouse.makePartName(convertToFieldSchemas(mtbl.getPartitionKeys()), part_vals);
+      result = getMPartition(catName, dbName, tableName, name);
+      committed = commitTransaction();
+    } finally {
+      rollbackAndCleanup(committed, (Query)null);
+    }
+    return result;
+  }
+
+  /**
+   * Getting MPartition object. Use this method if the partition name is available, so we do not
+   * query the table object again.
+   * @param catName The catalogue
+   * @param dbName The database
+   * @param tableName The table
+   * @param name The partition name
+   * @return The MPartition object in the backend database
+   * @throws MetaException
+   */
+  private MPartition getMPartition(String catName, String dbName, String tableName,
+      String name) throws MetaException {
+    catName = normalizeIdentifier(catName);
+    dbName = normalizeIdentifier(dbName);
+    tableName = normalizeIdentifier(tableName);
+    List<MPartition> mparts = null;
+    MPartition ret = null;
+    boolean commited = false;
+    Query query = null;
+    try {
+      openTransaction();
       query =
           pm.newQuery(MPartition.class,
               "table.tableName == t1 && table.database.name == t2 && partitionName == t3 " +
@@ -2566,7 +2602,7 @@ public class ObjectStore implements RawStore, Configurable {
    * to the same one as the table's storage descriptor.
    * @param part the partition to convert
    * @param useTableCD whether to try to use the parent table's column descriptor.
-   * @return the model partition object
+   * @return the model partition object, and null if the input partition is null.
    * @throws InvalidObjectException
    * @throws MetaException
    */
@@ -2576,6 +2612,26 @@ public class ObjectStore implements RawStore, Configurable {
       return null;
     }
     MTable mt = getMTable(part.getCatName(), part.getDbName(), part.getTableName());
+    return convertToMPart(part, mt, useTableCD);
+  }
+
+  /**
+   * Convert a Partition object into an MPartition, which is an object backed by the db
+   * If the Partition's set of columns is the same as the parent table's AND useTableCD
+   * is true, then this partition's storage descriptor's column descriptor will point
+   * to the same one as the table's storage descriptor.
+   * @param part the partition to convert
+   * @param mt the parent table object
+   * @param useTableCD whether to try to use the parent table's column descriptor.
+   * @return the model partition object, and null if the input partition is null.
+   * @throws InvalidObjectException
+   * @throws MetaException
+   */
+  private MPartition convertToMPart(Partition part, MTable mt, boolean useTableCD)
+      throws InvalidObjectException, MetaException {
+    if (part == null) {
+      return null;
+    }
     if (mt == null) {
       throw new InvalidObjectException(
           "Partition doesn't have a valid table or database name");
@@ -9212,14 +9268,11 @@ public class ObjectStore implements RawStore, Configurable {
   }
 
   @Override
-  public boolean doesPartitionExist(String catName, String dbName, String tableName, List<String>
-      partVals)
+  public boolean doesPartitionExist(String catName, String dbName, String tableName,
+                                    List<FieldSchema> partKeys, List<String> partVals)
       throws MetaException {
-    try {
-      return this.getPartition(catName, dbName, tableName, partVals) != null;
-    } catch (NoSuchObjectException e) {
-      return false;
-    }
+    String name = Warehouse.makePartName(partKeys, partVals);
+    return this.getMPartition(catName, dbName, tableName, name) != null;
   }
 
   private void debugLog(String message) {

http://git-wip-us.apache.org/repos/asf/hive/blob/bcbd2d52/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java
index b2d4283..bbbdf21 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java
@@ -323,13 +323,15 @@ public interface RawStore extends Configurable {
    * @param catName catalog name.
    * @param dbName database name.
    * @param tableName table name.
+   * @param partKeys list of partition keys used to generate the partition name.
    * @param part_vals list of partition values.
    * @return true if the partition exists, false otherwise.
    * @throws MetaException failure reading RDBMS
    * @throws NoSuchObjectException this is never thrown.
    */
   boolean doesPartitionExist(String catName, String dbName, String tableName,
-      List<String> part_vals) throws MetaException, NoSuchObjectException;
+      List<FieldSchema> partKeys, List<String> part_vals)
+      throws MetaException, NoSuchObjectException;
 
   /**
    * Drop a partition.

http://git-wip-us.apache.org/repos/asf/hive/blob/bcbd2d52/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java
index 2f31c68..7c3588d 100644
--- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java
+++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java
@@ -971,17 +971,18 @@ public class CachedStore implements RawStore, Configurable {
 
   @Override
   public boolean doesPartitionExist(String catName, String dbName, String tblName,
-      List<String> part_vals) throws MetaException, NoSuchObjectException {
+      List<FieldSchema> partKeys, List<String> part_vals)
+      throws MetaException, NoSuchObjectException {
     catName = normalizeIdentifier(catName);
     dbName = StringUtils.normalizeIdentifier(dbName);
     tblName = StringUtils.normalizeIdentifier(tblName);
     if (!shouldCacheTable(catName, dbName, tblName)) {
-      return rawStore.doesPartitionExist(catName, dbName, tblName, part_vals);
+      return rawStore.doesPartitionExist(catName, dbName, tblName, partKeys, part_vals);
     }
     Table tbl = sharedCache.getTableFromCache(catName, dbName, tblName);
     if (tbl == null) {
       // The table containing the partition is not yet loaded in cache
-      return rawStore.doesPartitionExist(catName, dbName, tblName, part_vals);
+      return rawStore.doesPartitionExist(catName, dbName, tblName, partKeys, part_vals);
     }
     return sharedCache.existPartitionFromCache(catName, dbName, tblName, part_vals);
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/bcbd2d52/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java
index 58af0df..7c7429d 100644
--- a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java
+++ b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java
@@ -740,8 +740,9 @@ public class DummyRawStoreControlledCommit implements RawStore, Configurable {
 
   @Override
   public boolean doesPartitionExist(String catName, String dbName, String tableName,
-      List<String> partVals) throws MetaException, NoSuchObjectException {
-    return objectStore.doesPartitionExist(catName, dbName, tableName, partVals);
+      List<FieldSchema> partKeys, List<String> partVals)
+      throws MetaException, NoSuchObjectException {
+    return objectStore.doesPartitionExist(catName, dbName, tableName, partKeys, partVals);
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/hive/blob/bcbd2d52/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java
index 1823430..e4f2a17 100644
--- a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java
+++ b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java
@@ -750,7 +750,8 @@ public class DummyRawStoreForJdoConnection implements RawStore {
 
   @Override
   public boolean doesPartitionExist(String catName, String dbName, String tableName,
-      List<String> partVals) throws MetaException, NoSuchObjectException {
+      List<FieldSchema> partKeys, List<String> partVals)
+      throws MetaException, NoSuchObjectException {
     return false;
   }