You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ma...@apache.org on 2020/03/13 04:41:08 UTC

[hive] branch master updated: HIVE-22998 : Dump partition info if hive.repl.dump.metadata.only.for.external.table conf is enabled. (Aasha Medhi, reviewed by Mahesh Kumar Behera)

This is an automated email from the ASF dual-hosted git repository.

mahesh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 109ecb9  HIVE-22998 : Dump partition info if hive.repl.dump.metadata.only.for.external.table conf is enabled. (Aasha Medhi, reviewed by Mahesh Kumar Behera)
109ecb9 is described below

commit 109ecb99e50ac63add05a8a3b513b51e3e78a1cb
Author: Aasha Medhi <aa...@gmail.com>
AuthorDate: Fri Mar 13 10:09:47 2020 +0530

    HIVE-22998 : Dump partition info if hive.repl.dump.metadata.only.for.external.table conf is enabled. (Aasha Medhi, reviewed by Mahesh Kumar Behera)
    
    Signed-off-by: Mahesh Kumar Behera <ma...@apache.org>
---
 .../TestReplicationScenariosExternalTables.java    |  7 ++++++-
 ...icationScenariosExternalTablesMetaDataOnly.java | 11 +++++++++--
 .../hadoop/hive/ql/exec/repl/ReplDumpTask.java     |  2 +-
 .../hadoop/hive/ql/parse/ReplicationSpec.java      | 12 ++++++++++++
 .../hive/ql/parse/repl/dump/HiveWrapper.java       |  3 ++-
 .../hive/ql/parse/repl/dump/TableExport.java       | 22 ++++++++++++++++------
 .../hadoop/hive/ql/parse/repl/dump/Utils.java      |  9 ++++++---
 .../repl/dump/events/AbstractEventHandler.java     |  2 +-
 .../parse/repl/dump/events/AlterTableHandler.java  |  3 ++-
 .../parse/repl/dump/events/CreateTableHandler.java |  3 ++-
 .../repl/dump/events/UpdatePartColStatHandler.java |  4 ++--
 .../dump/events/UpdateTableColStatHandler.java     |  3 ++-
 12 files changed, 61 insertions(+), 20 deletions(-)

diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java
index 81feaf5..1ba8003 100644
--- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java
+++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java
@@ -167,7 +167,8 @@ public class TestReplicationScenariosExternalTables extends BaseReplicationAcros
         .run("select country from t2 where country = 'us'")
         .verifyResult("us")
         .run("select country from t2 where country = 'france'")
-        .verifyResult("france");
+        .verifyResult("france")
+        .run("show partitions t2").verifyResults(new String[] {"country=france", "country=india", "country=us"});
 
     String hiveDumpLocation = tuple.dumpLocation + File.separator + ReplUtils.REPL_HIVE_BASE_DIR;
     // Ckpt should be set on bootstrapped db.
@@ -343,6 +344,8 @@ public class TestReplicationScenariosExternalTables extends BaseReplicationAcros
         .verifyResults(new String[] { "bangalore", "pune", "mumbai" })
         .run("select place from t2 where country='australia'")
         .verifyResults(new String[] { "sydney" })
+        .run("show partitions t2")
+        .verifyResults(new String[] {"country=australia", "country=india"})
         .verifyReplTargetProperty(replicatedDbName);
 
     Path customPartitionLocation =
@@ -364,6 +367,8 @@ public class TestReplicationScenariosExternalTables extends BaseReplicationAcros
         .run("use " + replicatedDbName)
         .run("select place from t2 where country='france'")
         .verifyResults(new String[] { "paris" })
+        .run("show partitions t2")
+        .verifyResults(new String[] {"country=australia", "country=france", "country=india"})
         .verifyReplTargetProperty(replicatedDbName);
 
     // change the location of the partition via alter command
diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTablesMetaDataOnly.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTablesMetaDataOnly.java
index 624f29b..c260a7d 100644
--- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTablesMetaDataOnly.java
+++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTablesMetaDataOnly.java
@@ -166,7 +166,8 @@ public class TestReplicationScenariosExternalTablesMetaDataOnly extends BaseRepl
         .run("select country from t2 where country = 'us'")
         .verifyResult(null)
         .run("select country from t2 where country = 'france'")
-        .verifyResult(null);
+        .verifyResult(null)
+        .run("show partitions t2").verifyResults(new String[] {"country=france", "country=india", "country=us"});
 
     // Ckpt should be set on bootstrapped db.
     String hiveDumpLocation = tuple.dumpLocation + File.separator + REPL_HIVE_BASE_DIR;
@@ -279,7 +280,9 @@ public class TestReplicationScenariosExternalTablesMetaDataOnly extends BaseRepl
         .verifyResults(new String[] {"t2"})
         .run("select place from t2")
         .verifyResults(new String[] {})
-        .verifyReplTargetProperty(replicatedDbName);
+        .verifyReplTargetProperty(replicatedDbName)
+        .run("show partitions t2")
+        .verifyResults(new String[] {"country=india"});
 
     // add new  data externally, to a partition, but under the table level top directory
     Path partitionDir = new Path(externalTableLocation, "country=india");
@@ -302,6 +305,8 @@ public class TestReplicationScenariosExternalTablesMetaDataOnly extends BaseRepl
         .verifyResults(new String[] {})
         .run("select place from t2 where country='australia'")
         .verifyResults(new String[] {})
+        .run("show partitions t2")
+        .verifyResults(new String[] {"country=australia", "country=india"})
         .verifyReplTargetProperty(replicatedDbName);
 
     Path customPartitionLocation =
@@ -323,6 +328,8 @@ public class TestReplicationScenariosExternalTablesMetaDataOnly extends BaseRepl
         .run("use " + replicatedDbName)
         .run("select place from t2 where country='france'")
         .verifyResults(new String[] {})
+        .run("show partitions t2")
+        .verifyResults(new String[] {"country=australia", "country=france", "country=india"})
         .verifyReplTargetProperty(replicatedDbName);
 
     // change the location of the partition via alter command
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java
index 92e45b4..aa59457 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java
@@ -615,7 +615,7 @@ public class ReplDumpTask extends Task<ReplDumpWork> implements Serializable {
             exportPaths, tableSpec, tuple.replicationSpec, hiveDb, distCpDoAsUser, conf, mmCtx).write(false);
     replLogger.tableLog(tblName, tableSpec.tableHandle.getTableType());
     if (tableSpec.tableHandle.getTableType().equals(TableType.EXTERNAL_TABLE)
-            || Utils.shouldDumpMetaDataOnly(tuple.object, conf)) {
+            || Utils.shouldDumpMetaDataOnly(conf)) {
       return;
     }
     for (ReplPathMapping replPathMapping: replPathMappings) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSpec.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSpec.java
index 13e4a8c..5c8d0ed 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSpec.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSpec.java
@@ -52,6 +52,7 @@ public class ReplicationSpec {
   private boolean needDupCopyCheck = false;
   //Determine if replication is done using repl or export-import
   private boolean isRepl = false;
+  private boolean isMetadataOnlyForExternalTables = false;
 
   // Key definitions related to replication.
   public enum KEY {
@@ -280,6 +281,17 @@ public class ReplicationSpec {
   }
 
   /**
+   * @return true if this statement refers to metadata-only operation.
+   */
+  public boolean isMetadataOnlyForExternalTables() {
+    return isMetadataOnlyForExternalTables;
+  }
+
+  public void setMetadataOnlyForExternalTables(boolean metadataOnlyForExternalTables) {
+    isMetadataOnlyForExternalTables = metadataOnlyForExternalTables;
+  }
+
+  /**
    * @return true if this statement refers to insert-into or insert-overwrite operation.
    */
   public boolean isReplace(){ return isReplace; }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/HiveWrapper.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/HiveWrapper.java
index f9648c8..a1f2fb9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/HiveWrapper.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/HiveWrapper.java
@@ -57,7 +57,8 @@ public class HiveWrapper {
 
   public Tuple<Table> table(final String tableName, HiveConf conf) throws HiveException {
     // Column statistics won't be accurate if we are dumping only metadata
-    boolean getColStats = !Utils.shouldDumpMetaDataOnly(db.getTable(dbName, tableName), conf);
+    boolean getColStats = !Utils.shouldDumpMetaDataOnlyForExternalTables(db.getTable(dbName, tableName), conf)
+            && !Utils.shouldDumpMetaDataOnly(conf);
     return new Tuple<>(functionForSpec, () -> db.getTable(dbName, tableName, true, false,
             getColStats));
   }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/TableExport.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/TableExport.java
index a384c7e..a26b159 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/TableExport.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/TableExport.java
@@ -75,11 +75,20 @@ public class TableExport {
         ? null
         : tableSpec;
     this.replicationSpec = replicationSpec;
-    if (this.tableSpec != null && this.tableSpec.tableHandle!=null && (this.tableSpec.tableHandle.isView() ||
-            Utils.shouldDumpMetaDataOnly(this.tableSpec.tableHandle, conf))) {
-      this.replicationSpec.setIsMetadataOnly(true);
-
-      this.tableSpec.tableHandle.setStatsStateLikeNewTable();
+    if (this.tableSpec != null && this.tableSpec.tableHandle!=null) {
+      //If table is view or if should dump metadata only flag used by DAS is set to true
+      //enable isMetadataOnly
+      if (this.tableSpec.tableHandle.isView() || Utils.shouldDumpMetaDataOnly(conf)) {
+        this.tableSpec.tableHandle.setStatsStateLikeNewTable();
+        this.replicationSpec.setIsMetadataOnly(true);
+      }
+      //If table is view or if should dump metadata only for external table flag is set to true
+      //enable isMetadataOnlyForExternalTable
+      if (this.tableSpec.tableHandle.isView()
+              || Utils.shouldDumpMetaDataOnlyForExternalTables(this.tableSpec.tableHandle, conf)) {
+        this.tableSpec.tableHandle.setStatsStateLikeNewTable();
+        this.replicationSpec.setMetadataOnlyForExternalTables(true);
+      }
     }
     this.db = db;
     this.distCpDoAsUser = distCpDoAsUser;
@@ -110,6 +119,7 @@ public class TableExport {
       if (tableSpec != null && tableSpec.tableHandle != null && tableSpec.tableHandle.isPartitioned()) {
         if (tableSpec.specType == TableSpec.SpecType.TABLE_ONLY) {
           // TABLE-ONLY, fetch partitions if regular export, don't if metadata-only
+          //For metadata only external tables, we still need the partition info
           if (replicationSpec.isMetadataOnly()) {
             return null;
           } else {
@@ -315,7 +325,7 @@ public class TableExport {
     AuthEntities authEntities = new AuthEntities();
     try {
       // Return if metadata-only
-      if (replicationSpec.isMetadataOnly()) {
+      if (replicationSpec.isMetadataOnly() || replicationSpec.isMetadataOnlyForExternalTables()) {
         return authEntities;
       }
       PartitionIterable partitions = getPartitions();
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/Utils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/Utils.java
index 6f8912b..5cb6f1b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/Utils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/Utils.java
@@ -272,9 +272,12 @@ public class Utils {
     }
   }
 
-  public static boolean shouldDumpMetaDataOnly(Table table, HiveConf conf) {
-    return conf.getBoolVar(HiveConf.ConfVars.REPL_DUMP_METADATA_ONLY) ||
-            (conf.getBoolVar(HiveConf.ConfVars.REPL_INCLUDE_EXTERNAL_TABLES) &&
+  public static boolean shouldDumpMetaDataOnly(HiveConf conf) {
+    return conf.getBoolVar(HiveConf.ConfVars.REPL_DUMP_METADATA_ONLY);
+  }
+
+  public static boolean shouldDumpMetaDataOnlyForExternalTables(Table table, HiveConf conf) {
+    return (conf.getBoolVar(HiveConf.ConfVars.REPL_INCLUDE_EXTERNAL_TABLES) &&
                     table.getTableType().equals(TableType.EXTERNAL_TABLE) &&
                     conf.getBoolVar(HiveConf.ConfVars.REPL_DUMP_METADATA_ONLY_FOR_EXTERNAL_TABLE));
   }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AbstractEventHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AbstractEventHandler.java
index 8046077..049c06b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AbstractEventHandler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AbstractEventHandler.java
@@ -94,7 +94,7 @@ abstract class AbstractEventHandler<T extends EventMessage> implements EventHand
           throws IOException, LoginException, MetaException, HiveFatalException {
     HiveConf hiveConf = withinContext.hiveConf;
     String distCpDoAsUser = hiveConf.getVar(HiveConf.ConfVars.HIVE_DISTCP_DOAS_USER);
-    if (!Utils.shouldDumpMetaDataOnly(table, withinContext.hiveConf)) {
+    if (!Utils.shouldDumpMetaDataOnly(withinContext.hiveConf)) {
       Path dataPath = new Path(withinContext.dumpRoot.toString(), EximUtil.DATA_PATH_NAME);
       List<ReplChangeManager.FileInfo> filePaths = new ArrayList<>();
       String[] decodedURISplits = ReplChangeManager.decodeFileUri(file);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AlterTableHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AlterTableHandler.java
index aedf698..0d5d4ea 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AlterTableHandler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AlterTableHandler.java
@@ -229,7 +229,8 @@ class AlterTableHandler extends AbstractEventHandler<AlterTableMessage> {
       // If we are not dumping metadata about a table, we shouldn't be dumping basic statistics
       // as well, since that won't be accurate. So reset them to what they would look like for an
       // empty table.
-      if (Utils.shouldDumpMetaDataOnly(qlMdTableAfter, withinContext.hiveConf)) {
+      if (Utils.shouldDumpMetaDataOnly(withinContext.hiveConf)
+              || Utils.shouldDumpMetaDataOnlyForExternalTables(qlMdTableAfter, withinContext.hiveConf)) {
         qlMdTableAfter.setStatsStateLikeNewTable();
       }
 
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/CreateTableHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/CreateTableHandler.java
index c853223..7a6ddf9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/CreateTableHandler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/CreateTableHandler.java
@@ -65,7 +65,8 @@ class CreateTableHandler extends AbstractEventHandler<CreateTableMessage> {
     // If we are not dumping data about a table, we shouldn't be dumping basic statistics
     // as well, since that won't be accurate. So reset them to what they would look like for an
     // empty table.
-    if (Utils.shouldDumpMetaDataOnly(qlMdTable, withinContext.hiveConf)) {
+    if (Utils.shouldDumpMetaDataOnly(withinContext.hiveConf)
+            || Utils.shouldDumpMetaDataOnlyForExternalTables(qlMdTable, withinContext.hiveConf)) {
       qlMdTable.setStatsStateLikeNewTable();
     }
 
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/UpdatePartColStatHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/UpdatePartColStatHandler.java
index 432dd44..ba550e4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/UpdatePartColStatHandler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/UpdatePartColStatHandler.java
@@ -46,9 +46,9 @@ class UpdatePartColStatHandler extends AbstractEventHandler<UpdatePartitionColum
               event.getEventType());
       return;
     }
-
     // Statistics without any data does not make sense.
-    if (withinContext.replicationSpec.isMetadataOnly()) {
+    if (withinContext.replicationSpec.isMetadataOnly()
+            || Utils.shouldDumpMetaDataOnlyForExternalTables(new Table(tableObj), withinContext.hiveConf)) {
       return;
     }
 
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/UpdateTableColStatHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/UpdateTableColStatHandler.java
index 75ee41f..69d7330 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/UpdateTableColStatHandler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/UpdateTableColStatHandler.java
@@ -43,7 +43,8 @@ class UpdateTableColStatHandler extends AbstractEventHandler<UpdateTableColumnSt
     }
 
     // Statistics without data doesn't make sense.
-    if (withinContext.replicationSpec.isMetadataOnly()) {
+    if (withinContext.replicationSpec.isMetadataOnly()
+            || Utils.shouldDumpMetaDataOnlyForExternalTables(qlMdTable, withinContext.hiveConf)) {
       return;
     }