You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ma...@apache.org on 2020/03/13 04:41:08 UTC
[hive] branch master updated: HIVE-22998 : Dump partition info if
hive.repl.dump.metadata.only.for.external.table conf is enabled. (Aasha
Medhi, reviewed by Mahesh Kumar Behera)
This is an automated email from the ASF dual-hosted git repository.
mahesh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 109ecb9 HIVE-22998 : Dump partition info if hive.repl.dump.metadata.only.for.external.table conf is enabled. (Aasha Medhi, reviewed by Mahesh Kumar Behera)
109ecb9 is described below
commit 109ecb99e50ac63add05a8a3b513b51e3e78a1cb
Author: Aasha Medhi <aa...@gmail.com>
AuthorDate: Fri Mar 13 10:09:47 2020 +0530
HIVE-22998 : Dump partition info if hive.repl.dump.metadata.only.for.external.table conf is enabled. (Aasha Medhi, reviewed by Mahesh Kumar Behera)
Signed-off-by: Mahesh Kumar Behera <ma...@apache.org>
---
.../TestReplicationScenariosExternalTables.java | 7 ++++++-
...icationScenariosExternalTablesMetaDataOnly.java | 11 +++++++++--
.../hadoop/hive/ql/exec/repl/ReplDumpTask.java | 2 +-
.../hadoop/hive/ql/parse/ReplicationSpec.java | 12 ++++++++++++
.../hive/ql/parse/repl/dump/HiveWrapper.java | 3 ++-
.../hive/ql/parse/repl/dump/TableExport.java | 22 ++++++++++++++++------
.../hadoop/hive/ql/parse/repl/dump/Utils.java | 9 ++++++---
.../repl/dump/events/AbstractEventHandler.java | 2 +-
.../parse/repl/dump/events/AlterTableHandler.java | 3 ++-
.../parse/repl/dump/events/CreateTableHandler.java | 3 ++-
.../repl/dump/events/UpdatePartColStatHandler.java | 4 ++--
.../dump/events/UpdateTableColStatHandler.java | 3 ++-
12 files changed, 61 insertions(+), 20 deletions(-)
diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java
index 81feaf5..1ba8003 100644
--- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java
+++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java
@@ -167,7 +167,8 @@ public class TestReplicationScenariosExternalTables extends BaseReplicationAcros
.run("select country from t2 where country = 'us'")
.verifyResult("us")
.run("select country from t2 where country = 'france'")
- .verifyResult("france");
+ .verifyResult("france")
+ .run("show partitions t2").verifyResults(new String[] {"country=france", "country=india", "country=us"});
String hiveDumpLocation = tuple.dumpLocation + File.separator + ReplUtils.REPL_HIVE_BASE_DIR;
// Ckpt should be set on bootstrapped db.
@@ -343,6 +344,8 @@ public class TestReplicationScenariosExternalTables extends BaseReplicationAcros
.verifyResults(new String[] { "bangalore", "pune", "mumbai" })
.run("select place from t2 where country='australia'")
.verifyResults(new String[] { "sydney" })
+ .run("show partitions t2")
+ .verifyResults(new String[] {"country=australia", "country=india"})
.verifyReplTargetProperty(replicatedDbName);
Path customPartitionLocation =
@@ -364,6 +367,8 @@ public class TestReplicationScenariosExternalTables extends BaseReplicationAcros
.run("use " + replicatedDbName)
.run("select place from t2 where country='france'")
.verifyResults(new String[] { "paris" })
+ .run("show partitions t2")
+ .verifyResults(new String[] {"country=australia", "country=france", "country=india"})
.verifyReplTargetProperty(replicatedDbName);
// change the location of the partition via alter command
diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTablesMetaDataOnly.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTablesMetaDataOnly.java
index 624f29b..c260a7d 100644
--- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTablesMetaDataOnly.java
+++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTablesMetaDataOnly.java
@@ -166,7 +166,8 @@ public class TestReplicationScenariosExternalTablesMetaDataOnly extends BaseRepl
.run("select country from t2 where country = 'us'")
.verifyResult(null)
.run("select country from t2 where country = 'france'")
- .verifyResult(null);
+ .verifyResult(null)
+ .run("show partitions t2").verifyResults(new String[] {"country=france", "country=india", "country=us"});
// Ckpt should be set on bootstrapped db.
String hiveDumpLocation = tuple.dumpLocation + File.separator + REPL_HIVE_BASE_DIR;
@@ -279,7 +280,9 @@ public class TestReplicationScenariosExternalTablesMetaDataOnly extends BaseRepl
.verifyResults(new String[] {"t2"})
.run("select place from t2")
.verifyResults(new String[] {})
- .verifyReplTargetProperty(replicatedDbName);
+ .verifyReplTargetProperty(replicatedDbName)
+ .run("show partitions t2")
+ .verifyResults(new String[] {"country=india"});
// add new data externally, to a partition, but under the table level top directory
Path partitionDir = new Path(externalTableLocation, "country=india");
@@ -302,6 +305,8 @@ public class TestReplicationScenariosExternalTablesMetaDataOnly extends BaseRepl
.verifyResults(new String[] {})
.run("select place from t2 where country='australia'")
.verifyResults(new String[] {})
+ .run("show partitions t2")
+ .verifyResults(new String[] {"country=australia", "country=india"})
.verifyReplTargetProperty(replicatedDbName);
Path customPartitionLocation =
@@ -323,6 +328,8 @@ public class TestReplicationScenariosExternalTablesMetaDataOnly extends BaseRepl
.run("use " + replicatedDbName)
.run("select place from t2 where country='france'")
.verifyResults(new String[] {})
+ .run("show partitions t2")
+ .verifyResults(new String[] {"country=australia", "country=france", "country=india"})
.verifyReplTargetProperty(replicatedDbName);
// change the location of the partition via alter command
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java
index 92e45b4..aa59457 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/ReplDumpTask.java
@@ -615,7 +615,7 @@ public class ReplDumpTask extends Task<ReplDumpWork> implements Serializable {
exportPaths, tableSpec, tuple.replicationSpec, hiveDb, distCpDoAsUser, conf, mmCtx).write(false);
replLogger.tableLog(tblName, tableSpec.tableHandle.getTableType());
if (tableSpec.tableHandle.getTableType().equals(TableType.EXTERNAL_TABLE)
- || Utils.shouldDumpMetaDataOnly(tuple.object, conf)) {
+ || Utils.shouldDumpMetaDataOnly(conf)) {
return;
}
for (ReplPathMapping replPathMapping: replPathMappings) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSpec.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSpec.java
index 13e4a8c..5c8d0ed 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSpec.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSpec.java
@@ -52,6 +52,7 @@ public class ReplicationSpec {
private boolean needDupCopyCheck = false;
//Determine if replication is done using repl or export-import
private boolean isRepl = false;
+ private boolean isMetadataOnlyForExternalTables = false;
// Key definitions related to replication.
public enum KEY {
@@ -280,6 +281,17 @@ public class ReplicationSpec {
}
/**
+ * @return true if this statement refers to metadata-only operation.
+ */
+ public boolean isMetadataOnlyForExternalTables() {
+ return isMetadataOnlyForExternalTables;
+ }
+
+ public void setMetadataOnlyForExternalTables(boolean metadataOnlyForExternalTables) {
+ isMetadataOnlyForExternalTables = metadataOnlyForExternalTables;
+ }
+
+ /**
* @return true if this statement refers to insert-into or insert-overwrite operation.
*/
public boolean isReplace(){ return isReplace; }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/HiveWrapper.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/HiveWrapper.java
index f9648c8..a1f2fb9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/HiveWrapper.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/HiveWrapper.java
@@ -57,7 +57,8 @@ public class HiveWrapper {
public Tuple<Table> table(final String tableName, HiveConf conf) throws HiveException {
// Column statistics won't be accurate if we are dumping only metadata
- boolean getColStats = !Utils.shouldDumpMetaDataOnly(db.getTable(dbName, tableName), conf);
+ boolean getColStats = !Utils.shouldDumpMetaDataOnlyForExternalTables(db.getTable(dbName, tableName), conf)
+ && !Utils.shouldDumpMetaDataOnly(conf);
return new Tuple<>(functionForSpec, () -> db.getTable(dbName, tableName, true, false,
getColStats));
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/TableExport.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/TableExport.java
index a384c7e..a26b159 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/TableExport.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/TableExport.java
@@ -75,11 +75,20 @@ public class TableExport {
? null
: tableSpec;
this.replicationSpec = replicationSpec;
- if (this.tableSpec != null && this.tableSpec.tableHandle!=null && (this.tableSpec.tableHandle.isView() ||
- Utils.shouldDumpMetaDataOnly(this.tableSpec.tableHandle, conf))) {
- this.replicationSpec.setIsMetadataOnly(true);
-
- this.tableSpec.tableHandle.setStatsStateLikeNewTable();
+ if (this.tableSpec != null && this.tableSpec.tableHandle!=null) {
+ //If table is view or if should dump metadata only flag used by DAS is set to true
+ //enable isMetadataOnly
+ if (this.tableSpec.tableHandle.isView() || Utils.shouldDumpMetaDataOnly(conf)) {
+ this.tableSpec.tableHandle.setStatsStateLikeNewTable();
+ this.replicationSpec.setIsMetadataOnly(true);
+ }
+ //If table is view or if should dump metadata only for external table flag is set to true
+ //enable isMetadataOnlyForExternalTable
+ if (this.tableSpec.tableHandle.isView()
+ || Utils.shouldDumpMetaDataOnlyForExternalTables(this.tableSpec.tableHandle, conf)) {
+ this.tableSpec.tableHandle.setStatsStateLikeNewTable();
+ this.replicationSpec.setMetadataOnlyForExternalTables(true);
+ }
}
this.db = db;
this.distCpDoAsUser = distCpDoAsUser;
@@ -110,6 +119,7 @@ public class TableExport {
if (tableSpec != null && tableSpec.tableHandle != null && tableSpec.tableHandle.isPartitioned()) {
if (tableSpec.specType == TableSpec.SpecType.TABLE_ONLY) {
// TABLE-ONLY, fetch partitions if regular export, don't if metadata-only
+ //For metadata only external tables, we still need the partition info
if (replicationSpec.isMetadataOnly()) {
return null;
} else {
@@ -315,7 +325,7 @@ public class TableExport {
AuthEntities authEntities = new AuthEntities();
try {
// Return if metadata-only
- if (replicationSpec.isMetadataOnly()) {
+ if (replicationSpec.isMetadataOnly() || replicationSpec.isMetadataOnlyForExternalTables()) {
return authEntities;
}
PartitionIterable partitions = getPartitions();
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/Utils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/Utils.java
index 6f8912b..5cb6f1b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/Utils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/Utils.java
@@ -272,9 +272,12 @@ public class Utils {
}
}
- public static boolean shouldDumpMetaDataOnly(Table table, HiveConf conf) {
- return conf.getBoolVar(HiveConf.ConfVars.REPL_DUMP_METADATA_ONLY) ||
- (conf.getBoolVar(HiveConf.ConfVars.REPL_INCLUDE_EXTERNAL_TABLES) &&
+ public static boolean shouldDumpMetaDataOnly(HiveConf conf) {
+ return conf.getBoolVar(HiveConf.ConfVars.REPL_DUMP_METADATA_ONLY);
+ }
+
+ public static boolean shouldDumpMetaDataOnlyForExternalTables(Table table, HiveConf conf) {
+ return (conf.getBoolVar(HiveConf.ConfVars.REPL_INCLUDE_EXTERNAL_TABLES) &&
table.getTableType().equals(TableType.EXTERNAL_TABLE) &&
conf.getBoolVar(HiveConf.ConfVars.REPL_DUMP_METADATA_ONLY_FOR_EXTERNAL_TABLE));
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AbstractEventHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AbstractEventHandler.java
index 8046077..049c06b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AbstractEventHandler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AbstractEventHandler.java
@@ -94,7 +94,7 @@ abstract class AbstractEventHandler<T extends EventMessage> implements EventHand
throws IOException, LoginException, MetaException, HiveFatalException {
HiveConf hiveConf = withinContext.hiveConf;
String distCpDoAsUser = hiveConf.getVar(HiveConf.ConfVars.HIVE_DISTCP_DOAS_USER);
- if (!Utils.shouldDumpMetaDataOnly(table, withinContext.hiveConf)) {
+ if (!Utils.shouldDumpMetaDataOnly(withinContext.hiveConf)) {
Path dataPath = new Path(withinContext.dumpRoot.toString(), EximUtil.DATA_PATH_NAME);
List<ReplChangeManager.FileInfo> filePaths = new ArrayList<>();
String[] decodedURISplits = ReplChangeManager.decodeFileUri(file);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AlterTableHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AlterTableHandler.java
index aedf698..0d5d4ea 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AlterTableHandler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/AlterTableHandler.java
@@ -229,7 +229,8 @@ class AlterTableHandler extends AbstractEventHandler<AlterTableMessage> {
// If we are not dumping metadata about a table, we shouldn't be dumping basic statistics
// as well, since that won't be accurate. So reset them to what they would look like for an
// empty table.
- if (Utils.shouldDumpMetaDataOnly(qlMdTableAfter, withinContext.hiveConf)) {
+ if (Utils.shouldDumpMetaDataOnly(withinContext.hiveConf)
+ || Utils.shouldDumpMetaDataOnlyForExternalTables(qlMdTableAfter, withinContext.hiveConf)) {
qlMdTableAfter.setStatsStateLikeNewTable();
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/CreateTableHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/CreateTableHandler.java
index c853223..7a6ddf9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/CreateTableHandler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/CreateTableHandler.java
@@ -65,7 +65,8 @@ class CreateTableHandler extends AbstractEventHandler<CreateTableMessage> {
// If we are not dumping data about a table, we shouldn't be dumping basic statistics
// as well, since that won't be accurate. So reset them to what they would look like for an
// empty table.
- if (Utils.shouldDumpMetaDataOnly(qlMdTable, withinContext.hiveConf)) {
+ if (Utils.shouldDumpMetaDataOnly(withinContext.hiveConf)
+ || Utils.shouldDumpMetaDataOnlyForExternalTables(qlMdTable, withinContext.hiveConf)) {
qlMdTable.setStatsStateLikeNewTable();
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/UpdatePartColStatHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/UpdatePartColStatHandler.java
index 432dd44..ba550e4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/UpdatePartColStatHandler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/UpdatePartColStatHandler.java
@@ -46,9 +46,9 @@ class UpdatePartColStatHandler extends AbstractEventHandler<UpdatePartitionColum
event.getEventType());
return;
}
-
// Statistics without any data does not make sense.
- if (withinContext.replicationSpec.isMetadataOnly()) {
+ if (withinContext.replicationSpec.isMetadataOnly()
+ || Utils.shouldDumpMetaDataOnlyForExternalTables(new Table(tableObj), withinContext.hiveConf)) {
return;
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/UpdateTableColStatHandler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/UpdateTableColStatHandler.java
index 75ee41f..69d7330 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/UpdateTableColStatHandler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/repl/dump/events/UpdateTableColStatHandler.java
@@ -43,7 +43,8 @@ class UpdateTableColStatHandler extends AbstractEventHandler<UpdateTableColumnSt
}
// Statistics without data doesn't make sense.
- if (withinContext.replicationSpec.isMetadataOnly()) {
+ if (withinContext.replicationSpec.isMetadataOnly()
+ || Utils.shouldDumpMetaDataOnlyForExternalTables(qlMdTable, withinContext.hiveConf)) {
return;
}