You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sa...@apache.org on 2021/09/23 05:33:36 UTC
[hive] branch master updated: HIVE-25535: Control cleaning obsolete
directories/files of a table via property (Ashish Sharma,
reviewed by Denys Kuzmenko, Adesh Rao, Sankar Hariappan)
This is an automated email from the ASF dual-hosted git repository.
sankarh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 817ff27 HIVE-25535: Control cleaning obsolete directories/files of a table via property (Ashish Sharma, reviewed by Denys Kuzmenko, Adesh Rao, Sankar Hariappan)
817ff27 is described below
commit 817ff27e96ecfce6c70c5850830e55a6e6f37da6
Author: Ashish Kumar Sharma <as...@gmail.com>
AuthorDate: Thu Sep 23 11:03:18 2021 +0530
HIVE-25535: Control cleaning obsolete directories/files of a table via property (Ashish Sharma, reviewed by Denys Kuzmenko, Adesh Rao, Sankar Hariappan)
Signed-off-by: Sankar Hariappan <sa...@apache.org>
Closes (#2651)
---
.../hadoop/hive/ql/txn/compactor/Cleaner.java | 14 +++
.../hive/ql/txn/compactor/CompactorTest.java | 7 +-
.../hadoop/hive/ql/txn/compactor/TestCleaner.java | 113 +++++++++++++++++++++
.../thrift/gen-cpp/hive_metastore_constants.cpp | 4 +
.../gen/thrift/gen-cpp/hive_metastore_constants.h | 2 +
.../metastore/api/hive_metastoreConstants.java | 3 +
.../src/gen/thrift/gen-php/metastore/Constant.php | 12 +++
.../gen/thrift/gen-py/hive_metastore/constants.py | 2 +
.../gen/thrift/gen-rb/hive_metastore_constants.rb | 4 +
.../hive/metastore/utils/MetaStoreUtils.java | 8 ++
.../src/main/thrift/hive_metastore.thrift | 2 +
11 files changed, 170 insertions(+), 1 deletion(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Cleaner.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Cleaner.java
index 13fc8bc..8149494 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Cleaner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Cleaner.java
@@ -33,6 +33,7 @@ import org.apache.hadoop.hive.metastore.metrics.PerfLogger;
import org.apache.hadoop.hive.metastore.txn.TxnCommonUtils;
import org.apache.hadoop.hive.metastore.txn.TxnStore;
import org.apache.hadoop.hive.metastore.txn.TxnUtils;
+import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
import org.apache.hadoop.hive.ql.io.AcidDirectory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -179,6 +180,13 @@ public class Cleaner extends MetaStoreCompactorThread {
txnHandler.markCleaned(ci);
return;
}
+ if (MetaStoreUtils.isNoCleanUpSet(t.getParameters())) {
+ // The table was marked no clean up true.
+ LOG.info("Skipping table " + ci.getFullTableName() + " clean up, as NO_CLEANUP set to true");
+ txnHandler.markCleaned(ci);
+ return;
+ }
+
Partition p = null;
if (ci.partName != null) {
p = resolvePartition(ci);
@@ -189,6 +197,12 @@ public class Cleaner extends MetaStoreCompactorThread {
txnHandler.markCleaned(ci);
return;
}
+ if (MetaStoreUtils.isNoCleanUpSet(p.getParameters())) {
+ // The partition was marked no clean up true.
+ LOG.info("Skipping partition " + ci.getFullPartitionName() + " clean up, as NO_CLEANUP set to true");
+ txnHandler.markCleaned(ci);
+ return;
+ }
}
StorageDescriptor sd = resolveStorageDescriptor(t, p);
final String location = sd.getLocation();
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java
index 66335c9..b3788e4 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java
@@ -201,12 +201,17 @@ public abstract class CompactorTest {
}
protected Partition newPartition(Table t, String value, List<Order> sortCols) throws Exception {
+ return newPartition(t, value, sortCols, new HashMap<>());
+ }
+
+ protected Partition newPartition(Table t, String value, List<Order> sortCols, Map<String, String> parameters)
+ throws Exception {
Partition part = new Partition();
part.addToValues(value);
part.setDbName(t.getDbName());
part.setTableName(t.getTableName());
part.setSd(newStorageDescriptor(getLocation(t.getTableName(), value), sortCols));
- part.setParameters(new HashMap<String, String>());
+ part.setParameters(parameters);
ms.add_partition(part);
return part;
}
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestCleaner.java b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestCleaner.java
index 3c02606..665d47c 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestCleaner.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestCleaner.java
@@ -24,6 +24,8 @@ import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.CommitTxnRequest;
import org.apache.hadoop.hive.metastore.api.CompactionRequest;
import org.apache.hadoop.hive.metastore.api.CompactionType;
+import org.apache.hadoop.hive.metastore.api.GetPartitionRequest;
+import org.apache.hadoop.hive.metastore.api.GetTableRequest;
import org.apache.hadoop.hive.metastore.api.GetValidWriteIdsRequest;
import org.apache.hadoop.hive.metastore.api.FindNextCompactRequest;
import org.apache.hadoop.hive.metastore.api.Partition;
@@ -43,7 +45,9 @@ import org.junit.Test;
import java.util.ArrayList;
import java.util.Collections;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import java.util.concurrent.TimeUnit;
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_COMPACTOR_CLEANER_RETENTION_TIME;
@@ -604,4 +608,113 @@ public class TestCleaner extends CompactorTest {
compactorTestCleanup();
}
+ @Test
+ public void NoCleanupAfterMajorCompaction() throws Exception {
+ Map<String, String> parameters = new HashMap<>();
+
+ //With no cleanup true
+ parameters.put("no_cleanup", "true");
+ Table t = newTable("default", "dcamc", false, parameters);
+
+ addBaseFile(t, null, 20L, 20);
+ addDeltaFile(t, null, 21L, 22L, 2);
+ addDeltaFile(t, null, 23L, 24L, 2);
+ addBaseFile(t, null, 25L, 25);
+
+ burnThroughTransactions("default", "dcamc", 25);
+
+ CompactionRequest rqst = new CompactionRequest("default", "dcamc", CompactionType.MAJOR);
+ compactInTxn(rqst);
+
+ startCleaner();
+ // Check there are no compactions requests left.
+ ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
+ Assert.assertEquals(1, rsp.getCompactsSize());
+ Assert.assertEquals(TxnStore.SUCCEEDED_RESPONSE, rsp.getCompacts().get(0).getState());
+
+ // Check that the files are not removed
+ List<Path> paths = getDirectories(conf, t, null);
+ Assert.assertEquals(4, paths.size());
+
+ //With no clean up false
+ t = ms.getTable(new GetTableRequest("default", "dcamc"));
+ t.getParameters().put("no_cleanup", "false");
+ ms.alter_table("default", "dcamc", t);
+ rqst = new CompactionRequest("default", "dcamc", CompactionType.MAJOR);
+ compactInTxn(rqst);
+
+ startCleaner();
+ // Check there are no compactions requests left.
+ rsp = txnHandler.showCompact(new ShowCompactRequest());
+ Assert.assertEquals(2, rsp.getCompactsSize());
+ Assert.assertEquals(TxnStore.SUCCEEDED_RESPONSE, rsp.getCompacts().get(0).getState());
+
+ // Check that the files are not removed
+ paths = getDirectories(conf, t, null);
+ Assert.assertEquals(1, paths.size());
+ Assert.assertEquals("base_25", paths.get(0).getName());
+ }
+
+ @Test
+ public void noCleanupAfterMinorCompactionOnPartition() throws Exception {
+ Map<String, String> parameters = new HashMap<>();
+ parameters.put("NO_CLEANUP", "True");
+ Table t = newTable("default", "dcamicop", true);
+ Partition p = newPartition(t, "today", null, parameters);
+
+ addBaseFile(t, p, 20L, 20);
+ addDeltaFile(t, p, 21L, 22L, 2);
+ addDeltaFile(t, p, 23L, 24L, 2);
+ addDeltaFile(t, p, 21L, 24L, 4);
+
+ burnThroughTransactions("default", "dcamicop", 25);
+
+ CompactionRequest rqst = new CompactionRequest("default", "dcamicop", CompactionType.MINOR);
+ rqst.setPartitionname("ds=today");
+ compactInTxn(rqst);
+
+ startCleaner();
+
+ // Check there are no compactions requests left.
+ ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
+ Assert.assertEquals(1, rsp.getCompactsSize());
+ Assert.assertEquals(TxnStore.SUCCEEDED_RESPONSE, rsp.getCompacts().get(0).getState());
+
+ // Check that the files are not removed
+ List<Path> paths = getDirectories(conf, t, p);
+ Assert.assertEquals(4, paths.size());
+
+ // compaction with no cleanup false
+ ArrayList<String> list = new ArrayList<>();
+ list.add("ds=today");
+ p = ms.getPartition("default", "dcamicop", "ds=today");
+ p.getParameters().put("NO_CLEANUP", "false");
+ ms.alter_partition("default", "dcamicop", p);
+ rqst = new CompactionRequest("default", "dcamicop", CompactionType.MINOR);
+ rqst.setPartitionname("ds=today");
+ compactInTxn(rqst);
+
+ startCleaner();
+
+ // Check there are no compactions requests left.
+ rsp = txnHandler.showCompact(new ShowCompactRequest());
+ Assert.assertEquals(2, rsp.getCompactsSize());
+ Assert.assertEquals(TxnStore.SUCCEEDED_RESPONSE, rsp.getCompacts().get(0).getState());
+
+ // Check that the files are removed
+ paths = getDirectories(conf, t, p);
+ Assert.assertEquals(2, paths.size());
+ boolean sawBase = false, sawDelta = false;
+ for (Path path : paths) {
+ if (path.getName().equals("base_20")) {
+ sawBase = true;
+ } else if (path.getName().equals(makeDeltaDirNameCompacted(21, 24))) {
+ sawDelta = true;
+ } else {
+ Assert.fail("Unexpected file " + path.getName());
+ }
+ }
+ Assert.assertTrue(sawBase);
+ Assert.assertTrue(sawDelta);
+ }
}
diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-cpp/hive_metastore_constants.cpp b/standalone-metastore/metastore-common/src/gen/thrift/gen-cpp/hive_metastore_constants.cpp
index eee3633..1b981f5 100644
--- a/standalone-metastore/metastore-common/src/gen/thrift/gen-cpp/hive_metastore_constants.cpp
+++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-cpp/hive_metastore_constants.cpp
@@ -77,6 +77,10 @@ hive_metastoreConstants::hive_metastoreConstants() {
PARTITION_TRANSFORM_SPEC = "partition_transform_spec";
+ NO_CLEANUP = "no_cleanup";
+
+ CTAS_LEGACY_CONFIG = "create_table_as_external";
+
}
}}} // namespace
diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-cpp/hive_metastore_constants.h b/standalone-metastore/metastore-common/src/gen/thrift/gen-cpp/hive_metastore_constants.h
index 284f082..d5f7cdf 100644
--- a/standalone-metastore/metastore-common/src/gen/thrift/gen-cpp/hive_metastore_constants.h
+++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-cpp/hive_metastore_constants.h
@@ -48,6 +48,8 @@ class hive_metastoreConstants {
std::string JDBC_CONFIG_PREFIX;
std::string TABLE_IS_CTAS;
std::string PARTITION_TRANSFORM_SPEC;
+ std::string NO_CLEANUP;
+ std::string CTAS_LEGACY_CONFIG;
};
extern const hive_metastoreConstants g_hive_metastore_constants;
diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/hive_metastoreConstants.java b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/hive_metastoreConstants.java
index 1a42b52..2851b22 100644
--- a/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/hive_metastoreConstants.java
+++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/hive_metastoreConstants.java
@@ -75,5 +75,8 @@ package org.apache.hadoop.hive.metastore.api;
public static final java.lang.String PARTITION_TRANSFORM_SPEC = "partition_transform_spec";
+ public static final java.lang.String NO_CLEANUP = "no_cleanup";
+
public static final java.lang.String CTAS_LEGACY_CONFIG = "create_table_as_external";
+
}
diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-php/metastore/Constant.php b/standalone-metastore/metastore-common/src/gen/thrift/gen-php/metastore/Constant.php
index 1335e99..b6f4e80 100644
--- a/standalone-metastore/metastore-common/src/gen/thrift/gen-php/metastore/Constant.php
+++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-php/metastore/Constant.php
@@ -51,6 +51,8 @@ final class Constant extends \Thrift\Type\TConstant
static protected $JDBC_CONFIG_PREFIX;
static protected $TABLE_IS_CTAS;
static protected $PARTITION_TRANSFORM_SPEC;
+ static protected $NO_CLEANUP;
+ static protected $CTAS_LEGACY_CONFIG;
protected static function init_DDL_TIME()
{
@@ -216,4 +218,14 @@ final class Constant extends \Thrift\Type\TConstant
{
return "partition_transform_spec";
}
+
+ protected static function init_NO_CLEANUP()
+ {
+ return "no_cleanup";
+ }
+
+ protected static function init_CTAS_LEGACY_CONFIG()
+ {
+ return "create_table_as_external";
+ }
}
diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/constants.py b/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/constants.py
index 9b7332b..ef5a2b1 100644
--- a/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/constants.py
+++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/constants.py
@@ -45,3 +45,5 @@ DRUID_CONFIG_PREFIX = "druid."
JDBC_CONFIG_PREFIX = "hive.sql."
TABLE_IS_CTAS = "created_with_ctas"
PARTITION_TRANSFORM_SPEC = "partition_transform_spec"
+NO_CLEANUP = "no_cleanup"
+CTAS_LEGACY_CONFIG = "create_table_as_external"
diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-rb/hive_metastore_constants.rb b/standalone-metastore/metastore-common/src/gen/thrift/gen-rb/hive_metastore_constants.rb
index 042e65e..d441f3a 100644
--- a/standalone-metastore/metastore-common/src/gen/thrift/gen-rb/hive_metastore_constants.rb
+++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-rb/hive_metastore_constants.rb
@@ -73,3 +73,7 @@ TABLE_IS_CTAS = %q"created_with_ctas"
PARTITION_TRANSFORM_SPEC = %q"partition_transform_spec"
+NO_CLEANUP = %q"no_cleanup"
+
+CTAS_LEGACY_CONFIG = %q"create_table_as_external"
+
diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java
index cb2f425..48d2bb1 100644
--- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java
+++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java
@@ -1119,4 +1119,12 @@ public class MetaStoreUtils {
*/
SOURCE, TARGET;
}
+
+ public static boolean isNoCleanUpSet(Map<String, String> parameters) {
+ String noCleanUp = parameters.get(hive_metastoreConstants.NO_CLEANUP);
+ if (noCleanUp == null) {
+ noCleanUp = parameters.get(hive_metastoreConstants.NO_CLEANUP.toUpperCase());
+ }
+ return noCleanUp != null && noCleanUp.equalsIgnoreCase("true");
+ }
}
diff --git a/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift b/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift
index c81ba2e..941fe58 100644
--- a/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift
+++ b/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift
@@ -3083,3 +3083,5 @@ const string DRUID_CONFIG_PREFIX = "druid.",
const string JDBC_CONFIG_PREFIX = "hive.sql.",
const string TABLE_IS_CTAS = "created_with_ctas",
const string PARTITION_TRANSFORM_SPEC = "partition_transform_spec",
+const string NO_CLEANUP = "no_cleanup",
+const string CTAS_LEGACY_CONFIG = "create_table_as_external",
\ No newline at end of file