You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sa...@apache.org on 2021/09/23 05:33:36 UTC

[hive] branch master updated: HIVE-25535: Control cleaning obsolete directories/files of a table via property (Ashish Sharma, reviewed by Denys Kuzmenko, Adesh Rao, Sankar Hariappan)

This is an automated email from the ASF dual-hosted git repository.

sankarh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 817ff27  HIVE-25535: Control cleaning obsolete directories/files of a table via property (Ashish Sharma, reviewed by Denys Kuzmenko, Adesh Rao, Sankar Hariappan)
817ff27 is described below

commit 817ff27e96ecfce6c70c5850830e55a6e6f37da6
Author: Ashish Kumar Sharma <as...@gmail.com>
AuthorDate: Thu Sep 23 11:03:18 2021 +0530

    HIVE-25535: Control cleaning obsolete directories/files of a table via property (Ashish Sharma, reviewed by Denys Kuzmenko, Adesh Rao, Sankar Hariappan)
    
    Signed-off-by: Sankar Hariappan <sa...@apache.org>
    Closes (#2651)
---
 .../hadoop/hive/ql/txn/compactor/Cleaner.java      |  14 +++
 .../hive/ql/txn/compactor/CompactorTest.java       |   7 +-
 .../hadoop/hive/ql/txn/compactor/TestCleaner.java  | 113 +++++++++++++++++++++
 .../thrift/gen-cpp/hive_metastore_constants.cpp    |   4 +
 .../gen/thrift/gen-cpp/hive_metastore_constants.h  |   2 +
 .../metastore/api/hive_metastoreConstants.java     |   3 +
 .../src/gen/thrift/gen-php/metastore/Constant.php  |  12 +++
 .../gen/thrift/gen-py/hive_metastore/constants.py  |   2 +
 .../gen/thrift/gen-rb/hive_metastore_constants.rb  |   4 +
 .../hive/metastore/utils/MetaStoreUtils.java       |   8 ++
 .../src/main/thrift/hive_metastore.thrift          |   2 +
 11 files changed, 170 insertions(+), 1 deletion(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Cleaner.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Cleaner.java
index 13fc8bc..8149494 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Cleaner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Cleaner.java
@@ -33,6 +33,7 @@ import org.apache.hadoop.hive.metastore.metrics.PerfLogger;
 import org.apache.hadoop.hive.metastore.txn.TxnCommonUtils;
 import org.apache.hadoop.hive.metastore.txn.TxnStore;
 import org.apache.hadoop.hive.metastore.txn.TxnUtils;
+import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
 import org.apache.hadoop.hive.ql.io.AcidDirectory;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -179,6 +180,13 @@ public class Cleaner extends MetaStoreCompactorThread {
         txnHandler.markCleaned(ci);
         return;
       }
+      if (MetaStoreUtils.isNoCleanUpSet(t.getParameters())) {
+        // The table was marked no clean up true.
+        LOG.info("Skipping table " + ci.getFullTableName() + " clean up, as NO_CLEANUP set to true");
+        txnHandler.markCleaned(ci);
+        return;
+      }
+
       Partition p = null;
       if (ci.partName != null) {
         p = resolvePartition(ci);
@@ -189,6 +197,12 @@ public class Cleaner extends MetaStoreCompactorThread {
           txnHandler.markCleaned(ci);
           return;
         }
+        if (MetaStoreUtils.isNoCleanUpSet(p.getParameters())) {
+          // The partition was marked no clean up true.
+          LOG.info("Skipping partition " + ci.getFullPartitionName() + " clean up, as NO_CLEANUP set to true");
+          txnHandler.markCleaned(ci);
+          return;
+        }
       }
       StorageDescriptor sd = resolveStorageDescriptor(t, p);
       final String location = sd.getLocation();
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java
index 66335c9..b3788e4 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/CompactorTest.java
@@ -201,12 +201,17 @@ public abstract class CompactorTest {
   }
 
   protected Partition newPartition(Table t, String value, List<Order> sortCols) throws Exception {
+    return newPartition(t, value, sortCols, new HashMap<>());
+  }
+
+  protected Partition newPartition(Table t, String value, List<Order> sortCols, Map<String, String> parameters)
+      throws Exception {
     Partition part = new Partition();
     part.addToValues(value);
     part.setDbName(t.getDbName());
     part.setTableName(t.getTableName());
     part.setSd(newStorageDescriptor(getLocation(t.getTableName(), value), sortCols));
-    part.setParameters(new HashMap<String, String>());
+    part.setParameters(parameters);
     ms.add_partition(part);
     return part;
   }
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestCleaner.java b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestCleaner.java
index 3c02606..665d47c 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestCleaner.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestCleaner.java
@@ -24,6 +24,8 @@ import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.api.CommitTxnRequest;
 import org.apache.hadoop.hive.metastore.api.CompactionRequest;
 import org.apache.hadoop.hive.metastore.api.CompactionType;
+import org.apache.hadoop.hive.metastore.api.GetPartitionRequest;
+import org.apache.hadoop.hive.metastore.api.GetTableRequest;
 import org.apache.hadoop.hive.metastore.api.GetValidWriteIdsRequest;
 import org.apache.hadoop.hive.metastore.api.FindNextCompactRequest;
 import org.apache.hadoop.hive.metastore.api.Partition;
@@ -43,7 +45,9 @@ import org.junit.Test;
 
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.concurrent.TimeUnit;
 
 import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_COMPACTOR_CLEANER_RETENTION_TIME;
@@ -604,4 +608,113 @@ public class TestCleaner extends CompactorTest {
     compactorTestCleanup();
   }
 
+  @Test
+  public void NoCleanupAfterMajorCompaction() throws Exception {
+    Map<String, String> parameters = new HashMap<>();
+
+    //With no cleanup true
+    parameters.put("no_cleanup", "true");
+    Table t = newTable("default", "dcamc", false, parameters);
+
+    addBaseFile(t, null, 20L, 20);
+    addDeltaFile(t, null, 21L, 22L, 2);
+    addDeltaFile(t, null, 23L, 24L, 2);
+    addBaseFile(t, null, 25L, 25);
+
+    burnThroughTransactions("default", "dcamc", 25);
+
+    CompactionRequest rqst = new CompactionRequest("default", "dcamc", CompactionType.MAJOR);
+    compactInTxn(rqst);
+
+    startCleaner();
+    // Check there are no compactions requests left.
+    ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
+    Assert.assertEquals(1, rsp.getCompactsSize());
+    Assert.assertEquals(TxnStore.SUCCEEDED_RESPONSE, rsp.getCompacts().get(0).getState());
+
+    // Check that the files are not removed
+    List<Path> paths = getDirectories(conf, t, null);
+    Assert.assertEquals(4, paths.size());
+
+    //With no clean up false
+    t = ms.getTable(new GetTableRequest("default", "dcamc"));
+    t.getParameters().put("no_cleanup", "false");
+    ms.alter_table("default", "dcamc", t);
+    rqst = new CompactionRequest("default", "dcamc", CompactionType.MAJOR);
+    compactInTxn(rqst);
+
+    startCleaner();
+    // Check there are no compactions requests left.
+    rsp = txnHandler.showCompact(new ShowCompactRequest());
+    Assert.assertEquals(2, rsp.getCompactsSize());
+    Assert.assertEquals(TxnStore.SUCCEEDED_RESPONSE, rsp.getCompacts().get(0).getState());
+
+    // Check that the files are not removed
+    paths = getDirectories(conf, t, null);
+    Assert.assertEquals(1, paths.size());
+    Assert.assertEquals("base_25", paths.get(0).getName());
+  }
+
+  @Test
+  public void noCleanupAfterMinorCompactionOnPartition() throws Exception {
+    Map<String, String> parameters = new HashMap<>();
+    parameters.put("NO_CLEANUP", "True");
+    Table t = newTable("default", "dcamicop", true);
+    Partition p = newPartition(t, "today", null, parameters);
+
+    addBaseFile(t, p, 20L, 20);
+    addDeltaFile(t, p, 21L, 22L, 2);
+    addDeltaFile(t, p, 23L, 24L, 2);
+    addDeltaFile(t, p, 21L, 24L, 4);
+
+    burnThroughTransactions("default", "dcamicop", 25);
+
+    CompactionRequest rqst = new CompactionRequest("default", "dcamicop", CompactionType.MINOR);
+    rqst.setPartitionname("ds=today");
+    compactInTxn(rqst);
+
+    startCleaner();
+
+    // Check there are no compactions requests left.
+    ShowCompactResponse rsp = txnHandler.showCompact(new ShowCompactRequest());
+    Assert.assertEquals(1, rsp.getCompactsSize());
+    Assert.assertEquals(TxnStore.SUCCEEDED_RESPONSE, rsp.getCompacts().get(0).getState());
+
+    // Check that the files are not removed
+    List<Path> paths = getDirectories(conf, t, p);
+    Assert.assertEquals(4, paths.size());
+
+    // compaction with no cleanup false
+    ArrayList<String> list = new ArrayList<>();
+    list.add("ds=today");
+    p = ms.getPartition("default", "dcamicop", "ds=today");
+    p.getParameters().put("NO_CLEANUP", "false");
+    ms.alter_partition("default", "dcamicop", p);
+    rqst = new CompactionRequest("default", "dcamicop", CompactionType.MINOR);
+    rqst.setPartitionname("ds=today");
+    compactInTxn(rqst);
+
+    startCleaner();
+
+    // Check there are no compactions requests left.
+    rsp = txnHandler.showCompact(new ShowCompactRequest());
+    Assert.assertEquals(2, rsp.getCompactsSize());
+    Assert.assertEquals(TxnStore.SUCCEEDED_RESPONSE, rsp.getCompacts().get(0).getState());
+
+    // Check that the files are removed
+    paths = getDirectories(conf, t, p);
+    Assert.assertEquals(2, paths.size());
+    boolean sawBase = false, sawDelta = false;
+    for (Path path : paths) {
+      if (path.getName().equals("base_20")) {
+        sawBase = true;
+      } else if (path.getName().equals(makeDeltaDirNameCompacted(21, 24))) {
+        sawDelta = true;
+      } else {
+        Assert.fail("Unexpected file " + path.getName());
+      }
+    }
+    Assert.assertTrue(sawBase);
+    Assert.assertTrue(sawDelta);
+  }
 }
diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-cpp/hive_metastore_constants.cpp b/standalone-metastore/metastore-common/src/gen/thrift/gen-cpp/hive_metastore_constants.cpp
index eee3633..1b981f5 100644
--- a/standalone-metastore/metastore-common/src/gen/thrift/gen-cpp/hive_metastore_constants.cpp
+++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-cpp/hive_metastore_constants.cpp
@@ -77,6 +77,10 @@ hive_metastoreConstants::hive_metastoreConstants() {
 
   PARTITION_TRANSFORM_SPEC = "partition_transform_spec";
 
+  NO_CLEANUP = "no_cleanup";
+
+  CTAS_LEGACY_CONFIG = "create_table_as_external";
+
 }
 
 }}} // namespace
diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-cpp/hive_metastore_constants.h b/standalone-metastore/metastore-common/src/gen/thrift/gen-cpp/hive_metastore_constants.h
index 284f082..d5f7cdf 100644
--- a/standalone-metastore/metastore-common/src/gen/thrift/gen-cpp/hive_metastore_constants.h
+++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-cpp/hive_metastore_constants.h
@@ -48,6 +48,8 @@ class hive_metastoreConstants {
   std::string JDBC_CONFIG_PREFIX;
   std::string TABLE_IS_CTAS;
   std::string PARTITION_TRANSFORM_SPEC;
+  std::string NO_CLEANUP;
+  std::string CTAS_LEGACY_CONFIG;
 };
 
 extern const hive_metastoreConstants g_hive_metastore_constants;
diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/hive_metastoreConstants.java b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/hive_metastoreConstants.java
index 1a42b52..2851b22 100644
--- a/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/hive_metastoreConstants.java
+++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/hive_metastoreConstants.java
@@ -75,5 +75,8 @@ package org.apache.hadoop.hive.metastore.api;
 
   public static final java.lang.String PARTITION_TRANSFORM_SPEC = "partition_transform_spec";
 
+  public static final java.lang.String NO_CLEANUP = "no_cleanup";
+
   public static final java.lang.String CTAS_LEGACY_CONFIG = "create_table_as_external";
+
 }
diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-php/metastore/Constant.php b/standalone-metastore/metastore-common/src/gen/thrift/gen-php/metastore/Constant.php
index 1335e99..b6f4e80 100644
--- a/standalone-metastore/metastore-common/src/gen/thrift/gen-php/metastore/Constant.php
+++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-php/metastore/Constant.php
@@ -51,6 +51,8 @@ final class Constant extends \Thrift\Type\TConstant
     static protected $JDBC_CONFIG_PREFIX;
     static protected $TABLE_IS_CTAS;
     static protected $PARTITION_TRANSFORM_SPEC;
+    static protected $NO_CLEANUP;
+    static protected $CTAS_LEGACY_CONFIG;
 
     protected static function init_DDL_TIME()
     {
@@ -216,4 +218,14 @@ final class Constant extends \Thrift\Type\TConstant
     {
         return "partition_transform_spec";
     }
+
+    protected static function init_NO_CLEANUP()
+    {
+        return "no_cleanup";
+    }
+
+    protected static function init_CTAS_LEGACY_CONFIG()
+    {
+        return "create_table_as_external";
+    }
 }
diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/constants.py b/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/constants.py
index 9b7332b..ef5a2b1 100644
--- a/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/constants.py
+++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/constants.py
@@ -45,3 +45,5 @@ DRUID_CONFIG_PREFIX = "druid."
 JDBC_CONFIG_PREFIX = "hive.sql."
 TABLE_IS_CTAS = "created_with_ctas"
 PARTITION_TRANSFORM_SPEC = "partition_transform_spec"
+NO_CLEANUP = "no_cleanup"
+CTAS_LEGACY_CONFIG = "create_table_as_external"
diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-rb/hive_metastore_constants.rb b/standalone-metastore/metastore-common/src/gen/thrift/gen-rb/hive_metastore_constants.rb
index 042e65e..d441f3a 100644
--- a/standalone-metastore/metastore-common/src/gen/thrift/gen-rb/hive_metastore_constants.rb
+++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-rb/hive_metastore_constants.rb
@@ -73,3 +73,7 @@ TABLE_IS_CTAS = %q"created_with_ctas"
 
 PARTITION_TRANSFORM_SPEC = %q"partition_transform_spec"
 
+NO_CLEANUP = %q"no_cleanup"
+
+CTAS_LEGACY_CONFIG = %q"create_table_as_external"
+
diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java
index cb2f425..48d2bb1 100644
--- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java
+++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java
@@ -1119,4 +1119,12 @@ public class MetaStoreUtils {
      */
     SOURCE, TARGET;
   }
+
+  public static boolean isNoCleanUpSet(Map<String, String> parameters) {
+    String noCleanUp = parameters.get(hive_metastoreConstants.NO_CLEANUP);
+    if (noCleanUp == null) {
+      noCleanUp = parameters.get(hive_metastoreConstants.NO_CLEANUP.toUpperCase());
+    }
+    return noCleanUp != null && noCleanUp.equalsIgnoreCase("true");
+  }
 }
diff --git a/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift b/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift
index c81ba2e..941fe58 100644
--- a/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift
+++ b/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift
@@ -3083,3 +3083,5 @@ const string DRUID_CONFIG_PREFIX = "druid.",
 const string JDBC_CONFIG_PREFIX = "hive.sql.",
 const string TABLE_IS_CTAS = "created_with_ctas",
 const string PARTITION_TRANSFORM_SPEC = "partition_transform_spec",
+const string NO_CLEANUP = "no_cleanup",
+const string CTAS_LEGACY_CONFIG = "create_table_as_external",
\ No newline at end of file