You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by as...@apache.org on 2022/03/26 13:53:56 UTC

[hive] branch master updated: HIVE-26060: Invalidate acid table directory cache on drop table (Antal Sinkovits, reviewed by Denys Kuzmenko)

This is an automated email from the ASF dual-hosted git repository.

asinkovits pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 1662a90  HIVE-26060: Invalidate acid table directory cache on drop table (Antal Sinkovits, reviewed by Denys Kuzmenko)
1662a90 is described below

commit 1662a902c98404946582709acba42de3252c4ac0
Author: Antal Sinkovits <as...@cloudera.com>
AuthorDate: Sat Mar 26 14:53:38 2022 +0100

    HIVE-26060: Invalidate acid table directory cache on drop table (Antal Sinkovits, reviewed by Denys Kuzmenko)
    
    Closes #3128
---
 .../hive/ql/txn/compactor/TestCompactor.java       | 78 ++++++++++++++++++++++
 .../org/apache/hadoop/hive/ql/io/AcidUtils.java    | 17 ++++-
 .../org/apache/hadoop/hive/ql/metadata/Hive.java   |  2 +
 3 files changed, 96 insertions(+), 1 deletion(-)

diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
index f2da316..16b5957 100644
--- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
+++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
@@ -2517,6 +2517,84 @@ public class TestCompactor {
     Assert.assertEquals("2\t55\t66", valuesReadFromHiveDriver.get(1));
   }
 
+  @Test
+  public void testAcidDirCacheOnDropTable() throws Exception {
+    int cacheDurationInMinutes = 10;
+    AcidUtils.initDirCache(cacheDurationInMinutes);
+    HiveConf.setBoolVar(conf, ConfVars.HIVE_COMPACTOR_GATHER_STATS, false);
+    String dbName = "default";
+    String tblName = "adc_table";
+
+    // First phase, populate the cache
+    executeStatementOnDriver("drop table if exists " + tblName, driver);
+    executeStatementOnDriver("create table " + tblName + " (a string) stored as orc " +
+            "TBLPROPERTIES ('transactional'='true', 'hive.exec.orc.split.strategy'='BI')", driver);
+    executeStatementOnDriver("insert into " + tblName + " values ('a')", driver);
+    executeStatementOnDriver("insert into " + tblName + " values ('b')", driver);
+    runMajorCompaction(dbName, tblName);
+    runCleaner(conf);
+
+    HiveConf.setIntVar(driver.getConf(), ConfVars.HIVE_TXN_ACID_DIR_CACHE_DURATION, cacheDurationInMinutes);
+    executeStatementOnDriver("select * from " + tblName + " order by a", driver);
+
+    // Second phase, the previous data should be cleaned
+    executeStatementOnDriver("drop table if exists " + tblName, driver);
+    executeStatementOnDriver("create table " + tblName + " (a string) stored as orc " +
+            "TBLPROPERTIES ('transactional'='true', 'hive.exec.orc.split.strategy'='BI')", driver);
+    executeStatementOnDriver("insert into " + tblName + " values ('c')", driver);
+    executeStatementOnDriver("insert into " + tblName + " values ('d')", driver);
+    runMajorCompaction(dbName, tblName);
+    runCleaner(conf);
+
+    HiveConf.setIntVar(driver.getConf(), ConfVars.HIVE_TXN_ACID_DIR_CACHE_DURATION, cacheDurationInMinutes);
+    List<String> rs = execSelectAndDumpData("select * from " + tblName + " order by a", driver, "select");
+    Assert.assertEquals(2, rs.size());
+    Assert.assertEquals("c", rs.get(0));
+    Assert.assertEquals("d", rs.get(1));
+  }
+
+  @Test
+  public void testAcidDirCacheOnDropPartitionedTable() throws Exception {
+    int cacheDurationInMinutes = 10;
+    AcidUtils.initDirCache(cacheDurationInMinutes);
+    HiveConf.setBoolVar(conf, ConfVars.HIVE_COMPACTOR_GATHER_STATS, false);
+    String dbName = "default";
+    String tblName = "adc_part_table";
+
+    // First phase, populate the cache
+    executeStatementOnDriver("drop table if exists " + tblName, driver);
+    executeStatementOnDriver("create table " + tblName + " (a string) PARTITIONED BY (p string) stored as orc " +
+            "TBLPROPERTIES ('transactional'='true', 'hive.exec.orc.split.strategy'='BI')", driver);
+    executeStatementOnDriver("insert into " + tblName + " values ('a', 'p1')", driver);
+    executeStatementOnDriver("insert into " + tblName + " values ('b', 'p1')", driver);
+    executeStatementOnDriver("insert into " + tblName + " values ('a', 'p2')", driver);
+    executeStatementOnDriver("insert into " + tblName + " values ('b', 'p2')", driver);
+    runMajorCompaction(dbName, tblName, "p=p1", "p=p2");
+    runCleaner(conf);
+
+    HiveConf.setIntVar(driver.getConf(), ConfVars.HIVE_TXN_ACID_DIR_CACHE_DURATION, cacheDurationInMinutes);
+    executeStatementOnDriver("select a from " + tblName + " order by a", driver);
+
+    // Second phase, the previous data should be cleaned
+    executeStatementOnDriver("drop table if exists " + tblName, driver);
+    executeStatementOnDriver("create table " + tblName + " (a string) PARTITIONED BY (p string) stored as orc " +
+            "TBLPROPERTIES ('transactional'='true', 'hive.exec.orc.split.strategy'='BI')", driver);
+    executeStatementOnDriver("insert into " + tblName + " values ('c', 'p1')", driver);
+    executeStatementOnDriver("insert into " + tblName + " values ('d', 'p1')", driver);
+    executeStatementOnDriver("insert into " + tblName + " values ('c', 'p2')", driver);
+    executeStatementOnDriver("insert into " + tblName + " values ('d', 'p2')", driver);
+    runMajorCompaction(dbName, tblName, "p=p1", "p=p2");
+    runCleaner(conf);
+
+    HiveConf.setIntVar(driver.getConf(), ConfVars.HIVE_TXN_ACID_DIR_CACHE_DURATION, cacheDurationInMinutes);
+    List<String> rs = execSelectAndDumpData("select a from " + tblName + " order by a", driver, "select");
+    Assert.assertEquals(4, rs.size());
+    Assert.assertEquals("c", rs.get(0));
+    Assert.assertEquals("c", rs.get(1));
+    Assert.assertEquals("d", rs.get(2));
+    Assert.assertEquals("d", rs.get(2));
+  }
+
   private List<ShowCompactResponseElement> getCompactionList() throws Exception {
     conf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 0);
     runInitiator(conf);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
index c5ed434..018c3e8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
@@ -3151,7 +3151,8 @@ public class AcidUtils {
       .noneMatch(pattern -> astSearcher.simpleBreadthFirstSearch(tree, pattern) != null));
   }
 
-  private static void initDirCache(int durationInMts) {
+  @VisibleForTesting
+  public static void initDirCache(int durationInMts) {
     if (dirCacheInited.get()) {
       LOG.debug("DirCache got initialized already");
       return;
@@ -3251,6 +3252,20 @@ public class AcidUtils {
     return value.getDirInfo();
   }
 
+  public static void tryInvalidateDirCache(org.apache.hadoop.hive.metastore.api.Table table) {
+    if (dirCacheInited.get()) {
+      String key = getFullTableName(table.getDbName(), table.getTableName()) + "_" + table.getSd().getLocation();
+      boolean partitioned = table.getPartitionKeys() != null && !table.getPartitionKeys().isEmpty();
+      if (!partitioned) {
+        dirCache.invalidate(key);
+      } else {
+        // Invalidate all partitions as the difference in the key is only the partition part at the end of the path.
+        dirCache.invalidateAll(
+          dirCache.asMap().keySet().stream().filter(k -> k.startsWith(key)).collect(Collectors.toSet()));
+      }
+    }
+  }
+
   static class DirInfoValue {
     private String txnString;
     private AcidDirectory dirInfo;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index a479023..773d2b7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@ -1435,6 +1435,8 @@ public class Hive {
       getMSC().dropTable(table, deleteData, ignoreUnknownTab, ifPurge);
     } catch (Exception e) {
       throw new HiveException(e);
+    } finally {
+      AcidUtils.tryInvalidateDirCache(table);
     }
   }