You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by as...@apache.org on 2022/03/26 13:53:56 UTC
[hive] branch master updated: HIVE-26060: Invalidate acid table directory cache on drop table (Antal Sinkovits, reviewed by Denys Kuzmenko)
This is an automated email from the ASF dual-hosted git repository.
asinkovits pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 1662a90 HIVE-26060: Invalidate acid table directory cache on drop table (Antal Sinkovits, reviewed by Denys Kuzmenko)
1662a90 is described below
commit 1662a902c98404946582709acba42de3252c4ac0
Author: Antal Sinkovits <as...@cloudera.com>
AuthorDate: Sat Mar 26 14:53:38 2022 +0100
HIVE-26060: Invalidate acid table directory cache on drop table (Antal Sinkovits, reviewed by Denys Kuzmenko)
Closes #3128
---
.../hive/ql/txn/compactor/TestCompactor.java | 78 ++++++++++++++++++++++
.../org/apache/hadoop/hive/ql/io/AcidUtils.java | 17 ++++-
.../org/apache/hadoop/hive/ql/metadata/Hive.java | 2 +
3 files changed, 96 insertions(+), 1 deletion(-)
diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
index f2da316..16b5957 100644
--- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
+++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
@@ -2517,6 +2517,84 @@ public class TestCompactor {
Assert.assertEquals("2\t55\t66", valuesReadFromHiveDriver.get(1));
}
+ @Test
+ public void testAcidDirCacheOnDropTable() throws Exception {
+ int cacheDurationInMinutes = 10;
+ AcidUtils.initDirCache(cacheDurationInMinutes);
+ HiveConf.setBoolVar(conf, ConfVars.HIVE_COMPACTOR_GATHER_STATS, false);
+ String dbName = "default";
+ String tblName = "adc_table";
+
+ // First phase, populate the cache
+ executeStatementOnDriver("drop table if exists " + tblName, driver);
+ executeStatementOnDriver("create table " + tblName + " (a string) stored as orc " +
+ "TBLPROPERTIES ('transactional'='true', 'hive.exec.orc.split.strategy'='BI')", driver);
+ executeStatementOnDriver("insert into " + tblName + " values ('a')", driver);
+ executeStatementOnDriver("insert into " + tblName + " values ('b')", driver);
+ runMajorCompaction(dbName, tblName);
+ runCleaner(conf);
+
+ HiveConf.setIntVar(driver.getConf(), ConfVars.HIVE_TXN_ACID_DIR_CACHE_DURATION, cacheDurationInMinutes);
+ executeStatementOnDriver("select * from " + tblName + " order by a", driver);
+
+ // Second phase, the previous data should be cleaned
+ executeStatementOnDriver("drop table if exists " + tblName, driver);
+ executeStatementOnDriver("create table " + tblName + " (a string) stored as orc " +
+ "TBLPROPERTIES ('transactional'='true', 'hive.exec.orc.split.strategy'='BI')", driver);
+ executeStatementOnDriver("insert into " + tblName + " values ('c')", driver);
+ executeStatementOnDriver("insert into " + tblName + " values ('d')", driver);
+ runMajorCompaction(dbName, tblName);
+ runCleaner(conf);
+
+ HiveConf.setIntVar(driver.getConf(), ConfVars.HIVE_TXN_ACID_DIR_CACHE_DURATION, cacheDurationInMinutes);
+ List<String> rs = execSelectAndDumpData("select * from " + tblName + " order by a", driver, "select");
+ Assert.assertEquals(2, rs.size());
+ Assert.assertEquals("c", rs.get(0));
+ Assert.assertEquals("d", rs.get(1));
+ }
+
+ @Test
+ public void testAcidDirCacheOnDropPartitionedTable() throws Exception {
+ int cacheDurationInMinutes = 10;
+ AcidUtils.initDirCache(cacheDurationInMinutes);
+ HiveConf.setBoolVar(conf, ConfVars.HIVE_COMPACTOR_GATHER_STATS, false);
+ String dbName = "default";
+ String tblName = "adc_part_table";
+
+ // First phase, populate the cache
+ executeStatementOnDriver("drop table if exists " + tblName, driver);
+ executeStatementOnDriver("create table " + tblName + " (a string) PARTITIONED BY (p string) stored as orc " +
+ "TBLPROPERTIES ('transactional'='true', 'hive.exec.orc.split.strategy'='BI')", driver);
+ executeStatementOnDriver("insert into " + tblName + " values ('a', 'p1')", driver);
+ executeStatementOnDriver("insert into " + tblName + " values ('b', 'p1')", driver);
+ executeStatementOnDriver("insert into " + tblName + " values ('a', 'p2')", driver);
+ executeStatementOnDriver("insert into " + tblName + " values ('b', 'p2')", driver);
+ runMajorCompaction(dbName, tblName, "p=p1", "p=p2");
+ runCleaner(conf);
+
+ HiveConf.setIntVar(driver.getConf(), ConfVars.HIVE_TXN_ACID_DIR_CACHE_DURATION, cacheDurationInMinutes);
+ executeStatementOnDriver("select a from " + tblName + " order by a", driver);
+
+ // Second phase, the previous data should be cleaned
+ executeStatementOnDriver("drop table if exists " + tblName, driver);
+ executeStatementOnDriver("create table " + tblName + " (a string) PARTITIONED BY (p string) stored as orc " +
+ "TBLPROPERTIES ('transactional'='true', 'hive.exec.orc.split.strategy'='BI')", driver);
+ executeStatementOnDriver("insert into " + tblName + " values ('c', 'p1')", driver);
+ executeStatementOnDriver("insert into " + tblName + " values ('d', 'p1')", driver);
+ executeStatementOnDriver("insert into " + tblName + " values ('c', 'p2')", driver);
+ executeStatementOnDriver("insert into " + tblName + " values ('d', 'p2')", driver);
+ runMajorCompaction(dbName, tblName, "p=p1", "p=p2");
+ runCleaner(conf);
+
+ HiveConf.setIntVar(driver.getConf(), ConfVars.HIVE_TXN_ACID_DIR_CACHE_DURATION, cacheDurationInMinutes);
+ List<String> rs = execSelectAndDumpData("select a from " + tblName + " order by a", driver, "select");
+ Assert.assertEquals(4, rs.size());
+ Assert.assertEquals("c", rs.get(0));
+ Assert.assertEquals("c", rs.get(1));
+ Assert.assertEquals("d", rs.get(2));
+ Assert.assertEquals("d", rs.get(2));
+ }
+
private List<ShowCompactResponseElement> getCompactionList() throws Exception {
conf.setIntVar(HiveConf.ConfVars.HIVE_COMPACTOR_DELTA_NUM_THRESHOLD, 0);
runInitiator(conf);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
index c5ed434..018c3e8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/AcidUtils.java
@@ -3151,7 +3151,8 @@ public class AcidUtils {
.noneMatch(pattern -> astSearcher.simpleBreadthFirstSearch(tree, pattern) != null));
}
- private static void initDirCache(int durationInMts) {
+ @VisibleForTesting
+ public static void initDirCache(int durationInMts) {
if (dirCacheInited.get()) {
LOG.debug("DirCache got initialized already");
return;
@@ -3251,6 +3252,20 @@ public class AcidUtils {
return value.getDirInfo();
}
+ public static void tryInvalidateDirCache(org.apache.hadoop.hive.metastore.api.Table table) {
+ if (dirCacheInited.get()) {
+ String key = getFullTableName(table.getDbName(), table.getTableName()) + "_" + table.getSd().getLocation();
+ boolean partitioned = table.getPartitionKeys() != null && !table.getPartitionKeys().isEmpty();
+ if (!partitioned) {
+ dirCache.invalidate(key);
+ } else {
+ // Invalidate all partitions as the difference in the key is only the partition part at the end of the path.
+ dirCache.invalidateAll(
+ dirCache.asMap().keySet().stream().filter(k -> k.startsWith(key)).collect(Collectors.toSet()));
+ }
+ }
+ }
+
static class DirInfoValue {
private String txnString;
private AcidDirectory dirInfo;
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index a479023..773d2b7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@ -1435,6 +1435,8 @@ public class Hive {
getMSC().dropTable(table, deleteData, ignoreUnknownTab, ifPurge);
} catch (Exception e) {
throw new HiveException(e);
+ } finally {
+ AcidUtils.tryInvalidateDirCache(table);
}
}