You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ta...@apache.org on 2022/11/20 23:04:26 UTC

[hbase] branch branch-2 updated: HBASE-27495 Improve HFileLinkCleaner to validate back reference links ahead the next traverse (#4887)

This is an automated email from the ASF dual-hosted git repository.

taklwu pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2 by this push:
     new 66aedcae021 HBASE-27495 Improve HFileLinkCleaner to validate back reference links ahead the next traverse (#4887)
66aedcae021 is described below

commit 66aedcae021ff26517137c48ab284cf6aa4b4e1f
Author: Tak Lon (Stephen) Wu <ta...@apache.org>
AuthorDate: Sun Nov 20 14:44:39 2022 -0800

    HBASE-27495 Improve HFileLinkCleaner to validate back reference links ahead the next traverse (#4887)
    
    Signed-off-by: Peter Somogyi <ps...@apache.org>
    Signed-off-by: Wellington Ramos Chevreuil <wc...@apache.org>
---
 .../hbase/master/cleaner/HFileLinkCleaner.java     |  19 ++-
 .../hbase/master/cleaner/TestHFileLinkCleaner.java | 128 ++++++++++++++-------
 2 files changed, 105 insertions(+), 42 deletions(-)

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/HFileLinkCleaner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/HFileLinkCleaner.java
index b550f39e2d7..3b137ebc1a9 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/HFileLinkCleaner.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/cleaner/HFileLinkCleaner.java
@@ -90,10 +90,23 @@ public class HFileLinkCleaner extends BaseHFileCleanerDelegate {
       }
 
       // HFile is deletable only if has no links
-      Path backRefDir = null;
+      Path backRefDir = HFileLink.getBackReferencesDir(parentDir, filePath.getName());
       try {
-        backRefDir = HFileLink.getBackReferencesDir(parentDir, filePath.getName());
-        return CommonFSUtils.listStatus(fs, backRefDir) == null;
+        FileStatus[] fileStatuses = CommonFSUtils.listStatus(fs, backRefDir);
+        // for empty reference directory, retain the logic to be deletable
+        if (fileStatuses == null) {
+          return true;
+        }
+        // reuse the found back reference files, check if the forward reference exists.
+        // with this optimization, the chore could save one round compute time if we're visiting
+        // the archive HFile earlier than the HFile Link
+        for (FileStatus fileStatus : fileStatuses) {
+          if (!isFileDeletable(fileStatus)) {
+            return false;
+          }
+        }
+        // all the found back reference files are clear, we can delete it.
+        return true;
       } catch (IOException e) {
         if (LOG.isDebugEnabled()) {
           LOG.debug("Couldn't get the references, not deleting file, just in case. filePath="
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestHFileLinkCleaner.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestHFileLinkCleaner.java
index 87d34dca868..2ad014ffa3f 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestHFileLinkCleaner.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestHFileLinkCleaner.java
@@ -43,7 +43,9 @@ import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.util.CommonFSUtils;
 import org.apache.hadoop.hbase.util.HFileArchiveUtil;
 import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
+import org.junit.After;
 import org.junit.AfterClass;
+import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.ClassRule;
 import org.junit.Rule;
@@ -61,9 +63,28 @@ public class TestHFileLinkCleaner {
   public static final HBaseClassTestRule CLASS_RULE =
     HBaseClassTestRule.forClass(TestHFileLinkCleaner.class);
 
+  private Configuration conf;
+  private Path rootDir;
+  private FileSystem fs;
+  private TableName tableName;
+  private TableName tableLinkName;
+  private String hfileName;
+  private String familyName;
+  private RegionInfo hri;
+  private RegionInfo hriLink;
+  private Path archiveDir;
+  private Path archiveStoreDir;
+  private Path familyPath;
+  private Path hfilePath;
+  private Path familyLinkPath;
+  private String hfileLinkName;
+  private Path linkBackRefDir;
+  private Path linkBackRef;
+  private FileStatus[] backRefs;
+  private HFileCleaner cleaner;
   private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
-
   private static DirScanPool POOL;
+  private static final long TTL = 1000;
 
   @Rule
   public TestName name = new TestName();
@@ -78,49 +99,71 @@ public class TestHFileLinkCleaner {
     POOL.shutdownNow();
   }
 
-  @Test
-  public void testHFileLinkCleaning() throws Exception {
-    Configuration conf = TEST_UTIL.getConfiguration();
+  @Before
+  public void configureDirectoriesAndLinks() throws IOException {
+    conf = TEST_UTIL.getConfiguration();
     CommonFSUtils.setRootDir(conf, TEST_UTIL.getDataTestDir());
     conf.set(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS, HFileLinkCleaner.class.getName());
-    Path rootDir = CommonFSUtils.getRootDir(conf);
-    FileSystem fs = FileSystem.get(conf);
+    rootDir = CommonFSUtils.getRootDir(conf);
+    fs = FileSystem.get(conf);
 
-    final TableName tableName = TableName.valueOf(name.getMethodName());
-    final TableName tableLinkName = TableName.valueOf(name.getMethodName() + "-link");
-    final String hfileName = "1234567890";
-    final String familyName = "cf";
+    tableName = TableName.valueOf(name.getMethodName());
+    tableLinkName = TableName.valueOf(name.getMethodName() + "-link");
+    hfileName = "1234567890";
+    familyName = "cf";
 
-    RegionInfo hri = RegionInfoBuilder.newBuilder(tableName).build();
-    RegionInfo hriLink = RegionInfoBuilder.newBuilder(tableLinkName).build();
+    hri = RegionInfoBuilder.newBuilder(tableName).build();
+    hriLink = RegionInfoBuilder.newBuilder(tableLinkName).build();
 
-    Path archiveDir = HFileArchiveUtil.getArchivePath(conf);
-    Path archiveStoreDir =
+    archiveDir = HFileArchiveUtil.getArchivePath(conf);
+    archiveStoreDir =
       HFileArchiveUtil.getStoreArchivePath(conf, tableName, hri.getEncodedName(), familyName);
 
     // Create hfile /hbase/table-link/region/cf/getEncodedName.HFILE(conf);
-    Path familyPath = getFamilyDirPath(archiveDir, tableName, hri.getEncodedName(), familyName);
+    familyPath = getFamilyDirPath(archiveDir, tableName, hri.getEncodedName(), familyName);
     fs.mkdirs(familyPath);
-    Path hfilePath = new Path(familyPath, hfileName);
+    hfilePath = new Path(familyPath, hfileName);
     fs.createNewFile(hfilePath);
 
+    createLink(true);
+
+    // Initialize cleaner
+    conf.setLong(TimeToLiveHFileCleaner.TTL_CONF_KEY, TTL);
+    Server server = new DummyServer();
+    cleaner = new HFileCleaner(1000, server, conf, fs, archiveDir, POOL);
+  }
+
+  private void createLink(boolean createBackReference) throws IOException {
     // Create link to hfile
-    Path familyLinkPath =
-      getFamilyDirPath(rootDir, tableLinkName, hriLink.getEncodedName(), familyName);
+    familyLinkPath = getFamilyDirPath(rootDir, tableLinkName, hriLink.getEncodedName(), familyName);
     fs.mkdirs(familyLinkPath);
-    HFileLink.create(conf, fs, familyLinkPath, hri, hfileName);
-    Path linkBackRefDir = HFileLink.getBackReferencesDir(archiveStoreDir, hfileName);
+    hfileLinkName = HFileLink.create(conf, fs, familyLinkPath, hri, hfileName, createBackReference);
+    linkBackRefDir = HFileLink.getBackReferencesDir(archiveStoreDir, hfileName);
     assertTrue(fs.exists(linkBackRefDir));
-    FileStatus[] backRefs = fs.listStatus(linkBackRefDir);
+    backRefs = fs.listStatus(linkBackRefDir);
     assertEquals(1, backRefs.length);
-    Path linkBackRef = backRefs[0].getPath();
+    linkBackRef = backRefs[0].getPath();
+  }
 
-    // Initialize cleaner
-    final long ttl = 1000;
-    conf.setLong(TimeToLiveHFileCleaner.TTL_CONF_KEY, ttl);
-    Server server = new DummyServer();
-    HFileCleaner cleaner = new HFileCleaner(1000, server, conf, fs, archiveDir, POOL);
+  @After
+  public void cleanup() throws IOException, InterruptedException {
+    // HFile can be removed
+    Thread.sleep(TTL * 2);
+    cleaner.chore();
+    assertFalse("HFile should be deleted", fs.exists(hfilePath));
+    // Remove everything
+    for (int i = 0; i < 4; ++i) {
+      Thread.sleep(TTL * 2);
+      cleaner.chore();
+    }
+    assertFalse("HFile should be deleted",
+      fs.exists(CommonFSUtils.getTableDir(archiveDir, tableName)));
+    assertFalse("Link should be deleted",
+      fs.exists(CommonFSUtils.getTableDir(archiveDir, tableLinkName)));
+  }
 
+  @Test
+  public void testHFileLinkCleaning() throws Exception {
     // Link backref cannot be removed
     cleaner.chore();
     assertTrue(fs.exists(linkBackRef));
@@ -131,21 +174,28 @@ public class TestHFileLinkCleaner {
       CommonFSUtils.getTableDir(archiveDir, tableLinkName));
     cleaner.chore();
     assertFalse("Link should be deleted", fs.exists(linkBackRef));
+  }
 
-    // HFile can be removed
-    Thread.sleep(ttl * 2);
+  @Test
+  public void testHFileLinkByRemovingReference() throws Exception {
+    // Link backref cannot be removed
     cleaner.chore();
-    assertFalse("HFile should be deleted", fs.exists(hfilePath));
+    assertTrue(fs.exists(linkBackRef));
+    assertTrue(fs.exists(hfilePath));
 
-    // Remove everything
-    for (int i = 0; i < 4; ++i) {
-      Thread.sleep(ttl * 2);
-      cleaner.chore();
-    }
-    assertFalse("HFile should be deleted",
-      fs.exists(CommonFSUtils.getTableDir(archiveDir, tableName)));
-    assertFalse("Link should be deleted",
-      fs.exists(CommonFSUtils.getTableDir(archiveDir, tableLinkName)));
+    // simulate after removing the reference in data directory, the Link backref can be removed
+    fs.delete(new Path(familyLinkPath, hfileLinkName), false);
+    cleaner.chore();
+    assertFalse("Link should be deleted", fs.exists(linkBackRef));
+  }
+
+  @Test
+  public void testHFileLinkEmptyBackReferenceDirectory() throws Exception {
+    // simulate and remove the back reference
+    fs.delete(linkBackRef, false);
+    assertTrue("back reference directory still exists", fs.exists(linkBackRefDir));
+    cleaner.chore();
+    assertFalse("back reference directory should be deleted", fs.exists(linkBackRefDir));
   }
 
   private static Path getFamilyDirPath(final Path rootDir, final TableName table,