You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by bu...@apache.org on 2020/03/03 18:43:16 UTC

[hbase] branch branch-2.1 updated: HBASE-23553 Snapshot referenced data files are deleted in some case

This is an automated email from the ASF dual-hosted git repository.

busbey pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new dbbfdda  HBASE-23553 Snapshot referenced data files are deleted in some case
dbbfdda is described below

commit dbbfddacd1d47fb41322b8ecd9a898000608a6f2
Author: meiyi <my...@gmail.com>
AuthorDate: Wed Dec 11 09:31:11 2019 +0800

    HBASE-23553 Snapshot referenced data files are deleted in some case
    
    Backport to branch-2.1 via HBASE-23915 by Karthik P. Differs from
    original by skipping test-only need for the "turn compaction on/off"
    feature.
    Closes #1230
    
    Co-authored-by: Karthik Palanisamy <kp...@hortonworks.com>
    Signed-off-by: Josh Elser <el...@apache.org>
    Signed-off-by: Sean Busbey <bu...@apache.org>
---
 .../hadoop/hbase/regionserver/HRegionServer.java   |   5 +
 .../hbase/snapshot/SnapshotReferenceUtil.java      |  10 ++
 .../hbase/client/TestTableSnapshotScanner.java     | 132 +++++++++++++++++++++
 3 files changed, 147 insertions(+)

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
index 08b3a98..733fd52 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
@@ -3845,4 +3845,9 @@ public class HRegionServer extends HasThread implements
       Runtime.getRuntime().halt(1);
     }
   }
+
+  @VisibleForTesting
+  public CompactedHFilesDischarger getCompactedHFilesDischarger() {
+    return compactedFileDischarger;
+  }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotReferenceUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotReferenceUtil.java
index b157d01..b6efb7c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotReferenceUtil.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotReferenceUtil.java
@@ -352,6 +352,16 @@ public final class SnapshotReferenceUtil {
         String hfile = storeFile.getName();
         if (HFileLink.isHFileLink(hfile)) {
           names.add(HFileLink.getReferencedHFileName(hfile));
+        } else if (StoreFileInfo.isReference(hfile)) {
+          Path refPath = StoreFileInfo.getReferredToFile(new Path(new Path(
+              new Path(new Path(regionInfo.getTable().getNamespaceAsString(),
+                  regionInfo.getTable().getQualifierAsString()), regionInfo.getEncodedName()),
+              family), hfile));
+          names.add(hfile);
+          names.add(refPath.getName());
+          if (HFileLink.isHFileLink(refPath.getName())) {
+            names.add(HFileLink.getReferencedHFileName(refPath.getName()));
+          }
         } else {
           names.add(hfile);
         }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableSnapshotScanner.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableSnapshotScanner.java
index 408953a..2a735c2 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableSnapshotScanner.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestTableSnapshotScanner.java
@@ -17,10 +17,13 @@
  */
 package org.apache.hadoop.hbase.client;
 
+import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.List;
+import java.util.stream.Collectors;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.Cell;
@@ -29,13 +32,19 @@ import org.apache.hadoop.hbase.HBaseClassTestRule;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.master.cleaner.TimeToLiveHFileCleaner;
 import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
+import org.apache.hadoop.hbase.regionserver.HRegionServer;
 import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
 import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
 import org.apache.hadoop.hbase.testclassification.ClientTests;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.util.HFileArchiveUtil;
+import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.ClassRule;
@@ -303,4 +312,127 @@ public class TestTableSnapshotScanner {
     }
   }
 
+  @Test
+  public void testMergeRegion() throws Exception {
+    setupCluster();
+    TableName tableName = TableName.valueOf("testMergeRegion");
+    String snapshotName = tableName.getNameAsString() + "_snapshot";
+    Configuration conf = UTIL.getConfiguration();
+    Path rootDir = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
+    long timeout = 20000; // 20s
+    try (Admin admin = UTIL.getAdmin()) {
+      // create table with 3 regions
+      Table table = UTIL.createTable(tableName, FAMILIES, 1, bbb, yyy, 3);
+      List<RegionInfo> regions = admin.getRegions(tableName);
+      Assert.assertEquals(3, regions.size());
+      RegionInfo region0 = regions.get(0);
+      RegionInfo region1 = regions.get(1);
+      RegionInfo region2 = regions.get(2);
+      // put some data in the table
+      UTIL.loadTable(table, FAMILIES);
+      admin.flush(tableName);
+      // wait flush is finished
+      UTIL.waitFor(timeout, () -> {
+        try {
+          Path tableDir = FSUtils.getTableDir(rootDir, tableName);
+          for (RegionInfo region : regions) {
+            Path regionDir = new Path(tableDir, region.getEncodedName());
+            for (Path familyDir : FSUtils.getFamilyDirs(fs, regionDir)) {
+              if (fs.listStatus(familyDir).length != 1) {
+                return false;
+              }
+            }
+          }
+          return true;
+        } catch (IOException e) {
+          LOG.warn("Failed check if flush is finished", e);
+          return false;
+        }
+      });
+      // merge 2 regions
+      admin.mergeRegionsAsync(region0.getEncodedNameAsBytes(), region1.getEncodedNameAsBytes(),
+        true);
+      UTIL.waitFor(timeout, () -> admin.getRegions(tableName).size() == 2);
+      List<RegionInfo> mergedRegions = admin.getRegions(tableName);
+      RegionInfo mergedRegion =
+          mergedRegions.get(0).getEncodedName().equals(region2.getEncodedName())
+              ? mergedRegions.get(1)
+              : mergedRegions.get(0);
+      // snapshot
+      admin.snapshot(snapshotName, tableName);
+      Assert.assertEquals(1, admin.listSnapshots().size());
+      // major compact
+      admin.majorCompactRegion(mergedRegion.getRegionName());
+      // wait until merged region has no reference
+      UTIL.waitFor(timeout, () -> {
+        try {
+          for (RegionServerThread regionServerThread : UTIL.getMiniHBaseCluster()
+              .getRegionServerThreads()) {
+            HRegionServer regionServer = regionServerThread.getRegionServer();
+            for (HRegion subRegion : regionServer.getRegions(tableName)) {
+              if (subRegion.getRegionInfo().getEncodedName()
+                  .equals(mergedRegion.getEncodedName())) {
+                regionServer.getCompactedHFilesDischarger().chore();
+              }
+            }
+          }
+          Path tableDir = FSUtils.getTableDir(rootDir, tableName);
+          HRegionFileSystem regionFs = HRegionFileSystem
+              .openRegionFromFileSystem(UTIL.getConfiguration(), fs,
+                  tableDir, mergedRegion, true);
+          return !regionFs.hasReferences(admin.getDescriptor(tableName));
+        } catch (IOException e) {
+          LOG.warn("Failed check merged region has no reference", e);
+          return false;
+        }
+      });
+      // run catalog janitor to clean and wait for parent regions are archived
+      UTIL.getMiniHBaseCluster().getMaster().getCatalogJanitor().choreForTesting();
+      UTIL.waitFor(timeout, () -> {
+        try {
+          Path tableDir = FSUtils.getTableDir(rootDir, tableName);
+          for (FileStatus fileStatus : fs.listStatus(tableDir)) {
+            String name = fileStatus.getPath().getName();
+            if (name.equals(region0.getEncodedName()) || name.equals(region1.getEncodedName())) {
+              return false;
+            }
+          }
+          return true;
+        } catch (IOException e) {
+          LOG.warn("Check if parent regions are archived error", e);
+          return false;
+        }
+      });
+      // set file modify time and then run cleaner
+      long cleanerTtl =  conf.getLong("hbase.master.hfilecleaner.ttl",
+          TimeToLiveHFileCleaner.DEFAULT_TTL);
+      long time = System.currentTimeMillis() - cleanerTtl * 1000;
+      traverseAndSetFileTime(HFileArchiveUtil.getArchivePath(conf), time);
+      UTIL.getMiniHBaseCluster().getMaster().getHFileCleaner().runCleaner();
+      // scan snapshot
+      try (TableSnapshotScanner scanner = new TableSnapshotScanner(conf,
+          UTIL.getDataTestDirOnTestFS(snapshotName), snapshotName, new Scan(bbb, yyy))) {
+        verifyScanner(scanner, bbb, yyy);
+      }
+    }  finally {
+      tearDownCluster();
+    }
+  }
+
+  private void traverseAndSetFileTime(Path path, long time) throws IOException {
+    fs.setTimes(path, time, -1);
+    if (fs.isDirectory(path)) {
+      List<FileStatus> allPaths = Arrays.asList(fs.listStatus(path));
+      List<FileStatus> subDirs =
+          allPaths.stream().filter(FileStatus::isDirectory).collect(Collectors.toList());
+      List<FileStatus> files =
+          allPaths.stream().filter(FileStatus::isFile).collect(Collectors.toList());
+      for (FileStatus subDir : subDirs) {
+        traverseAndSetFileTime(subDir.getPath(), time);
+      }
+      for (FileStatus file : files) {
+        fs.setTimes(file.getPath(), time, -1);
+      }
+    }
+  }
 }