You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by te...@apache.org on 2016/09/09 18:11:00 UTC

hbase git commit: HBASE-16464 archive folder grows bigger and bigger due to corrupt snapshot under tmp dir (Heng Chen)

Repository: hbase
Updated Branches:
  refs/heads/HBASE-7912 76d7acdfa -> aa94a89d2


HBASE-16464 archive folder grows bigger and bigger due to corrupt snapshot under tmp dir (Heng Chen)


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/aa94a89d
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/aa94a89d
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/aa94a89d

Branch: refs/heads/HBASE-7912
Commit: aa94a89d2ce558e83d4630142915d3fb27b2ab13
Parents: 76d7acd
Author: tedyu <yu...@gmail.com>
Authored: Fri Sep 9 11:10:45 2016 -0700
Committer: tedyu <yu...@gmail.com>
Committed: Fri Sep 9 11:10:45 2016 -0700

----------------------------------------------------------------------
 .../master/snapshot/SnapshotFileCache.java      | 17 ++++++-
 .../master/snapshot/TakeSnapshotHandler.java    |  2 +-
 .../snapshot/SnapshotDescriptionUtils.java      | 14 ++++++
 .../hbase/snapshot/SnapshotManifestV2.java      | 18 +++++---
 .../snapshot/TestSnapshotHFileCleaner.java      | 47 ++++++++++++++++++--
 .../hbase/snapshot/SnapshotTestingUtils.java    | 13 ++++++
 6 files changed, 100 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/aa94a89d/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotFileCache.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotFileCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotFileCache.java
index dfd3cb5..5b367c5 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotFileCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/SnapshotFileCache.java
@@ -39,6 +39,7 @@ import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.Stoppable;
+import org.apache.hadoop.hbase.snapshot.CorruptedSnapshotException;
 import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
 import org.apache.hadoop.hbase.util.FSUtils;
 
@@ -300,7 +301,21 @@ public class SnapshotFileCache implements Stoppable {
     FileStatus[] running = FSUtils.listStatus(fs, snapshotTmpDir);
     if (running != null) {
       for (FileStatus run : running) {
-        snapshotInProgress.addAll(fileInspector.filesUnderSnapshot(run.getPath()));
+        try {
+          snapshotInProgress.addAll(fileInspector.filesUnderSnapshot(run.getPath()));
+        } catch (CorruptedSnapshotException e) {
+          // See HBASE-16464
+          if (e.getCause() instanceof FileNotFoundException) {
+            // If the snapshot is not in progress, we will delete it
+            if (!fs.exists(new Path(run.getPath(),
+              SnapshotDescriptionUtils.SNAPSHOT_IN_PROGRESS))) {
+              fs.delete(run.getPath(), true);
+              LOG.warn("delete the " + run.getPath() + " due to exception:", e.getCause());
+            }
+          } else {
+            throw e;
+          }
+        }
       }
     }
     return snapshotInProgress;

http://git-wip-us.apache.org/repos/asf/hbase/blob/aa94a89d/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/TakeSnapshotHandler.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/TakeSnapshotHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/TakeSnapshotHandler.java
index 9172e06..8967a70 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/TakeSnapshotHandler.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/TakeSnapshotHandler.java
@@ -164,7 +164,7 @@ public abstract class TakeSnapshotHandler extends EventHandler implements Snapsh
     try {
       // If regions move after this meta scan, the region specific snapshot should fail, triggering
       // an external exception that gets captured here.
-
+      SnapshotDescriptionUtils.createInProgressTag(workingDir, fs);
       // write down the snapshot info in the working directory
       SnapshotDescriptionUtils.writeSnapshotInfo(snapshot, workingDir, fs);
       snapshotManifest.addTableDescriptor(this.htd);

http://git-wip-us.apache.org/repos/asf/hbase/blob/aa94a89d/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDescriptionUtils.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDescriptionUtils.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDescriptionUtils.java
index 79e7312..7677e0a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDescriptionUtils.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotDescriptionUtils.java
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hbase.snapshot;
 
+import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.util.Collections;
 
@@ -101,6 +102,9 @@ public class SnapshotDescriptionUtils {
 
   /** Temporary directory under the snapshot directory to store in-progress snapshots */
   public static final String SNAPSHOT_TMP_DIR_NAME = ".tmp";
+
+  /** This tag will be created in in-progess snapshots */
+  public static final String SNAPSHOT_IN_PROGRESS = ".inprogress";
   // snapshot operation values
   /** Default value if no start time is specified */
   public static final long NO_SNAPSHOT_START_TIME_SPECIFIED = 0;
@@ -290,6 +294,16 @@ public class SnapshotDescriptionUtils {
   }
 
   /**
+   * Create in-progress tag under .tmp of in-progress snapshot
+   * */
+  public static void createInProgressTag(Path workingDir, FileSystem fs) throws IOException {
+    FsPermission perms = FSUtils.getFilePermissions(fs, fs.getConf(),
+      HConstants.DATA_FILE_UMASK_KEY);
+    Path snapshot_in_progress = new Path(workingDir, SnapshotDescriptionUtils.SNAPSHOT_IN_PROGRESS);
+    FSUtils.create(fs, snapshot_in_progress, perms, true);
+  }
+
+  /**
    * Read in the {@link org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription} stored for the snapshot in the passed directory
    * @param fs filesystem where the snapshot was taken
    * @param snapshotDir directory where the snapshot was stored

http://git-wip-us.apache.org/repos/asf/hbase/blob/aa94a89d/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotManifestV2.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotManifestV2.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotManifestV2.java
index 2df9bef..bb9d2f9 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotManifestV2.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/SnapshotManifestV2.java
@@ -81,12 +81,18 @@ public final class SnapshotManifestV2 {
     }
 
     public void regionClose(final SnapshotRegionManifest.Builder region) throws IOException {
-      SnapshotRegionManifest manifest = region.build();
-      FSDataOutputStream stream = fs.create(getRegionManifestPath(snapshotDir, manifest));
-      try {
-        manifest.writeTo(stream);
-      } finally {
-        stream.close();
+      // we should ensure the snapshot dir exist, maybe it has been deleted by master
+      // see HBASE-16464
+      if (fs.exists(snapshotDir)) {
+        SnapshotRegionManifest manifest = region.build();
+        FSDataOutputStream stream = fs.create(getRegionManifestPath(snapshotDir, manifest));
+        try {
+          manifest.writeTo(stream);
+        } finally {
+          stream.close();
+        }
+      } else {
+        LOG.warn("can't write manifest without parent dir, maybe it has been deleted by master?");
       }
     }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/aa94a89d/hbase-server/src/test/java/org/apache/hadoop/hbase/master/snapshot/TestSnapshotHFileCleaner.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/snapshot/TestSnapshotHFileCleaner.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/snapshot/TestSnapshotHFileCleaner.java
index 5e5b004..90e6db7 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/snapshot/TestSnapshotHFileCleaner.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/snapshot/TestSnapshotHFileCleaner.java
@@ -20,6 +20,8 @@ package org.apache.hadoop.hbase.master.snapshot;
 import static org.junit.Assert.assertFalse;
 
 import java.io.IOException;
+import java.util.Collection;
+import java.util.HashSet;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
@@ -31,9 +33,12 @@ import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.testclassification.SmallTests;
 import org.apache.hadoop.hbase.testclassification.MasterTests;
 import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
+import org.apache.hadoop.hbase.snapshot.SnapshotReferenceUtil;
+import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.FSUtils;
 import org.junit.AfterClass;
+import org.junit.BeforeClass;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 
@@ -44,12 +49,20 @@ import org.junit.experimental.categories.Category;
 public class TestSnapshotHFileCleaner {
 
   private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+  private static final String TABLE_NAME_STR = "testSnapshotManifest";
+  private static final String SNAPSHOT_NAME_STR = "testSnapshotManifest-snapshot";
+  private static Path rootDir;
+  private static FileSystem fs;
+
+  @BeforeClass
+  public static void setup() throws Exception {
+    Configuration conf = TEST_UTIL.getConfiguration();
+    rootDir = FSUtils.getRootDir(conf);
+    fs = FileSystem.get(conf);
+  }
 
   @AfterClass
   public static void cleanup() throws IOException {
-    Configuration conf = TEST_UTIL.getConfiguration();
-    Path rootDir = FSUtils.getRootDir(conf);
-    FileSystem fs = FileSystem.get(conf);
     // cleanup
     fs.delete(rootDir, true);
   }
@@ -87,4 +100,32 @@ public class TestSnapshotHFileCleaner {
     // make sure that the file isn't deletable
     assertFalse(cleaner.isFileDeletable(fs.getFileStatus(refFile)));
   }
+
+  class SnapshotFiles implements SnapshotFileCache.SnapshotFileInspector {
+    public Collection<String> filesUnderSnapshot(final Path snapshotDir) throws IOException {
+      Collection<String> files =  new HashSet<String>();
+      files.addAll(SnapshotReferenceUtil.getHFileNames(TEST_UTIL.getConfiguration(), fs,
+          snapshotDir));
+      return files;
+    }
+  }
+
+  /**
+   * HBASE-16464
+   */
+  @Test
+  public void testMissedTmpSnapshot() throws IOException {
+    SnapshotTestingUtils.SnapshotMock
+      snapshotMock = new SnapshotTestingUtils.SnapshotMock(TEST_UTIL.getConfiguration(), fs, rootDir);
+    SnapshotTestingUtils.SnapshotMock.SnapshotBuilder builder = snapshotMock.createSnapshotV2(
+      SNAPSHOT_NAME_STR, TABLE_NAME_STR);
+    builder.addRegionV2();
+    builder.missOneRegionSnapshotFile();
+
+    long period = Long.MAX_VALUE;
+    SnapshotFileCache cache = new SnapshotFileCache(fs, rootDir, period, 10000000,
+      "test-snapshot-file-cache-refresh", new SnapshotFiles());
+    cache.getSnapshotsInProgress();
+    assertFalse(fs.exists(builder.getSnapshotsDir()));
+  }
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/aa94a89d/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java
index 96e3990..2eddfce 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/SnapshotTestingUtils.java
@@ -34,6 +34,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
@@ -528,6 +529,18 @@ public class SnapshotTestingUtils {
         return regionData.files;
       }
 
+      public void missOneRegionSnapshotFile() throws IOException {
+        FileStatus[] manifestFiles = FSUtils.listStatus(fs, snapshotDir);
+        for (FileStatus fileStatus : manifestFiles) {
+          String fileName = fileStatus.getPath().getName();
+          if (fileName.endsWith(SnapshotDescriptionUtils.SNAPSHOTINFO_FILE)
+            || fileName.endsWith(".tabledesc")
+            || fileName.endsWith(SnapshotDescriptionUtils.SNAPSHOT_TMP_DIR_NAME)) {
+              fs.delete(fileStatus.getPath(), true);
+          }
+        }
+      }
+
       public Path commit() throws IOException {
         ForeignExceptionDispatcher monitor = new ForeignExceptionDispatcher(desc.getName());
         SnapshotManifest manifest = SnapshotManifest.create(conf, fs, snapshotDir, desc, monitor);