You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by te...@apache.org on 2016/08/04 17:22:05 UTC

hbase git commit: HBASE-16314 Retry on table snapshot failure (Vladimir Rodionov)

Repository: hbase
Updated Branches:
  refs/heads/HBASE-7912 e8e514181 -> 541740d48


HBASE-16314 Retry on table snapshot failure (Vladimir Rodionov)


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/541740d4
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/541740d4
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/541740d4

Branch: refs/heads/HBASE-7912
Commit: 541740d48fe2d953f8a659d15848350af8d40177
Parents: e8e5141
Author: tedyu <yu...@gmail.com>
Authored: Thu Aug 4 10:21:46 2016 -0700
Committer: tedyu <yu...@gmail.com>
Committed: Thu Aug 4 10:21:46 2016 -0700

----------------------------------------------------------------------
 .../backup/master/FullTableBackupProcedure.java | 54 +++++++++++++++-----
 1 file changed, 41 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/541740d4/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/master/FullTableBackupProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/master/FullTableBackupProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/master/FullTableBackupProcedure.java
index 0a12b62..a7cfd8a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/master/FullTableBackupProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/master/FullTableBackupProcedure.java
@@ -35,13 +35,13 @@ import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.NotServingRegionException;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.backup.BackupCopyService;
 import org.apache.hadoop.hbase.backup.BackupInfo;
 import org.apache.hadoop.hbase.backup.BackupRestoreServerFactory;
 import org.apache.hadoop.hbase.backup.BackupType;
 import org.apache.hadoop.hbase.backup.HBackupFileSystem;
-import org.apache.hadoop.hbase.backup.BackupCopyService.Type;
 import org.apache.hadoop.hbase.backup.BackupInfo.BackupPhase;
 import org.apache.hadoop.hbase.backup.BackupInfo.BackupState;
 import org.apache.hadoop.hbase.backup.impl.BackupException;
@@ -71,7 +71,13 @@ public class FullTableBackupProcedure
     extends StateMachineProcedure<MasterProcedureEnv, FullTableBackupState>
     implements TableProcedureInterface {
   private static final Log LOG = LogFactory.getLog(FullTableBackupProcedure.class);
-
+  
+  private static final String SNAPSHOT_BACKUP_MAX_ATTEMPTS_KEY = "hbase.backup.snapshot.attempts.max";
+  private static final int DEFAULT_SNAPSHOT_BACKUP_MAX_ATTEMPTS = 10;
+  
+  private static final String SNAPSHOT_BACKUP_ATTEMPTS_DELAY_KEY = "hbase.backup.snapshot.attempts.delay";
+  private static final int DEFAULT_SNAPSHOT_BACKUP_ATTEMPTS_DELAY = 10000;
+  
   private final AtomicBoolean aborted = new AtomicBoolean(false);
   private Configuration conf;
   private String backupId;
@@ -560,17 +566,7 @@ public class FullTableBackupProcedure
               LOG.debug("Unable to delete " + snapshotName, e);
             }
             // Kick off snapshot for backup
-            try {
-              env.getMasterServices().getSnapshotManager().takeSnapshot(backupSnapshot);
-            } catch (IOException e) {
-              LOG.debug("Unable to take snapshot: " + snapshotName, e);
-            }
-            long waitTime = SnapshotDescriptionUtils.getMaxMasterTimeout(
-                env.getMasterConfiguration(),
-                backupSnapshot.getType(), SnapshotDescriptionUtils.DEFAULT_MAX_WAIT_TIME);
-            BackupServerUtil.waitForSnapshot(backupSnapshot, waitTime,
-              env.getMasterServices().getSnapshotManager(), env.getMasterConfiguration());
-            // set the snapshot name in BackupStatus of this table, only after snapshot success.
+            snapshotTable(env, backupSnapshot);  
             backupContext.setSnapshotName(tableName, backupSnapshot.getName());
           }
           setNextState(FullTableBackupState.SNAPSHOT_COPY);
@@ -621,6 +617,38 @@ public class FullTableBackupProcedure
     return Flow.HAS_MORE_STATE;
   }
 
+  private void snapshotTable(final MasterProcedureEnv env, SnapshotDescription backupSnapshot)
+    throws IOException
+  {
+    
+    int maxAttempts = env.getMasterConfiguration().getInt(SNAPSHOT_BACKUP_MAX_ATTEMPTS_KEY, 
+      DEFAULT_SNAPSHOT_BACKUP_MAX_ATTEMPTS);
+    int delay = env.getMasterConfiguration().getInt(SNAPSHOT_BACKUP_ATTEMPTS_DELAY_KEY, 
+      DEFAULT_SNAPSHOT_BACKUP_ATTEMPTS_DELAY);    
+    int attempts = 0;
+    
+    while (attempts++ < maxAttempts) {
+      try {
+        env.getMasterServices().getSnapshotManager().takeSnapshot(backupSnapshot);
+        long waitTime = SnapshotDescriptionUtils.getMaxMasterTimeout(
+          env.getMasterConfiguration(),
+          backupSnapshot.getType(), SnapshotDescriptionUtils.DEFAULT_MAX_WAIT_TIME);
+        BackupServerUtil.waitForSnapshot(backupSnapshot, waitTime,
+          env.getMasterServices().getSnapshotManager(), env.getMasterConfiguration());
+        break;
+      } catch( NotServingRegionException ee) {
+        LOG.warn("Snapshot attempt "+attempts +" failed for table "+backupSnapshot.getTable() +
+          ", sleeping for " + delay+"ms", ee);        
+        if(attempts < maxAttempts) {
+          try {
+            Thread.sleep(delay);
+          } catch (InterruptedException e) {
+            Thread.currentThread().interrupt();
+          }
+        }
+      } 
+    }    
+  }
   @Override
   protected void rollbackState(final MasterProcedureEnv env, final FullTableBackupState state)
       throws IOException {