You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by te...@apache.org on 2016/08/04 17:22:05 UTC
hbase git commit: HBASE-16314 Retry on table snapshot failure
(Vladimir Rodionov)
Repository: hbase
Updated Branches:
refs/heads/HBASE-7912 e8e514181 -> 541740d48
HBASE-16314 Retry on table snapshot failure (Vladimir Rodionov)
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/541740d4
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/541740d4
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/541740d4
Branch: refs/heads/HBASE-7912
Commit: 541740d48fe2d953f8a659d15848350af8d40177
Parents: e8e5141
Author: tedyu <yu...@gmail.com>
Authored: Thu Aug 4 10:21:46 2016 -0700
Committer: tedyu <yu...@gmail.com>
Committed: Thu Aug 4 10:21:46 2016 -0700
----------------------------------------------------------------------
.../backup/master/FullTableBackupProcedure.java | 54 +++++++++++++++-----
1 file changed, 41 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/541740d4/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/master/FullTableBackupProcedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/master/FullTableBackupProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/master/FullTableBackupProcedure.java
index 0a12b62..a7cfd8a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/master/FullTableBackupProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/backup/master/FullTableBackupProcedure.java
@@ -35,13 +35,13 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.NotServingRegionException;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.backup.BackupCopyService;
import org.apache.hadoop.hbase.backup.BackupInfo;
import org.apache.hadoop.hbase.backup.BackupRestoreServerFactory;
import org.apache.hadoop.hbase.backup.BackupType;
import org.apache.hadoop.hbase.backup.HBackupFileSystem;
-import org.apache.hadoop.hbase.backup.BackupCopyService.Type;
import org.apache.hadoop.hbase.backup.BackupInfo.BackupPhase;
import org.apache.hadoop.hbase.backup.BackupInfo.BackupState;
import org.apache.hadoop.hbase.backup.impl.BackupException;
@@ -71,7 +71,13 @@ public class FullTableBackupProcedure
extends StateMachineProcedure<MasterProcedureEnv, FullTableBackupState>
implements TableProcedureInterface {
private static final Log LOG = LogFactory.getLog(FullTableBackupProcedure.class);
-
+
+ private static final String SNAPSHOT_BACKUP_MAX_ATTEMPTS_KEY = "hbase.backup.snapshot.attempts.max";
+ private static final int DEFAULT_SNAPSHOT_BACKUP_MAX_ATTEMPTS = 10;
+
+ private static final String SNAPSHOT_BACKUP_ATTEMPTS_DELAY_KEY = "hbase.backup.snapshot.attempts.delay";
+ private static final int DEFAULT_SNAPSHOT_BACKUP_ATTEMPTS_DELAY = 10000;
+
private final AtomicBoolean aborted = new AtomicBoolean(false);
private Configuration conf;
private String backupId;
@@ -560,17 +566,7 @@ public class FullTableBackupProcedure
LOG.debug("Unable to delete " + snapshotName, e);
}
// Kick off snapshot for backup
- try {
- env.getMasterServices().getSnapshotManager().takeSnapshot(backupSnapshot);
- } catch (IOException e) {
- LOG.debug("Unable to take snapshot: " + snapshotName, e);
- }
- long waitTime = SnapshotDescriptionUtils.getMaxMasterTimeout(
- env.getMasterConfiguration(),
- backupSnapshot.getType(), SnapshotDescriptionUtils.DEFAULT_MAX_WAIT_TIME);
- BackupServerUtil.waitForSnapshot(backupSnapshot, waitTime,
- env.getMasterServices().getSnapshotManager(), env.getMasterConfiguration());
- // set the snapshot name in BackupStatus of this table, only after snapshot success.
+ snapshotTable(env, backupSnapshot);
backupContext.setSnapshotName(tableName, backupSnapshot.getName());
}
setNextState(FullTableBackupState.SNAPSHOT_COPY);
@@ -621,6 +617,38 @@ public class FullTableBackupProcedure
return Flow.HAS_MORE_STATE;
}
+ private void snapshotTable(final MasterProcedureEnv env, SnapshotDescription backupSnapshot)
+ throws IOException
+ {
+
+ int maxAttempts = env.getMasterConfiguration().getInt(SNAPSHOT_BACKUP_MAX_ATTEMPTS_KEY,
+ DEFAULT_SNAPSHOT_BACKUP_MAX_ATTEMPTS);
+ int delay = env.getMasterConfiguration().getInt(SNAPSHOT_BACKUP_ATTEMPTS_DELAY_KEY,
+ DEFAULT_SNAPSHOT_BACKUP_ATTEMPTS_DELAY);
+ int attempts = 0;
+
+ while (attempts++ < maxAttempts) {
+ try {
+ env.getMasterServices().getSnapshotManager().takeSnapshot(backupSnapshot);
+ long waitTime = SnapshotDescriptionUtils.getMaxMasterTimeout(
+ env.getMasterConfiguration(),
+ backupSnapshot.getType(), SnapshotDescriptionUtils.DEFAULT_MAX_WAIT_TIME);
+ BackupServerUtil.waitForSnapshot(backupSnapshot, waitTime,
+ env.getMasterServices().getSnapshotManager(), env.getMasterConfiguration());
+ break;
+ } catch( NotServingRegionException ee) {
+ LOG.warn("Snapshot attempt "+attempts +" failed for table "+backupSnapshot.getTable() +
+ ", sleeping for " + delay+"ms", ee);
+ if(attempts < maxAttempts) {
+ try {
+ Thread.sleep(delay);
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ }
+ }
+ }
+ }
+ }
@Override
protected void rollbackState(final MasterProcedureEnv env, final FullTableBackupState state)
throws IOException {