You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2018/08/24 16:22:26 UTC
hbase git commit: HBASE-21072 Block out HBCK1 in hbase2 Write the
hbase-1.x hbck1 lock file to block out hbck1 instances writing state to an
hbase-2.x cluster (could do damage). Set hbase.write.hbck1.lock.file to false
to disable this writing.
Repository: hbase
Updated Branches:
refs/heads/branch-2.0 ed235c97e -> ba452da4b
HBASE-21072 Block out HBCK1 in hbase2
Write the hbase-1.x hbck1 lock file to block out hbck1 instances writing
state to an hbase-2.x cluster (could do damage).
Set hbase.write.hbck1.lock.file to false to disable this writing.
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/ba452da4
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/ba452da4
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/ba452da4
Branch: refs/heads/branch-2.0
Commit: ba452da4bab9bdbe542115ebdfecc021c4f1e848
Parents: ed235c9
Author: Michael Stack <st...@apache.org>
Authored: Wed Aug 22 22:44:00 2018 -0700
Committer: Michael Stack <st...@apache.org>
Committed: Fri Aug 24 09:22:00 2018 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/hbase/master/HMaster.java | 9 +-
.../org/apache/hadoop/hbase/util/HBaseFsck.java | 109 +++++++++++++------
.../apache/hadoop/hbase/master/TestMaster.java | 43 +++++++-
3 files changed, 127 insertions(+), 34 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/ba452da4/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index 3f296c1..d5167c8 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -173,6 +173,7 @@ import org.apache.hadoop.hbase.util.Addressing;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.CompressionTest;
import org.apache.hadoop.hbase.util.EncryptionTest;
+import org.apache.hadoop.hbase.util.HBaseFsck;
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
import org.apache.hadoop.hbase.util.HasThread;
import org.apache.hadoop.hbase.util.IdLock;
@@ -828,7 +829,13 @@ public class HMaster extends HRegionServer implements MasterServices {
ZKClusterId.setClusterId(this.zooKeeper, fileSystemManager.getClusterId());
this.clusterId = clusterId.toString();
-
+ // Precaution. Put in place the old hbck1 lock file to fence out old hbase1s running their
+ // hbck1s against an hbase2 cluster; it could do damage. To skip this behavior, set
+ // hbase.write.hbck1.lock.file to false.
+ if (this.conf.getBoolean("hbase.write.hbck1.lock.file", true)) {
+ HBaseFsck.checkAndMarkRunningHbck(this.conf,
+ HBaseFsck.createLockRetryCounterFactory(this.conf).create());
+ }
status.setStatus("Initialze ServerManager and schedule SCP for crash servers");
this.serverManager = createServerManager(this);
http://git-wip-us.apache.org/repos/asf/hbase/blob/ba452da4/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
index b5533d4..fec323a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java
@@ -135,6 +135,7 @@ import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
+import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.yetus.audience.InterfaceStability;
@@ -155,7 +156,10 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.AdminServic
/**
* HBaseFsck (hbck) is a tool for checking and repairing region consistency and
- * table integrity problems in a corrupted HBase.
+ * table integrity problems in a corrupted HBase. This tool was written for hbase-1.x. It does not
+ * work with hbase-2.x; it can read state but is not allowed to change state; i.e. effect 'repair'.
+ * See hbck2 (HBASE-19121) for a hbck tool for hbase2.
+ *
* <p>
* Region consistency checks verify that hbase:meta, region deployment on region
* servers and the state of data in HDFS (.regioninfo files) all are in
@@ -208,7 +212,12 @@ public class HBaseFsck extends Configured implements Closeable {
private static final int DEFAULT_OVERLAPS_TO_SIDELINE = 2;
private static final int DEFAULT_MAX_MERGE = 5;
private static final String TO_BE_LOADED = "to_be_loaded";
- private static final String HBCK_LOCK_FILE = "hbase-hbck.lock";
+ /**
+ * Here is where hbase-1.x used to default the lock for hbck1.
+ * It puts in place a lock when it goes to write/make changes.
+ */
+ @VisibleForTesting
+ public static final String HBCK_LOCK_FILE = "hbase-hbck.lock";
private static final int DEFAULT_MAX_LOCK_FILE_ATTEMPTS = 5;
private static final int DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds
private static final int DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds
@@ -359,40 +368,75 @@ public class HBaseFsck extends Configured implements Closeable {
super(conf);
errors = getErrorReporter(getConf());
this.executor = exec;
- lockFileRetryCounterFactory = new RetryCounterFactory(
- getConf().getInt("hbase.hbck.lockfile.attempts", DEFAULT_MAX_LOCK_FILE_ATTEMPTS),
- getConf().getInt(
- "hbase.hbck.lockfile.attempt.sleep.interval", DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL),
- getConf().getInt(
- "hbase.hbck.lockfile.attempt.maxsleeptime", DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME));
- createZNodeRetryCounterFactory = new RetryCounterFactory(
- getConf().getInt("hbase.hbck.createznode.attempts", DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS),
- getConf().getInt(
- "hbase.hbck.createznode.attempt.sleep.interval",
- DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL),
- getConf().getInt(
- "hbase.hbck.createznode.attempt.maxsleeptime",
- DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME));
+ lockFileRetryCounterFactory = createLockRetryCounterFactory(getConf());
+ createZNodeRetryCounterFactory = createZnodeRetryCounterFactory(getConf());
zkw = createZooKeeperWatcher();
}
- private class FileLockCallable implements Callable<FSDataOutputStream> {
+ /**
+ * @return A retry counter factory configured for retrying lock file creation.
+ */
+ public static RetryCounterFactory createLockRetryCounterFactory(Configuration conf) {
+ return new RetryCounterFactory(
+ conf.getInt("hbase.hbck.lockfile.attempts", DEFAULT_MAX_LOCK_FILE_ATTEMPTS),
+ conf.getInt("hbase.hbck.lockfile.attempt.sleep.interval",
+ DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL),
+ conf.getInt("hbase.hbck.lockfile.attempt.maxsleeptime",
+ DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME));
+ }
+
+ /**
+ * @return A retry counter factory configured for retrying znode creation.
+ */
+ private static RetryCounterFactory createZnodeRetryCounterFactory(Configuration conf) {
+ return new RetryCounterFactory(
+ conf.getInt("hbase.hbck.createznode.attempts", DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS),
+ conf.getInt("hbase.hbck.createznode.attempt.sleep.interval",
+ DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL),
+ conf.getInt("hbase.hbck.createznode.attempt.maxsleeptime",
+ DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME));
+ }
+
+ /**
+ * @return Return the tmp dir this tool writes too.
+ */
+ @VisibleForTesting
+ public static Path getTmpDir(Configuration conf) throws IOException {
+ return new Path(FSUtils.getRootDir(conf), HConstants.HBASE_TEMP_DIRECTORY);
+ }
+
+ private static class FileLockCallable implements Callable<FSDataOutputStream> {
RetryCounter retryCounter;
+ private final Configuration conf;
+ private Path hbckLockPath = null;
- public FileLockCallable(RetryCounter retryCounter) {
+ public FileLockCallable(Configuration conf, RetryCounter retryCounter) {
this.retryCounter = retryCounter;
+ this.conf = conf;
+ }
+
+ /**
+ * @return Will be <code>null</code> unless you call {@link #call()}
+ */
+ Path getHbckLockPath() {
+ return this.hbckLockPath;
}
+
@Override
public FSDataOutputStream call() throws IOException {
try {
- FileSystem fs = FSUtils.getCurrentFileSystem(getConf());
- FsPermission defaultPerms = FSUtils.getFilePermissions(fs, getConf(),
+ FileSystem fs = FSUtils.getCurrentFileSystem(this.conf);
+ FsPermission defaultPerms = FSUtils.getFilePermissions(fs, this.conf,
HConstants.DATA_FILE_UMASK_KEY);
- Path tmpDir = new Path(FSUtils.getRootDir(getConf()), HConstants.HBASE_TEMP_DIRECTORY);
+ Path tmpDir = getTmpDir(conf);
+ this.hbckLockPath = new Path(tmpDir, HBCK_LOCK_FILE);
fs.mkdirs(tmpDir);
- HBCK_LOCK_PATH = new Path(tmpDir, HBCK_LOCK_FILE);
- final FSDataOutputStream out = createFileWithRetries(fs, HBCK_LOCK_PATH, defaultPerms);
+ final FSDataOutputStream out = createFileWithRetries(fs, this.hbckLockPath, defaultPerms);
out.writeBytes(InetAddress.getLocalHost().toString());
+ // Add a note into the file we write on why hbase2 is writing out an hbck1 lock file.
+ out.writeBytes(" Written by an hbase-2.x Master to block an " +
+ "attempt by an hbase-1.x HBCK tool making modification to state. " +
+ "See 'HBCK must match HBase server version' in the hbase refguide.");
out.flush();
return out;
} catch(RemoteException e) {
@@ -407,7 +451,6 @@ public class HBaseFsck extends Configured implements Closeable {
private FSDataOutputStream createFileWithRetries(final FileSystem fs,
final Path hbckLockFilePath, final FsPermission defaultPerms)
throws IOException {
-
IOException exception = null;
do {
try {
@@ -439,13 +482,13 @@ public class HBaseFsck extends Configured implements Closeable {
* @return FSDataOutputStream object corresponding to the newly opened lock file
* @throws IOException if IO failure occurs
*/
- private FSDataOutputStream checkAndMarkRunningHbck() throws IOException {
- RetryCounter retryCounter = lockFileRetryCounterFactory.create();
- FileLockCallable callable = new FileLockCallable(retryCounter);
+ public static Pair<Path, FSDataOutputStream> checkAndMarkRunningHbck(Configuration conf,
+ RetryCounter retryCounter) throws IOException {
+ FileLockCallable callable = new FileLockCallable(conf, retryCounter);
ExecutorService executor = Executors.newFixedThreadPool(1);
FutureTask<FSDataOutputStream> futureTask = new FutureTask<>(callable);
executor.execute(futureTask);
- final int timeoutInSeconds = getConf().getInt(
+ final int timeoutInSeconds = conf.getInt(
"hbase.hbck.lockfile.maxwaittime", DEFAULT_WAIT_FOR_LOCK_TIMEOUT);
FSDataOutputStream stream = null;
try {
@@ -462,7 +505,7 @@ public class HBaseFsck extends Configured implements Closeable {
} finally {
executor.shutdownNow();
}
- return stream;
+ return new Pair<Path, FSDataOutputStream>(callable.getHbckLockPath(), stream);
}
private void unlockHbck() {
@@ -471,8 +514,7 @@ public class HBaseFsck extends Configured implements Closeable {
do {
try {
IOUtils.closeQuietly(hbckOutFd);
- FSUtils.delete(FSUtils.getCurrentFileSystem(getConf()),
- HBCK_LOCK_PATH, true);
+ FSUtils.delete(FSUtils.getCurrentFileSystem(getConf()), HBCK_LOCK_PATH, true);
LOG.info("Finishing hbck");
return;
} catch (IOException ioe) {
@@ -501,7 +543,10 @@ public class HBaseFsck extends Configured implements Closeable {
if (isExclusive()) {
// Grab the lock
- hbckOutFd = checkAndMarkRunningHbck();
+ Pair<Path, FSDataOutputStream> pair =
+ checkAndMarkRunningHbck(getConf(), this.lockFileRetryCounterFactory.create());
+ HBCK_LOCK_PATH = pair.getFirst();
+ this.hbckOutFd = pair.getSecond();
if (hbckOutFd == null) {
setRetCode(-1);
LOG.error("Another instance of hbck is fixing HBase, exiting this instance. " +
http://git-wip-us.apache.org/repos/asf/hbase/blob/ba452da4/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMaster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMaster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMaster.java
index 11df313..50a83d8 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMaster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMaster.java
@@ -19,11 +19,15 @@ package org.apache.hadoop.hbase.master;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.IOException;
import java.util.List;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
@@ -40,10 +44,10 @@ import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.client.TableState;
-import org.apache.hadoop.hbase.master.assignment.RegionStates;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.HBaseFsck;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.util.StringUtils;
import org.junit.AfterClass;
@@ -192,5 +196,42 @@ public class TestMaster {
TEST_UTIL.deleteTable(tableName);
}
}
+
+ @Test
+ public void testBlockingHbkc1WithLockFile() throws IOException {
+ // This is how the patch to the lock file is created inside in HBaseFsck. Too hard to use its
+ // actual method without disturbing HBaseFsck... Do the below mimic instead.
+ Path hbckLockPath = new Path(HBaseFsck.getTmpDir(TEST_UTIL.getConfiguration()),
+ HBaseFsck.HBCK_LOCK_FILE);
+ FileSystem fs = TEST_UTIL.getTestFileSystem();
+ assertTrue(fs.exists(hbckLockPath));
+ TEST_UTIL.getMiniHBaseCluster().
+ killMaster(TEST_UTIL.getMiniHBaseCluster().getMaster().getServerName());
+ assertTrue(fs.exists(hbckLockPath));
+ TEST_UTIL.getMiniHBaseCluster().startMaster();
+ TEST_UTIL.waitFor(30000, () -> TEST_UTIL.getMiniHBaseCluster().getMaster() != null &&
+ TEST_UTIL.getMiniHBaseCluster().getMaster().isInitialized());
+ assertTrue(fs.exists(hbckLockPath));
+ // Start a second Master. Should be fine.
+ TEST_UTIL.getMiniHBaseCluster().startMaster();
+ assertTrue(fs.exists(hbckLockPath));
+ fs.delete(hbckLockPath, true);
+ assertFalse(fs.exists(hbckLockPath));
+ // Kill all Masters.
+ TEST_UTIL.getMiniHBaseCluster().getLiveMasterThreads().stream().
+ map(sn -> sn.getMaster().getServerName()).forEach(sn -> {
+ try {
+ TEST_UTIL.getMiniHBaseCluster().killMaster(sn);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ });
+ // Start a new one.
+ TEST_UTIL.getMiniHBaseCluster().startMaster();
+ TEST_UTIL.waitFor(30000, () -> TEST_UTIL.getMiniHBaseCluster().getMaster() != null &&
+ TEST_UTIL.getMiniHBaseCluster().getMaster().isInitialized());
+ // Assert lock gets put in place again.
+ assertTrue(fs.exists(hbckLockPath));
+ }
}