You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by zh...@apache.org on 2020/05/30 08:40:14 UTC
[hbase] 03/03: HBASE-24471 The way we bootstrap meta table is
confusing (#1806)
This is an automated email from the ASF dual-hosted git repository.
zhangduo pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git
commit 4d5efec76718032a1e55024fd5133409e4be3cb8
Author: Duo Zhang <zh...@apache.org>
AuthorDate: Sat May 30 16:03:52 2020 +0800
HBASE-24471 The way we bootstrap meta table is confusing (#1806)
Signed-off-by: Michael Stack <st...@apache.org>
---
.../src/main/protobuf/MasterProcedure.proto | 3 +-
.../hadoop/hbase/master/MasterFileSystem.java | 108 ++++++---------------
.../hbase/master/procedure/InitMetaProcedure.java | 77 +++++++++++++--
.../hadoop/hbase/regionserver/HRegionServer.java | 9 +-
.../hadoop/hbase/util/FSTableDescriptors.java | 1 +
5 files changed, 101 insertions(+), 97 deletions(-)
diff --git a/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto b/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto
index de33345..7c59ccf 100644
--- a/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto
+++ b/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto
@@ -468,7 +468,8 @@ message ReopenTableRegionsStateData {
}
enum InitMetaState {
- INIT_META_ASSIGN_META = 1;
+ INIT_META_WRITE_FS_LAYOUT = 1;
+ INIT_META_ASSIGN_META = 2;
}
message InitMetaStateData {
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
index 42a433e..31d365a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
@@ -18,22 +18,18 @@
*/
package org.apache.hadoop.hbase.master;
+import java.io.FileNotFoundException;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hbase.ClusterId;
import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.backup.HFileArchiver;
-import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
-import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
import org.apache.hadoop.hbase.client.RegionInfo;
-import org.apache.hadoop.hbase.client.RegionInfoBuilder;
-import org.apache.hadoop.hbase.client.TableDescriptor;
-import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
import org.apache.hadoop.hbase.exceptions.DeserializationException;
import org.apache.hadoop.hbase.fs.HFileSystem;
import org.apache.hadoop.hbase.log.HBaseMarkers;
@@ -42,9 +38,7 @@ import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.security.access.SnapshotScannerHDFSAclHelper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.CommonFSUtils;
-import org.apache.hadoop.hbase.util.FSTableDescriptors;
import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.ipc.RemoteException;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -247,15 +241,26 @@ public class MasterFileSystem {
* needed populating the directory with necessary bootup files).
* @throws IOException
*/
- private Path checkRootDir(final Path rd, final Configuration c, final FileSystem fs)
- throws IOException {
+ private void checkRootDir(final Path rd, final Configuration c, final FileSystem fs)
+ throws IOException {
+ int threadWakeFrequency = c.getInt(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000);
// If FS is in safe mode wait till out of it.
- FSUtils.waitOnSafeMode(c, c.getInt(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000));
+ FSUtils.waitOnSafeMode(c, threadWakeFrequency);
// Filesystem is good. Go ahead and check for hbase.rootdir.
+ FileStatus status;
+ try {
+ status = fs.getFileStatus(rd);
+ } catch (FileNotFoundException e) {
+ status = null;
+ }
+ int versionFileWriteAttempts = c.getInt(HConstants.VERSION_FILE_WRITE_ATTEMPTS,
+ HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS);
try {
- if (!fs.exists(rd)) {
- fs.mkdirs(rd);
+ if (status == null) {
+ if (!fs.mkdirs(rd)) {
+ throw new IOException("Can not create configured '" + HConstants.HBASE_DIR + "' " + rd);
+ }
// DFS leaves safe mode with 0 DNs when there are 0 blocks.
// We used to handle this by checking the current DN count and waiting until
// it is nonzero. With security, the check for datanode count doesn't work --
@@ -263,47 +268,29 @@ public class MasterFileSystem {
// and simply retry file creation during bootstrap indefinitely. As soon as
// there is one datanode it will succeed. Permission problems should have
// already been caught by mkdirs above.
- FSUtils.setVersion(fs, rd, c.getInt(HConstants.THREAD_WAKE_FREQUENCY,
- 10 * 1000), c.getInt(HConstants.VERSION_FILE_WRITE_ATTEMPTS,
- HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
+ FSUtils.setVersion(fs, rd, threadWakeFrequency, versionFileWriteAttempts);
} else {
- if (!fs.isDirectory(rd)) {
- throw new IllegalArgumentException(rd.toString() + " is not a directory");
+ if (!status.isDirectory()) {
+ throw new IllegalArgumentException(
+ "Configured '" + HConstants.HBASE_DIR + "' " + rd + " is not a directory.");
}
// as above
- FSUtils.checkVersion(fs, rd, true, c.getInt(HConstants.THREAD_WAKE_FREQUENCY,
- 10 * 1000), c.getInt(HConstants.VERSION_FILE_WRITE_ATTEMPTS,
- HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
+ FSUtils.checkVersion(fs, rd, true, threadWakeFrequency, versionFileWriteAttempts);
}
} catch (DeserializationException de) {
- LOG.error(HBaseMarkers.FATAL, "Please fix invalid configuration for "
- + HConstants.HBASE_DIR, de);
+ LOG.error(HBaseMarkers.FATAL, "Please fix invalid configuration for '{}' {}",
+ HConstants.HBASE_DIR, rd, de);
throw new IOException(de);
} catch (IllegalArgumentException iae) {
- LOG.error(HBaseMarkers.FATAL, "Please fix invalid configuration for "
- + HConstants.HBASE_DIR + " " + rd.toString(), iae);
+ LOG.error(HBaseMarkers.FATAL, "Please fix invalid configuration for '{}' {}",
+ HConstants.HBASE_DIR, rd, iae);
throw iae;
}
// Make sure cluster ID exists
- if (!FSUtils.checkClusterIdExists(fs, rd, c.getInt(
- HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000))) {
- FSUtils.setClusterId(fs, rd, new ClusterId(), c.getInt(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000));
+ if (!FSUtils.checkClusterIdExists(fs, rd, threadWakeFrequency)) {
+ FSUtils.setClusterId(fs, rd, new ClusterId(), threadWakeFrequency);
}
clusterId = FSUtils.getClusterId(fs, rd);
-
- // Make sure the meta region directory exists!
- if (!FSUtils.metaRegionExists(fs, rd)) {
- bootstrap(rd, c);
- }
-
- // Create tableinfo-s for hbase:meta if not already there.
- // assume, created table descriptor is for enabling table
- // meta table is a system table, so descriptors are predefined,
- // we should get them from registry.
- FSTableDescriptors fsd = new FSTableDescriptors(fs, rd);
- fsd.createTableDescriptor(fsd.get(TableName.META_TABLE_NAME));
-
- return rd;
}
/**
@@ -398,43 +385,6 @@ public class MasterFileSystem {
}
}
- private static void bootstrap(final Path rd, final Configuration c)
- throws IOException {
- LOG.info("BOOTSTRAP: creating hbase:meta region");
- try {
- // Bootstrapping, make sure blockcache is off. Else, one will be
- // created here in bootstrap and it'll need to be cleaned up. Better to
- // not make it in first place. Turn off block caching for bootstrap.
- // Enable after.
- FSTableDescriptors.tryUpdateMetaTableDescriptor(c);
- TableDescriptor metaDescriptor = new FSTableDescriptors(c).get(TableName.META_TABLE_NAME);
- HRegion meta = HRegion.createHRegion(RegionInfoBuilder.FIRST_META_REGIONINFO, rd,
- c, setInfoFamilyCachingForMeta(metaDescriptor, false), null);
- meta.close();
- } catch (IOException e) {
- e = e instanceof RemoteException ?
- ((RemoteException)e).unwrapRemoteException() : e;
- LOG.error("bootstrap", e);
- throw e;
- }
- }
-
- /**
- * Enable in memory caching for hbase:meta
- */
- public static TableDescriptor setInfoFamilyCachingForMeta(TableDescriptor metaDescriptor, final boolean b) {
- TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(metaDescriptor);
- for (ColumnFamilyDescriptor hcd: metaDescriptor.getColumnFamilies()) {
- if (Bytes.equals(hcd.getName(), HConstants.CATALOG_FAMILY)) {
- builder.modifyColumnFamily(ColumnFamilyDescriptorBuilder.newBuilder(hcd)
- .setBlockCacheEnabled(b)
- .setInMemory(b)
- .build());
- }
- }
- return builder.build();
- }
-
public void deleteFamilyFromFS(RegionInfo region, byte[] familyName)
throws IOException {
deleteFamilyFromFS(rootdir, region, familyName);
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/InitMetaProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/InitMetaProcedure.java
index c510a25..bbdaaa7 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/InitMetaProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/InitMetaProcedure.java
@@ -20,16 +20,29 @@ package org.apache.hadoop.hbase.master.procedure;
import java.io.IOException;
import java.util.Arrays;
import java.util.concurrent.CountDownLatch;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
+import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
+import org.apache.hadoop.hbase.procedure2.ProcedureUtil;
import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.util.CommonFSUtils;
+import org.apache.hadoop.hbase.util.FSTableDescriptors;
+import org.apache.hadoop.hbase.util.RetryCounter;
import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.InitMetaState;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.InitMetaStateData;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos;
/**
* This procedure is used to initialize meta table for a new hbase deploy. It will just schedule an
@@ -38,8 +51,12 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.I
@InterfaceAudience.Private
public class InitMetaProcedure extends AbstractStateMachineTableProcedure<InitMetaState> {
+ private static final Logger LOG = LoggerFactory.getLogger(InitMetaProcedure.class);
+
private CountDownLatch latch = new CountDownLatch(1);
+ private RetryCounter retryCounter;
+
@Override
public TableName getTableName() {
return TableName.META_TABLE_NAME;
@@ -50,16 +67,56 @@ public class InitMetaProcedure extends AbstractStateMachineTableProcedure<InitMe
return TableOperationType.CREATE;
}
+ private static void writeFsLayout(Path rootDir, Configuration conf) throws IOException {
+ LOG.info("BOOTSTRAP: creating hbase:meta region");
+ FileSystem fs = rootDir.getFileSystem(conf);
+ Path tableDir = CommonFSUtils.getTableDir(rootDir, TableName.META_TABLE_NAME);
+ if (fs.exists(tableDir) && !fs.delete(tableDir, true)) {
+ LOG.warn("Can not delete partial created meta table, continue...");
+ }
+ // Bootstrapping, make sure blockcache is off. Else, one will be
+ // created here in bootstrap and it'll need to be cleaned up. Better to
+ // not make it in first place. Turn off block caching for bootstrap.
+ // Enable after.
+ FSTableDescriptors.tryUpdateMetaTableDescriptor(conf, fs, rootDir,
+ builder -> builder.setRegionReplication(
+ conf.getInt(HConstants.META_REPLICAS_NUM, HConstants.DEFAULT_META_REPLICA_NUM)));
+ TableDescriptor metaDescriptor = new FSTableDescriptors(conf).get(TableName.META_TABLE_NAME);
+ HRegion
+ .createHRegion(RegionInfoBuilder.FIRST_META_REGIONINFO, rootDir, conf, metaDescriptor, null)
+ .close();
+ }
+
@Override
protected Flow executeFromState(MasterProcedureEnv env, InitMetaState state)
- throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
- switch (state) {
- case INIT_META_ASSIGN_META:
- addChildProcedure(env.getAssignmentManager()
- .createAssignProcedures(Arrays.asList(RegionInfoBuilder.FIRST_META_REGIONINFO)));
- return Flow.NO_MORE_STATE;
- default:
- throw new UnsupportedOperationException("unhandled state=" + state);
+ throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
+ LOG.debug("Execute {}", this);
+ try {
+ switch (state) {
+ case INIT_META_WRITE_FS_LAYOUT:
+ Configuration conf = env.getMasterConfiguration();
+ Path rootDir = CommonFSUtils.getRootDir(conf);
+ writeFsLayout(rootDir, conf);
+ setNextState(InitMetaState.INIT_META_ASSIGN_META);
+ return Flow.HAS_MORE_STATE;
+ case INIT_META_ASSIGN_META:
+ LOG.info("Going to assign meta");
+ addChildProcedure(env.getAssignmentManager()
+ .createAssignProcedures(Arrays.asList(RegionInfoBuilder.FIRST_META_REGIONINFO)));
+ return Flow.NO_MORE_STATE;
+ default:
+ throw new UnsupportedOperationException("unhandled state=" + state);
+ }
+ } catch (IOException e) {
+ if (retryCounter == null) {
+ retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration());
+ }
+ long backoff = retryCounter.getBackoffTimeAndIncrementAttempts();
+ LOG.warn("Failed to init meta, suspend {}secs", backoff, e);
+ setTimeout(Math.toIntExact(backoff));
+ setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT);
+ skipPersistence();
+ throw new ProcedureSuspendedException();
}
}
@@ -71,7 +128,7 @@ public class InitMetaProcedure extends AbstractStateMachineTableProcedure<InitMe
@Override
protected void rollbackState(MasterProcedureEnv env, InitMetaState state)
- throws IOException, InterruptedException {
+ throws IOException, InterruptedException {
throw new UnsupportedOperationException();
}
@@ -87,7 +144,7 @@ public class InitMetaProcedure extends AbstractStateMachineTableProcedure<InitMe
@Override
protected InitMetaState getInitialState() {
- return InitMetaState.INIT_META_ASSIGN_META;
+ return InitMetaState.INIT_META_WRITE_FS_LAYOUT;
}
@Override
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
index 08ef216..01b866d 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
@@ -718,7 +718,7 @@ public class HRegionServer extends Thread implements
}
private void initializeFileSystem() throws IOException {
- // Get fs instance used by this RS. Do we use checksum verification in the hbase? If hbase
+ // Get fs instance used by this RS. Do we use checksum verification in the hbase? If hbase
// checksum verification enabled, then automatically switch off hdfs checksum verification.
boolean useHBaseChecksum = conf.getBoolean(HConstants.HBASE_CHECKSUM_VERIFICATION, true);
CommonFSUtils.setFsDefault(this.conf, CommonFSUtils.getWALRootDir(this.conf));
@@ -731,12 +731,7 @@ public class HRegionServer extends Thread implements
this.dataFs = new HFileSystem(this.conf, useHBaseChecksum);
this.dataRootDir = CommonFSUtils.getRootDir(this.conf);
this.tableDescriptors =
- new FSTableDescriptors(this.dataFs, this.dataRootDir, !canUpdateTableDescriptor(), false);
- if (this instanceof HMaster) {
- FSTableDescriptors.tryUpdateMetaTableDescriptor(this.conf, this.dataFs, this.dataRootDir,
- builder -> builder.setRegionReplication(
- conf.getInt(HConstants.META_REPLICAS_NUM, HConstants.DEFAULT_META_REPLICA_NUM)));
- }
+ new FSTableDescriptors(this.dataFs, this.dataRootDir, !canUpdateTableDescriptor(), false);
}
protected void login(UserProvider user, String host) throws IOException {
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java
index 2329a0d..1f064f2 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java
@@ -119,6 +119,7 @@ public class FSTableDescriptors implements TableDescriptors {
this.usecache = usecache;
}
+ @VisibleForTesting
public static void tryUpdateMetaTableDescriptor(Configuration conf) throws IOException {
tryUpdateMetaTableDescriptor(conf, CommonFSUtils.getCurrentFileSystem(conf),
CommonFSUtils.getRootDir(conf), null);