You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by zh...@apache.org on 2020/05/30 08:04:05 UTC

[hbase] branch master updated: HBASE-24471 The way we bootstrap meta table is confusing (#1806)

This is an automated email from the ASF dual-hosted git repository.

zhangduo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/master by this push:
     new 36a6ef9  HBASE-24471 The way we bootstrap meta table is confusing (#1806)
36a6ef9 is described below

commit 36a6ef9cf94a8cfbcfe89c7c73e158ee14b813b2
Author: Duo Zhang <zh...@apache.org>
AuthorDate: Sat May 30 16:03:52 2020 +0800

    HBASE-24471 The way we bootstrap meta table is confusing (#1806)
    
    Signed-off-by: Michael Stack <st...@apache.org>
---
 .../protobuf/server/master/MasterProcedure.proto   |   5 +-
 .../hadoop/hbase/master/MasterFileSystem.java      | 109 ++++++---------------
 .../hbase/master/procedure/InitMetaProcedure.java  |  89 ++++++++++++-----
 .../hadoop/hbase/regionserver/HRegionServer.java   |   9 +-
 .../hadoop/hbase/util/FSTableDescriptors.java      |   1 +
 5 files changed, 97 insertions(+), 116 deletions(-)

diff --git a/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto b/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto
index 5171e66..b51b529 100644
--- a/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto
+++ b/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto
@@ -489,8 +489,9 @@ message ReopenTableRegionsStateData {
 }
 
 enum InitMetaState {
-  INIT_META_ASSIGN_META = 1;
-  INIT_META_CREATE_NAMESPACES = 2;
+  INIT_META_WRITE_FS_LAYOUT = 1;
+  INIT_META_ASSIGN_META = 2;
+  INIT_META_CREATE_NAMESPACES = 3;
 }
 
 message InitMetaStateData {
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
index b9dddab..0bbfa4a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
@@ -18,34 +18,27 @@
  */
 package org.apache.hadoop.hbase.master;
 
+import java.io.FileNotFoundException;
 import java.io.IOException;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsAction;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hbase.ClusterId;
 import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.backup.HFileArchiver;
-import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
-import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
 import org.apache.hadoop.hbase.client.RegionInfo;
-import org.apache.hadoop.hbase.client.RegionInfoBuilder;
-import org.apache.hadoop.hbase.client.TableDescriptor;
-import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
 import org.apache.hadoop.hbase.exceptions.DeserializationException;
 import org.apache.hadoop.hbase.fs.HFileSystem;
 import org.apache.hadoop.hbase.log.HBaseMarkers;
 import org.apache.hadoop.hbase.mob.MobConstants;
-import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.replication.ReplicationUtils;
 import org.apache.hadoop.hbase.security.access.SnapshotScannerHDFSAclHelper;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.CommonFSUtils;
-import org.apache.hadoop.hbase.util.FSTableDescriptors;
 import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.ipc.RemoteException;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -244,15 +237,26 @@ public class MasterFileSystem {
    * @return hbase.rootdir (after checks for existence and bootstrapping if needed populating the
    *         directory with necessary bootup files).
    */
-  private Path checkRootDir(final Path rd, final Configuration c, final FileSystem fs)
-      throws IOException {
+  private void checkRootDir(final Path rd, final Configuration c, final FileSystem fs)
+    throws IOException {
+    int threadWakeFrequency = c.getInt(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000);
     // If FS is in safe mode wait till out of it.
-    FSUtils.waitOnSafeMode(c, c.getInt(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000));
+    FSUtils.waitOnSafeMode(c, threadWakeFrequency);
 
     // Filesystem is good. Go ahead and check for hbase.rootdir.
+    FileStatus status;
+    try {
+      status = fs.getFileStatus(rd);
+    } catch (FileNotFoundException e) {
+      status = null;
+    }
+    int versionFileWriteAttempts = c.getInt(HConstants.VERSION_FILE_WRITE_ATTEMPTS,
+      HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS);
     try {
-      if (!fs.exists(rd)) {
-        fs.mkdirs(rd);
+      if (status == null) {
+        if (!fs.mkdirs(rd)) {
+          throw new IOException("Can not create configured '" + HConstants.HBASE_DIR + "' " + rd);
+        }
         // DFS leaves safe mode with 0 DNs when there are 0 blocks.
         // We used to handle this by checking the current DN count and waiting until
         // it is nonzero. With security, the check for datanode count doesn't work --
@@ -260,47 +264,29 @@ public class MasterFileSystem {
         // and simply retry file creation during bootstrap indefinitely. As soon as
         // there is one datanode it will succeed. Permission problems should have
         // already been caught by mkdirs above.
-        FSUtils.setVersion(fs, rd, c.getInt(HConstants.THREAD_WAKE_FREQUENCY,
-          10 * 1000), c.getInt(HConstants.VERSION_FILE_WRITE_ATTEMPTS,
-            HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
+        FSUtils.setVersion(fs, rd, threadWakeFrequency, versionFileWriteAttempts);
       } else {
-        if (!fs.isDirectory(rd)) {
-          throw new IllegalArgumentException(rd.toString() + " is not a directory");
+        if (!status.isDirectory()) {
+          throw new IllegalArgumentException(
+            "Configured '" + HConstants.HBASE_DIR + "' " + rd + " is not a directory.");
         }
         // as above
-        FSUtils.checkVersion(fs, rd, true, c.getInt(HConstants.THREAD_WAKE_FREQUENCY,
-          10 * 1000), c.getInt(HConstants.VERSION_FILE_WRITE_ATTEMPTS,
-            HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
+        FSUtils.checkVersion(fs, rd, true, threadWakeFrequency, versionFileWriteAttempts);
       }
     } catch (DeserializationException de) {
-      LOG.error(HBaseMarkers.FATAL, "Please fix invalid configuration for "
-        + HConstants.HBASE_DIR, de);
+      LOG.error(HBaseMarkers.FATAL, "Please fix invalid configuration for '{}' {}",
+        HConstants.HBASE_DIR, rd, de);
       throw new IOException(de);
     } catch (IllegalArgumentException iae) {
-      LOG.error(HBaseMarkers.FATAL, "Please fix invalid configuration for "
-        + HConstants.HBASE_DIR + " " + rd.toString(), iae);
+      LOG.error(HBaseMarkers.FATAL, "Please fix invalid configuration for '{}' {}",
+        HConstants.HBASE_DIR, rd, iae);
       throw iae;
     }
     // Make sure cluster ID exists
-    if (!FSUtils.checkClusterIdExists(fs, rd, c.getInt(
-        HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000))) {
-      FSUtils.setClusterId(fs, rd, new ClusterId(), c.getInt(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000));
+    if (!FSUtils.checkClusterIdExists(fs, rd, threadWakeFrequency)) {
+      FSUtils.setClusterId(fs, rd, new ClusterId(), threadWakeFrequency);
     }
     clusterId = FSUtils.getClusterId(fs, rd);
-
-    // Make sure the meta region directory exists!
-    if (!FSUtils.metaRegionExists(fs, rd)) {
-      bootstrap(rd, c);
-    }
-
-    // Create tableinfo-s for hbase:meta if not already there.
-    // assume, created table descriptor is for enabling table
-    // meta table is a system table, so descriptors are predefined,
-    // we should get them from registry.
-    FSTableDescriptors fsd = new FSTableDescriptors(fs, rd);
-    fsd.createTableDescriptor(fsd.get(TableName.META_TABLE_NAME));
-
-    return rd;
   }
 
   /**
@@ -395,43 +381,6 @@ public class MasterFileSystem {
     }
   }
 
-  private static void bootstrap(final Path rd, final Configuration c)
-  throws IOException {
-    LOG.info("BOOTSTRAP: creating hbase:meta region");
-    try {
-      // Bootstrapping, make sure blockcache is off.  Else, one will be
-      // created here in bootstrap and it'll need to be cleaned up.  Better to
-      // not make it in first place.  Turn off block caching for bootstrap.
-      // Enable after.
-      FSTableDescriptors.tryUpdateMetaTableDescriptor(c);
-      TableDescriptor metaDescriptor = new FSTableDescriptors(c).get(TableName.META_TABLE_NAME);
-      HRegion meta = HRegion.createHRegion(RegionInfoBuilder.FIRST_META_REGIONINFO, rd,
-          c, setInfoFamilyCachingForMeta(metaDescriptor, false), null);
-      meta.close();
-    } catch (IOException e) {
-        e = e instanceof RemoteException ?
-                ((RemoteException)e).unwrapRemoteException() : e;
-      LOG.error("bootstrap", e);
-      throw e;
-    }
-  }
-
-  /**
-   * Enable in memory caching for hbase:meta
-   */
-  public static TableDescriptor setInfoFamilyCachingForMeta(TableDescriptor metaDescriptor, final boolean b) {
-    TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(metaDescriptor);
-    for (ColumnFamilyDescriptor hcd: metaDescriptor.getColumnFamilies()) {
-      if (Bytes.equals(hcd.getName(), HConstants.CATALOG_FAMILY)) {
-        builder.modifyColumnFamily(ColumnFamilyDescriptorBuilder.newBuilder(hcd)
-                .setBlockCacheEnabled(b)
-                .setInMemory(b)
-                .build());
-      }
-    }
-    return builder.build();
-  }
-
   public void deleteFamilyFromFS(RegionInfo region, byte[] familyName)
       throws IOException {
     deleteFamilyFromFS(rootdir, region, familyName);
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/InitMetaProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/InitMetaProcedure.java
index 4cd305b..cdf78cc 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/InitMetaProcedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/InitMetaProcedure.java
@@ -25,13 +25,21 @@ import static org.apache.hadoop.hbase.master.procedure.AbstractStateMachineNames
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.concurrent.CountDownLatch;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.RegionInfoBuilder;
+import org.apache.hadoop.hbase.client.TableDescriptor;
 import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
 import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
 import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
 import org.apache.hadoop.hbase.procedure2.ProcedureUtil;
 import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.util.CommonFSUtils;
+import org.apache.hadoop.hbase.util.FSTableDescriptors;
 import org.apache.hadoop.hbase.util.RetryCounter;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
@@ -64,20 +72,46 @@ public class InitMetaProcedure extends AbstractStateMachineTableProcedure<InitMe
     return TableOperationType.CREATE;
   }
 
+  private static void writeFsLayout(Path rootDir, Configuration conf) throws IOException {
+    LOG.info("BOOTSTRAP: creating hbase:meta region");
+    FileSystem fs = rootDir.getFileSystem(conf);
+    Path tableDir = CommonFSUtils.getTableDir(rootDir, TableName.META_TABLE_NAME);
+    if (fs.exists(tableDir) && !fs.delete(tableDir, true)) {
+      LOG.warn("Can not delete partial created meta table, continue...");
+    }
+    // Bootstrapping, make sure blockcache is off. Else, one will be
+    // created here in bootstrap and it'll need to be cleaned up. Better to
+    // not make it in first place. Turn off block caching for bootstrap.
+    // Enable after.
+    FSTableDescriptors.tryUpdateMetaTableDescriptor(conf, fs, rootDir,
+      builder -> builder.setRegionReplication(
+        conf.getInt(HConstants.META_REPLICAS_NUM, HConstants.DEFAULT_META_REPLICA_NUM)));
+    TableDescriptor metaDescriptor = new FSTableDescriptors(conf).get(TableName.META_TABLE_NAME);
+    HRegion
+      .createHRegion(RegionInfoBuilder.FIRST_META_REGIONINFO, rootDir, conf, metaDescriptor, null)
+      .close();
+  }
+
   @Override
   protected Flow executeFromState(MasterProcedureEnv env, InitMetaState state)
-      throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
+    throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
     LOG.debug("Execute {}", this);
-    switch (state) {
-      case INIT_META_ASSIGN_META:
-        LOG.info("Going to assign meta");
-        addChildProcedure(env.getAssignmentManager()
-          .createAssignProcedures(Arrays.asList(RegionInfoBuilder.FIRST_META_REGIONINFO)));
-        setNextState(InitMetaState.INIT_META_CREATE_NAMESPACES);
-        return Flow.HAS_MORE_STATE;
-      case INIT_META_CREATE_NAMESPACES:
-        LOG.info("Going to create {} and {} namespaces", DEFAULT_NAMESPACE, SYSTEM_NAMESPACE);
-        try {
+    try {
+      switch (state) {
+        case INIT_META_WRITE_FS_LAYOUT:
+          Configuration conf = env.getMasterConfiguration();
+          Path rootDir = CommonFSUtils.getRootDir(conf);
+          writeFsLayout(rootDir, conf);
+          setNextState(InitMetaState.INIT_META_ASSIGN_META);
+          return Flow.HAS_MORE_STATE;
+        case INIT_META_ASSIGN_META:
+          LOG.info("Going to assign meta");
+          addChildProcedure(env.getAssignmentManager()
+            .createAssignProcedures(Arrays.asList(RegionInfoBuilder.FIRST_META_REGIONINFO)));
+          setNextState(InitMetaState.INIT_META_CREATE_NAMESPACES);
+          return Flow.HAS_MORE_STATE;
+        case INIT_META_CREATE_NAMESPACES:
+          LOG.info("Going to create {} and {} namespaces", DEFAULT_NAMESPACE, SYSTEM_NAMESPACE);
           createDirectory(env, DEFAULT_NAMESPACE);
           createDirectory(env, SYSTEM_NAMESPACE);
           // here the TableNamespaceManager has not been initialized yet, so we have to insert the
@@ -85,20 +119,21 @@ public class InitMetaProcedure extends AbstractStateMachineTableProcedure<InitMe
           // namespaces when starting.
           insertNamespaceToMeta(env.getMasterServices().getConnection(), DEFAULT_NAMESPACE);
           insertNamespaceToMeta(env.getMasterServices().getConnection(), SYSTEM_NAMESPACE);
-        } catch (IOException e) {
-          if (retryCounter == null) {
-            retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration());
-          }
-          long backoff = retryCounter.getBackoffTimeAndIncrementAttempts();
-          LOG.warn("Failed to init default and system namespaces, suspend {}secs", backoff, e);
-          setTimeout(Math.toIntExact(backoff));
-          setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT);
-          skipPersistence();
-          throw new ProcedureSuspendedException();
-        }
-        return Flow.NO_MORE_STATE;
-      default:
-        throw new UnsupportedOperationException("unhandled state=" + state);
+
+          return Flow.NO_MORE_STATE;
+        default:
+          throw new UnsupportedOperationException("unhandled state=" + state);
+      }
+    } catch (IOException e) {
+      if (retryCounter == null) {
+        retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration());
+      }
+      long backoff = retryCounter.getBackoffTimeAndIncrementAttempts();
+      LOG.warn("Failed to init meta, suspend {}secs", backoff, e);
+      setTimeout(Math.toIntExact(backoff));
+      setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT);
+      skipPersistence();
+      throw new ProcedureSuspendedException();
     }
   }
 
@@ -117,7 +152,7 @@ public class InitMetaProcedure extends AbstractStateMachineTableProcedure<InitMe
 
   @Override
   protected void rollbackState(MasterProcedureEnv env, InitMetaState state)
-      throws IOException, InterruptedException {
+    throws IOException, InterruptedException {
     throw new UnsupportedOperationException();
   }
 
@@ -133,7 +168,7 @@ public class InitMetaProcedure extends AbstractStateMachineTableProcedure<InitMe
 
   @Override
   protected InitMetaState getInitialState() {
-    return InitMetaState.INIT_META_ASSIGN_META;
+    return InitMetaState.INIT_META_WRITE_FS_LAYOUT;
   }
 
   @Override
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
index f05c18e..34052bb 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
@@ -713,7 +713,7 @@ public class HRegionServer extends Thread implements
   }
 
   private void initializeFileSystem() throws IOException {
-    // Get fs instance used by this RS.  Do we use checksum verification in the hbase? If hbase
+    // Get fs instance used by this RS. Do we use checksum verification in the hbase? If hbase
     // checksum verification enabled, then automatically switch off hdfs checksum verification.
     boolean useHBaseChecksum = conf.getBoolean(HConstants.HBASE_CHECKSUM_VERIFICATION, true);
     CommonFSUtils.setFsDefault(this.conf, CommonFSUtils.getWALRootDir(this.conf));
@@ -726,12 +726,7 @@ public class HRegionServer extends Thread implements
     this.dataFs = new HFileSystem(this.conf, useHBaseChecksum);
     this.dataRootDir = CommonFSUtils.getRootDir(this.conf);
     this.tableDescriptors =
-        new FSTableDescriptors(this.dataFs, this.dataRootDir, !canUpdateTableDescriptor(), false);
-    if (this instanceof HMaster) {
-      FSTableDescriptors.tryUpdateMetaTableDescriptor(this.conf, this.dataFs, this.dataRootDir,
-        builder -> builder.setRegionReplication(
-          conf.getInt(HConstants.META_REPLICAS_NUM, HConstants.DEFAULT_META_REPLICA_NUM)));
-    }
+      new FSTableDescriptors(this.dataFs, this.dataRootDir, !canUpdateTableDescriptor(), false);
   }
 
   protected void login(UserProvider user, String host) throws IOException {
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java
index 1dfdca5..bee3114 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java
@@ -119,6 +119,7 @@ public class FSTableDescriptors implements TableDescriptors {
     this.usecache = usecache;
   }
 
+  @VisibleForTesting
   public static void tryUpdateMetaTableDescriptor(Configuration conf) throws IOException {
     tryUpdateMetaTableDescriptor(conf, CommonFSUtils.getCurrentFileSystem(conf),
       CommonFSUtils.getRootDir(conf), null);