You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by vj...@apache.org on 2021/07/01 10:06:24 UTC

[hbase] branch branch-2.4 updated: HBASE-25902 Add missing CFs in meta during HBase 1 to 2 Upgrade (#3417)

This is an automated email from the ASF dual-hosted git repository.

vjasani pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2.4 by this push:
     new 5b5c92f  HBASE-25902 Add missing CFs in meta during HBase 1 to 2 Upgrade (#3417)
5b5c92f is described below

commit 5b5c92f427ed26d5882c1d99eef3c409dedc4e12
Author: Viraj Jasani <vj...@apache.org>
AuthorDate: Thu Jul 1 15:14:23 2021 +0530

    HBASE-25902 Add missing CFs in meta during HBase 1 to 2 Upgrade (#3417)
    
    Signed-off-by: Michael Stack <st...@apache.org>
---
 .../hadoop/hbase/PleaseRestartMasterException.java | 34 +++++++++
 .../org/apache/hadoop/hbase/master/HMaster.java    | 89 +++++++++++++++++++++-
 .../hadoop/hbase/util/FSTableDescriptors.java      | 45 ++++++-----
 3 files changed, 149 insertions(+), 19 deletions(-)

diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/PleaseRestartMasterException.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/PleaseRestartMasterException.java
new file mode 100644
index 0000000..62f84e9
--- /dev/null
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/PleaseRestartMasterException.java
@@ -0,0 +1,34 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase;
+
+import org.apache.yetus.audience.InterfaceAudience;
+
+/**
+ * Thrown if the master requires restart.
+ */
+@InterfaceAudience.Public
+public class PleaseRestartMasterException extends HBaseIOException {
+
+  public PleaseRestartMasterException(final String s) {
+    super(s);
+  }
+
+}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index f92dd4c..9086ad3 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -70,6 +70,7 @@ import org.apache.hadoop.hbase.InvalidFamilyOperationException;
 import org.apache.hadoop.hbase.MasterNotRunningException;
 import org.apache.hadoop.hbase.NamespaceDescriptor;
 import org.apache.hadoop.hbase.PleaseHoldException;
+import org.apache.hadoop.hbase.PleaseRestartMasterException;
 import org.apache.hadoop.hbase.RegionMetrics;
 import org.apache.hadoop.hbase.ReplicationPeerNotFoundException;
 import org.apache.hadoop.hbase.ServerMetrics;
@@ -175,6 +176,7 @@ import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshotNotifier;
 import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshotNotifierFactory;
 import org.apache.hadoop.hbase.quotas.SpaceViolationPolicy;
 import org.apache.hadoop.hbase.regionserver.HRegionServer;
+import org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException;
 import org.apache.hadoop.hbase.regionserver.RSRpcServices;
 import org.apache.hadoop.hbase.replication.ReplicationException;
 import org.apache.hadoop.hbase.replication.ReplicationLoadSource;
@@ -191,6 +193,7 @@ import org.apache.hadoop.hbase.security.UserProvider;
 import org.apache.hadoop.hbase.trace.TraceUtil;
 import org.apache.hadoop.hbase.util.Addressing;
 import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.FSTableDescriptors;
 import org.apache.hadoop.hbase.util.HBaseFsck;
 import org.apache.hadoop.hbase.util.HFileArchiveUtil;
 import org.apache.hadoop.hbase.util.IdLock;
@@ -953,9 +956,26 @@ public class HMaster extends HRegionServer implements MasterServices {
     if (!waitForMetaOnline()) {
       return;
     }
+    TableDescriptor metaDescriptor = tableDescriptors.get(
+        TableName.META_TABLE_NAME);
+    final ColumnFamilyDescriptor tableFamilyDesc = metaDescriptor
+        .getColumnFamily(HConstants.TABLE_FAMILY);
+    final ColumnFamilyDescriptor replBarrierFamilyDesc =
+        metaDescriptor.getColumnFamily(HConstants.REPLICATION_BARRIER_FAMILY);
+
     this.assignmentManager.joinCluster();
     // The below depends on hbase:meta being online.
-    this.tableStateManager.start();
+    try {
+      this.tableStateManager.start();
+    } catch (NoSuchColumnFamilyException e) {
+      if (tableFamilyDesc == null && replBarrierFamilyDesc == null) {
+        LOG.info("TableStates manager could not be started. This is expected"
+            + " during HBase 1 to 2 upgrade.", e);
+      } else {
+        throw e;
+      }
+    }
+
     this.assignmentManager.processOfflineRegions();
     // this must be called after the above processOfflineRegions to prevent race
     this.assignmentManager.wakeMetaLoadedEvent();
@@ -1025,7 +1045,17 @@ public class HMaster extends HRegionServer implements MasterServices {
       return;
     }
     status.setStatus("Starting cluster schema service");
-    initClusterSchemaService();
+    try {
+      initClusterSchemaService();
+    } catch (IllegalStateException e) {
+      if (e.getCause() != null && e.getCause() instanceof NoSuchColumnFamilyException
+          && tableFamilyDesc == null && replBarrierFamilyDesc == null) {
+        LOG.info("ClusterSchema service could not be initialized. This is "
+            + "expected during HBase 1 to 2 upgrade", e);
+      } else {
+        throw e;
+      }
+    }
 
     if (this.cpHost != null) {
       try {
@@ -1047,6 +1077,29 @@ public class HMaster extends HRegionServer implements MasterServices {
     // Set master as 'initialized'.
     setInitialized(true);
 
+    if (tableFamilyDesc == null && replBarrierFamilyDesc == null) {
+      // create missing CFs in meta table after master is set to 'initialized'.
+      createMissingCFsInMetaDuringUpgrade(metaDescriptor);
+
+      // Throwing this Exception to abort active master is painful but this
+      // seems the only way to add missing CFs in meta while upgrading from
+      // HBase 1 to 2 (where HBase 2 has HBASE-23055 & HBASE-23782 checked-in).
+      // So, why do we abort active master after adding missing CFs in meta?
+      // When we reach here, we would have already bypassed NoSuchColumnFamilyException
+      // in initClusterSchemaService(), meaning ClusterSchemaService is not
+      // correctly initialized but we bypassed it. Similarly, we bypassed
+      // tableStateManager.start() as well. Hence, we should better abort
+      // current active master because our main task - adding missing CFs
+      // in meta table is done (possible only after master state is set as
+      // initialized) at the expense of bypassing few important tasks as part
+      // of active master init routine. So now we abort active master so that
+      // next active master init will not face any issues and all mandatory
+      // services will be started during master init phase.
+      throw new PleaseRestartMasterException("Aborting active master after missing"
+          + " CFs are successfully added in meta. Subsequent active master "
+          + "initialization should be uninterrupted");
+    }
+
     if (maintenanceMode) {
       LOG.info("Detected repair mode, skipping final initialization steps.");
       return;
@@ -1106,6 +1159,38 @@ public class HMaster extends HRegionServer implements MasterServices {
     }
   }
 
+  private void createMissingCFsInMetaDuringUpgrade(
+      TableDescriptor metaDescriptor) throws IOException {
+    TableDescriptor newMetaDesc =
+        TableDescriptorBuilder.newBuilder(metaDescriptor)
+            .setColumnFamily(FSTableDescriptors.getTableFamilyDescForMeta(conf))
+            .setColumnFamily(FSTableDescriptors.getReplBarrierFamilyDescForMeta())
+            .build();
+    long pid = this.modifyTable(TableName.META_TABLE_NAME, () -> newMetaDesc,
+        0, 0, false);
+    int tries = 30;
+    while (!(getMasterProcedureExecutor().isFinished(pid))
+        && getMasterProcedureExecutor().isRunning() && tries > 0) {
+      try {
+        Thread.sleep(1000);
+      } catch (InterruptedException e) {
+        throw new IOException("Wait interrupted", e);
+      }
+      tries--;
+    }
+    if (tries <= 0) {
+      throw new HBaseIOException(
+          "Failed to add table and rep_barrier CFs to meta in a given time.");
+    } else {
+      Procedure<?> result = getMasterProcedureExecutor().getResult(pid);
+      if (result != null && result.isFailed()) {
+        throw new IOException(
+            "Failed to add table and rep_barrier CFs to meta. "
+                + MasterProcedureUtil.unwrapRemoteIOException(result));
+      }
+    }
+  }
+
   /**
    * Check hbase:meta is up and ready for reading. For use during Master startup only.
    * @return True if meta is UP and online and startup can progress. Otherwise, meta is not online
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java
index 76a9328..f40736d 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java
@@ -39,6 +39,7 @@ import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.TableDescriptors;
 import org.apache.hadoop.hbase.TableInfoMissingException;
 import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
 import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
 import org.apache.hadoop.hbase.client.CoprocessorDescriptorBuilder;
 import org.apache.hadoop.hbase.client.TableDescriptor;
@@ -139,6 +140,31 @@ public class FSTableDescriptors implements TableDescriptors {
     }
   }
 
+  public static ColumnFamilyDescriptor getTableFamilyDescForMeta(
+      final Configuration conf) {
+    return ColumnFamilyDescriptorBuilder
+        .newBuilder(HConstants.TABLE_FAMILY)
+        .setMaxVersions(conf.getInt(HConstants.HBASE_META_VERSIONS,
+            HConstants.DEFAULT_HBASE_META_VERSIONS))
+        .setInMemory(true)
+        .setBlocksize(8 * 1024)
+        .setScope(HConstants.REPLICATION_SCOPE_LOCAL)
+        // Disable blooms for meta.  Needs work.  Seems to mess w/ getClosestOrBefore.
+        .setBloomFilterType(BloomType.NONE)
+        .build();
+  }
+
+  public static ColumnFamilyDescriptor getReplBarrierFamilyDescForMeta() {
+    return ColumnFamilyDescriptorBuilder
+        .newBuilder(HConstants.REPLICATION_BARRIER_FAMILY)
+        .setMaxVersions(HConstants.ALL_VERSIONS)
+        .setInMemory(true)
+        .setScope(HConstants.REPLICATION_SCOPE_LOCAL)
+        // Disable blooms for meta.  Needs work.  Seems to mess w/ getClosestOrBefore.
+        .setBloomFilterType(BloomType.NONE)
+        .build();
+  }
+
   public static TableDescriptorBuilder createMetaTableDescriptorBuilder(final Configuration conf)
     throws IOException {
     // TODO We used to set CacheDataInL1 for META table. When we have BucketCache in file mode, now
@@ -155,23 +181,8 @@ public class FSTableDescriptors implements TableDescriptors {
         // Disable blooms for meta.  Needs work.  Seems to mess w/ getClosestOrBefore.
         .setBloomFilterType(BloomType.NONE)
         .build())
-      .setColumnFamily(ColumnFamilyDescriptorBuilder.newBuilder(HConstants.TABLE_FAMILY)
-        .setMaxVersions(conf.getInt(HConstants.HBASE_META_VERSIONS,
-          HConstants.DEFAULT_HBASE_META_VERSIONS))
-        .setInMemory(true)
-        .setBlocksize(8 * 1024)
-        .setScope(HConstants.REPLICATION_SCOPE_LOCAL)
-        // Disable blooms for meta.  Needs work.  Seems to mess w/ getClosestOrBefore.
-        .setBloomFilterType(BloomType.NONE)
-        .build())
-      .setColumnFamily(ColumnFamilyDescriptorBuilder
-        .newBuilder(HConstants.REPLICATION_BARRIER_FAMILY)
-        .setMaxVersions(HConstants.ALL_VERSIONS)
-        .setInMemory(true)
-        .setScope(HConstants.REPLICATION_SCOPE_LOCAL)
-        // Disable blooms for meta.  Needs work.  Seems to mess w/ getClosestOrBefore.
-        .setBloomFilterType(BloomType.NONE)
-        .build())
+      .setColumnFamily(getTableFamilyDescForMeta(conf))
+      .setColumnFamily(getReplBarrierFamilyDescForMeta())
       .setCoprocessor(CoprocessorDescriptorBuilder.newBuilder(
         MultiRowMutationEndpoint.class.getName())
         .setPriority(Coprocessor.PRIORITY_SYSTEM)