You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by vj...@apache.org on 2021/07/01 10:57:44 UTC
[hbase] branch branch-2.3 updated: HBASE-25902 Add missing CFs in
meta during HBase 1 to 2 Upgrade (#3417)
This is an automated email from the ASF dual-hosted git repository.
vjasani pushed a commit to branch branch-2.3
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2.3 by this push:
new 0c39233 HBASE-25902 Add missing CFs in meta during HBase 1 to 2 Upgrade (#3417)
0c39233 is described below
commit 0c392339ca1e4d34c39cc0024e1e8883b2b18c76
Author: Viraj Jasani <vj...@apache.org>
AuthorDate: Thu Jul 1 15:14:23 2021 +0530
HBASE-25902 Add missing CFs in meta during HBase 1 to 2 Upgrade (#3417)
Signed-off-by: Michael Stack <st...@apache.org>
---
.../hadoop/hbase/PleaseRestartMasterException.java | 34 +++++++++
.../org/apache/hadoop/hbase/master/HMaster.java | 89 +++++++++++++++++++++-
.../hadoop/hbase/util/FSTableDescriptors.java | 45 ++++++-----
3 files changed, 149 insertions(+), 19 deletions(-)
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/PleaseRestartMasterException.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/PleaseRestartMasterException.java
new file mode 100644
index 0000000..62f84e9
--- /dev/null
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/PleaseRestartMasterException.java
@@ -0,0 +1,34 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase;
+
+import org.apache.yetus.audience.InterfaceAudience;
+
+/**
+ * Thrown if the master requires restart.
+ */
+@InterfaceAudience.Public
+public class PleaseRestartMasterException extends HBaseIOException {
+
+ public PleaseRestartMasterException(final String s) {
+ super(s);
+ }
+
+}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index 7e9cee4..85265ff 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -76,6 +76,7 @@ import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.MetaTableAccessor;
import org.apache.hadoop.hbase.NamespaceDescriptor;
import org.apache.hadoop.hbase.PleaseHoldException;
+import org.apache.hadoop.hbase.PleaseRestartMasterException;
import org.apache.hadoop.hbase.ReplicationPeerNotFoundException;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
@@ -180,6 +181,7 @@ import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshotNotifier;
import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshotNotifierFactory;
import org.apache.hadoop.hbase.quotas.SpaceViolationPolicy;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
+import org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException;
import org.apache.hadoop.hbase.regionserver.RSRpcServices;
import org.apache.hadoop.hbase.replication.ReplicationException;
import org.apache.hadoop.hbase.replication.ReplicationLoadSource;
@@ -197,6 +199,7 @@ import org.apache.hadoop.hbase.trace.TraceUtil;
import org.apache.hadoop.hbase.util.Addressing;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+import org.apache.hadoop.hbase.util.FSTableDescriptors;
import org.apache.hadoop.hbase.util.HBaseFsck;
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
import org.apache.hadoop.hbase.util.IdLock;
@@ -1079,9 +1082,26 @@ public class HMaster extends HRegionServer implements MasterServices {
if (!waitForMetaOnline()) {
return;
}
+ TableDescriptor metaDescriptor = tableDescriptors.get(
+ TableName.META_TABLE_NAME);
+ final ColumnFamilyDescriptor tableFamilyDesc = metaDescriptor
+ .getColumnFamily(HConstants.TABLE_FAMILY);
+ final ColumnFamilyDescriptor replBarrierFamilyDesc =
+ metaDescriptor.getColumnFamily(HConstants.REPLICATION_BARRIER_FAMILY);
+
this.assignmentManager.joinCluster();
// The below depends on hbase:meta being online.
- this.tableStateManager.start();
+ try {
+ this.tableStateManager.start();
+ } catch (NoSuchColumnFamilyException e) {
+ if (tableFamilyDesc == null && replBarrierFamilyDesc == null) {
+ LOG.info("TableStates manager could not be started. This is expected"
+ + " during HBase 1 to 2 upgrade.", e);
+ } else {
+ throw e;
+ }
+ }
+
this.assignmentManager.processOfflineRegions();
// this must be called after the above processOfflineRegions to prevent race
this.assignmentManager.wakeMetaLoadedEvent();
@@ -1118,7 +1138,17 @@ public class HMaster extends HRegionServer implements MasterServices {
return;
}
status.setStatus("Starting cluster schema service");
- initClusterSchemaService();
+ try {
+ initClusterSchemaService();
+ } catch (IllegalStateException e) {
+ if (e.getCause() != null && e.getCause() instanceof NoSuchColumnFamilyException
+ && tableFamilyDesc == null && replBarrierFamilyDesc == null) {
+ LOG.info("ClusterSchema service could not be initialized. This is "
+ + "expected during HBase 1 to 2 upgrade", e);
+ } else {
+ throw e;
+ }
+ }
if (this.cpHost != null) {
try {
@@ -1140,6 +1170,29 @@ public class HMaster extends HRegionServer implements MasterServices {
// Set master as 'initialized'.
setInitialized(true);
+ if (tableFamilyDesc == null && replBarrierFamilyDesc == null) {
+ // create missing CFs in meta table after master is set to 'initialized'.
+ createMissingCFsInMetaDuringUpgrade(metaDescriptor);
+
+ // Throwing this Exception to abort active master is painful but this
+ // seems the only way to add missing CFs in meta while upgrading from
+ // HBase 1 to 2 (where HBase 2 has HBASE-23055 & HBASE-23782 checked-in).
+ // So, why do we abort active master after adding missing CFs in meta?
+ // When we reach here, we would have already bypassed NoSuchColumnFamilyException
+ // in initClusterSchemaService(), meaning ClusterSchemaService is not
+ // correctly initialized but we bypassed it. Similarly, we bypassed
+ // tableStateManager.start() as well. Hence, we should better abort
+ // current active master because our main task - adding missing CFs
+ // in meta table is done (possible only after master state is set as
+ // initialized) at the expense of bypassing few important tasks as part
+ // of active master init routine. So now we abort active master so that
+ // next active master init will not face any issues and all mandatory
+ // services will be started during master init phase.
+ throw new PleaseRestartMasterException("Aborting active master after missing"
+ + " CFs are successfully added in meta. Subsequent active master "
+ + "initialization should be uninterrupted");
+ }
+
if (maintenanceMode) {
LOG.info("Detected repair mode, skipping final initialization steps.");
return;
@@ -1206,6 +1259,38 @@ public class HMaster extends HRegionServer implements MasterServices {
}
}
+ private void createMissingCFsInMetaDuringUpgrade(
+ TableDescriptor metaDescriptor) throws IOException {
+ TableDescriptor newMetaDesc =
+ TableDescriptorBuilder.newBuilder(metaDescriptor)
+ .setColumnFamily(FSTableDescriptors.getTableFamilyDescForMeta(conf))
+ .setColumnFamily(FSTableDescriptors.getReplBarrierFamilyDescForMeta())
+ .build();
+ long pid = this.modifyTable(TableName.META_TABLE_NAME, () -> newMetaDesc,
+ 0, 0, false);
+ int tries = 30;
+ while (!(getMasterProcedureExecutor().isFinished(pid))
+ && getMasterProcedureExecutor().isRunning() && tries > 0) {
+ try {
+ Thread.sleep(1000);
+ } catch (InterruptedException e) {
+ throw new IOException("Wait interrupted", e);
+ }
+ tries--;
+ }
+ if (tries <= 0) {
+ throw new HBaseIOException(
+ "Failed to add table and rep_barrier CFs to meta in a given time.");
+ } else {
+ Procedure<?> result = getMasterProcedureExecutor().getResult(pid);
+ if (result != null && result.isFailed()) {
+ throw new IOException(
+ "Failed to add table and rep_barrier CFs to meta. "
+ + MasterProcedureUtil.unwrapRemoteIOException(result));
+ }
+ }
+ }
+
/**
* Check hbase:meta is up and ready for reading. For use during Master startup only.
* @return True if meta is UP and online and startup can progress. Otherwise, meta is not online
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java
index 2329a0d..7ab6ed8 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java
@@ -41,6 +41,7 @@ import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableDescriptors;
import org.apache.hadoop.hbase.TableInfoMissingException;
import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
import org.apache.hadoop.hbase.client.CoprocessorDescriptorBuilder;
import org.apache.hadoop.hbase.client.TableDescriptor;
@@ -146,6 +147,31 @@ public class FSTableDescriptors implements TableDescriptors {
}
}
+ public static ColumnFamilyDescriptor getTableFamilyDescForMeta(
+ final Configuration conf) {
+ return ColumnFamilyDescriptorBuilder
+ .newBuilder(HConstants.TABLE_FAMILY)
+ .setMaxVersions(conf.getInt(HConstants.HBASE_META_VERSIONS,
+ HConstants.DEFAULT_HBASE_META_VERSIONS))
+ .setInMemory(true)
+ .setBlocksize(8 * 1024)
+ .setScope(HConstants.REPLICATION_SCOPE_LOCAL)
+ // Disable blooms for meta. Needs work. Seems to mess w/ getClosestOrBefore.
+ .setBloomFilterType(BloomType.NONE)
+ .build();
+ }
+
+ public static ColumnFamilyDescriptor getReplBarrierFamilyDescForMeta() {
+ return ColumnFamilyDescriptorBuilder
+ .newBuilder(HConstants.REPLICATION_BARRIER_FAMILY)
+ .setMaxVersions(HConstants.ALL_VERSIONS)
+ .setInMemory(true)
+ .setScope(HConstants.REPLICATION_SCOPE_LOCAL)
+ // Disable blooms for meta. Needs work. Seems to mess w/ getClosestOrBefore.
+ .setBloomFilterType(BloomType.NONE)
+ .build();
+ }
+
@VisibleForTesting
public static TableDescriptorBuilder createMetaTableDescriptorBuilder(final Configuration conf)
throws IOException {
@@ -163,23 +189,8 @@ public class FSTableDescriptors implements TableDescriptors {
// Disable blooms for meta. Needs work. Seems to mess w/ getClosestOrBefore.
.setBloomFilterType(BloomType.NONE)
.build())
- .setColumnFamily(ColumnFamilyDescriptorBuilder.newBuilder(HConstants.TABLE_FAMILY)
- .setMaxVersions(conf.getInt(HConstants.HBASE_META_VERSIONS,
- HConstants.DEFAULT_HBASE_META_VERSIONS))
- .setInMemory(true)
- .setBlocksize(8 * 1024)
- .setScope(HConstants.REPLICATION_SCOPE_LOCAL)
- // Disable blooms for meta. Needs work. Seems to mess w/ getClosestOrBefore.
- .setBloomFilterType(BloomType.NONE)
- .build())
- .setColumnFamily(ColumnFamilyDescriptorBuilder
- .newBuilder(HConstants.REPLICATION_BARRIER_FAMILY)
- .setMaxVersions(HConstants.ALL_VERSIONS)
- .setInMemory(true)
- .setScope(HConstants.REPLICATION_SCOPE_LOCAL)
- // Disable blooms for meta. Needs work. Seems to mess w/ getClosestOrBefore.
- .setBloomFilterType(BloomType.NONE)
- .build())
+ .setColumnFamily(getTableFamilyDescForMeta(conf))
+ .setColumnFamily(getReplBarrierFamilyDescForMeta())
.setCoprocessor(CoprocessorDescriptorBuilder.newBuilder(
MultiRowMutationEndpoint.class.getName())
.setPriority(Coprocessor.PRIORITY_SYSTEM)