You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by el...@apache.org on 2018/12/07 22:28:52 UTC
[09/51] [abbrv] hbase git commit: HBASE-21494 NPE when loading
RecoverStandByProcedure
HBASE-21494 NPE when loading RecoverStandByProcedure
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/b329e6e3
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/b329e6e3
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/b329e6e3
Branch: refs/heads/HBASE-20952
Commit: b329e6e3f271bc22ec4a6f4dd71a8e8b422db3d0
Parents: f555258
Author: zhangduo <zh...@apache.org>
Authored: Sun Nov 18 15:58:53 2018 +0800
Committer: Duo Zhang <zh...@apache.org>
Committed: Mon Nov 19 09:35:18 2018 +0800
----------------------------------------------------------------------
.../org/apache/hadoop/hbase/master/HMaster.java | 4 +-
.../SyncReplicationReplayWALManager.java | 4 +-
.../TestRegisterPeerWorkerWhenRestarting.java | 127 +++++++++++++++++++
...tPeerSyncReplicationStateProcedureRetry.java | 5 +
4 files changed, 137 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/b329e6e3/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index 31dc208..e1d3740 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -778,7 +778,6 @@ public class HMaster extends HRegionServer implements MasterServices {
this.splitOrMergeTracker.start();
this.replicationPeerManager = ReplicationPeerManager.create(zooKeeper, conf);
- this.syncReplicationReplayWALManager = new SyncReplicationReplayWALManager(this);
this.drainingServerTracker = new DrainingServerTracker(zooKeeper, this, this.serverManager);
this.drainingServerTracker.start();
@@ -949,7 +948,10 @@ public class HMaster extends HRegionServer implements MasterServices {
}
status.setStatus("Initialize ServerManager and schedule SCP for crash servers");
+ // The below two managers must be created before loading procedures, as they will be used during
+ // loading.
this.serverManager = createServerManager(this);
+ this.syncReplicationReplayWALManager = new SyncReplicationReplayWALManager(this);
createProcedureExecutor();
@SuppressWarnings("rawtypes")
Map<Class<? extends Procedure>, List<Procedure<MasterProcedureEnv>>> procsByType =
http://git-wip-us.apache.org/repos/asf/hbase/blob/b329e6e3/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/SyncReplicationReplayWALManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/SyncReplicationReplayWALManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/SyncReplicationReplayWALManager.java
index 89e97bb..ae624b1 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/SyncReplicationReplayWALManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/SyncReplicationReplayWALManager.java
@@ -146,12 +146,12 @@ public class SyncReplicationReplayWALManager {
this.fs = services.getMasterFileSystem().getWALFileSystem();
this.walRootDir = services.getMasterFileSystem().getWALRootDir();
this.remoteWALDir = new Path(this.walRootDir, ReplicationUtils.REMOTE_WAL_DIR_NAME);
- MasterProcedureScheduler scheduler =
- services.getMasterProcedureExecutor().getEnvironment().getProcedureScheduler();
serverManager.registerListener(new ServerListener() {
@Override
public void serverAdded(ServerName serverName) {
+ MasterProcedureScheduler scheduler =
+ services.getMasterProcedureExecutor().getEnvironment().getProcedureScheduler();
for (UsedReplayWorkersForPeer usedWorkers : usedWorkersByPeer.values()) {
synchronized (usedWorkers) {
usedWorkers.wake(scheduler);
http://git-wip-us.apache.org/repos/asf/hbase/blob/b329e6e3/hbase-server/src/test/java/org/apache/hadoop/hbase/master/replication/TestRegisterPeerWorkerWhenRestarting.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/replication/TestRegisterPeerWorkerWhenRestarting.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/replication/TestRegisterPeerWorkerWhenRestarting.java
new file mode 100644
index 0000000..72aa32d
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/replication/TestRegisterPeerWorkerWhenRestarting.java
@@ -0,0 +1,127 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.replication;
+
+import static org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RecoverStandbyState.DISPATCH_WALS_VALUE;
+import static org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RecoverStandbyState.UNREGISTER_PEER_FROM_WORKER_STORAGE_VALUE;
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
+import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
+import org.apache.hadoop.hbase.replication.SyncReplicationState;
+import org.apache.hadoop.hbase.replication.SyncReplicationTestBase;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.MasterTests;
+import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
+import org.apache.zookeeper.KeeperException;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+/**
+ * Testcase for HBASE-21494.
+ */
+@Category({ MasterTests.class, LargeTests.class })
+public class TestRegisterPeerWorkerWhenRestarting extends SyncReplicationTestBase {
+
+ @ClassRule
+ public static final HBaseClassTestRule CLASS_RULE =
+ HBaseClassTestRule.forClass(TestRegisterPeerWorkerWhenRestarting.class);
+
+ private static volatile boolean FAIL = false;
+
+ public static final class HMasterForTest extends HMaster {
+
+ public HMasterForTest(Configuration conf) throws IOException, KeeperException {
+ super(conf);
+ }
+
+ @Override
+ public void remoteProcedureCompleted(long procId) {
+ if (FAIL && getMasterProcedureExecutor()
+ .getProcedure(procId) instanceof SyncReplicationReplayWALRemoteProcedure) {
+ throw new RuntimeException("Inject error");
+ }
+ super.remoteProcedureCompleted(procId);
+ }
+ }
+
+ @BeforeClass
+ public static void setUp() throws Exception {
+ UTIL2.getConfiguration().setClass(HConstants.MASTER_IMPL, HMasterForTest.class, HMaster.class);
+ SyncReplicationTestBase.setUp();
+ }
+
+ @Test
+ public void testRestart() throws Exception {
+ UTIL2.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID,
+ SyncReplicationState.STANDBY);
+ UTIL1.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID,
+ SyncReplicationState.ACTIVE);
+
+ UTIL1.getAdmin().disableReplicationPeer(PEER_ID);
+ write(UTIL1, 0, 100);
+ Thread.sleep(2000);
+ // peer is disabled so no data have been replicated
+ verifyNotReplicatedThroughRegion(UTIL2, 0, 100);
+
+ // transit the A to DA first to avoid too many error logs.
+ UTIL1.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID,
+ SyncReplicationState.DOWNGRADE_ACTIVE);
+ HMaster master = UTIL2.getHBaseCluster().getMaster();
+ // make sure the transiting can not succeed
+ FAIL = true;
+ ProcedureExecutor<MasterProcedureEnv> procExec = master.getMasterProcedureExecutor();
+ Thread t = new Thread() {
+
+ @Override
+ public void run() {
+ try {
+ UTIL2.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID,
+ SyncReplicationState.DOWNGRADE_ACTIVE);
+ } catch (IOException e) {
+ throw new UncheckedIOException(e);
+ }
+ }
+ };
+ t.start();
+ // wait until we are in the states where we need to register peer worker when restarting
+ UTIL2.waitFor(60000,
+ () -> procExec.getProcedures().stream().filter(p -> p instanceof RecoverStandbyProcedure)
+ .map(p -> (RecoverStandbyProcedure) p)
+ .anyMatch(p -> p.getCurrentStateId() == DISPATCH_WALS_VALUE ||
+ p.getCurrentStateId() == UNREGISTER_PEER_FROM_WORKER_STORAGE_VALUE));
+ // failover to another master
+ MasterThread mt = UTIL2.getMiniHBaseCluster().getMasterThread();
+ mt.getMaster().abort("for testing");
+ mt.join();
+ FAIL = false;
+ t.join();
+ // make sure the new master can finish the transiting
+ assertEquals(SyncReplicationState.DOWNGRADE_ACTIVE,
+ UTIL2.getAdmin().getReplicationPeerSyncReplicationState(PEER_ID));
+ verify(UTIL2, 0, 100);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hbase/blob/b329e6e3/hbase-server/src/test/java/org/apache/hadoop/hbase/master/replication/TestTransitPeerSyncReplicationStateProcedureRetry.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/replication/TestTransitPeerSyncReplicationStateProcedureRetry.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/replication/TestTransitPeerSyncReplicationStateProcedureRetry.java
index 1c4a819..9b73039 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/replication/TestTransitPeerSyncReplicationStateProcedureRetry.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/replication/TestTransitPeerSyncReplicationStateProcedureRetry.java
@@ -17,6 +17,8 @@
*/
package org.apache.hadoop.hbase.master.replication;
+import static org.junit.Assert.assertEquals;
+
import java.io.IOException;
import java.io.UncheckedIOException;
import org.apache.hadoop.hbase.HBaseClassTestRule;
@@ -90,5 +92,8 @@ public class TestTransitPeerSyncReplicationStateProcedureRetry extends SyncRepli
.mapToLong(Procedure::getProcId).min().getAsLong();
MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId);
ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false);
+ assertEquals(SyncReplicationState.DOWNGRADE_ACTIVE,
+ UTIL2.getAdmin().getReplicationPeerSyncReplicationState(PEER_ID));
+ verify(UTIL2, 0, 100);
}
}