You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2017/08/22 00:23:15 UTC

hbase git commit: HBASE-18103 [AMv2] Changed master to throw YouAreDeadException when it receives regionServerReport() with incorrect region assignment. Added test to verify rogue region server behavior.

Repository: hbase
Updated Branches:
  refs/heads/master bf343da4a -> 5895538a3


HBASE-18103 [AMv2] Changed master to throw YouAreDeadException when it receives regionServerReport() with incorrect region assignment. Added test to verify rogue region server behavior.

Behavior prior to these changes is to call expireServer(), log exception and suppress it. These changes will result in RS receiving the YouAreDeadException and treating it as a fatal error. This 'fail fast' approach will help us stabilize the code. This behavior can be reconsidered later if necessary.

Signed-off-by: Michael Stack <st...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/5895538a
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/5895538a
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/5895538a

Branch: refs/heads/master
Commit: 5895538a32d6bd2d7153e38304523caf6ea7579c
Parents: bf343da
Author: Umesh Agashe <ua...@cloudera.com>
Authored: Fri Aug 18 14:28:00 2017 -0700
Committer: Michael Stack <st...@apache.org>
Committed: Mon Aug 21 17:23:10 2017 -0700

----------------------------------------------------------------------
 .../master/assignment/AssignmentManager.java    |   7 +-
 .../master/assignment/MockMasterServices.java   |   9 +-
 .../assignment/TestRogueRSAssignment.java       | 192 +++++++++++++++++++
 3 files changed, 204 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/5895538a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
index 0b23f47..6a9cfc2 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
@@ -46,6 +46,7 @@ import org.apache.hadoop.hbase.RegionException;
 import org.apache.hadoop.hbase.RegionStateListener;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.YouAreDeadException;
 import org.apache.hadoop.hbase.classification.InterfaceAudience;
 import org.apache.hadoop.hbase.client.TableState;
 import org.apache.hadoop.hbase.exceptions.UnexpectedStateException;
@@ -895,7 +896,7 @@ public class AssignmentManager implements ServerListener {
    * that something went wrong on the RS side.
    */
   public void reportOnlineRegions(final ServerName serverName,
-      final int versionNumber, final Set<byte[]> regionNames) {
+      final int versionNumber, final Set<byte[]> regionNames) throws YouAreDeadException {
     if (!isRunning()) return;
     if (LOG.isTraceEnabled()) {
       LOG.trace("ReportOnlineRegions " + serverName + " regionCount=" + regionNames.size() +
@@ -959,7 +960,8 @@ public class AssignmentManager implements ServerListener {
     }
   }
 
-  void checkOnlineRegionsReport(final ServerStateNode serverNode, final Set<byte[]> regionNames) {
+  void checkOnlineRegionsReport(final ServerStateNode serverNode, final Set<byte[]> regionNames)
+      throws YouAreDeadException {
     final ServerName serverName = serverNode.getServerName();
     try {
       for (byte[] regionName: regionNames) {
@@ -999,6 +1001,7 @@ public class AssignmentManager implements ServerListener {
     } catch (UnexpectedStateException e) {
       LOG.warn("Killing " + serverName + ": " + e.getMessage());
       killRegionServer(serverNode);
+      throw (YouAreDeadException)new YouAreDeadException(e.getMessage()).initCause(e);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/5895538a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java
index d558aaf..48386a6 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/MockMasterServices.java
@@ -34,6 +34,7 @@ import org.apache.hadoop.hbase.ServerLoad;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableDescriptors;
 import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.YouAreDeadException;
 import org.apache.hadoop.hbase.client.ClusterConnection;
 import org.apache.hadoop.hbase.client.HConnectionTestingUtility;
 import org.apache.hadoop.hbase.master.LoadBalancer;
@@ -115,8 +116,12 @@ public class MockMasterServices extends MockNoopMasterServices {
       protected boolean waitServerReportEvent(ServerName serverName, Procedure proc) {
         // Make a report with current state of the server 'serverName' before we call wait..
         SortedSet<byte []> regions = regionsToRegionServers.get(serverName);
-        getAssignmentManager().reportOnlineRegions(serverName, 0,
-            regions == null? new HashSet<byte []>(): regions);
+        try {
+          getAssignmentManager().reportOnlineRegions(serverName, 0,
+              regions == null? new HashSet<byte []>(): regions);
+        } catch (YouAreDeadException e) {
+          throw new RuntimeException(e);
+        }
         return super.waitServerReportEvent(serverName, proc);
       }
     };

http://git-wip-us.apache.org/repos/asf/hbase/blob/5895538a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRogueRSAssignment.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRogueRSAssignment.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRogueRSAssignment.java
new file mode 100644
index 0000000..49eb573
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRogueRSAssignment.java
@@ -0,0 +1,192 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.assignment;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.YouAreDeadException;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
+import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants;
+import org.apache.hadoop.hbase.shaded.com.google.protobuf.ServiceException;
+import org.apache.hadoop.hbase.shaded.com.google.protobuf.UnsafeByteOperations;
+import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.ClusterStatusProtos;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos;
+import org.apache.hadoop.hbase.testclassification.MasterTests;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.ExpectedException;
+import org.junit.rules.TestName;
+
+import java.io.IOException;
+import java.util.List;
+
+import static org.hamcrest.core.Is.isA;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+/**
+ * Tests to verify master/ assignment manager functionality against rogue RS
+ */
+@Category({MasterTests.class, MediumTests.class})
+public class TestRogueRSAssignment {
+  private static final Log LOG = LogFactory.getLog(TestRogueRSAssignment.class);
+
+  @Rule
+  public final TestName name = new TestName();
+
+  @Rule
+  public ExpectedException exception = ExpectedException.none();
+  private static final int initialRegionCount = 3;
+  private final static byte[] FAMILY = Bytes.toBytes("FAMILY");
+
+  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
+  private static final Configuration conf = UTIL.getConfiguration();
+  private static Admin admin;
+  private static MiniHBaseCluster cluster;
+  private static HMaster master;
+
+  private static void setupConf(Configuration conf) {
+    // Reduce the maximum attempts to speed up the test
+    conf.setInt("hbase.assignment.maximum.attempts", 3);
+    conf.setInt("hbase.master.maximum.ping.server.attempts", 3);
+    conf.setInt("hbase.master.ping.server.retry.sleep.interval", 1);
+    conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1);
+  }
+
+  @BeforeClass
+  public static void setupCluster() throws Exception {
+    setupConf(conf);
+    UTIL.startMiniCluster(2);
+
+    cluster = UTIL.getHBaseCluster();
+    assertNotNull(cluster);
+
+    admin = UTIL.getAdmin();
+    assertNotNull(admin);
+
+    master = cluster.getMaster();
+    assertNotNull(master);
+  }
+
+  @AfterClass
+  public static void cleanupTest() throws Exception {
+    try {
+      UTIL.shutdownMiniCluster();
+      cluster = null;
+      admin = null;
+    } catch (Exception e) {
+      LOG.warn("failure shutting down cluster", e);
+    }
+  }
+
+  @Before
+  public void setup() throws IOException {
+    // Turn off balancer
+    admin.setBalancerRunning(false, true);
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    for (TableDescriptor td: UTIL.getAdmin().listTableDescriptors()) {
+      LOG.info("Tear down, remove table=" + td.getTableName());
+      UTIL.deleteTable(td.getTableName());
+    }
+    // Turn on balancer
+    admin.setBalancerRunning(true, false);
+  }
+
+  @Test(timeout = 120000)
+  public void testReportRSWithWrongRegion() throws Exception {
+    final TableName tableName = TableName.valueOf(this.name.getMethodName());
+
+    List<HRegionInfo> tableRegions = createTable(tableName);
+
+    final ServerName sn = ServerName.parseVersionedServerName(
+        ServerName.valueOf("1.example.org", 1, System.currentTimeMillis()).getVersionedBytes());
+
+    // make fake request with a region assigned to different RS
+    RegionServerStatusProtos.RegionServerReportRequest.Builder request =
+        makeRSReportRequestWithRegions(sn, tableRegions.get(1));
+
+    // sending fake request to master
+    // TODO: replace YouAreDeadException with appropriate exception as and when necessary
+    exception.expect(ServiceException.class);
+    exception.expectCause(isA(YouAreDeadException.class));
+    RegionServerStatusProtos.RegionServerReportResponse response =
+        master.getMasterRpcServices().regionServerReport(null, request.build());
+  }
+
+  private RegionServerStatusProtos.RegionServerReportRequest.Builder
+      makeRSReportRequestWithRegions(final ServerName sn, HRegionInfo... regions) {
+    ClusterStatusProtos.ServerLoad.Builder sl = ClusterStatusProtos.ServerLoad.newBuilder();
+    for (int i = 0; i < regions.length; i++) {
+      HBaseProtos.RegionSpecifier.Builder rs = HBaseProtos.RegionSpecifier.newBuilder();
+      rs.setType(HBaseProtos.RegionSpecifier.RegionSpecifierType.REGION_NAME);
+      rs.setValue(UnsafeByteOperations.unsafeWrap(regions[i].getRegionName()));
+
+      ClusterStatusProtos.RegionLoad.Builder rl = ClusterStatusProtos.RegionLoad.newBuilder()
+          .setRegionSpecifier(rs.build());
+
+      sl.addRegionLoads(i, rl.build());
+    }
+
+    return RegionServerStatusProtos.RegionServerReportRequest.newBuilder()
+              .setServer(ProtobufUtil.toServerName(sn))
+              .setLoad(sl);
+  }
+
+  private List<HRegionInfo> createTable(final TableName tableName) throws Exception {
+    TableDescriptorBuilder tdBuilder = TableDescriptorBuilder.newBuilder(tableName);
+    tdBuilder.addColumnFamily(ColumnFamilyDescriptorBuilder.newBuilder(FAMILY).build());
+
+    byte[][] rows = new byte[initialRegionCount - 1][];
+    for (int i = 0; i < rows.length; ++i) {
+      rows[i] = Bytes.toBytes(String.format("%d", i));
+    }
+    admin.createTable(tdBuilder.build(), rows);
+    return assertRegionCount(tableName, initialRegionCount);
+  }
+
+  private List<HRegionInfo> assertRegionCount(final TableName tableName, final int nregions)
+      throws Exception {
+    UTIL.waitUntilNoRegionsInTransition();
+    List<HRegionInfo> tableRegions = admin.getTableRegions(tableName);
+    assertEquals(nregions, tableRegions.size());
+    return tableRegions;
+  }
+}