You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ap...@apache.org on 2017/11/28 00:25:21 UTC

hbase git commit: HBASE-19335 Fix waitUntilAllRegionsAssigned(). Ignore assignments to killed servers and when region state != OPEN.

Repository: hbase
Updated Branches:
  refs/heads/master 7c1c370f2 -> e70b62854


HBASE-19335 Fix waitUntilAllRegionsAssigned(). Ignore assignments to killed servers and when region state != OPEN.

Update timeouts for TestRegionObserverInterface.
Reason: There are ~10 tests there, each with 5 min individual timeout. Too much. The test class is labelled MediumTests,
let's used that with our standard CategoryBasedTimeout. 3 min per test function should be enough even on slower Apache machines.


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/e70b6285
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/e70b6285
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/e70b6285

Branch: refs/heads/master
Commit: e70b628544b377d4107d13c9fdbe95540f4fd9d7
Parents: 7c1c370
Author: Apekshit Sharma <ap...@apache.org>
Authored: Wed Nov 22 16:33:52 2017 -0800
Committer: Apekshit Sharma <ap...@apache.org>
Committed: Mon Nov 27 16:20:15 2017 -0800

----------------------------------------------------------------------
 .../hadoop/hbase/DistributedHBaseCluster.java   | 13 +++++++++
 .../master/assignment/RegionStateStore.java     |  7 +++--
 .../org/apache/hadoop/hbase/HBaseCluster.java   |  9 ++++++
 .../hadoop/hbase/HBaseTestingUtility.java       | 25 +++++++++++++++--
 .../apache/hadoop/hbase/MiniHBaseCluster.java   | 16 ++++++++++-
 .../TestRegionObserverInterface.java            | 29 ++++++++++++--------
 6 files changed, 82 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/e70b6285/hbase-it/src/test/java/org/apache/hadoop/hbase/DistributedHBaseCluster.java
----------------------------------------------------------------------
diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/DistributedHBaseCluster.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/DistributedHBaseCluster.java
index 8c0d273..e8a0041 100644
--- a/hbase-it/src/test/java/org/apache/hadoop/hbase/DistributedHBaseCluster.java
+++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/DistributedHBaseCluster.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hbase;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Comparator;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 import java.util.TreeSet;
@@ -50,6 +51,12 @@ public class DistributedHBaseCluster extends HBaseCluster {
   private final Connection connection;
 
   private ClusterManager clusterManager;
+  /**
+   * List of RegionServers killed so far. ServerName also comprises startCode of a server,
+   * so any restarted instances of the same server will have different ServerName and will not
+   * coincide with past dead ones. So there's no need to cleanup this list.
+   */
+  private Set<ServerName> killedRegionServers = new HashSet<>();
 
   public DistributedHBaseCluster(Configuration conf, ClusterManager clusterManager)
       throws IOException {
@@ -113,11 +120,17 @@ public class DistributedHBaseCluster extends HBaseCluster {
   @Override
   public void killRegionServer(ServerName serverName) throws IOException {
     LOG.info("Aborting RS: " + serverName.getServerName());
+    killedRegionServers.add(serverName);
     clusterManager.kill(ServiceType.HBASE_REGIONSERVER,
       serverName.getHostname(), serverName.getPort());
   }
 
   @Override
+  public boolean isKilledRS(ServerName serverName) {
+    return killedRegionServers.contains(serverName);
+  }
+
+  @Override
   public void stopRegionServer(ServerName serverName) throws IOException {
     LOG.info("Stopping RS: " + serverName.getServerName());
     clusterManager.stop(ServiceType.HBASE_REGIONSERVER,

http://git-wip-us.apache.org/repos/asf/hbase/blob/e70b6285/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java
index 723a776..ba70ef4 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateStore.java
@@ -48,6 +48,7 @@ import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.apache.zookeeper.KeeperException;
 
+import org.apache.hadoop.hbase.shaded.com.google.common.annotations.VisibleForTesting;
 import org.apache.hadoop.hbase.shaded.com.google.common.base.Preconditions;
 
 /**
@@ -317,12 +318,14 @@ public class RegionStateStore {
    * @param r Result to pull the region state from
    * @return the region state, or null if unknown.
    */
-  protected State getRegionState(final Result r, int replicaId) {
+  @VisibleForTesting
+  public static State getRegionState(final Result r, int replicaId) {
     Cell cell = r.getColumnLatestCell(HConstants.CATALOG_FAMILY, getStateColumn(replicaId));
     if (cell == null || cell.getValueLength() == 0) {
       return null;
     }
-    return State.valueOf(Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()));
+    return State.valueOf(Bytes.toString(cell.getValueArray(), cell.getValueOffset(),
+        cell.getValueLength()));
   }
 
   private static byte[] getStateColumn(int replicaId) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/e70b6285/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseCluster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseCluster.java
index dfff9f4..cac957b 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseCluster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseCluster.java
@@ -129,6 +129,15 @@ public abstract class HBaseCluster implements Closeable, Configurable {
   public abstract void killRegionServer(ServerName serverName) throws IOException;
 
   /**
+   * Keeping track of killed servers and being able to check if a particular server was killed makes
+   * it possible to do fault tolerance testing for dead servers in a deterministic way. A concrete
+   * example of such case is - killing servers and waiting for all regions of a particular table
+   * to be assigned. We can check for server column in META table and that its value is not one
+   * of the killed servers.
+   */
+  public abstract boolean isKilledRS(ServerName serverName);
+
+  /**
    * Stops the given region server, by attempting a gradual stop.
    * @return whether the operation finished with success
    * @throws IOException if something goes wrong

http://git-wip-us.apache.org/repos/asf/hbase/blob/e70b6285/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
index eec7892..4e65651 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
@@ -67,6 +67,8 @@ import org.apache.hadoop.hbase.Waiter.Predicate;
 import org.apache.hadoop.hbase.client.ImmutableHRegionInfo;
 import org.apache.hadoop.hbase.client.RegionInfo;
 import org.apache.hadoop.hbase.client.RegionInfoBuilder;
+import org.apache.hadoop.hbase.master.RegionState;
+import org.apache.hadoop.hbase.master.assignment.RegionStateStore;
 import org.apache.hadoop.hbase.trace.TraceUtil;
 import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
 import org.apache.yetus.audience.InterfaceAudience;
@@ -3418,8 +3420,27 @@ public class HBaseTestingUtility extends HBaseCommonTestingUtility {
               byte[] b = r.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
               HRegionInfo info = HRegionInfo.parseFromOrNull(b);
               if (info != null && info.getTable().equals(tableName)) {
-                b = r.getValue(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
-                allRegionsAssigned &= (b != null);
+                // Get server hosting this region from catalog family. Return false if no server
+                // hosting this region, or if the server hosting this region was recently killed
+                // (for fault tolerance testing).
+                byte[] server =
+                    r.getValue(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
+                if (server == null) {
+                  return false;
+                } else {
+                  byte[] startCode =
+                      r.getValue(HConstants.CATALOG_FAMILY, HConstants.STARTCODE_QUALIFIER);
+                  ServerName serverName =
+                      ServerName.valueOf(Bytes.toString(server).replaceFirst(":", ",") + "," +
+                          Bytes.toLong(startCode));
+                  if (getHBaseCluster().isKilledRS(serverName)) {
+                    return false;
+                  }
+                }
+                if (RegionStateStore.getRegionState(r, info.getReplicaId())
+                    != RegionState.State.OPEN) {
+                  return false;
+                }
               }
             }
           } finally {

http://git-wip-us.apache.org/repos/asf/hbase/blob/e70b6285/hbase-server/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java
index a46fa9d..e02347d 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java
@@ -21,8 +21,10 @@ package org.apache.hadoop.hbase;
 import java.io.IOException;
 import java.security.PrivilegedAction;
 import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
 
+import java.util.Set;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.yetus.audience.InterfaceAudience;
@@ -108,6 +110,12 @@ public class MiniHBaseCluster extends HBaseCluster {
   public static class MiniHBaseClusterRegionServer extends HRegionServer {
     private Thread shutdownThread = null;
     private User user = null;
+    /**
+     * List of RegionServers killed so far. ServerName also comprises startCode of a server,
+     * so any restarted instances of the same server will have different ServerName and will not
+     * coincide with past dead ones. So there's no need to cleanup this list.
+     */
+    static Set<ServerName> killedServers = new HashSet<>();
 
     public MiniHBaseClusterRegionServer(Configuration conf)
         throws IOException, InterruptedException {
@@ -156,7 +164,8 @@ public class MiniHBaseCluster extends HBaseCluster {
     }
 
     @Override
-    public void kill() {
+    protected void kill() {
+      killedServers.add(getServerName());
       super.kill();
     }
 
@@ -250,6 +259,11 @@ public class MiniHBaseCluster extends HBaseCluster {
   }
 
   @Override
+  public boolean isKilledRS(ServerName serverName) {
+    return MiniHBaseClusterRegionServer.killedServers.contains(serverName);
+  }
+
+  @Override
   public void stopRegionServer(ServerName serverName) throws IOException {
     stopRegionServer(getRegionServerIndex(serverName));
   }

http://git-wip-us.apache.org/repos/asf/hbase/blob/e70b6285/hbase-server/src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionObserverInterface.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionObserverInterface.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionObserverInterface.java
index 49fc3fd..fe67a08 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionObserverInterface.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionObserverInterface.java
@@ -35,6 +35,7 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.CategoryBasedTimeout;
 import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.CellUtil;
 import org.apache.hadoop.hbase.Coprocessor;
@@ -88,12 +89,16 @@ import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.rules.TestName;
+import org.junit.rules.TestRule;
 
 import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
 
 @Category({ CoprocessorTests.class, MediumTests.class })
 public class TestRegionObserverInterface {
   private static final Log LOG = LogFactory.getLog(TestRegionObserverInterface.class);
+  @Rule
+  public final TestRule timeout = CategoryBasedTimeout.builder().
+      withTimeout(this.getClass()).withLookingForStuckThread(true).build();
 
   public static final TableName TEST_TABLE = TableName.valueOf("TestTable");
   public final static byte[] A = Bytes.toBytes("a");
@@ -124,7 +129,7 @@ public class TestRegionObserverInterface {
     util.shutdownMiniCluster();
   }
 
-  @Test(timeout = 300000)
+  @Test
   public void testRegionObserver() throws IOException {
     final TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + "." + name.getMethodName());
     // recreate table every time in order to reset the status of the
@@ -184,7 +189,7 @@ public class TestRegionObserverInterface {
       tableName, new Integer[] { 1, 1, 1, 1 });
   }
 
-  @Test(timeout = 300000)
+  @Test
   public void testRowMutation() throws IOException {
     final TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + "." + name.getMethodName());
     Table table = util.createTable(tableName, new byte[][] { A, B, C });
@@ -216,7 +221,7 @@ public class TestRegionObserverInterface {
     }
   }
 
-  @Test(timeout = 300000)
+  @Test
   public void testIncrementHook() throws IOException {
     final TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + "." + name.getMethodName());
     Table table = util.createTable(tableName, new byte[][] { A, B, C });
@@ -239,7 +244,7 @@ public class TestRegionObserverInterface {
     }
   }
 
-  @Test(timeout = 300000)
+  @Test
   public void testCheckAndPutHooks() throws IOException {
     final TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + "." + name.getMethodName());
     try (Table table = util.createTable(tableName, new byte[][] { A, B, C })) {
@@ -260,7 +265,7 @@ public class TestRegionObserverInterface {
     }
   }
 
-  @Test(timeout = 300000)
+  @Test
   public void testCheckAndDeleteHooks() throws IOException {
     final TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + "." + name.getMethodName());
     Table table = util.createTable(tableName, new byte[][] { A, B, C });
@@ -285,7 +290,7 @@ public class TestRegionObserverInterface {
     }
   }
 
-  @Test(timeout = 300000)
+  @Test
   public void testAppendHook() throws IOException {
     final TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + "." + name.getMethodName());
     Table table = util.createTable(tableName, new byte[][] { A, B, C });
@@ -308,7 +313,7 @@ public class TestRegionObserverInterface {
     }
   }
 
-  @Test(timeout = 300000)
+  @Test
   // HBase-3583
   public void testHBase3583() throws IOException {
     final TableName tableName = TableName.valueOf(name.getMethodName());
@@ -351,7 +356,7 @@ public class TestRegionObserverInterface {
     table.close();
   }
 
-  @Test(timeout = 300000)
+  @Test
   public void testHBASE14489() throws IOException {
     final TableName tableName = TableName.valueOf(name.getMethodName());
     Table table = util.createTable(tableName, new byte[][] { A });
@@ -476,7 +481,7 @@ public class TestRegionObserverInterface {
    * Tests overriding compaction handling via coprocessor hooks
    * @throws Exception
    */
-  @Test(timeout = 300000)
+  @Test
   public void testCompactionOverride() throws Exception {
     final TableName compactTable = TableName.valueOf(name.getMethodName());
     Admin admin = util.getAdmin();
@@ -546,7 +551,7 @@ public class TestRegionObserverInterface {
     table.close();
   }
 
-  @Test(timeout = 300000)
+  @Test
   public void bulkLoadHFileTest() throws Exception {
     final String testName = TestRegionObserverInterface.class.getName() + "." + name.getMethodName();
     final TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + "." + name.getMethodName());
@@ -575,7 +580,7 @@ public class TestRegionObserverInterface {
     }
   }
 
-  @Test(timeout = 300000)
+  @Test
   public void testRecovery() throws Exception {
     LOG.info(TestRegionObserverInterface.class.getName() + "." + name.getMethodName());
     final TableName tableName = TableName.valueOf(TEST_TABLE.getNameAsString() + "." + name.getMethodName());
@@ -625,7 +630,7 @@ public class TestRegionObserverInterface {
     }
   }
 
-  @Test(timeout = 300000)
+  @Test
   public void testPreWALRestoreSkip() throws Exception {
     LOG.info(TestRegionObserverInterface.class.getName() + "." + name.getMethodName());
     TableName tableName = TableName.valueOf(SimpleRegionObserver.TABLE_SKIPPED);