You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2019/12/28 19:24:35 UTC

[hbase] branch HBASE-18095/client-locate-meta-no-zookeeper updated (e41b46c -> 1c41b36)

This is an automated email from the ASF dual-hosted git repository.

stack pushed a change to branch HBASE-18095/client-locate-meta-no-zookeeper
in repository https://gitbox.apache.org/repos/asf/hbase.git.


    omit e41b46c  HBASE-23304: RPCs needed for client meta information lookup (#904)
    omit 4f9b03f  HBASE-23281: Track meta region locations in masters (#830)
    omit cd53a80  HBASE-23275: Track active master's address in ActiveMasterManager (#812)
     add f4860d8  HBASE-23594 Procedure stuck due to region happen to recorded on two servers. (#953)
     add ab40b96  HBASE-23564 RegionStates may has some expired serverinfo and make regions do not balance. (#930)
     add 4b6ce0f  HBASE-23572 In 'HBCK Report', distringush between live, dead, and unknown servers
     add fc15ea7  HBASE-23320 Upgrade surefire plugin to 3.0.0-M4
     add 00bb0f2  HBASE-23581 Creating table gets stuck when specifying an invalid split policy as METADATA (#942)
     add ee19008  HBASE-23589: FlushDescriptor contains non-matching family/output combinations (#949)
     add 9ff2507  HBASE-23613 ProcedureExecutor check StuckWorkers blocked by DeadServe… (#960)
     add 1b049a2  HBASE-23326 Implement a ProcedureStore which stores procedures in a HRegion (#941)
     add 56f9db9  HBASE-23374 ExclusiveMemHFileBlock’s allocator should not be hardcoded as ByteBuffAllocator.HEAP
     add 94346d8  HBASE-23238: Remove 'static'ness of cell counter in LimitKVsReturnFilter (addendum) (#963)
     add def9ac7  HBASE-23286 Improve MTTR: Split WAL to HFile (#820)
     add e9b9980  HBASE-23619 Used built-in formatting for logger in hbase-zookeeper
     add 684a68e  Adding developer details to pom.xml
     add 0ba84d8  HBASE-23617 Add a stress test tool for region based procedure store (#962)
     add 703ee60  HBASE-23621 Reduced the number of Checkstyle violations in tests of hbase-common
     add 2ca1e46  HBASE-23622 Reduced the number of Checkstyle violations in hbase-common
     new f878fca  HBASE-23275: Track active master's address in ActiveMasterManager (#812)
     new 420083b  HBASE-23281: Track meta region locations in masters (#830)
     new 1c41b36  HBASE-23304: RPCs needed for client meta information lookup (#904)

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (e41b46c)
            \
             N -- N -- N   refs/heads/HBASE-18095/client-locate-meta-no-zookeeper (1c41b36)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../src/main/assembly/client-components.xml        |   8 +
 hbase-assembly/src/main/assembly/components.xml    |   8 +
 .../hadoop/hbase/client/RawAsyncHBaseAdmin.java    |   8 +-
 .../org/apache/hadoop/hbase/client/RegionInfo.java |   2 +-
 .../org/apache/hadoop/hbase/BBKVComparator.java    |   5 +-
 .../org/apache/hadoop/hbase/CellComparator.java    |   2 +-
 .../apache/hadoop/hbase/CellComparatorImpl.java    |  10 +-
 .../java/org/apache/hadoop/hbase/HConstants.java   |  14 +-
 .../java/org/apache/hadoop/hbase/net/Address.java  |   9 +-
 .../hadoop/hbase/trace/SpanReceiverHost.java       |  10 +-
 .../org/apache/hadoop/hbase/trace/TraceUtil.java   |   6 +-
 .../apache/hadoop/hbase/util/ByteRangeUtils.java   |  12 +-
 .../apache/hadoop/hbase/util/CommonFSUtils.java    |  70 +--
 .../hadoop/hbase/util/ConcatenatedLists.java       |   5 +-
 .../java/org/apache/hadoop/hbase/util/Order.java   |  41 +-
 .../java/org/apache/hadoop/hbase/ClassFinder.java  |  39 +-
 .../hadoop/hbase/HBaseCommonTestingUtility.java    |  59 ++-
 .../org/apache/hadoop/hbase/ResourceChecker.java   |  22 +-
 .../java/org/apache/hadoop/hbase/TestCellUtil.java |  21 +-
 .../org/apache/hadoop/hbase/TestClassFinder.java   |  55 +-
 .../hadoop/hbase/TestHBaseConfiguration.java       |  50 +-
 .../hadoop/hbase/codec/TestCellCodecWithTags.java  |  11 +-
 .../hbase/codec/TestKeyValueCodecWithTags.java     |  11 +-
 .../hadoop/hbase/io/crypto/TestEncryption.java     |  10 +-
 .../hadoop/hbase/util/RedundantKVGenerator.java    | 140 +----
 .../procedure2/CompletedProcedureCleaner.java      |   3 +
 .../hadoop/hbase/procedure2/ProcedureUtil.java     |  13 +
 .../store/InMemoryProcedureIterator.java           |  94 ++++
 .../hbase/procedure2/store/LeaseRecovery.java      |  16 +-
 .../hbase/procedure2/store/ProcedureStore.java     |  24 +-
 .../WALProcedureTree.java => ProcedureTree.java}   | 134 +----
 .../hbase/procedure2/store/ProtoAndProcedure.java  |  36 +-
 .../procedure2/store/{ => wal}/BitSetNode.java     |   7 +-
 .../wal/CorruptedWALProcedureStoreException.java   |   3 +
 .../store/{ => wal}/ProcedureStoreTracker.java     |   7 +-
 .../procedure2/store/wal/ProcedureWALFile.java     |   6 +-
 .../procedure2/store/wal/ProcedureWALFormat.java   |   6 +-
 .../store/wal/ProcedureWALFormatReader.java        |  13 +-
 .../store/wal/ProcedureWALPrettyPrinter.java       |   3 +
 .../procedure2/store/wal/WALProcedureMap.java      |   3 +
 .../procedure2/store/wal/WALProcedureStore.java    |  21 +-
 .../hbase/procedure2/ProcedureTestingUtility.java  | 127 ++---
 ...va => ProcedureStorePerformanceEvaluation.java} | 218 ++++----
 ...ALProcedureTree.java => TestProcedureTree.java} |  12 +-
 .../wal/ProcedureWALPerformanceEvaluation.java     | 217 ++------
 .../procedure2/store/{ => wal}/TestBitSetNode.java |   4 +-
 .../store/{ => wal}/TestProcedureStoreTracker.java |   2 +-
 .../store/wal/TestWALProcedureStore.java           |   6 +-
 .../hbase/io/hfile/ExclusiveMemHFileBlock.java     |   5 +-
 .../hadoop/hbase/io/hfile/HFileBlockBuilder.java   |   2 +-
 .../org/apache/hadoop/hbase/master/HMaster.java    |  41 +-
 .../hbase/master/assignment/AssignmentManager.java |  12 +-
 .../hbase/master/assignment/RegionStates.java      |  17 +-
 .../hbase/master/procedure/MasterProcedureEnv.java |   6 +-
 .../master/procedure/MasterProcedureUtil.java      |  10 +-
 .../master/procedure/ServerCrashProcedure.java     |  31 +-
 .../store/region/RegionFlusherAndCompactor.java    | 240 +++++++++
 .../store/region/RegionProcedureStore.java         | 586 +++++++++++++++++++++
 .../region/RegionProcedureStoreWALRoller.java      | 127 +++++
 .../apache/hadoop/hbase/regionserver/CellSet.java  |   2 +-
 .../apache/hadoop/hbase/regionserver/HRegion.java  | 278 +++++-----
 .../hadoop/hbase/regionserver/LogRoller.java       | 203 +------
 .../hbase/regionserver/wal/AbstractFSWAL.java      |  19 +-
 .../hadoop/hbase/regionserver/wal/WALUtil.java     |   4 +-
 .../java/org/apache/hadoop/hbase/util/FSUtils.java |   7 +-
 .../hadoop/hbase/util/TableDescriptorChecker.java  |  12 +-
 .../LogRoller.java => wal/AbstractWALRoller.java}  |  95 ++--
 .../hbase/wal/BoundedRecoveredEditsOutputSink.java |   8 +-
 .../wal/BoundedRecoveredHFilesOutputSink.java      | 240 +++++++++
 .../org/apache/hadoop/hbase/wal/OutputSink.java    |   4 +-
 .../hadoop/hbase/wal/RecoveredEditsOutputSink.java |   6 +-
 .../org/apache/hadoop/hbase/wal/WALFactory.java    |   4 +-
 .../org/apache/hadoop/hbase/wal/WALSplitUtil.java  |  61 ++-
 .../org/apache/hadoop/hbase/wal/WALSplitter.java   |  15 +-
 .../main/resources/hbase-webapps/master/hbck.jsp   |  70 ++-
 .../resources/hbase-webapps/master/procedures.jsp  | 114 ----
 .../hbase/client/TestIllegalTableDescriptor.java   |   6 +
 .../hbase/client/TestScannersFromClientSide.java   |  17 +-
 ...cerWithDisabledTable.java => TestBalancer.java} |  19 +-
 .../hbase/master/TestLoadProcedureError.java       |   2 +-
 .../hbase/master/TestMasterMetricsWrapper.java     |   8 +-
 ...onRetry.java => TestRaceBetweenSCPAndTRSP.java} | 128 +++--
 .../procedure/TestMasterProcedureWalLease.java     | 238 ---------
 .../RegionProcedureStorePerformanceEvaluation.java | 147 ++++++
 .../region/RegionProcedureStoreTestHelper.java     |  54 ++
 .../region/RegionProcedureStoreTestProcedure.java  |  77 +++
 .../store/region/TestRegionProcedureStore.java     | 159 ++++++
 .../region/TestRegionProcedureStoreMigration.java  | 143 +++++
 .../region/TestRegionProcedureStoreWALCleaner.java | 129 +++++
 .../TestRegionServerCrashDisableWAL.java           |   3 +-
 .../regionserver/wal/AbstractTestWALReplay.java    |   4 +-
 .../org/apache/hadoop/hbase/wal/TestWALSplit.java  |   9 +-
 .../hadoop/hbase/wal/TestWALSplitToHFile.java      | 408 ++++++++++++++
 .../hadoop/hbase/zookeeper/DeletionListener.java   |   2 +-
 .../hbase/zookeeper/LoadBalancerTracker.java       |   2 +-
 .../hadoop/hbase/zookeeper/MetaTableLocator.java   |   6 +-
 .../hbase/zookeeper/MiniZooKeeperCluster.java      |  15 +-
 .../hbase/zookeeper/RecoverableZooKeeper.java      |  12 +-
 .../apache/hadoop/hbase/zookeeper/ZKAclReset.java  |   8 +-
 .../hadoop/hbase/zookeeper/ZKLeaderManager.java    |   4 +-
 .../hadoop/hbase/zookeeper/ZKNodeTracker.java      |   4 +-
 .../org/apache/hadoop/hbase/zookeeper/ZKUtil.java  |  15 +-
 .../apache/hadoop/hbase/zookeeper/ZKWatcher.java   |  10 +-
 .../apache/hadoop/hbase/HBaseZKTestingUtility.java |   2 +-
 pom.xml                                            |   8 +-
 105 files changed, 3591 insertions(+), 1870 deletions(-)
 create mode 100644 hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/store/InMemoryProcedureIterator.java
 copy hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/ExclusiveOperationException.java => hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/store/LeaseRecovery.java (80%)
 rename hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/store/{wal/WALProcedureTree.java => ProcedureTree.java} (69%)
 copy hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MetaQueue.java => hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/store/ProtoAndProcedure.java (54%)
 rename hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/store/{ => wal}/BitSetNode.java (97%)
 rename hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/store/{ => wal}/ProcedureStoreTracker.java (98%)
 copy hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/store/{wal/ProcedureWALPerformanceEvaluation.java => ProcedureStorePerformanceEvaluation.java} (52%)
 rename hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/store/{wal/TestWALProcedureTree.java => TestProcedureTree.java} (93%)
 rename hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/store/{ => wal}/TestBitSetNode.java (96%)
 rename hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/store/{ => wal}/TestProcedureStoreTracker.java (99%)
 create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/procedure2/store/region/RegionFlusherAndCompactor.java
 create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/procedure2/store/region/RegionProcedureStore.java
 create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/procedure2/store/region/RegionProcedureStoreWALRoller.java
 copy hbase-server/src/main/java/org/apache/hadoop/hbase/{regionserver/LogRoller.java => wal/AbstractWALRoller.java} (77%)
 create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/wal/BoundedRecoveredHFilesOutputSink.java
 rename hbase-server/src/test/java/org/apache/hadoop/hbase/master/{TestBalancerWithDisabledTable.java => TestBalancer.java} (82%)
 copy hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/{TestReportRegionStateTransitionRetry.java => TestRaceBetweenSCPAndTRSP.java} (54%)
 delete mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestMasterProcedureWalLease.java
 create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/procedure2/store/region/RegionProcedureStorePerformanceEvaluation.java
 create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/procedure2/store/region/RegionProcedureStoreTestHelper.java
 create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/procedure2/store/region/RegionProcedureStoreTestProcedure.java
 create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/procedure2/store/region/TestRegionProcedureStore.java
 create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/procedure2/store/region/TestRegionProcedureStoreMigration.java
 create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/procedure2/store/region/TestRegionProcedureStoreWALCleaner.java
 create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/wal/TestWALSplitToHFile.java


[hbase] 02/03: HBASE-23281: Track meta region locations in masters (#830)

Posted by st...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

stack pushed a commit to branch HBASE-18095/client-locate-meta-no-zookeeper
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 420083bdc26209b3f4725ab1a2a920b1fc7bfba5
Author: Bharath Vissapragada <bh...@apache.org>
AuthorDate: Wed Dec 4 15:26:58 2019 -0800

    HBASE-23281: Track meta region locations in masters (#830)
    
    * HBASE-23281: Track meta region changes on masters
    
    This patch adds a simple cache that tracks the meta region replica
    locations. It keeps an eye on the region movements so that the
    cached locations are not stale.
    
    This information is used for servicing client RPCs for connections
    that use master based registry (HBASE-18095). The RPC end points
    will be added in a separate patch.
    
    Signed-off-by: Nick Dimiduk <nd...@apache.org>
---
 .../hadoop/hbase/shaded/protobuf/ProtobufUtil.java |  42 +++-
 .../apache/hadoop/hbase/zookeeper/ZNodePaths.java  |  19 +-
 .../org/apache/hadoop/hbase/master/HMaster.java    |  18 +-
 .../hbase/master/MetaRegionLocationCache.java      | 249 +++++++++++++++++++++
 .../hbase/client/TestMetaRegionLocationCache.java  | 186 +++++++++++++++
 .../hbase/master/TestCloseAnOpeningRegion.java     |   5 +-
 .../hbase/master/TestClusterRestartFailover.java   |   3 +-
 .../master/TestRegionsRecoveryConfigManager.java   |   5 +-
 .../hbase/master/TestShutdownBackupMaster.java     |   3 +-
 .../assignment/TestOpenRegionProcedureBackoff.java |   3 +-
 .../assignment/TestOpenRegionProcedureHang.java    |   2 +-
 .../TestRegionAssignedToMultipleRegionServers.java |   3 +-
 .../assignment/TestReportOnlineRegionsRace.java    |   3 +-
 ...tReportRegionStateTransitionFromDeadServer.java |   3 +-
 .../TestReportRegionStateTransitionRetry.java      |   3 +-
 .../master/assignment/TestSCPGetRegionsRace.java   |   3 +-
 .../assignment/TestWakeUpUnexpectedProcedure.java  |   3 +-
 .../TestRegisterPeerWorkerWhenRestarting.java      |   3 +-
 .../hadoop/hbase/protobuf/TestProtobufUtil.java    |  36 ++-
 .../TestRegionServerReportForDuty.java             |   2 +-
 .../replication/TestReplicationProcedureRetry.java |   3 +-
 .../hadoop/hbase/zookeeper/MetaTableLocator.java   |  38 +---
 .../apache/hadoop/hbase/zookeeper/ZKWatcher.java   |  37 ++-
 23 files changed, 586 insertions(+), 86 deletions(-)

diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java
index 5a71917..2adcea9 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -80,6 +80,7 @@ import org.apache.hadoop.hbase.client.PackagePrivateFieldAccessor;
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.RegionInfoBuilder;
 import org.apache.hadoop.hbase.client.RegionLoadStats;
+import org.apache.hadoop.hbase.client.RegionReplicaUtil;
 import org.apache.hadoop.hbase.client.RegionStatesCount;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.Scan;
@@ -93,6 +94,7 @@ import org.apache.hadoop.hbase.exceptions.DeserializationException;
 import org.apache.hadoop.hbase.filter.ByteArrayComparable;
 import org.apache.hadoop.hbase.filter.Filter;
 import org.apache.hadoop.hbase.io.TimeRange;
+import org.apache.hadoop.hbase.master.RegionState;
 import org.apache.hadoop.hbase.protobuf.ProtobufMagic;
 import org.apache.hadoop.hbase.protobuf.ProtobufMessageConverter;
 import org.apache.hadoop.hbase.quotas.QuotaScope;
@@ -3068,6 +3070,44 @@ public final class ProtobufUtil {
   }
 
   /**
+   * Get the Meta region state from the passed data bytes. Can handle both old and new style
+   * server names.
+   * @param data protobuf serialized data with meta server name.
+   * @param replicaId replica ID for this region
+   * @return RegionState instance corresponding to the serialized data.
+   * @throws DeserializationException if the data is invalid.
+   */
+  public static RegionState parseMetaRegionStateFrom(final byte[] data, int replicaId)
+      throws DeserializationException {
+    RegionState.State state = RegionState.State.OPEN;
+    ServerName serverName;
+    if (data != null && data.length > 0 && ProtobufUtil.isPBMagicPrefix(data)) {
+      try {
+        int prefixLen = ProtobufUtil.lengthOfPBMagic();
+        ZooKeeperProtos.MetaRegionServer rl =
+            ZooKeeperProtos.MetaRegionServer.parser().parseFrom(data, prefixLen,
+                data.length - prefixLen);
+        if (rl.hasState()) {
+          state = RegionState.State.convert(rl.getState());
+        }
+        HBaseProtos.ServerName sn = rl.getServer();
+        serverName = ServerName.valueOf(
+            sn.getHostName(), sn.getPort(), sn.getStartCode());
+      } catch (InvalidProtocolBufferException e) {
+        throw new DeserializationException("Unable to parse meta region location");
+      }
+    } else {
+      // old style of meta region location?
+      serverName = parseServerNameFrom(data);
+    }
+    if (serverName == null) {
+      state = RegionState.State.OFFLINE;
+    }
+    return new RegionState(RegionReplicaUtil.getRegionInfoForReplica(
+        RegionInfoBuilder.FIRST_META_REGIONINFO, replicaId), state, serverName);
+  }
+
+  /**
    * Get a ServerName from the passed in data bytes.
    * @param data Data with a serialize server name in it; can handle the old style
    * servername where servername was host and port.  Works too with data that
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZNodePaths.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZNodePaths.java
index c8511d4..d923ebf 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZNodePaths.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZNodePaths.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -40,7 +40,8 @@ public class ZNodePaths {
   // TODO: Replace this with ZooKeeper constant when ZOOKEEPER-277 is resolved.
   public static final char ZNODE_PATH_SEPARATOR = '/';
 
-  public final static String META_ZNODE_PREFIX = "meta-region-server";
+  public static final String META_ZNODE_PREFIX_CONF_KEY = "zookeeper.znode.metaserver";
+  public static final String META_ZNODE_PREFIX = "meta-region-server";
   private static final String DEFAULT_SNAPSHOT_CLEANUP_ZNODE = "snapshot-cleanup";
 
   // base znode for this cluster
@@ -94,7 +95,7 @@ public class ZNodePaths {
   public ZNodePaths(Configuration conf) {
     baseZNode = conf.get(ZOOKEEPER_ZNODE_PARENT, DEFAULT_ZOOKEEPER_ZNODE_PARENT);
     ImmutableMap.Builder<Integer, String> builder = ImmutableMap.builder();
-    metaZNodePrefix = conf.get("zookeeper.znode.metaserver", META_ZNODE_PREFIX);
+    metaZNodePrefix = conf.get(META_ZNODE_PREFIX_CONF_KEY, META_ZNODE_PREFIX);
     String defaultMetaReplicaZNode = ZNodePaths.joinZNode(baseZNode, metaZNodePrefix);
     builder.put(DEFAULT_REPLICA_ID, defaultMetaReplicaZNode);
     int numMetaReplicas = conf.getInt(META_REPLICAS_NUM, DEFAULT_META_REPLICA_NUM);
@@ -179,6 +180,18 @@ public class ZNodePaths {
   }
 
   /**
+   * Parses the meta replicaId from the passed path.
+   * @param path the name of the full path which includes baseZNode.
+   * @return replicaId
+   */
+  public int getMetaReplicaIdFromPath(String path) {
+    // Extract the znode from path. The prefix is of the following format.
+    // baseZNode + PATH_SEPARATOR.
+    int prefixLen = baseZNode.length() + 1;
+    return getMetaReplicaIdFromZnode(path.substring(prefixLen));
+  }
+
+  /**
    * Parse the meta replicaId from the passed znode
    * @param znode the name of the znode, does not include baseZNode
    * @return replicaId
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index 241ed1a..df72e71 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -360,6 +360,12 @@ public class HMaster extends HRegionServer implements MasterServices {
   // manager of assignment nodes in zookeeper
   private AssignmentManager assignmentManager;
 
+  /**
+   * Cache for the meta region replica's locations. Also tracks their changes to avoid stale
+   * cache entries.
+   */
+  private final MetaRegionLocationCache metaRegionLocationCache;
+
   // manager of replication
   private ReplicationPeerManager replicationPeerManager;
 
@@ -513,8 +519,7 @@ public class HMaster extends HRegionServer implements MasterServices {
    * #finishActiveMasterInitialization(MonitoredTask) after
    * the master becomes the active one.
    */
-  public HMaster(final Configuration conf)
-      throws IOException, KeeperException {
+  public HMaster(final Configuration conf) throws IOException {
     super(conf);
     TraceUtil.initTracer(conf);
     try {
@@ -527,7 +532,6 @@ public class HMaster extends HRegionServer implements MasterServices {
       } else {
         maintenanceMode = false;
       }
-
       this.rsFatals = new MemoryBoundedLogMessageBuffer(
           conf.getLong("hbase.master.buffer.for.rs.fatals", 1 * 1024 * 1024));
       LOG.info("hbase.rootdir={}, hbase.cluster.distributed={}", getDataRootDir(),
@@ -575,8 +579,10 @@ public class HMaster extends HRegionServer implements MasterServices {
 
       // Some unit tests don't need a cluster, so no zookeeper at all
       if (!conf.getBoolean("hbase.testing.nocluster", false)) {
+        this.metaRegionLocationCache = new MetaRegionLocationCache(this.zooKeeper);
         this.activeMasterManager = new ActiveMasterManager(zooKeeper, this.serverName, this);
       } else {
+        this.metaRegionLocationCache = null;
         this.activeMasterManager = null;
       }
       cachedClusterId = new CachedClusterId(conf);
@@ -3886,4 +3892,8 @@ public class HMaster extends HRegionServer implements MasterServices {
       rbc.chore();
     }
   }
+
+  public MetaRegionLocationCache getMetaRegionLocationCache() {
+    return this.metaRegionLocationCache;
+  }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaRegionLocationCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaRegionLocationCache.java
new file mode 100644
index 0000000..f4e91b5
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaRegionLocationCache.java
@@ -0,0 +1,249 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+import java.util.concurrent.ConcurrentNavigableMap;
+import java.util.concurrent.ThreadFactory;
+import org.apache.hadoop.hbase.HRegionLocation;
+import org.apache.hadoop.hbase.exceptions.DeserializationException;
+import org.apache.hadoop.hbase.types.CopyOnWriteArrayMap;
+import org.apache.hadoop.hbase.util.RetryCounter;
+import org.apache.hadoop.hbase.util.RetryCounterFactory;
+import org.apache.hadoop.hbase.zookeeper.ZKListener;
+import org.apache.hadoop.hbase.zookeeper.ZKUtil;
+import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
+import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.apache.zookeeper.KeeperException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder;
+import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
+
+/**
+ * A cache of meta region location metadata. Registers a listener on ZK to track changes to the
+ * meta table znodes. Clients are expected to retry if the meta information is stale. This class
+ * is thread-safe (a single instance of this class can be shared by multiple threads without race
+ * conditions).
+ */
+@InterfaceAudience.Private
+public class MetaRegionLocationCache extends ZKListener {
+
+  private static final Logger LOG = LoggerFactory.getLogger(MetaRegionLocationCache.class);
+
+  /**
+   * Maximum number of times we retry when ZK operation times out.
+   */
+  private static final int MAX_ZK_META_FETCH_RETRIES = 10;
+  /**
+   * Sleep interval ms between ZK operation retries.
+   */
+  private static final int SLEEP_INTERVAL_MS_BETWEEN_RETRIES = 1000;
+  private static final int SLEEP_INTERVAL_MS_MAX = 10000;
+  private final RetryCounterFactory retryCounterFactory =
+      new RetryCounterFactory(MAX_ZK_META_FETCH_RETRIES, SLEEP_INTERVAL_MS_BETWEEN_RETRIES);
+
+  /**
+   * Cached meta region locations indexed by replica ID.
+   * CopyOnWriteArrayMap ensures synchronization during updates and a consistent snapshot during
+   * client requests. Even though CopyOnWriteArrayMap copies the data structure for every write,
+   * that should be OK since the size of the list is often small and mutations are not too often
+   * and we do not need to block client requests while mutations are in progress.
+   */
+  private final CopyOnWriteArrayMap<Integer, HRegionLocation> cachedMetaLocations;
+
+  private enum ZNodeOpType {
+    INIT,
+    CREATED,
+    CHANGED,
+    DELETED
+  }
+
+  public MetaRegionLocationCache(ZKWatcher zkWatcher) {
+    super(zkWatcher);
+    cachedMetaLocations = new CopyOnWriteArrayMap<>();
+    watcher.registerListener(this);
+    // Populate the initial snapshot of data from meta znodes.
+    // This is needed because stand-by masters can potentially start after the initial znode
+    // creation. It blocks forever until the initial meta locations are loaded from ZK and watchers
+    // are established. Subsequent updates are handled by the registered listener. Also, this runs
+    // in a separate thread in the background to not block master init.
+    ThreadFactory threadFactory = new ThreadFactoryBuilder().setDaemon(true).build();
+    RetryCounterFactory retryFactory = new RetryCounterFactory(
+        Integer.MAX_VALUE, SLEEP_INTERVAL_MS_BETWEEN_RETRIES, SLEEP_INTERVAL_MS_MAX);
+    threadFactory.newThread(
+      ()->loadMetaLocationsFromZk(retryFactory.create(), ZNodeOpType.INIT)).start();
+  }
+
+  /**
+   * Populates the current snapshot of meta locations from ZK. If no meta znodes exist, it registers
+   * a watcher on base znode to check for any CREATE/DELETE events on the children.
+   * @param retryCounter controls the number of retries and sleep between retries.
+   */
+  private void loadMetaLocationsFromZk(RetryCounter retryCounter, ZNodeOpType opType) {
+    List<String> znodes = null;
+    while (retryCounter.shouldRetry()) {
+      try {
+        znodes = watcher.getMetaReplicaNodesAndWatchChildren();
+        break;
+      } catch (KeeperException ke) {
+        LOG.debug("Error populating initial meta locations", ke);
+        if (!retryCounter.shouldRetry()) {
+          // Retries exhausted and watchers not set. This is not a desirable state since the cache
+          // could remain stale forever. Propagate the exception.
+          watcher.abort("Error populating meta locations", ke);
+          return;
+        }
+        try {
+          retryCounter.sleepUntilNextRetry();
+        } catch (InterruptedException ie) {
+          LOG.error("Interrupted while loading meta locations from ZK", ie);
+          Thread.currentThread().interrupt();
+          return;
+        }
+      }
+    }
+    if (znodes == null || znodes.isEmpty()) {
+      // No meta znodes exist at this point but we registered a watcher on the base znode to listen
+      // for updates. They will be handled via nodeChildrenChanged().
+      return;
+    }
+    if (znodes.size() == cachedMetaLocations.size()) {
+      // No new meta znodes got added.
+      return;
+    }
+    for (String znode: znodes) {
+      String path = ZNodePaths.joinZNode(watcher.getZNodePaths().baseZNode, znode);
+      updateMetaLocation(path, opType);
+    }
+  }
+
+  /**
+   * Gets the HRegionLocation for a given meta replica ID. Renews the watch on the znode for
+   * future updates.
+   * @param replicaId ReplicaID of the region.
+   * @return HRegionLocation for the meta replica.
+   * @throws KeeperException if there is any issue fetching/parsing the serialized data.
+   */
+  private HRegionLocation getMetaRegionLocation(int replicaId)
+      throws KeeperException {
+    RegionState metaRegionState;
+    try {
+      byte[] data = ZKUtil.getDataAndWatch(watcher,
+          watcher.getZNodePaths().getZNodeForReplica(replicaId));
+      metaRegionState = ProtobufUtil.parseMetaRegionStateFrom(data, replicaId);
+    } catch (DeserializationException e) {
+      throw ZKUtil.convert(e);
+    }
+    return new HRegionLocation(metaRegionState.getRegion(), metaRegionState.getServerName());
+  }
+
+  private void updateMetaLocation(String path, ZNodeOpType opType) {
+    if (!isValidMetaZNode(path)) {
+      return;
+    }
+    LOG.debug("Updating meta znode for path {}: {}", path, opType.name());
+    int replicaId = watcher.getZNodePaths().getMetaReplicaIdFromPath(path);
+    RetryCounter retryCounter = retryCounterFactory.create();
+    HRegionLocation location = null;
+    while (retryCounter.shouldRetry()) {
+      try {
+        if (opType == ZNodeOpType.DELETED) {
+          if (!ZKUtil.watchAndCheckExists(watcher, path)) {
+            // The path does not exist, we've set the watcher and we can break for now.
+            break;
+          }
+          // If it is a transient error and the node appears right away, we fetch the
+          // latest meta state.
+        }
+        location = getMetaRegionLocation(replicaId);
+        break;
+      } catch (KeeperException e) {
+        LOG.debug("Error getting meta location for path {}", path, e);
+        if (!retryCounter.shouldRetry()) {
+          LOG.warn("Error getting meta location for path {}. Retries exhausted.", path, e);
+          break;
+        }
+        try {
+          retryCounter.sleepUntilNextRetry();
+        } catch (InterruptedException ie) {
+          Thread.currentThread().interrupt();
+          return;
+        }
+      }
+    }
+    if (location == null) {
+      cachedMetaLocations.remove(replicaId);
+      return;
+    }
+    cachedMetaLocations.put(replicaId, location);
+  }
+
+  /**
+   * @return Optional list of HRegionLocations for meta replica(s), null if the cache is empty.
+   *
+   */
+  public Optional<List<HRegionLocation>> getMetaRegionLocations() {
+    ConcurrentNavigableMap<Integer, HRegionLocation> snapshot =
+        cachedMetaLocations.tailMap(cachedMetaLocations.firstKey());
+    if (snapshot.isEmpty()) {
+      // This could be possible if the master has not successfully initialized yet or meta region
+      // is stuck in some weird state.
+      return Optional.empty();
+    }
+    List<HRegionLocation> result = new ArrayList<>();
+    // Explicitly iterate instead of new ArrayList<>(snapshot.values()) because the underlying
+    // ArrayValueCollection does not implement toArray().
+    snapshot.values().forEach(location -> result.add(location));
+    return Optional.of(result);
+  }
+
+  /**
+   * Helper to check if the given 'path' corresponds to a meta znode. This listener is only
+   * interested in changes to meta znodes.
+   */
+  private boolean isValidMetaZNode(String path) {
+    return watcher.getZNodePaths().isAnyMetaReplicaZNode(path);
+  }
+
+  @Override
+  public void nodeCreated(String path) {
+    updateMetaLocation(path, ZNodeOpType.CREATED);
+  }
+
+  @Override
+  public void nodeDeleted(String path) {
+    updateMetaLocation(path, ZNodeOpType.DELETED);
+  }
+
+  @Override
+  public void nodeDataChanged(String path) {
+    updateMetaLocation(path, ZNodeOpType.CHANGED);
+  }
+
+  @Override
+  public void nodeChildrenChanged(String path) {
+    if (!path.equals(watcher.getZNodePaths().baseZNode)) {
+      return;
+    }
+    loadMetaLocationsFromZk(retryCounterFactory.create(), ZNodeOpType.CHANGED);
+  }
+}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaRegionLocationCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaRegionLocationCache.java
new file mode 100644
index 0000000..02236a6
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaRegionLocationCache.java
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.client;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import org.apache.commons.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionLocation;
+import org.apache.hadoop.hbase.MultithreadedTestUtil;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.master.MetaRegionLocationCache;
+import org.apache.hadoop.hbase.master.RegionState;
+import org.apache.hadoop.hbase.testclassification.MasterTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.hbase.util.JVMClusterUtil;
+import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
+import org.apache.hadoop.hbase.zookeeper.ZKUtil;
+import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
+import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+@Category({SmallTests.class, MasterTests.class })
+public class TestMetaRegionLocationCache {
+  @ClassRule
+  public static final HBaseClassTestRule CLASS_RULE =
+      HBaseClassTestRule.forClass(TestMetaRegionLocationCache.class);
+
+  private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+  private static AsyncRegistry REGISTRY;
+
+  @BeforeClass
+  public static void setUp() throws Exception {
+    TEST_UTIL.getConfiguration().setInt(HConstants.META_REPLICAS_NUM, 3);
+    TEST_UTIL.startMiniCluster(3);
+    REGISTRY = AsyncRegistryFactory.getRegistry(TEST_UTIL.getConfiguration());
+    RegionReplicaTestHelper.waitUntilAllMetaReplicasHavingRegionLocation(
+        TEST_UTIL.getConfiguration(), REGISTRY, 3);
+    TEST_UTIL.getAdmin().balancerSwitch(false, true);
+  }
+
+  @AfterClass
+  public static void cleanUp() throws Exception {
+    IOUtils.closeQuietly(REGISTRY);
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
+  private List<HRegionLocation> getCurrentMetaLocations(ZKWatcher zk) throws Exception {
+    List<HRegionLocation> result = new ArrayList<>();
+    for (String znode: zk.getMetaReplicaNodes()) {
+      String path = ZNodePaths.joinZNode(zk.getZNodePaths().baseZNode, znode);
+      int replicaId = zk.getZNodePaths().getMetaReplicaIdFromPath(path);
+      RegionState state = MetaTableLocator.getMetaRegionState(zk, replicaId);
+      result.add(new HRegionLocation(state.getRegion(), state.getServerName()));
+    }
+    return result;
+  }
+
+  // Verifies that the cached meta locations in the given master are in sync with what is in ZK.
+  private void verifyCachedMetaLocations(HMaster master) throws Exception {
+    // Wait until initial meta locations are loaded.
+    int retries = 0;
+    while (!master.getMetaRegionLocationCache().getMetaRegionLocations().isPresent()) {
+      Thread.sleep(1000);
+      if (++retries == 10) {
+        break;
+      }
+    }
+    List<HRegionLocation> metaHRLs =
+        master.getMetaRegionLocationCache().getMetaRegionLocations().get();
+    assertFalse(metaHRLs.isEmpty());
+    ZKWatcher zk = master.getZooKeeper();
+    List<String> metaZnodes = zk.getMetaReplicaNodes();
+    assertEquals(metaZnodes.size(), metaHRLs.size());
+    List<HRegionLocation> actualHRLs = getCurrentMetaLocations(zk);
+    Collections.sort(metaHRLs);
+    Collections.sort(actualHRLs);
+    assertEquals(actualHRLs, metaHRLs);
+  }
+
+  @Test public void testInitialMetaLocations() throws Exception {
+    verifyCachedMetaLocations(TEST_UTIL.getMiniHBaseCluster().getMaster());
+  }
+
+  @Test public void testStandByMetaLocations() throws Exception {
+    HMaster standBy = TEST_UTIL.getMiniHBaseCluster().startMaster().getMaster();
+    verifyCachedMetaLocations(standBy);
+  }
+
+  /*
+   * Shuffles the meta region replicas around the cluster and makes sure the cache is not stale.
+   */
+  @Test public void testMetaLocationsChange() throws Exception {
+    List<HRegionLocation> currentMetaLocs =
+        getCurrentMetaLocations(TEST_UTIL.getMiniHBaseCluster().getMaster().getZooKeeper());
+    // Move these replicas to random servers.
+    for (HRegionLocation location: currentMetaLocs) {
+      RegionReplicaTestHelper.moveRegion(TEST_UTIL, location);
+    }
+    RegionReplicaTestHelper.waitUntilAllMetaReplicasHavingRegionLocation(
+        TEST_UTIL.getConfiguration(), REGISTRY, 3);
+    for (JVMClusterUtil.MasterThread masterThread:
+        TEST_UTIL.getMiniHBaseCluster().getMasterThreads()) {
+      verifyCachedMetaLocations(masterThread.getMaster());
+    }
+  }
+
+  /**
+   * Tests MetaRegionLocationCache's init procedure to make sure that it correctly watches the base
+   * znode for notifications.
+   */
+  @Test public void testMetaRegionLocationCache() throws Exception {
+    final String parentZnodeName = "/randomznodename";
+    Configuration conf = new Configuration(TEST_UTIL.getConfiguration());
+    conf.set(HConstants.ZOOKEEPER_ZNODE_PARENT, parentZnodeName);
+    ServerName sn = ServerName.valueOf("localhost", 1234, 5678);
+    try (ZKWatcher zkWatcher = new ZKWatcher(conf, null, null, true)) {
+      // A thread that repeatedly creates and drops an unrelated child znode. This is to simulate
+      // some ZK activity in the background.
+      MultithreadedTestUtil.TestContext ctx = new MultithreadedTestUtil.TestContext(conf);
+      ctx.addThread(new MultithreadedTestUtil.RepeatingTestThread(ctx) {
+        @Override public void doAnAction() throws Exception {
+          final String testZnode = parentZnodeName + "/child";
+          ZKUtil.createNodeIfNotExistsAndWatch(zkWatcher, testZnode, testZnode.getBytes());
+          ZKUtil.deleteNode(zkWatcher, testZnode);
+        }
+      });
+      ctx.startThreads();
+      try {
+        MetaRegionLocationCache metaCache = new MetaRegionLocationCache(zkWatcher);
+        // meta znodes do not exist at this point, cache should be empty.
+        assertFalse(metaCache.getMetaRegionLocations().isPresent());
+        // Set the meta locations for a random meta replicas, simulating an active hmaster meta
+        // assignment.
+        for (int i = 0; i < 3; i++) {
+          // Updates the meta znodes.
+          MetaTableLocator.setMetaLocation(zkWatcher, sn, i, RegionState.State.OPEN);
+        }
+        // Wait until the meta cache is populated.
+        int iters = 0;
+        while (iters++ < 10) {
+          if (metaCache.getMetaRegionLocations().isPresent()
+            && metaCache.getMetaRegionLocations().get().size() == 3) {
+            break;
+          }
+          Thread.sleep(1000);
+        }
+        List<HRegionLocation> metaLocations = metaCache.getMetaRegionLocations().get();
+        assertEquals(3, metaLocations.size());
+        for (HRegionLocation location : metaLocations) {
+          assertEquals(sn, location.getServerName());
+        }
+      } finally {
+        // clean up.
+        ctx.stop();
+        ZKUtil.deleteChildrenRecursively(zkWatcher, parentZnodeName);
+      }
+    }
+  }
+}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCloseAnOpeningRegion.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCloseAnOpeningRegion.java
index ba4d535..4922224 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCloseAnOpeningRegion.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCloseAnOpeningRegion.java
@@ -35,7 +35,6 @@ import org.apache.hadoop.hbase.regionserver.HRegionServer;
 import org.apache.hadoop.hbase.testclassification.MasterTests;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.zookeeper.KeeperException;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.ClassRule;
@@ -65,7 +64,7 @@ public class TestCloseAnOpeningRegion {
 
   public static final class MockHMaster extends HMaster {
 
-    public MockHMaster(Configuration conf) throws IOException, KeeperException {
+    public MockHMaster(Configuration conf) throws IOException {
       super(conf);
     }
 
@@ -141,4 +140,4 @@ public class TestCloseAnOpeningRegion {
       table.put(new Put(Bytes.toBytes(0)).addColumn(CF, Bytes.toBytes("cq"), Bytes.toBytes(0)));
     }
   }
-}
\ No newline at end of file
+}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestClusterRestartFailover.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestClusterRestartFailover.java
index a6844fc..84722df 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestClusterRestartFailover.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestClusterRestartFailover.java
@@ -42,7 +42,6 @@ import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
 import org.apache.hadoop.hbase.procedure2.Procedure;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.testclassification.MasterTests;
-import org.apache.zookeeper.KeeperException;
 import org.junit.ClassRule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
@@ -147,7 +146,7 @@ public class TestClusterRestartFailover extends AbstractTestRestartCluster {
 
   public static final class HMasterForTest extends HMaster {
 
-    public HMasterForTest(Configuration conf) throws IOException, KeeperException {
+    public HMasterForTest(Configuration conf) throws IOException {
       super(conf);
     }
 
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryConfigManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryConfigManager.java
index 22554d3..d29e061 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryConfigManager.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryConfigManager.java
@@ -28,7 +28,6 @@ import org.apache.hadoop.hbase.StartMiniClusterOption;
 import org.apache.hadoop.hbase.Stoppable;
 import org.apache.hadoop.hbase.testclassification.MasterTests;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
-import org.apache.zookeeper.KeeperException;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
@@ -120,7 +119,7 @@ public class TestRegionsRecoveryConfigManager {
 
   // Make it public so that JVMClusterUtil can access it.
   public static class TestHMaster extends HMaster {
-    public TestHMaster(Configuration conf) throws IOException, KeeperException {
+    public TestHMaster(Configuration conf) throws IOException {
       super(conf);
     }
   }
@@ -144,4 +143,4 @@ public class TestRegionsRecoveryConfigManager {
 
   }
 
-}
\ No newline at end of file
+}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestShutdownBackupMaster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestShutdownBackupMaster.java
index d3a8520..a42a404 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestShutdownBackupMaster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestShutdownBackupMaster.java
@@ -30,7 +30,6 @@ import org.apache.hadoop.hbase.StartMiniClusterOption;
 import org.apache.hadoop.hbase.testclassification.MasterTests;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
-import org.apache.zookeeper.KeeperException;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.ClassRule;
@@ -56,7 +55,7 @@ public class TestShutdownBackupMaster {
 
   public static final class MockHMaster extends HMaster {
 
-    public MockHMaster(Configuration conf) throws IOException, KeeperException {
+    public MockHMaster(Configuration conf) throws IOException {
       super(conf);
     }
 
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestOpenRegionProcedureBackoff.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestOpenRegionProcedureBackoff.java
index ca0384e..4112da7 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestOpenRegionProcedureBackoff.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestOpenRegionProcedureBackoff.java
@@ -35,7 +35,6 @@ import org.apache.hadoop.hbase.master.MasterServices;
 import org.apache.hadoop.hbase.testclassification.MasterTests;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.zookeeper.KeeperException;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.ClassRule;
@@ -71,7 +70,7 @@ public class TestOpenRegionProcedureBackoff {
 
   public static final class HMasterForTest extends HMaster {
 
-    public HMasterForTest(Configuration conf) throws IOException, KeeperException {
+    public HMasterForTest(Configuration conf) throws IOException {
       super(conf);
     }
 
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestOpenRegionProcedureHang.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestOpenRegionProcedureHang.java
index 0463721..a25368f 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestOpenRegionProcedureHang.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestOpenRegionProcedureHang.java
@@ -105,7 +105,7 @@ public class TestOpenRegionProcedureHang {
 
   public static final class HMasterForTest extends HMaster {
 
-    public HMasterForTest(Configuration conf) throws IOException, KeeperException {
+    public HMasterForTest(Configuration conf) throws IOException {
       super(conf);
     }
 
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionAssignedToMultipleRegionServers.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionAssignedToMultipleRegionServers.java
index 0d8202b..44af256 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionAssignedToMultipleRegionServers.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionAssignedToMultipleRegionServers.java
@@ -41,7 +41,6 @@ import org.apache.hadoop.hbase.regionserver.HRegionServer;
 import org.apache.hadoop.hbase.testclassification.MasterTests;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.zookeeper.KeeperException;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.ClassRule;
@@ -110,7 +109,7 @@ public class TestRegionAssignedToMultipleRegionServers {
 
   public static final class HMasterForTest extends HMaster {
 
-    public HMasterForTest(Configuration conf) throws IOException, KeeperException {
+    public HMasterForTest(Configuration conf) throws IOException {
       super(conf);
     }
 
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestReportOnlineRegionsRace.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestReportOnlineRegionsRace.java
index acad88c..4dede89 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestReportOnlineRegionsRace.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestReportOnlineRegionsRace.java
@@ -44,7 +44,6 @@ import org.apache.hadoop.hbase.testclassification.MasterTests;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.IdLock;
-import org.apache.zookeeper.KeeperException;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.ClassRule;
@@ -106,7 +105,7 @@ public class TestReportOnlineRegionsRace {
 
   public static final class HMasterForTest extends HMaster {
 
-    public HMasterForTest(Configuration conf) throws IOException, KeeperException {
+    public HMasterForTest(Configuration conf) throws IOException {
       super(conf);
     }
 
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestReportRegionStateTransitionFromDeadServer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestReportRegionStateTransitionFromDeadServer.java
index 6c9e5eb..1de806f 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestReportRegionStateTransitionFromDeadServer.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestReportRegionStateTransitionFromDeadServer.java
@@ -44,7 +44,6 @@ import org.apache.hadoop.hbase.regionserver.HRegionServer;
 import org.apache.hadoop.hbase.testclassification.MasterTests;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.zookeeper.KeeperException;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.ClassRule;
@@ -117,7 +116,7 @@ public class TestReportRegionStateTransitionFromDeadServer {
 
   public static final class HMasterForTest extends HMaster {
 
-    public HMasterForTest(Configuration conf) throws IOException, KeeperException {
+    public HMasterForTest(Configuration conf) throws IOException {
       super(conf);
     }
 
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestReportRegionStateTransitionRetry.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestReportRegionStateTransitionRetry.java
index 6c191c9..71c4693 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestReportRegionStateTransitionRetry.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestReportRegionStateTransitionRetry.java
@@ -40,7 +40,6 @@ import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
 import org.apache.hadoop.hbase.testclassification.MasterTests;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.zookeeper.KeeperException;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.ClassRule;
@@ -84,7 +83,7 @@ public class TestReportRegionStateTransitionRetry {
 
   public static final class HMasterForTest extends HMaster {
 
-    public HMasterForTest(Configuration conf) throws IOException, KeeperException {
+    public HMasterForTest(Configuration conf) throws IOException {
       super(conf);
     }
 
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSCPGetRegionsRace.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSCPGetRegionsRace.java
index cbbdbdc..d676af9 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSCPGetRegionsRace.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSCPGetRegionsRace.java
@@ -43,7 +43,6 @@ import org.apache.hadoop.hbase.testclassification.MasterTests;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.IdLock;
-import org.apache.zookeeper.KeeperException;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.ClassRule;
@@ -130,7 +129,7 @@ public class TestSCPGetRegionsRace {
 
   public static final class HMasterForTest extends HMaster {
 
-    public HMasterForTest(Configuration conf) throws IOException, KeeperException {
+    public HMasterForTest(Configuration conf) throws IOException {
       super(conf);
     }
 
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestWakeUpUnexpectedProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestWakeUpUnexpectedProcedure.java
index 47c70a1..62e3161 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestWakeUpUnexpectedProcedure.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestWakeUpUnexpectedProcedure.java
@@ -44,7 +44,6 @@ import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.testclassification.MasterTests;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
-import org.apache.zookeeper.KeeperException;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.ClassRule;
@@ -198,7 +197,7 @@ public class TestWakeUpUnexpectedProcedure {
 
   public static final class HMasterForTest extends HMaster {
 
-    public HMasterForTest(Configuration conf) throws IOException, KeeperException {
+    public HMasterForTest(Configuration conf) throws IOException {
       super(conf);
     }
 
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/replication/TestRegisterPeerWorkerWhenRestarting.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/replication/TestRegisterPeerWorkerWhenRestarting.java
index f46bb41..4dff86d 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/replication/TestRegisterPeerWorkerWhenRestarting.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/replication/TestRegisterPeerWorkerWhenRestarting.java
@@ -33,7 +33,6 @@ import org.apache.hadoop.hbase.replication.SyncReplicationTestBase;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.testclassification.MasterTests;
 import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
-import org.apache.zookeeper.KeeperException;
 import org.junit.BeforeClass;
 import org.junit.ClassRule;
 import org.junit.Test;
@@ -53,7 +52,7 @@ public class TestRegisterPeerWorkerWhenRestarting extends SyncReplicationTestBas
 
   public static final class HMasterForTest extends HMaster {
 
-    public HMasterForTest(Configuration conf) throws IOException, KeeperException {
+    public HMasterForTest(Configuration conf) throws IOException {
       super(conf);
     }
 
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/protobuf/TestProtobufUtil.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/protobuf/TestProtobufUtil.java
index ff29df8..69e656f 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/protobuf/TestProtobufUtil.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/protobuf/TestProtobufUtil.java
@@ -19,7 +19,6 @@ package org.apache.hadoop.hbase.protobuf;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
-
 import com.google.protobuf.ByteString;
 import java.io.IOException;
 import java.nio.ByteBuffer;
@@ -29,13 +28,17 @@ import org.apache.hadoop.hbase.CellBuilderType;
 import org.apache.hadoop.hbase.CellComparatorImpl;
 import org.apache.hadoop.hbase.ExtendedCellBuilderFactory;
 import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.client.Append;
 import org.apache.hadoop.hbase.client.Delete;
 import org.apache.hadoop.hbase.client.Get;
 import org.apache.hadoop.hbase.client.Increment;
 import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.RegionInfoBuilder;
 import org.apache.hadoop.hbase.io.TimeRange;
+import org.apache.hadoop.hbase.master.RegionState;
 import org.apache.hadoop.hbase.protobuf.generated.CellProtos;
 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.Column;
@@ -51,11 +54,12 @@ import org.apache.hadoop.hbase.util.Bytes;
 import org.junit.ClassRule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.ZooKeeperProtos.MetaRegionServer;
 
 /**
  * Class to test ProtobufUtil.
  */
-@Category({MiscTests.class, SmallTests.class})
+@Category({ MiscTests.class, SmallTests.class})
 public class TestProtobufUtil {
 
   @ClassRule
@@ -348,4 +352,32 @@ public class TestProtobufUtil {
         ProtobufUtil.toCell(ExtendedCellBuilderFactory.create(CellBuilderType.SHALLOW_COPY), cell);
     assertTrue(CellComparatorImpl.COMPARATOR.compare(offheapKV, newOffheapKV) == 0);
   }
+
+  @Test
+  public void testMetaRegionState() throws Exception {
+    ServerName serverName = ServerName.valueOf("localhost", 1234, 5678);
+    // New region state style.
+    for (RegionState.State state: RegionState.State.values()) {
+      RegionState regionState =
+          new RegionState(RegionInfoBuilder.FIRST_META_REGIONINFO, state, serverName);
+      MetaRegionServer metars = MetaRegionServer.newBuilder()
+          .setServer(org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil.toServerName(serverName))
+          .setRpcVersion(HConstants.RPC_CURRENT_VERSION)
+          .setState(state.convert()).build();
+      // Serialize
+      byte[] data = ProtobufUtil.prependPBMagic(metars.toByteArray());
+      ProtobufUtil.prependPBMagic(data);
+      // Deserialize
+      RegionState regionStateNew =
+          org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil.parseMetaRegionStateFrom(data, 1);
+      assertEquals(regionState.getServerName(), regionStateNew.getServerName());
+      assertEquals(regionState.getState(), regionStateNew.getState());
+    }
+    // old style.
+    RegionState rs =
+        org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil.parseMetaRegionStateFrom(
+            serverName.getVersionedBytes(), 1);
+    assertEquals(serverName, rs.getServerName());
+    assertEquals(rs.getState(), RegionState.State.OPEN);
+  }
 }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerReportForDuty.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerReportForDuty.java
index aaf2d2e..f61a77e 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerReportForDuty.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerReportForDuty.java
@@ -122,7 +122,7 @@ public class TestRegionServerReportForDuty {
    * This test HMaster class will always throw ServerNotRunningYetException if checked.
    */
   public static class NeverInitializedMaster extends HMaster {
-    public NeverInitializedMaster(Configuration conf) throws IOException, KeeperException {
+    public NeverInitializedMaster(Configuration conf) throws IOException {
       super(conf);
     }
 
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationProcedureRetry.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationProcedureRetry.java
index a2ae0b4..e9fcc66 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationProcedureRetry.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationProcedureRetry.java
@@ -36,7 +36,6 @@ import org.apache.hadoop.hbase.master.HMaster;
 import org.apache.hadoop.hbase.master.replication.ReplicationPeerManager;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.testclassification.ReplicationTests;
-import org.apache.zookeeper.KeeperException;
 import org.junit.After;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
@@ -134,7 +133,7 @@ public class TestReplicationProcedureRetry {
 
     private ReplicationPeerManager manager;
 
-    public MockHMaster(Configuration conf) throws IOException, KeeperException {
+    public MockHMaster(Configuration conf) throws IOException {
       super(conf);
     }
 
diff --git a/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/MetaTableLocator.java b/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/MetaTableLocator.java
index 10fd165..be2e5c4 100644
--- a/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/MetaTableLocator.java
+++ b/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/MetaTableLocator.java
@@ -34,12 +34,7 @@ import org.apache.yetus.audience.InterfaceAudience;
 import org.apache.zookeeper.KeeperException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-
-import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException;
-
 import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.ZooKeeperProtos;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.ZooKeeperProtos.MetaRegionServer;
 
 /**
@@ -274,42 +269,17 @@ public final class MetaTableLocator {
    * @throws KeeperException if a ZooKeeper operation fails
    */
   public static RegionState getMetaRegionState(ZKWatcher zkw, int replicaId)
-          throws KeeperException {
-    RegionState.State state = RegionState.State.OPEN;
-    ServerName serverName = null;
+      throws KeeperException {
+    RegionState regionState = null;
     try {
       byte[] data = ZKUtil.getData(zkw, zkw.getZNodePaths().getZNodeForReplica(replicaId));
-      if (data != null && data.length > 0 && ProtobufUtil.isPBMagicPrefix(data)) {
-        try {
-          int prefixLen = ProtobufUtil.lengthOfPBMagic();
-          ZooKeeperProtos.MetaRegionServer rl =
-            ZooKeeperProtos.MetaRegionServer.parser().parseFrom(data, prefixLen,
-                    data.length - prefixLen);
-          if (rl.hasState()) {
-            state = RegionState.State.convert(rl.getState());
-          }
-          HBaseProtos.ServerName sn = rl.getServer();
-          serverName = ServerName.valueOf(
-            sn.getHostName(), sn.getPort(), sn.getStartCode());
-        } catch (InvalidProtocolBufferException e) {
-          throw new DeserializationException("Unable to parse meta region location");
-        }
-      } else {
-        // old style of meta region location?
-        serverName = ProtobufUtil.parseServerNameFrom(data);
-      }
+      regionState = ProtobufUtil.parseMetaRegionStateFrom(data, replicaId);
     } catch (DeserializationException e) {
       throw ZKUtil.convert(e);
     } catch (InterruptedException e) {
       Thread.currentThread().interrupt();
     }
-    if (serverName == null) {
-      state = RegionState.State.OFFLINE;
-    }
-    return new RegionState(
-        RegionReplicaUtil.getRegionInfoForReplica(
-            RegionInfoBuilder.FIRST_META_REGIONINFO, replicaId),
-        state, serverName);
+    return regionState;
   }
 
   /**
diff --git a/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKWatcher.java b/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKWatcher.java
index e0b34e4..a39c413 100644
--- a/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKWatcher.java
+++ b/hbase-zookeeper/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKWatcher.java
@@ -23,10 +23,8 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.concurrent.CopyOnWriteArrayList;
-import java.util.concurrent.CountDownLatch;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.Abortable;
 import org.apache.hadoop.hbase.AuthUtil;
@@ -81,10 +79,6 @@ public class ZKWatcher implements Watcher, Abortable, Closeable {
   // listeners to be notified
   private final List<ZKListener> listeners = new CopyOnWriteArrayList<>();
 
-  // Used by ZKUtil:waitForZKConnectionIfAuthenticating to wait for SASL
-  // negotiation to complete
-  private CountDownLatch saslLatch = new CountDownLatch(1);
-
   private final Configuration conf;
 
   /* A pattern that matches a Kerberos name, borrowed from Hadoop's KerberosName */
@@ -383,13 +377,32 @@ public class ZKWatcher implements Watcher, Abortable, Closeable {
    */
   public List<String> getMetaReplicaNodes() throws KeeperException {
     List<String> childrenOfBaseNode = ZKUtil.listChildrenNoWatch(this, znodePaths.baseZNode);
+    return filterMetaReplicaNodes(childrenOfBaseNode);
+  }
+
+  /**
+   * Same as {@link #getMetaReplicaNodes()} except that this also registers a watcher on base znode
+   * for subsequent CREATE/DELETE operations on child nodes.
+   */
+  public List<String> getMetaReplicaNodesAndWatchChildren() throws KeeperException {
+    List<String> childrenOfBaseNode =
+        ZKUtil.listChildrenAndWatchForNewChildren(this, znodePaths.baseZNode);
+    return filterMetaReplicaNodes(childrenOfBaseNode);
+  }
+
+  /**
+   * @param nodes Input list of znodes
+   * @return Filtered list of znodes from nodes that belong to meta replica(s).
+   */
+  private List<String> filterMetaReplicaNodes(List<String> nodes) {
+    if (nodes == null || nodes.isEmpty()) {
+      return new ArrayList<>();
+    }
     List<String> metaReplicaNodes = new ArrayList<>(2);
-    if (childrenOfBaseNode != null) {
-      String pattern = conf.get("zookeeper.znode.metaserver","meta-region-server");
-      for (String child : childrenOfBaseNode) {
-        if (child.startsWith(pattern)) {
-          metaReplicaNodes.add(child);
-        }
+    String pattern = conf.get(ZNodePaths.META_ZNODE_PREFIX_CONF_KEY, ZNodePaths.META_ZNODE_PREFIX);
+    for (String child : nodes) {
+      if (child.startsWith(pattern)) {
+        metaReplicaNodes.add(child);
       }
     }
     return metaReplicaNodes;


[hbase] 03/03: HBASE-23304: RPCs needed for client meta information lookup (#904)

Posted by st...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

stack pushed a commit to branch HBASE-18095/client-locate-meta-no-zookeeper
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 1c41b3652b1054f022f87ebcb54f13dd69b67982
Author: Bharath Vissapragada <bh...@apache.org>
AuthorDate: Thu Dec 19 11:29:25 2019 -0800

    HBASE-23304: RPCs needed for client meta information lookup (#904)
    
    * HBASE-23304: RPCs needed for client meta information lookup
    
    This patch implements the RPCs needed for the meta information
    lookup during connection init. New tests added to cover the RPC
    code paths. HBASE-23305 builds on this to implement the client
    side logic.
    
    Fixed a bunch of checkstyle nits around the places the patch
    touches.
    
    Signed-off-by: Andrew Purtell <ap...@apache.org>
---
 .../hadoop/hbase/shaded/protobuf/ProtobufUtil.java |   4 +-
 .../src/main/protobuf/Master.proto                 |  44 ++++++
 .../hadoop/hbase/master/MasterRpcServices.java     |  85 ++++++++---
 .../hbase/master/TestClientMetaServiceRPCs.java    | 164 +++++++++++++++++++++
 4 files changed, 275 insertions(+), 22 deletions(-)

diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java
index 2adcea9..23f5c08 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java
@@ -376,7 +376,9 @@ public final class ProtobufUtil {
    * @see #toServerName(org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.ServerName)
    */
   public static HBaseProtos.ServerName toServerName(final ServerName serverName) {
-    if (serverName == null) return null;
+    if (serverName == null) {
+      return null;
+    }
     HBaseProtos.ServerName.Builder builder =
       HBaseProtos.ServerName.newBuilder();
     builder.setHostName(serverName.getHostname());
diff --git a/hbase-protocol-shaded/src/main/protobuf/Master.proto b/hbase-protocol-shaded/src/main/protobuf/Master.proto
index 69377a6..e88ddc4 100644
--- a/hbase-protocol-shaded/src/main/protobuf/Master.proto
+++ b/hbase-protocol-shaded/src/main/protobuf/Master.proto
@@ -1200,3 +1200,47 @@ service HbckService {
   rpc FixMeta(FixMetaRequest)
     returns(FixMetaResponse);
 }
+
+/** Request and response to get the clusterID for this cluster */
+message GetClusterIdRequest {
+}
+message GetClusterIdResponse {
+  /** Not set if cluster ID could not be determined. */
+  optional string cluster_id = 1;
+}
+
+/** Request and response to get the currently active master name for this cluster */
+message GetActiveMasterRequest {
+}
+message GetActiveMasterResponse {
+  /** Not set if an active master could not be determined. */
+  optional ServerName server_name = 1;
+}
+
+/** Request and response to get the current list of meta region locations */
+message GetMetaRegionLocationsRequest {
+}
+message GetMetaRegionLocationsResponse {
+  /** Not set if meta region locations could not be determined. */
+  repeated RegionLocation meta_locations = 1;
+}
+
+/**
+ * Implements all the RPCs needed by clients to look up cluster meta information needed for connection establishment.
+ */
+service ClientMetaService {
+  /**
+   * Get Cluster ID for this cluster.
+   */
+  rpc GetClusterId(GetClusterIdRequest) returns(GetClusterIdResponse);
+
+  /**
+   * Get active master server name for this cluster.
+   */
+  rpc GetActiveMaster(GetActiveMasterRequest) returns(GetActiveMasterResponse);
+
+  /**
+   * Get current meta replicas' region locations.
+   */
+  rpc GetMetaRegionLocations(GetMetaRegionLocationsRequest) returns(GetMetaRegionLocationsResponse);
+}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
index 612c731..620c3a0 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
@@ -18,7 +18,6 @@
 package org.apache.hadoop.hbase.master;
 
 import static org.apache.hadoop.hbase.master.MasterWalManager.META_FILTER;
-
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.net.BindException;
@@ -30,6 +29,7 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
+import java.util.Optional;
 import java.util.Set;
 import java.util.stream.Collectors;
 import org.apache.hadoop.conf.Configuration;
@@ -37,6 +37,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.ClusterMetricsBuilder;
 import org.apache.hadoop.hbase.DoNotRetryIOException;
 import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionLocation;
 import org.apache.hadoop.hbase.MetaTableAccessor;
 import org.apache.hadoop.hbase.NamespaceDescriptor;
 import org.apache.hadoop.hbase.Server;
@@ -116,11 +117,9 @@ import org.apache.yetus.audience.InterfaceAudience;
 import org.apache.zookeeper.KeeperException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-
 import org.apache.hbase.thirdparty.com.google.protobuf.RpcController;
 import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
 import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations;
-
 import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
 import org.apache.hadoop.hbase.shaded.protobuf.ResponseConverter;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.AccessControlProtos;
@@ -161,6 +160,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.BalanceReq
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.BalanceResponse;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ClearDeadServersRequest;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ClearDeadServersResponse;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ClientMetaService;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.CreateNamespaceRequest;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.CreateNamespaceResponse;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.CreateTableRequest;
@@ -185,12 +185,18 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ExecProced
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ExecProcedureResponse;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.FixMetaRequest;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.FixMetaResponse;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetActiveMasterRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetActiveMasterResponse;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetClusterIdRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetClusterIdResponse;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetClusterStatusRequest;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetClusterStatusResponse;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetCompletedSnapshotsRequest;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetCompletedSnapshotsResponse;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetLocksRequest;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetLocksResponse;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetMetaRegionLocationsRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetMetaRegionLocationsResponse;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetNamespaceDescriptorRequest;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetNamespaceDescriptorResponse;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetProcedureResultRequest;
@@ -351,9 +357,10 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.Snapshot
  */
 @InterfaceAudience.Private
 @SuppressWarnings("deprecation")
-public class MasterRpcServices extends RSRpcServices
-      implements MasterService.BlockingInterface, RegionServerStatusService.BlockingInterface,
-        LockService.BlockingInterface, HbckService.BlockingInterface {
+public class MasterRpcServices extends RSRpcServices implements
+    MasterService.BlockingInterface, RegionServerStatusService.BlockingInterface,
+    LockService.BlockingInterface, HbckService.BlockingInterface,
+    ClientMetaService.BlockingInterface {
   private static final Logger LOG = LoggerFactory.getLogger(MasterRpcServices.class.getName());
   private static final Logger AUDITLOG =
       LoggerFactory.getLogger("SecurityLogger."+MasterRpcServices.class.getName());
@@ -362,7 +369,7 @@ public class MasterRpcServices extends RSRpcServices
 
   /**
    * @return Subset of configuration to pass initializing regionservers: e.g.
-   * the filesystem to use and root directory to use.
+   *     the filesystem to use and root directory to use.
    */
   private RegionServerStartupResponse.Builder createConfigurationSubset() {
     RegionServerStartupResponse.Builder resp = addConfig(
@@ -488,15 +495,17 @@ public class MasterRpcServices extends RSRpcServices
   protected List<BlockingServiceAndInterface> getServices() {
     List<BlockingServiceAndInterface> bssi = new ArrayList<>(5);
     bssi.add(new BlockingServiceAndInterface(
-      MasterService.newReflectiveBlockingService(this),
-      MasterService.BlockingInterface.class));
+        MasterService.newReflectiveBlockingService(this),
+        MasterService.BlockingInterface.class));
     bssi.add(new BlockingServiceAndInterface(
-      RegionServerStatusService.newReflectiveBlockingService(this),
-      RegionServerStatusService.BlockingInterface.class));
+        RegionServerStatusService.newReflectiveBlockingService(this),
+        RegionServerStatusService.BlockingInterface.class));
     bssi.add(new BlockingServiceAndInterface(LockService.newReflectiveBlockingService(this),
         LockService.BlockingInterface.class));
     bssi.add(new BlockingServiceAndInterface(HbckService.newReflectiveBlockingService(this),
         HbckService.BlockingInterface.class));
+    bssi.add(new BlockingServiceAndInterface(ClientMetaService.newReflectiveBlockingService(this),
+        ClientMetaService.BlockingInterface.class));
     bssi.addAll(super.getServices());
     return bssi;
   }
@@ -623,7 +632,9 @@ public class MasterRpcServices extends RSRpcServices
 
       final byte[] regionName = req.getRegion().getValue().toByteArray();
       final RegionInfo regionInfo = master.getAssignmentManager().getRegionInfo(regionName);
-      if (regionInfo == null) throw new UnknownRegionException(Bytes.toStringBinary(regionName));
+      if (regionInfo == null) {
+        throw new UnknownRegionException(Bytes.toStringBinary(regionName));
+      }
 
       final AssignRegionResponse arr = AssignRegionResponse.newBuilder().build();
       if (master.cpHost != null) {
@@ -668,7 +679,7 @@ public class MasterRpcServices extends RSRpcServices
 
   @Override
   public CreateTableResponse createTable(RpcController controller, CreateTableRequest req)
-  throws ServiceException {
+      throws ServiceException {
     TableDescriptor tableDescriptor = ProtobufUtil.toTableDescriptor(req.getTableSchema());
     byte [][] splitKeys = ProtobufUtil.getSplitKeysArray(req);
     try {
@@ -1062,7 +1073,7 @@ public class MasterRpcServices extends RSRpcServices
    * Get list of TableDescriptors for requested tables.
    * @param c Unused (set to null).
    * @param req GetTableDescriptorsRequest that contains:
-   * - tableNames: requested tables, or if empty, all are requested
+   *     - tableNames: requested tables, or if empty, all are requested.
    * @return GetTableDescriptorsResponse
    * @throws ServiceException
    */
@@ -1206,9 +1217,9 @@ public class MasterRpcServices extends RSRpcServices
   /**
    * Checks if the specified snapshot is done.
    * @return true if the snapshot is in file system ready to use,
-   *   false if the snapshot is in the process of completing
+   *     false if the snapshot is in the process of completing
    * @throws ServiceException wrapping UnknownSnapshotException if invalid snapshot, or
-   *  a wrapped HBaseSnapshotException with progress failure reason.
+   *     a wrapped HBaseSnapshotException with progress failure reason.
    */
   @Override
   public IsSnapshotDoneResponse isSnapshotDone(RpcController controller,
@@ -1450,7 +1461,9 @@ public class MasterRpcServices extends RSRpcServices
 
       final byte[] regionName = request.getRegion().getValue().toByteArray();
       final RegionInfo hri = master.getAssignmentManager().getRegionInfo(regionName);
-      if (hri == null) throw new UnknownRegionException(Bytes.toStringBinary(regionName));
+      if (hri == null) {
+        throw new UnknownRegionException(Bytes.toStringBinary(regionName));
+      }
 
       if (master.cpHost != null) {
         master.cpHost.preRegionOffline(hri);
@@ -2311,8 +2324,8 @@ public class MasterRpcServices extends RSRpcServices
             report.getRegionSize(), now);
         }
       } else {
-        LOG.debug(
-          "Received region space usage report but HMaster is not ready to process it, skipping");
+        LOG.debug("Received region space usage report but HMaster is not ready to process it, "
+            + "skipping");
       }
       return RegionSpaceUseReportResponse.newBuilder().build();
     } catch (Exception e) {
@@ -2348,8 +2361,8 @@ public class MasterRpcServices extends RSRpcServices
         }
         return builder.build();
       } else {
-        LOG.debug(
-          "Received space quota region size report but HMaster is not ready to process it, skipping");
+        LOG.debug("Received space quota region size report but HMaster is not ready to process it,"
+            + "skipping");
       }
       return builder.build();
     } catch (Exception e) {
@@ -2893,4 +2906,34 @@ public class MasterRpcServices extends RSRpcServices
     return true;
   }
 
+  @Override
+  public GetClusterIdResponse getClusterId(RpcController rpcController, GetClusterIdRequest request)
+      throws ServiceException {
+    GetClusterIdResponse.Builder resp = GetClusterIdResponse.newBuilder();
+    String clusterId = master.getClusterId();
+    if (clusterId != null) {
+      resp.setClusterId(clusterId);
+    }
+    return resp.build();
+  }
+
+  @Override
+  public GetActiveMasterResponse getActiveMaster(RpcController rpcController,
+      GetActiveMasterRequest request) throws ServiceException {
+    GetActiveMasterResponse.Builder resp = GetActiveMasterResponse.newBuilder();
+    Optional<ServerName> serverName = master.getActiveMaster();
+    serverName.ifPresent(name -> resp.setServerName(ProtobufUtil.toServerName(name)));
+    return resp.build();
+  }
+
+  @Override
+  public GetMetaRegionLocationsResponse getMetaRegionLocations(RpcController rpcController,
+      GetMetaRegionLocationsRequest request) throws ServiceException {
+    GetMetaRegionLocationsResponse.Builder response = GetMetaRegionLocationsResponse.newBuilder();
+    Optional<List<HRegionLocation>> metaLocations =
+        master.getMetaRegionLocationCache().getMetaRegionLocations();
+    metaLocations.ifPresent(hRegionLocations -> hRegionLocations.forEach(
+      location -> response.addMetaLocations(ProtobufUtil.toRegionLocation(location))));
+    return response.build();
+  }
 }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestClientMetaServiceRPCs.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestClientMetaServiceRPCs.java
new file mode 100644
index 0000000..428aee2
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestClientMetaServiceRPCs.java
@@ -0,0 +1,164 @@
+/*
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master;
+
+import static org.apache.hadoop.hbase.HConstants.DEFAULT_HBASE_RPC_TIMEOUT;
+import static org.apache.hadoop.hbase.HConstants.HBASE_RPC_TIMEOUT_KEY;
+import static org.junit.Assert.assertEquals;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HRegionLocation;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.StartMiniClusterOption;
+import org.apache.hadoop.hbase.ipc.HBaseRpcController;
+import org.apache.hadoop.hbase.ipc.RpcClient;
+import org.apache.hadoop.hbase.ipc.RpcClientFactory;
+import org.apache.hadoop.hbase.ipc.RpcControllerFactory;
+import org.apache.hadoop.hbase.security.User;
+import org.apache.hadoop.hbase.testclassification.MasterTests;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.util.JVMClusterUtil;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ClientMetaService;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetActiveMasterRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetActiveMasterResponse;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetClusterIdRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetClusterIdResponse;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetMetaRegionLocationsRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.GetMetaRegionLocationsResponse;
+
+@Category({MediumTests.class, MasterTests.class})
+public class TestClientMetaServiceRPCs {
+
+  @ClassRule
+  public static final HBaseClassTestRule CLASS_RULE =
+      HBaseClassTestRule.forClass(TestClientMetaServiceRPCs.class);
+
+  // Total number of masters (active + stand by) for the purpose of this test.
+  private static final int MASTER_COUNT = 3;
+  private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+  private static Configuration conf;
+  private static int rpcTimeout;
+  private static RpcClient rpcClient;
+
+  @BeforeClass
+  public static void setUp() throws Exception {
+    // Start the mini cluster with stand-by masters.
+    StartMiniClusterOption.Builder builder = StartMiniClusterOption.builder();
+    builder.numMasters(MASTER_COUNT).numRegionServers(3);
+    TEST_UTIL.startMiniCluster(builder.build());
+    conf = TEST_UTIL.getConfiguration();
+    rpcTimeout = (int) Math.min(Integer.MAX_VALUE, TimeUnit.MILLISECONDS.toNanos(
+        conf.getLong(HBASE_RPC_TIMEOUT_KEY, DEFAULT_HBASE_RPC_TIMEOUT)));
+    rpcClient = RpcClientFactory.createClient(conf,
+        TEST_UTIL.getMiniHBaseCluster().getMaster().getClusterId());
+  }
+
+  @AfterClass
+  public static void tearDown() throws Exception {
+    if (rpcClient != null) {
+      rpcClient.close();
+    }
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
+  private static ClientMetaService.BlockingInterface getMasterStub(ServerName server)
+      throws IOException {
+    return ClientMetaService.newBlockingStub(
+        rpcClient.createBlockingRpcChannel(server, User.getCurrent(), rpcTimeout));
+  }
+
+  private static HBaseRpcController getRpcController() {
+    return RpcControllerFactory.instantiate(conf).newController();
+  }
+
+  /**
+   * Verifies the cluster ID from all running masters.
+   */
+  @Test public void TestClusterID() throws Exception {
+    HBaseRpcController rpcController = getRpcController();
+    String clusterID = TEST_UTIL.getMiniHBaseCluster().getMaster().getClusterId();
+    int rpcCount = 0;
+    for (JVMClusterUtil.MasterThread masterThread:
+        TEST_UTIL.getMiniHBaseCluster().getMasterThreads()) {
+      ClientMetaService.BlockingInterface stub =
+          getMasterStub(masterThread.getMaster().getServerName());
+      GetClusterIdResponse resp =
+          stub.getClusterId(rpcController, GetClusterIdRequest.getDefaultInstance());
+      assertEquals(clusterID, resp.getClusterId());
+      rpcCount++;
+    }
+    assertEquals(MASTER_COUNT, rpcCount);
+  }
+
+  /**
+   * Verifies the active master ServerName as seen by all masters.
+   */
+  @Test public void TestActiveMaster() throws Exception {
+    HBaseRpcController rpcController = getRpcController();
+    ServerName activeMaster = TEST_UTIL.getMiniHBaseCluster().getMaster().getServerName();
+    int rpcCount = 0;
+    for (JVMClusterUtil.MasterThread masterThread:
+        TEST_UTIL.getMiniHBaseCluster().getMasterThreads()) {
+      ClientMetaService.BlockingInterface stub =
+          getMasterStub(masterThread.getMaster().getServerName());
+      GetActiveMasterResponse resp =
+          stub.getActiveMaster(rpcController, GetActiveMasterRequest.getDefaultInstance());
+      assertEquals(activeMaster, ProtobufUtil.toServerName(resp.getServerName()));
+      rpcCount++;
+    }
+    assertEquals(MASTER_COUNT, rpcCount);
+  }
+
+  /**
+   * Verifies that the meta region locations RPC returns consistent results across all masters.
+   */
+  @Test public void TestMetaLocations() throws Exception {
+    HBaseRpcController rpcController = getRpcController();
+    List<HRegionLocation> metaLocations = TEST_UTIL.getMiniHBaseCluster().getMaster()
+        .getMetaRegionLocationCache().getMetaRegionLocations().get();
+    Collections.sort(metaLocations);
+    int rpcCount = 0;
+    for (JVMClusterUtil.MasterThread masterThread:
+      TEST_UTIL.getMiniHBaseCluster().getMasterThreads()) {
+      ClientMetaService.BlockingInterface stub =
+          getMasterStub(masterThread.getMaster().getServerName());
+      GetMetaRegionLocationsResponse resp = stub.getMetaRegionLocations(
+          rpcController, GetMetaRegionLocationsRequest.getDefaultInstance());
+      List<HRegionLocation> result = new ArrayList<>();
+      resp.getMetaLocationsList().forEach(
+        location -> result.add(ProtobufUtil.toRegionLocation(location)));
+      Collections.sort(result);
+      assertEquals(metaLocations, result);
+      rpcCount++;
+    }
+    assertEquals(MASTER_COUNT, rpcCount);
+  }
+}


[hbase] 01/03: HBASE-23275: Track active master's address in ActiveMasterManager (#812)

Posted by st...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

stack pushed a commit to branch HBASE-18095/client-locate-meta-no-zookeeper
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit f878fcadddccb2b6a028e664dd6068753b285b20
Author: Bharath Vissapragada <bh...@apache.org>
AuthorDate: Wed Nov 20 11:41:36 2019 -0800

    HBASE-23275: Track active master's address in ActiveMasterManager (#812)
    
    * HBASE-23275: Track active master's address in ActiveMasterManager
    
    Currently we just track whether an active master exists.
    It helps to also track the address of the active master in
    all the masters to help serve the client RPC requests to
    know which master is active.
    
    Signed-off-by: Nick Dimiduk <nd...@apache.org>
    Signed-off-by: Andrew Purtell <ap...@apache.org>
---
 .../hadoop/hbase/master/ActiveMasterManager.java   | 63 +++++++++++++++++-----
 .../org/apache/hadoop/hbase/master/HMaster.java    |  4 ++
 .../hbase/master/TestActiveMasterManager.java      | 10 ++++
 3 files changed, 64 insertions(+), 13 deletions(-)

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ActiveMasterManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ActiveMasterManager.java
index 50798ed..99cab62 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ActiveMasterManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ActiveMasterManager.java
@@ -1,4 +1,4 @@
-/**
+/*
  *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
@@ -17,25 +17,24 @@
  * limitations under the License.
  */
 package org.apache.hadoop.hbase.master;
-
 import java.io.IOException;
+import java.util.Optional;
 import java.util.concurrent.atomic.AtomicBoolean;
-
-import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
-import org.apache.hadoop.hbase.zookeeper.ZKUtil;
-import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
-import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
-import org.apache.yetus.audience.InterfaceAudience;
 import org.apache.hadoop.hbase.Server;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.ZNodeClearer;
 import org.apache.hadoop.hbase.exceptions.DeserializationException;
 import org.apache.hadoop.hbase.monitoring.MonitoredTask;
-import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
 import org.apache.hadoop.hbase.zookeeper.ZKListener;
+import org.apache.hadoop.hbase.zookeeper.ZKUtil;
+import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
+import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
+import org.apache.yetus.audience.InterfaceAudience;
 import org.apache.zookeeper.KeeperException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
 
 /**
  * Handles everything on master-side related to master election.
@@ -57,12 +56,18 @@ public class ActiveMasterManager extends ZKListener {
   final AtomicBoolean clusterHasActiveMaster = new AtomicBoolean(false);
   final AtomicBoolean clusterShutDown = new AtomicBoolean(false);
 
+  // This server's information.
   private final ServerName sn;
   private int infoPort;
   private final Server master;
 
+  // Active master's server name. Invalidated anytime active master changes (based on ZK
+  // notifications) and lazily fetched on-demand.
+  // ServerName is immutable, so we don't need heavy synchronization around it.
+  private volatile ServerName activeMasterServerName;
+
   /**
-   * @param watcher
+   * @param watcher ZK watcher
    * @param sn ServerName
    * @param master In an instance of a Master.
    */
@@ -107,6 +112,30 @@ public class ActiveMasterManager extends ZKListener {
   }
 
   /**
+   * Fetches the active master's ServerName from zookeeper.
+   */
+  private void fetchAndSetActiveMasterServerName() {
+    LOG.debug("Attempting to fetch active master sn from zk");
+    try {
+      activeMasterServerName = MasterAddressTracker.getMasterAddress(watcher);
+    } catch (IOException | KeeperException e) {
+      // Log and ignore for now and re-fetch later if needed.
+      LOG.error("Error fetching active master information", e);
+    }
+  }
+
+  public Optional<ServerName> getActiveMasterServerName() {
+    if (!clusterHasActiveMaster.get()) {
+      return Optional.empty();
+    }
+    if (activeMasterServerName == null) {
+      fetchAndSetActiveMasterServerName();
+    }
+    // It could still be null, but return whatever we have.
+    return Optional.ofNullable(activeMasterServerName);
+  }
+
+  /**
    * Handle a change in the master node.  Doesn't matter whether this was called
    * from a nodeCreated or nodeDeleted event because there are no guarantees
    * that the current state of the master node matches the event at the time of
@@ -134,6 +163,9 @@ public class ActiveMasterManager extends ZKListener {
           // Notify any thread waiting to become the active master
           clusterHasActiveMaster.notifyAll();
         }
+        // Reset the active master sn. Will be re-fetched later if needed.
+        // We don't want to make a synchronous RPC under a monitor.
+        activeMasterServerName = null;
       }
     } catch (KeeperException ke) {
       master.abort("Received an unexpected KeeperException, aborting", ke);
@@ -151,8 +183,8 @@ public class ActiveMasterManager extends ZKListener {
    * @param checkInterval the interval to check if the master is stopped
    * @param startupStatus the monitor status to track the progress
    * @return True if no issue becoming active master else false if another
-   * master was running or if some other problem (zookeeper, stop flag has been
-   * set on this Master)
+   *   master was running or if some other problem (zookeeper, stop flag has been
+   *   set on this Master)
    */
   boolean blockUntilBecomingActiveMaster(
       int checkInterval, MonitoredTask startupStatus) {
@@ -178,10 +210,14 @@ public class ActiveMasterManager extends ZKListener {
           // We are the master, return
           startupStatus.setStatus("Successfully registered as active master.");
           this.clusterHasActiveMaster.set(true);
+          activeMasterServerName = sn;
           LOG.info("Registered as active master=" + this.sn);
           return true;
         }
 
+        // Invalidate the active master name so that subsequent requests do not get any stale
+        // master information. Will be re-fetched if needed.
+        activeMasterServerName = null;
         // There is another active master running elsewhere or this is a restart
         // and the master ephemeral node has not expired yet.
         this.clusterHasActiveMaster.set(true);
@@ -208,7 +244,8 @@ public class ActiveMasterManager extends ZKListener {
             ZKUtil.deleteNode(this.watcher, this.watcher.getZNodePaths().masterAddressZNode);
 
             // We may have failed to delete the znode at the previous step, but
-            //  we delete the file anyway: a second attempt to delete the znode is likely to fail again.
+            //  we delete the file anyway: a second attempt to delete the znode is likely to fail
+            //  again.
             ZNodeClearer.deleteMyEphemeralNodeOnDisk();
           } else {
             msg = "Another master is the active master, " + currentMaster +
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index 9cf38f6..241ed1a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -3875,6 +3875,10 @@ public class HMaster extends HRegionServer implements MasterServices {
     return cachedClusterId.getFromCacheOrFetch();
   }
 
+  public Optional<ServerName> getActiveMaster() {
+    return activeMasterManager.getActiveMasterServerName();
+  }
+
   @Override
   public void runReplicationBarrierCleaner() {
     ReplicationBarrierCleaner rbc = this.replicationBarrierCleaner;
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestActiveMasterManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestActiveMasterManager.java
index 4649eea..84837f9 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestActiveMasterManager.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestActiveMasterManager.java
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hbase.master;
 
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
@@ -91,6 +92,7 @@ public class TestActiveMasterManager {
     ActiveMasterManager activeMasterManager =
       dummyMaster.getActiveMasterManager();
     assertFalse(activeMasterManager.clusterHasActiveMaster.get());
+    assertFalse(activeMasterManager.getActiveMasterServerName().isPresent());
 
     // First test becoming the active master uninterrupted
     MonitoredTask status = Mockito.mock(MonitoredTask.class);
@@ -99,6 +101,7 @@ public class TestActiveMasterManager {
     activeMasterManager.blockUntilBecomingActiveMaster(100, status);
     assertTrue(activeMasterManager.clusterHasActiveMaster.get());
     assertMaster(zk, master);
+    assertMaster(zk, activeMasterManager.getActiveMasterServerName().get());
 
     // Now pretend master restart
     DummyMaster secondDummyMaster = new DummyMaster(zk,master);
@@ -108,6 +111,8 @@ public class TestActiveMasterManager {
     activeMasterManager.blockUntilBecomingActiveMaster(100, status);
     assertTrue(activeMasterManager.clusterHasActiveMaster.get());
     assertMaster(zk, master);
+    assertMaster(zk, activeMasterManager.getActiveMasterServerName().get());
+    assertMaster(zk, secondActiveMasterManager.getActiveMasterServerName().get());
   }
 
   /**
@@ -135,6 +140,7 @@ public class TestActiveMasterManager {
     ActiveMasterManager activeMasterManager =
       ms1.getActiveMasterManager();
     assertFalse(activeMasterManager.clusterHasActiveMaster.get());
+    assertFalse(activeMasterManager.getActiveMasterServerName().isPresent());
 
     // First test becoming the active master uninterrupted
     ClusterStatusTracker clusterStatusTracker =
@@ -144,6 +150,7 @@ public class TestActiveMasterManager {
         Mockito.mock(MonitoredTask.class));
     assertTrue(activeMasterManager.clusterHasActiveMaster.get());
     assertMaster(zk, firstMasterAddress);
+    assertMaster(zk, activeMasterManager.getActiveMasterServerName().get());
 
     // New manager will now try to become the active master in another thread
     WaitToBeMasterThread t = new WaitToBeMasterThread(zk, secondMasterAddress);
@@ -161,6 +168,8 @@ public class TestActiveMasterManager {
     assertTrue(t.manager.clusterHasActiveMaster.get());
     // But secondary one should not be the active master
     assertFalse(t.isActiveMaster);
+    // Verify the active master ServerName is populated in standby master.
+    assertEquals(firstMasterAddress, t.manager.getActiveMasterServerName().get());
 
     // Close the first server and delete it's master node
     ms1.stop("stopping first server");
@@ -189,6 +198,7 @@ public class TestActiveMasterManager {
 
     assertTrue(t.manager.clusterHasActiveMaster.get());
     assertTrue(t.isActiveMaster);
+    assertEquals(secondMasterAddress, t.manager.getActiveMasterServerName().get());
 
     LOG.info("Deleting master node");