You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by su...@apache.org on 2021/07/09 06:55:54 UTC

[hbase] branch HBASE-24666 updated (2580c97 -> add13ab)

This is an automated email from the ASF dual-hosted git repository.

sunxin pushed a change to branch HBASE-24666
in repository https://gitbox.apache.org/repos/asf/hbase.git.


    omit 2580c97  HBASE-25807 Move method reportProcedureDone from RegionServerStatus.proto to Master.proto (#3205)
    omit 786c7d7  HBASE-24737 Find a way to resolve WALFileLengthProvider#getLogFileSizeIfBeingWritten problem (#3045)
    omit 1553b39  HBASE-25113 [testing] HBaseCluster support ReplicationServer for UTs (#2662)
    omit c8d8782  HBASE-25071 ReplicationServer support start ReplicationSource internal (#2452)
    omit bd13d14  HBASE-24999 Master manages ReplicationServers (#2579)
    omit 29adcce  HBASE-24684 Fetch ReplicationSink servers list from HMaster instead o… (#2077)
    omit b86d97c  HBASE-24998 Introduce a ReplicationSourceOverallController interface and decouple ReplicationSourceManager and ReplicationSource (#2364)
    omit 1f11ee4  HBASE-24982 Disassemble the method replicateWALEntry from AdminService to a new interface ReplicationServerService (#2360)
    omit c9a01b2  HBASE-24683 Add a basic ReplicationServer which only implement ReplicationSink Service (#2111)
    omit d4bcf8d  HBASE-24735: Refactor ReplicationSourceManager: move logPositionAndCleanOldLogs/cleanUpHFileRefs to ReplicationSource inside (#2064)
    omit b60ec36  HBASE-24681 Remove the cache walsById/walsByIdRecoveredQueues from ReplicationSourceManager (#2019)
    omit a62a4b1  HBASE-24682 Refactor ReplicationSource#addHFileRefs method: move it to ReplicationSourceManager (#2020)
     add 8f03c44  HBASE-25556 Frequent replication "Encountered a malformed edit" warnings (#2965)
     add 51a3d45  HBASE-25598 TestFromClientSide5.testScanMetrics is flaky (#2977)
     add ed2693f  HBASE-25602 Fix broken TestReplicationShell on master (#2981)
     add a7d0445  HBASE-25601 Use ASF-official mailing list archives
     add 3f1c486  HBASE-25596: Fix NPE and avoid permanent unreplicated data due to EOF (#2987)
     add 8d0de96  HBASE-25590 Bulkload replication HFileRefs cannot be cleared in some cases where set exclude-namespace/exclude-table-cfs (#2969)
     add a984358  HBASE-25586 Fix HBASE-22492 on branch-2 (SASL GapToken) (#2961)
     add 30cb419  HBASE-25615 Upgrade java version in pre commit docker file (#2997)
     add 34bd1bd  HBASE-25620 Increase timeout value for pre commit (#3000)
     add d5df999  HBASE-25604 Upgrade spotbugs to 4.x (#2986)
     add b24bd40  HBASE-25611 ExportSnapshot chmod flag uses value as decimal (#3003)
     add b522d2a  Revert "HBASE-25604 Upgrade spotbugs to 4.x (#2986)"
     add a97a40c  HBASE-25580 Release scripts should include in the vote email the git hash that the RC tag points to (#2956)
     add 157200e  HBASE-25402 Sorting order by start key or end key is not considering empty start key/end key (#2955)
     add e099ef3  HBASE-25626 Possible Resource Leak in HeterogeneousRegionCountCostFunction
     add a4eb1aa  HBASE-25421 There is no limit on the column length when creating a table (#2796)
     add 5d9a6ed  HBASE-25367 Sort broken after Change 'State time' in UI (#2964)
     add e80b901  HBASE-25603 Add switch for compaction after bulkload (#2982)
     add f93c9c6  HBASE-25385 TestCurrentHourProvider fails if the latest timezone changes are not present (#3012)
     add 830d289  HBASE-25460 : Expose drainingServers as cluster metric (#2995)
     add dd4a11e  HBASE-25637 Rename method completeCompaction to refreshStoreSizeAndTotalBytes (#3023)
     add 9b0485f  HBASE-23578 [UI] Master UI shows long stack traces when table is broken (#3014)
     add 190c253  HBASE-25609 There is a problem with the SPLITS_FILE in the HBase shell statement(#2992)
     add 53128fe  HBASE-25644 Scan#setSmall blindly sets ReadType as PREAD
     add c1dacfd  HBASE-25547 (addendum): Roll ExecutorType into ExecutorConfig (#2996)
     add 109bd24  HBASE-25630 Set switch compaction after bulkload default as false (#3022)
     add 573daed  HBASE-25646: Possible Resource Leak in CatalogJanitor #3036
     add d818eff  HBASE-25582 Support setting scan ReadType to be STREAM at cluster level (#3035)
     add 92fe609  HBASE-25604 Upgrade spotbugs to 4.x (#3029)
     add 95342a2  HBASE-25654 [Documentation] Fix format error in security.adoc
     add 373dc77  HBASE-25548 Optionally allow snapshots to preserve cluster's max file… (#2923)
     add d79019b  HBASE-25629 Reimplement TestCurrentHourProvider to not depend on unstable TZs (#3013)
     add 0e6c2c4  HBASE-25636 Expose HBCK report as metrics (#3031)
     add 0cc1ae4  HBASE-25587 [hbck2] Schedule SCP for all unknown servers (#2978)
     add cc61714  HBASE-25566 RoundRobinTableInputFormat (#2947)
     add 1a69a52  HBASE-25570 On largish cluster, "CleanerChore: Could not delete dir..." makes master log unreadable (#2949)
     add 7386fb6  HBASE-25622 Result#compareResults should compare tags. (#3026)
     add 876fec1  HBASE-25657 Fix spotbugs warnings after upgrading spotbugs to 4.x (#3041)
     add aeec8ca  HBASE-25635 CandidateGenerator may miss some region balance actions (#3024)
     add 8337fb2  HBASE-25662 Fix spotbugs warning in RoundRobinTableInputFormat (#3050)
     add f4e1ab7  HBASE-25663 Make graceful_stop localhostname compare match even if fqdn (#3048)
     add 630f47e   HBASE-25660 Print split policy in use on Region open (as well as split policy vitals) (#3044)
     add 21409bf  HBASE-25573 release script generated vote template has incorrect staging area (#2952)
     add 625bea3  HBASE-25595 TestLruBlockCache.testBackgroundEvictionThread is flaky (#2974)
     add 0ef892b  HBASE-25621 Balancer should check region plan source to avoid misplace region groups (#3002)
     add db2e6d8  HBASE-25597 Add row info in Exception when cell size exceeds maxCellSize (#2976)
     add 5457554  HBASE-25374 Make REST Client connection and socket time out configurable (#2752)
     add c36e40e  Revert "HBASE-25663 Make graceful_stop localhostname compare match even if fqdn (#3048)"
     add 59ec375  HBASE-25594 graceful_stop.sh fails to unload regions when ran at localhost
     add d74ae15  HBASE-25568 Upgrade Thrift jar to fix CVE-2020-13949 (#3043)
     add 75931b4  HBASE-25669 Fix typo of hbase.mob.compaction.chore.period in the docs (#3056)
     add ebb0adf  HBASE-25665 Option to use hostname instead of canonical hostname for secure HBase cluster connection (#3051)
     add bcf503e  HBASE-25653 Add units and round off region size to 2 digits after decimal (#3046)
     add 976629c  HBASE-25608 Support HFileOutputFormat locality sensitive even destination cluster is different from source cluster (#2988)
     add ff38218  HBASE-25627: HBase replication should have a metric to represent if the source is stuck getting initialized (#3018)
     add d200a67  Update 2.4.x download link to release 2.4.2
     add a698b1e  HBASE-25673 Wrong log regarding current active master at ZKLeaderManager#waitToBecomeLeader (#3057)
     add 7ac1c8b  HBASE-25677 Server+table counters on each scan #nextRaw invocation becomes a bottleneck when heavy load (#3061)
     add 3ebb978  HBASE-25674 - RegionInfo.parseFrom(DataInputStream) sometimes fails to read the protobuf magic marker (#3062)
     add 0cead10  HBOPS-25594 Make easier to use graceful_stop on localhost mode (#3054)
     add d93035a  HBASE-25643 The delayed FlushRegionEntry should be removed when we ne… (#3049)
     add 585aca1  HBASE-25518 Support separate child regions to different region servers (#3001)
     add 82dfa27  Revert "HBOPS-25594 Make easier to use graceful_stop on localhost mode (#3054)"
     add cc6c14a  HBASE-25594 Make easier to use graceful_stop on localhost mode (#3054)
     add f405990  HBASE-25678 Support nonce operations for Increment/Append in RowMutations and CheckAndMutate (#3064)
     add fea4bd1  HBASE-25679 Size of log queue metric is incorrect (#3071)
     add a3938c8  HBASE-25681 Add a switch for server/table queryMeter (#3070)
     add ba3610d  HBASE-19577 Use log4j2 instead of log4j for logging (#1708)
     add 326835e  HBASE-25594 Make easier to use graceful_stop on localhost mode (#3054) Addendum.
     add 3358091  HBASE-25683 Simplify UTs using DummyServer (#3069)
     add b8a03d7  HBASE-25689 Nightly client integration test is failing after upgrading to log4j2 (#3077)
     add 87d0533  HBASE-25688 Use CustomRequestLog instead of Slf4jRequestLog for jetty (#3075)
     add 202b17f  HBASE-25685 asyncprofiler2.0 no longer supports svg; wants html (#3079)
     add 1e3fe3c  HBASE-25691 Test failure: TestVerifyBucketCacheFile.testRetrieveFromFile (#3081)
     add f6bb4bb  HBASE-25693 NPE getting metrics from standby masters (MetricsMasterWrapperImpl.getMergePlanCount) (#3091)
     add 1e4639d  HBASE-25032 Wait for region server to become online before adding it to online servers in Master (#2769)
     add b062598  HBASE-25686 [hbtop] Add some javadoc (#3096)
     add 93b1163  HBASE-25702 Remove RowProcessor (#3097)
     add 6a8998b  HBASE-25695 Link to the filter on hbase:meta from user tables panel on master page (#3092)
     add 05dddaf  HBASE-25710 During the recovery process,if there is an incremental backup of data that has not been updated,warning and skip (#3103)
     add 60dde9a  HBASE-25705 Convert proto to RSGroupInfo is costly (#3102)
     add 71417ca  HBASE-25707 When restoring a table, create a namespace if it does not exist (#3100)
     add 57a49f5  HBASE-25692 Always try to close the WAL reader when we catch any exception (#3090)
     add 46f7d9d  HBASE-25703 Support conditional update in MultiRowMutationEndpoint (#3098)
     add 0242489  HBASE-25696 Need to initialize SLF4JBridgeHandler in jul-to-slf4j for redirecting jul to slf4j (#3093)
     add e14ec57  HBASE-25558:Adding audit log for execMasterService (#3101)
     add 446f22f  HBASE-25721 Add 2.3.5 to the downloads page (#3114)
     add 7a31557  HBASE-25726 MoveCostFunction is not included in the list of cost functions for StochasticLoadBalancer (#3116)
     add 2a44f3e  HBASE-25696 Addendum add missing jcl-over-slf4j and jul-to-slf4 dependencies
     add 5a63fe6  HBASE-25199 Remove deprecated HStore#getStoreHomedir methods (#2562)
     add 048ca4e  HBASE-25174 Remove deprecated fields in HConstants (#2558)
     add f4e1236  HBASE-25735 Add target Region to connection exceptions Signed-off-by: Wellington Chevreuil <wc...@apache.org>
     add d9f4f41   HBASE-25735 Add target Region to connection exceptions Addendum to fix broke compile.
     add a59ac48  HBASE-25743: Retry REQUESTTIMEOUT based KeeperExceptions in ZK client. (#3131)
     add 6444e94  HBASE-25717 RegionServer aborted with due to ClassCastException (#3108)
     add f9819f3  HBASE-25735 Add target Region to connection exceptions Restore API for Phoenix (though it shouldn't be using Private classes).
     add 8ba9034  HBASE-25750 Upgrade RpcControllerFactory and HBaseRpcController from Private to LimitedPrivate(COPROC,PHOENIX) (#3136)
     add ad06aa2  HBASE-25747 Remove unused getWriteAvailable method in OperationQuota (#3133)
     add 8e08952  HBASE-25716 The configured loggers in log4j2.xml will always be created (#3111)
     add 6aab134  Add Geoffrey Jacoby to developers list in pom.xml
     add 74e533d  HBASE-25751 - Add writable TimeToPurgeDeletes to ScanOptions (#3137)
     add 5f1f8be  HBASE-25744 Change default of `hbase.normalizer.merge.min_region_size.mb` to `0`
     add 7549410  HBASE-25755 Exclude tomcat-embed-core from libthrift (#3141)
     add f9e928e  HBASE-25184 Move RegionLocationFinder to hbase-balancer (#2543)
     add de012d7  HBASE-25759 The master services field in LocalityBasedCostFunction is never used (#3144)
     add 5910e9e2 HBASE-25767 CandidateGenerator.getRandomIterationOrder is too slow on large cluster (#3149)
     add c5b0989  HBASE-25762 Improvement for some debug-logging guards (#3145)
     add bc52bca  HBASE-25770 Http InfoServers should honor gzip encoding when requested (#3159)
     add 6cf4fdd  HBASE-25776 Use Class.asSubclass to fix the warning in StochasticLoadBalancer.loadCustomCostFunctions (#3163)
     add 533c84d  HBASE-25739 TableSkewCostFunction need to use aggregated deviation (#3067)
     add bf78246  HBASE-25775 Use a special balancer to deal with maintenance mode (#3161)
     add 94f4479  HBASE-25780 Add 2.2.7 to download page (#3175)
     add b65890d  Revert "HBASE-25739 TableSkewCostFunction need to use aggregated deviation (#3067)"
     add e8ac1fb  HBASE-25777 Fix wrong initialization value in StressAssignmentManagerMonkeyFactory (#3164)
     add 33e886c  HBASE-25780 Add 2.2.7 to download page [addendum] (#3180)
     add 781da18  HBASE-25290 Remove table on master related code in balancer implementation (#3162)
     add 0d257ba  HBASE-25763 TestRSGroupsWithACL.setupBeforeClass is flaky (#3158)
     add 72aa741  HBASE-25798 typo in MetricsAssertHelper (#3186)
     add d5c5e48  HBASE-25793 Move BaseLoadBalancer.Cluster to a separated file (#3185)
     add 50920ee  HBASE-25774 TestSyncReplicationStandbyKillRS#testStandbyKillRegionServer is flaky (#3189)
     add 5f4e2e1  HBASE-25766 Introduce RegionSplitRestriction that restricts the pattern of the split point (#3150)
     add 996862c  HBASE-25754 StripeCompactionPolicy should support compacting cold regions (#3152)
     add 96fefce  HBASE-25802 Miscellaneous style improvements for load balancer related classes (#3192)
     add 9895b2d  HBASE-25756 Support alternate compression for major and minor compactions (#3142)
     add 302d9ea  HBASE-25373 Remove HTrace completely in code base and try to make use of OpenTelemetry
     add 2420286  HBASE-25401 Add trace support for async call in rpc client (#2790)
     add 57960fa  HBASE-25424 Find a way to config OpenTelemetry tracing without direct… (#2808)
     add 805b2ae  HBASE-23898 Add trace support for simple apis in async client (#2813)
     add dcb78bd  HBASE-25454 Add trace support for connection registry (#2828)
     add ae2c62f  HBASE-25481 Add host and port attribute when tracing rpc call at client side (#2857)
     add 03e12bf  HBASE-25455 Add trace support for HRegion read/write operation (#2861)
     add 2be2c63  HBASE-25484 Add trace support for WAL sync (#2892)
     add bb8c496  HBASE-25535 Set span kind to CLIENT in AbstractRpcClient (#2907)
     add f6ff519  HBASE-25591 Upgrade opentelemetry to 0.17.1 (#2971)
     add 8d68f8c  HBASE-25617 Revisit the span names (#2998)
     add 8399293  HBASE-25616 Upgrade opentelemetry to 1.0.0 (#3034)
     add 7f90c22  HBASE-25723 Temporarily remove the trace support for RegionScanner.next (#3119)
     add 8df9beb  HBASE-25732 Change the command line argument for tracing after upgrading opentelemtry to 1.0.0 (#3123)
     add b714889  HBASE-25733 Upgrade opentelemetry to 1.0.1 (#3122)
     add be4503d  HBASE-23762 Add documentation on how to enable and view tracing with OpenTelemetry (#3135)
     add f36e153  HBASE-25778 The tracinig implementation for AsyncConnectionImpl.getHbck is incorrect (#3165)
     add a4d954e  HBASE-25757 Move BaseLoadBalancer to hbase-balancer module (#3191)
     add 8d2a0ef  HBASE-25811 The client integration test is failing after HBASE-22120 merged (#3201)
     add 8856f61  HBASE-25757 Addendum remove CandidateGenerator classes under hbase-server module
     add 2382f68  HBASE-25792 Filter out o.a.hadoop.thirdparty building shaded jars (#3184)
     add b061b0c  HBASE-25779 HRegionServer#compactSplitThread should be private
     add 6c65314  HBASE-25819 Fix style issues for StochasticLoadBalancer (#3207)
     add 73a82bd  HBASE-25825 RSGroupBasedLoadBalancer.onConfigurationChange should chain the request to internal balancer (#3209)
     add 5d42f58  HBASE-25816: Improve the documentation of Architecture section of reference guide (#3211)
     add accfceb  HBASE-25833 fix HBase Configuration File Descriptions (#3216)
     add 00fec24  HBASE-25790 NamedQueue 'BalancerRejection' for recent history of balancer skipping (#3182)
     add 7640134  HBASE-25774 Added more detailed logs about the restarting of region servers (#3213)
     add 6ad5b9e  HBASE-25824 IntegrationTestLoadCommonCrawl (#3208)
     add 762abe3  HBASE-25838 Use double instead of Double in StochasticLoadBalancer (#3221)
     add e44592a  HBASE-25840 CatalogJanitor warns about skipping gc of regions during RIT, but does not actually skip (#3223)
     add fda324b  HBASE-25836 RegionStates#getAssignmentsForBalancer should only care about OPEN or OPENING regions (#3219)
     add 432d141  HBASE-25835 Ignore duplicate split requests from regionserver reports (#3218)
     add 17193da  HBASE-25842 move regionserver http-related code into o.a.h.h.regionserver.http
     add 90f9864  HBASE-25834 Remove balanceTable method from LoadBalancer interface (#3217)
     add eb9b543  HBASE-25843 move master http-related code into o.a.h.h.master.http
     add cc88cf0  HBASE-25847 More DEBUG and TRACE level logging in CatalogJanitor and HbckChore (#3230)
     add ba4cb91  HBASE-25851 Make LoadBalancer not extend Configurable interface (#3233)
     add 6309c09  HBASE-25854 Remove redundant AM in-memory state changes in CatalogJanitor (#3234)
     add 6cfff27  HBASE-25837 TestRollingRestart is flaky (#3220)
     add 02b018c  HBASE-25774 ServerManager.getOnlineServer may miss some region servers when refreshing state in some procedure implementations
     add c2a1d31  HBASE-25774 Addendum fix compile error
     add 8c2332d  HBASE-25860 Add metric for successful wal roll requests. (#3238)
     add 2b6a91a  HBASE-25859 Reference class incorrectly parses the protobuf magic marker (#3236)
     add 29bd3dd  HBASE-25852 Move all the intialization work of LoadBalancer implementation to initialize method (#3248)
     add 630c73f  HBASE-25867 Extra doc around ITBLL (#3242)
     add cdac8fd  HBASE-25862 update reference guide for log4j2.xml (#3252)
     add b65733c  while creating manifest, search only for ancestors insteadd of all of history (#3246)
     add 0955a7a  HBASE-25884 Return empty records for disabled balancer in-memory queue (#3263)
     add f6c3eca  Add haxiaolin to pom.xml
     add d69d5c2  HBASE-25861 Correct the usage of Configuration#addDeprecation (#3249)
     add 4115c2e  Revert "while creating manifest, search only for ancestors insteadd of all of history (#3246)"
     add 8ae4d65  HBASE-25870 Validate only direct ancestors instead of entire history for a particular backup
     add 85d8ec7  HBASE-25883 The regionFinder and rackManager fields in BaseLoadBalancer should be volatile (#3262)
     add 1c6994a  HBASE-25872 Add documentation for LoadBalancer about synchronization (#3267)
     add 2126ec9  HBASE-25875 RegionServer failed to start with IllegalThreadStateException due to race condition in AuthenticationTokenSecretManager (#3250)
     add 741b4b4  HBASE-25032 Do not assign regions to region server which has not called regionServerReport yet (#3268)
     add 5b99409  HBASE-25791 UI of master-status to show a recent history of that why balancer was rejected to run (#3275)
     add fe47557  HBASE-25841 Add basic jshell support
     add 7c24ed4  HBASE-25897 TestRetainAssignmentOnRestart is flaky after HBASE-25032 (#3281)
     add 15e8611  HBASE-25848: Add flexibility to backup replication in case replication filter throws an exception (#3283)
     add a1177b3  HBASE-25682 Add a new command to update the configuration of all RSs in a RSGroup (#3080)
     add dfa88e1  HBASE-25827 Per Cell TTL tags get duplicated with increments causing tags length overflow (#3210)
     add 8ec6fd9  HBASE-25869 WAL value compression (#3244)
     add b581b32  HBASE-25817 Memory leak from thrift server hashMap (#3257)
     add edde01c  HBASE-25892: 'False' should be 'True' in auditlog of listLabels (#3273)
     add 7f6b778  HBASE-25773 TestSnapshotScannerHDFSAclController.setupBeforeClass is flaky (#3160)
     add fe70fce  HBASE-25888 Backup tests are categorically flakey (#3279)
     add f94f4e2  HBASE-25873 Refactor and cleanup the code for CostFunction (#3274)
     add b02c810  HBASE-25899 Improve efficiency of SnapshotHFileCleaner (#3280)
     add f53ceee  HBASE-25898 RS getting aborted due to NPE in Replication WALEntryStream (#3292)
     add 21aa553  HBASE-25745 Deprecate/Rename config `hbase.normalizer.min.region.count` to `hbase.normalizer.merge.min.region.count`
     add 36affda  HBASE-25906 UI of master-status to show recent history of balancer desicion (#3296)
     add 6a77872  HBASE-25894 Improve the performance for region load and region count related cost functions (#3276)
     add 76fbb8b  HBASE-25818 Move StochasticLoadBalancer to hbase-balancer module (#3206)
     add 19fd42b  HBASE-25841 Add basic jshell support (addendum)
     add a22e418  HBASE-25924 Re-compute size of WAL file while removing from WALEntryStream (#3314)
     add 63141bf  HBASE-25926 Cleanup MetaTableAccessor references in FavoredNodeBalancer related code (#3313)
     add feb89d9  HBASE-25933 Log trace raw exception, instead of cause message in NettyRpcServerRequestDecoder (#3323)
     add c88ae3a  HBASE-25908 Exclude jakarta.activation-api (#3299)
     add ed8df5e  HBASE-25758 Move MetaTableAccessor out of hbase-balancer module (#3309)
     add 7218c83  HBASE-25931 Move FavoredNodeManager to hbase-balancer module (#3324)
     add 560297d  HBASE-25651 NORMALIZER_TARGET_REGION_SIZE needs a unit in its name (#3063)
     add 479ae88  HBASE-25928 TestHBaseConfiguration#testDeprecatedConfigurations is broken with Hadoop 3.3 (#3320)
     add 3f7d289  HBASE-25910 - Fix port assignment test (#3308)
     add c1d299f  HBASE-25938 The SnapshotOfRegionAssignmentFromMeta.initialize call in FavoredNodeLoadBalancer is just a dummy one (#3329)
     add 9a2027b  HBASE-25927: Fix the log messages by not stringifying the exceptions in log (#3338)
     add f119a86  HBASE-25940 Update Compression/TestCompressionTest: LZ4, SNAPPY, LZO (#3334)
     add f2ff816  HBASE-25939 Move more tests code for StochasticLoadBalancer to hbase-balancer module (#3331)
     add 528f543  HBASE-25942: Get rid of null regioninfo in wrapped connection exceptions (#3337)
     add d164314  HBASE-25941 TestRESTServerSSL fails because of jdk bug (#3335)
     add bd4c4ce  HBASE-25948 Remove deprecated ZK command 'rmr' in hbase-cleanup.sh (#3343)
     add 1ccba10  HBASE-25903 ReadOnlyZKClient APIs - CompletableFuture.get() calls can cause threads to hang forver when ZK client create throws Non IOException (#3293)
     add 06c6e06  HBASE-25916 Move FavoredNodeLoadBalancer to hbase-balancer module (#3327)
     add b04c3c7  HBASE-25932: Ensure replication reads the trailer bytes from WAL. (#3332)
     add 4fb0861  HBASE-25932 addendum: Add test comments. (#3344)
     add e9f5953  HBASE-25956 Add 2.4.3 to download page
     add 335305e  HBASE-25911 Replace calls to System.currentTimeMillis with EnvironmentEdgeManager.currentTime (#3302)
     add 426c3c1  HBASE-25799 add clusterReadRequests and clusterWriteRequests jmx (#3188)
     add 4671cb1  HBASE-25929 RegionServer JVM crash when compaction (#3318)
     add 1ecff8a  HBASE-25930 Thrift does not support requests in Kerberos environment (#3326)
     add de06e20  HBASE-25970 MOB data loss - incorrect concatenation of MOB_FILE_REFS (#3355)
     add eddf4cc  HBASE-25963 HBaseCluster should be marked as IA.Public (#3348)
     add 456c7f9  HBASE-25977 Remove 2.2.7 from download page (#3361)
     add 40a3d57  HBASE-22708 Remove the deprecated methods in Hbck interface (#3362)
     add be14605  HBASE-25918 Upgrade hbase-thirdparty dependency to 3.5.1 (#3317)
     add 1654dcf  HBASE-25969 Purge netty-all transitive includes (#3353) (#3365)
     add 471e815  HBASE-25981 JVM crash when displaying RegionServer UI (#3364)
     add 7f7a293  HBASE-25987 Make SSL keystore type configurable for HBase ThriftServer (#3367)
     add 329f0ba  HBASE-25967 The readRequestsCount does not calculate when the outResu… (#3351)
     add 6b81ff9  HBASE-25993 Make excluded SSL cipher suites configurable for all Web UIs (#3375)
     add ad44577  HBASE-25999 Add 1.7.0 to download page
     add ba6995e  HBASE-25989 FanOutOneBlockAsyncDFSOutput using shaded protobuf in hdfs 3.3+ (#3368)
     add ec31818  HBASE-26002 MultiRowMutationEndpoint should return the result of the conditional update (#3384)
     add a35ec99  HBASE-26002 MultiRowMutationEndpoint should return the result of the conditional update (addendum)
     add 97f90e0  HBASE-25994 Active WAL tailing fails when WAL value compression is enabled (#3377)
     add 4262887  HBASE-26002 MultiRowMutationEndpoint should return the result of the conditional update (addendum)
     add 0c0ea5e  HBASE-26003 Update downloads.xml for release 2.4.4
     add e551cd6  Amend HBASE-26003 Update downloads.xml for release 2.4.4
     add 480b6bb  HBASE-25995 Change the method name for DoubleArrayCost.setCosts (#3381)
     add 8f618a0  HBASE-25997 NettyRpcFrameDecoder decode request header wrong when han… (#3380)
     add 555f8b4  HBASE-26000 Optimize the display of ZK dump in the master web UI (#3383)
     add 7466e08  HBASE-26008 Fix typo in AsyncConnectionImpl (#3391)
     add 5a19bcf  HBASE-25984: Avoid premature reuse of sync futures in FSHLog (#3371)
     add eb242be  HBASE-25976 Implement a master based ReplicationTracker (#3390)
     add 336d846  HBASE-25998: Redo synchronization in SyncFuture (#3382)
     add 53f61ef  HBASE-26001 When turn on access control, the cell level TTL of Increment and Append operations is invalid (#3385)
     add f0a3959  HBASE-26005 Update ref guide about the EOL for 2.2.x (#3388)
     add d292375  HBASE-25992 Polish the ReplicationSourceWALReader code for 2.x after HBASE-25596 (#3376)
     add 9f4177f  HBASE-25698 Fixing IllegalReferenceCountException when using TinyLfuBlockCache (#3215)
     add c5461aa  HBASE-25992 Addendum add missing catch WALEntryFilterRetryableException back
     add f640eef  HBASE-26013 Get operations readRows metrics becomes zero after HBASE-25677 (#3404)
     add d44292a  HBASE-25937: Clarify UnknownRegionException (#3330)
     add d9bd296  HBASE-25877 Add access check for compactionSwitch (#3253)
     add 9a324bd  Remove the reflection and call Configuration.getPassword() directly. (#3408)
     add fa2d127  HBASE-25934 Add username for RegionScannerHolder (#3325)
     add 1a9ddb6  Revert "Remove the reflection and call Configuration.getPassword() directly. (#3408)"
     add cb247f9  HBASE-26019 Remove the reflection and call Configuration.getPassword() directly. (#3408)
     add 39d143f  HBASE-26020 Split TestWALEntryStream.testDifferentCounts out (#3409)
     add dcd0fb8  HBASE-25966 Fix typo in NOTICE.vm
     add d11dc81  HBASE-26012 Improve logging and dequeue logic in DelayQueue (#3397)
     add bffe895  HBASE-26025 Add a flag to mark if the IOError can be solved by retry in thrift IOError (#3414)
     add e6eb657  HBASE-26015 Should implement getRegionServers(boolean) method in Asyn… (#3406)
     add fb4af2a  HBASE-25914 Provide slow/large logs on RegionServer UI (#3319)
     add 22ec681a HBASE-25980 Master table.jsp pointed at meta throws 500 when no all r… (#3374)
     add 9eae57f  Add weichiu to the developer section.
     add 79659d8  HBASE-23817 The message "Please make sure that backup is enabled on the cluster." is shown even when the backup feature is enabled (#3427)
     add 64d4915  HBASE-26039 TestReplicationKillRS is useless after HBASE-23956 (#3440)
     add 51893b9  HBASE-26029 It is not reliable to use nodeDeleted event to track region server's death (#3430)
     add 147b030  HBASE-26028 The view as json page shows exception when using TinyLfuBlockCache (#3420)
     add 4c7da49  HBASE-25902 Add missing CFs in meta during HBase 1 to 2 Upgrade (#3441) (#3417)
     add 84f9900  HBASE-22923 min version of RegionServer to move system table regions (#3439) (#3438)
     add 1c28633  HBASE-26035 Redundant null check in the compareTo function (#3433)
     add 5118321  HBASE-26059 Set version as 3.0.0-alpha-1 in master in prep for first RC of 3.0.0-alpha-1 (#3453)
     add ef639ff  HBASE-26041 Replace PrintThreadInfoHelper with HBase's own ReflectionUtils.printThreadInfo() (#3442)
     add fab0505  HBASE-26051 Remove reflections used to access HDFS EC APIs (#3446)
     add 82c44b4  HBASE-26050 Remove the reflection used in FSUtils.isInSafeMode (#3445)
     add 5ef5e64  HBASE-26057 Remove reflections used to access Hadoop 2 API in FanOutOneBlockAsyncDFSOutputHelper (#3448)
     add 7aa0dfa  HBASE-26061 [create-release] The indent of checkcompatibility.py is broken after HBASE-25473 (#3454)
     add b80d70c  HBASE-26030 hbase-cleanup.sh did not clean the wal dir if hbase.wal.dir configured individually (#3432)
     add 934fe02  HBASE-26063 The current checkcompatibility.py script can not compare master and rel/2.0.0 (#3457)
     add 1883889  HBASE-22923 Consider minVersionToMoveSysTables while moving region and creating regionPlan (ADDENDUM) (#3455)
     add a3ad97f  HBASE-26065 StripeStoreFileManager does not need to throw IOException for most methods (#3459)
     add 7e5a0db  HBASE-26068 The last assertion in TestHStore.testRefreshStoreFilesNotChanged is wrong (#3461)
     new 0c061bc  HBASE-24682 Refactor ReplicationSource#addHFileRefs method: move it to ReplicationSourceManager (#2020)
     new 8481d54  HBASE-24681 Remove the cache walsById/walsByIdRecoveredQueues from ReplicationSourceManager (#2019)
     new eded309  HBASE-24735: Refactor ReplicationSourceManager: move logPositionAndCleanOldLogs/cleanUpHFileRefs to ReplicationSource inside (#2064)
     new 2fcdb7a  HBASE-24683 Add a basic ReplicationServer which only implement ReplicationSink Service (#2111)
     new d2588a1  HBASE-24982 Disassemble the method replicateWALEntry from AdminService to a new interface ReplicationServerService (#2360)
     new 4718d24  HBASE-24998 Introduce a ReplicationSourceOverallController interface and decouple ReplicationSourceManager and ReplicationSource (#2364)
     new 03a3057  HBASE-24684 Fetch ReplicationSink servers list from HMaster instead o… (#2077)
     new 6d7bd0a  HBASE-24999 Master manages ReplicationServers (#2579)
     new c8c85f4  HBASE-25071 ReplicationServer support start ReplicationSource internal (#2452)
     new 6ae6097  HBASE-25113 [testing] HBaseCluster support ReplicationServer for UTs (#2662)
     new ff16870  HBASE-24737 Find a way to resolve WALFileLengthProvider#getLogFileSizeIfBeingWritten problem (#3045)
     new add13ab  HBASE-25807 Move method reportProcedureDone from RegionServerStatus.proto to Master.proto (#3205)

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (2580c97)
            \
             N -- N -- N   refs/heads/HBASE-24666 (add13ab)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 12 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 bin/graceful_stop.sh                               |    9 +-
 bin/hbase                                          |   61 +-
 bin/hbase-cleanup.sh                               |   12 +-
 bin/hbase-daemon.sh                                |   16 +-
 bin/hbase.cmd                                      |   19 +-
 .../hbase_startup.jsh                              |   15 +-
 conf/hbase-env.cmd                                 |    5 +
 conf/hbase-env.sh                                  |    8 +
 conf/log4j.properties                              |  139 --
 .../log4j2-hbtop.xml                               |   32 +-
 conf/log4j2.xml                                    |  109 ++
 dev-support/Jenkinsfile_GitHub                     |    4 +-
 dev-support/checkcompatibility.py                  |    6 +-
 dev-support/create-release/release-build.sh        |   11 +-
 dev-support/create-release/vote.tmpl               |    6 +-
 dev-support/docker/Dockerfile                      |   13 +-
 .../hbase_nightly_pseudo-distributed-test.sh       |   38 +-
 dev-support/spotbugs-exclude.xml                   |   19 +
 hbase-annotations/pom.xml                          |    2 +-
 hbase-archetypes/hbase-archetype-builder/pom.xml   |    2 +-
 hbase-archetypes/hbase-client-project/pom.xml      |   28 +-
 .../src/main/resources/log4j.properties            |  121 --
 .../src/main/resources/log4j2.xml                  |  109 ++
 .../hbase-shaded-client-project/pom.xml            |   28 +-
 .../src/main/resources/log4j.properties            |  121 --
 .../src/main/resources/log4j2.xml                  |  109 ++
 hbase-archetypes/pom.xml                           |    2 +-
 hbase-assembly/pom.xml                             |   24 +-
 hbase-assembly/src/main/assembly/client.xml        |   20 +-
 .../src/main/assembly/hadoop-three-compat.xml      |   22 +-
 hbase-asyncfs/pom.xml                              |   20 +-
 .../FanOutOneBlockAsyncDFSOutputHelper.java        |   66 +-
 .../hadoop/hbase/io/asyncfs/ProtobufDecoder.java   |    2 +-
 .../hadoop/hbase/io/asyncfs/AsyncFSTestBase.java   |    8 -
 .../TestSaslFanOutOneBlockAsyncDFSOutput.java      |    4 +-
 hbase-backup/pom.xml                               |   20 +-
 .../apache/hadoop/hbase/backup/BackupDriver.java   |    3 +-
 .../hbase/backup/BackupRestoreConstants.java       |    6 +-
 .../apache/hadoop/hbase/backup/RestoreDriver.java  |    3 +-
 .../hadoop/hbase/backup/impl/BackupManager.java    |   18 +-
 .../hbase/backup/impl/RestoreTablesClient.java     |    5 +
 .../backup/mapreduce/MapReduceBackupMergeJob.java  |   15 +-
 .../hadoop/hbase/backup/util/RestoreTool.java      |   28 +-
 .../apache/hadoop/hbase/backup/TestBackupBase.java |   48 +-
 .../hbase/backup/TestBackupDeleteWithFailures.java |   14 +-
 .../hadoop/hbase/backup/TestBackupManager.java     |    9 +-
 .../hadoop/hbase/backup/TestBackupSystemTable.java |    5 +-
 .../hadoop/hbase/backup/TestRemoteBackup.java      |   17 +-
 .../hadoop/hbase/backup/TestRemoteRestore.java     |   15 +-
 hbase-balancer/pom.xml                             |   39 +-
 .../hbase/favored/FavoredNodeAssignmentHelper.java |  105 +-
 .../hbase/favored/FavoredNodeLoadBalancer.java     |   72 +-
 .../hadoop/hbase/favored/FavoredNodesManager.java  |   89 +-
 .../hadoop/hbase/favored/FavoredNodesPlan.java     |    2 +-
 .../hadoop/hbase/favored/FavoredNodesPromoter.java |    2 +
 .../apache/hadoop/hbase/master/LoadBalancer.java   |  115 +-
 .../master/SnapshotOfRegionAssignmentFromMeta.java |  149 +-
 .../hbase/master/balancer/AssignRegionAction.java  |   43 +-
 .../hbase/master/balancer/BalanceAction.java       |   40 +-
 .../master/balancer/BalancerClusterState.java      |  865 ++++++++++
 .../hbase/master/balancer/BaseLoadBalancer.java    |  620 +++++++
 .../master/balancer/CPRequestCostFunction.java     |   30 +-
 .../hbase/master/balancer/CandidateGenerator.java  |   72 +-
 .../hbase/master/balancer/ClusterInfoProvider.java |  128 ++
 .../balancer/CostFromRegionLoadAsRateFunction.java |   33 +-
 .../balancer/CostFromRegionLoadFunction.java       |   82 +
 .../hadoop/hbase/master/balancer/CostFunction.java |  101 ++
 .../hbase/master/balancer/DoubleArrayCost.java     |  109 ++
 .../master/balancer/FavoredStochasticBalancer.java |  235 +--
 .../HeterogeneousRegionCountCostFunction.java      |   48 +-
 .../hbase/master/balancer/LoadBalancerFactory.java |    9 +-
 .../master/balancer/LoadCandidateGenerator.java    |    6 +-
 .../balancer/LocalityBasedCandidateGenerator.java  |   81 +
 .../master/balancer/LocalityBasedCostFunction.java |   91 +
 .../master/balancer/MemStoreSizeCostFunction.java  |   30 +-
 .../hbase/master/balancer/MoveCostFunction.java    |   82 +
 .../hbase/master/balancer/MoveRegionAction.java    |   49 +-
 .../PrimaryRegionCountSkewCostFunction.java        |   83 +
 .../master/balancer/RackLocalityCostFunction.java  |   22 +-
 .../master/balancer/RandomCandidateGenerator.java  |   24 +-
 .../master/balancer/ReadRequestCostFunction.java   |   30 +-
 .../balancer/RegionCountSkewCostFunction.java      |   75 +
 .../balancer/RegionHDFSBlockLocationFinder.java    |  187 +--
 .../balancer/RegionReplicaCandidateGenerator.java  |   32 +-
 .../RegionReplicaGroupingCostFunction.java         |  104 ++
 .../balancer/RegionReplicaHostCostFunction.java    |   71 +
 .../RegionReplicaRackCandidateGenerator.java       |   53 +
 .../balancer/RegionReplicaRackCostFunction.java    |   65 +
 .../balancer/ServerLocalityCostFunction.java       |   21 +-
 .../hbase/master/balancer/SimpleLoadBalancer.java  |  238 +--
 .../master/balancer/StochasticLoadBalancer.java    |  720 ++++++++
 .../master/balancer/StoreFileCostFunction.java     |   30 +-
 .../hbase/master/balancer/SwapRegionsAction.java   |   62 +
 .../master/balancer/TableSkewCostFunction.java     |   31 +-
 .../master/balancer/WriteRequestCostFunction.java  |   30 +-
 .../favored/TestFavoredNodeAssignmentHelper.java   |  116 +-
 .../favored/TestStartcodeAgnosticServerName.java   |    0
 .../hbase/master/balancer/BalancerTestBase.java    |  110 +-
 .../master/balancer/DummyClusterInfoProvider.java  |  124 ++
 .../hbase/master/balancer/DummyCostFunction.java   |    7 +-
 .../balancer/HeterogeneousCostRulesTestHelper.java |   67 +
 .../LoadBalancerPerformanceEvaluation.java         |   12 +-
 .../balancer/StochasticBalancerTestBase.java       |   96 ++
 .../balancer/StochasticBalancerTestBase2.java      |    8 +-
 .../master/balancer/TestBaseLoadBalancer.java      |  101 +-
 .../hbase/master/balancer/TestDoubleArrayCost.java |   46 +-
 .../TestRegionHDFSBlockLocationFinder.java         |  207 +++
 .../master/balancer/TestSimpleLoadBalancer.java    |   59 +-
 .../balancer/TestStochasticLoadBalancer.java       |  307 ++--
 .../TestStochasticLoadBalancerBalanceCluster.java  |    4 +-
 ...estStochasticLoadBalancerHeterogeneousCost.java |  140 +-
 ...ochasticLoadBalancerHeterogeneousCostRules.java |  132 +-
 .../TestStochasticLoadBalancerLargeCluster.java    |    2 +-
 .../TestStochasticLoadBalancerMidCluster.java      |    2 +-
 .../TestStochasticLoadBalancerRegionReplica.java   |   46 +-
 ...icLoadBalancerRegionReplicaHighReplication.java |    5 +-
 ...asticLoadBalancerRegionReplicaLargeCluster.java |    3 +-
 ...chasticLoadBalancerRegionReplicaMidCluster.java |    2 +-
 ...egionReplicaReplicationGreaterThanNumNodes.java |    4 +-
 ...ochasticLoadBalancerRegionReplicaSameHosts.java |    4 +-
 ...ochasticLoadBalancerRegionReplicaWithRacks.java |    4 +-
 .../TestStochasticLoadBalancerSmallCluster.java    |    2 +-
 hbase-build-configuration/pom.xml                  |    2 +-
 hbase-checkstyle/pom.xml                           |    4 +-
 hbase-client/pom.xml                               |   29 +-
 .../hadoop/hbase/PleaseRestartMasterException.java |   23 +-
 .../org/apache/hadoop/hbase/ServerMetrics.java     |   10 +
 .../apache/hadoop/hbase/ServerMetricsBuilder.java  |   43 +-
 .../hadoop/hbase/UnknownRegionException.java       |   18 +-
 .../java/org/apache/hadoop/hbase/client/Admin.java |    8 +
 .../hadoop/hbase/client/AdminOverAsyncAdmin.java   |    5 +
 .../org/apache/hadoop/hbase/client/AsyncAdmin.java |   34 +
 .../hbase/client/AsyncBatchRpcRetryingCaller.java  |   22 +-
 .../hadoop/hbase/client/AsyncConnection.java       |    4 +-
 .../hadoop/hbase/client/AsyncConnectionImpl.java   |  124 +-
 .../hadoop/hbase/client/AsyncHBaseAdmin.java       |    5 +
 .../hadoop/hbase/client/AsyncRegionLocator.java    |  165 +-
 .../org/apache/hadoop/hbase/client/AsyncTable.java |   41 +-
 .../hbase/client/AsyncTableRegionLocatorImpl.java  |   18 +-
 .../hadoop/hbase/client/BalancerRejection.java     |  116 ++
 .../hadoop/hbase/client/ClientIdGenerator.java     |    3 +-
 .../hbase/client/ColumnFamilyDescriptor.java       |   12 +
 .../client/ColumnFamilyDescriptorBuilder.java      |   36 +
 .../hadoop/hbase/client/ConnectionFactory.java     |   55 +-
 .../org/apache/hadoop/hbase/client/Delete.java     |    2 +-
 .../hbase/client/DoNotRetryRegionException.java    |    4 +
 .../org/apache/hadoop/hbase/client/HBaseHbck.java  |   16 +
 .../java/org/apache/hadoop/hbase/client/Hbck.java  |   17 +-
 .../apache/hadoop/hbase/client/MasterRegistry.java |   63 +-
 .../hadoop/hbase/client/RawAsyncHBaseAdmin.java    |   78 +-
 .../hadoop/hbase/client/RawAsyncTableImpl.java     |  357 ++--
 .../org/apache/hadoop/hbase/client/RegionInfo.java |    4 +-
 .../hadoop/hbase/client/RegionInfoBuilder.java     |    3 +-
 .../org/apache/hadoop/hbase/client/Result.java     |    3 +-
 .../java/org/apache/hadoop/hbase/client/Scan.java  |    4 +-
 .../hadoop/hbase/client/SnapshotDescription.java   |   14 +-
 .../hbase/client/TableDescriptorBuilder.java       |   18 +-
 .../hadoop/hbase/client/ZKConnectionRegistry.java  |   36 +-
 .../apache/hadoop/hbase/ipc/AbstractRpcClient.java |   83 +-
 .../hadoop/hbase/ipc/BlockingRpcConnection.java    |   18 +-
 .../java/org/apache/hadoop/hbase/ipc/Call.java     |   10 +-
 .../hadoop/hbase/ipc/HBaseRpcController.java       |   27 +-
 .../hadoop/hbase/ipc/HBaseRpcControllerImpl.java   |   46 +-
 .../java/org/apache/hadoop/hbase/ipc/IPCUtil.java  |   67 +-
 .../hadoop/hbase/ipc/NettyRpcDuplexHandler.java    |   12 +-
 .../org/apache/hadoop/hbase/ipc/RpcConnection.java |    6 +-
 .../hadoop/hbase/ipc/RpcControllerFactory.java     |   25 +-
 .../apache/hadoop/hbase/master/RegionState.java    |    7 +-
 .../hbase/replication/ReplicationPeerConfig.java   |   29 +-
 .../GssSaslClientAuthenticationProvider.java       |   30 +-
 .../hadoop/hbase/shaded/protobuf/ProtobufUtil.java |   98 +-
 .../hbase/shaded/protobuf/RequestConverter.java    |  172 +-
 .../hadoop/hbase/slowlog/SlowLogTableAccessor.java |    8 +-
 .../hadoop/hbase/zookeeper/ReadOnlyZKClient.java   |    6 +-
 .../hbase/client/TestAsyncConnectionTracing.java   |  115 ++
 .../client/TestAsyncRegionLocatorTracing.java      |  159 ++
 .../hadoop/hbase/client/TestAsyncTableTracing.java |  414 +++++
 .../hadoop/hbase/client/TestRegionInfoBuilder.java |    5 +-
 .../hadoop/hbase/client/TestRegionInfoDisplay.java |    3 +-
 .../org/apache/hadoop/hbase/client/TestScan.java   |   12 +
 .../hadoop/hbase/ipc/TestFailedServersLog.java     |   67 +-
 .../hbase/ipc/TestHBaseRpcControllerImpl.java      |    2 +-
 .../org/apache/hadoop/hbase/ipc/TestIPCUtil.java   |   17 +-
 .../replication/TestReplicationPeerConfig.java     |  366 +++--
 .../hbase/security/TestHBaseSaslRpcClient.java     |   14 +-
 hbase-common/pom.xml                               |   23 +-
 .../java/org/apache/hadoop/hbase/CellUtil.java     |    4 +
 .../java/org/apache/hadoop/hbase/ChoreService.java |   10 +-
 .../apache/hadoop/hbase/HBaseConfiguration.java    |   81 +-
 .../java/org/apache/hadoop/hbase/HConstants.java   |   75 +-
 .../hadoop/hbase/HDFSBlocksDistribution.java       |    0
 .../java/org/apache/hadoop/hbase/KeyValue.java     |    4 +-
 .../java/org/apache/hadoop/hbase/KeyValueUtil.java |    2 +-
 .../org/apache/hadoop/hbase/PrivateCellUtil.java   |   31 +-
 .../org/apache/hadoop/hbase/PrivateConstants.java  |   23 +-
 .../org/apache/hadoop/hbase/ScheduledChore.java    |    6 +-
 .../hbase/io/BoundedDelegatingInputStream.java     |  115 ++
 .../apache/hadoop/hbase/io/ByteBuffAllocator.java  |   21 -
 .../hadoop/hbase/io/DelegatingInputStream.java     |   54 +
 .../hadoop/hbase/io/compress/Compression.java      |    2 +
 .../io/hadoopbackport/ThrottledInputStream.java    |    4 +-
 .../hadoop/hbase/security/SecurityConstants.java   |   14 +-
 .../hbase/trace/HBaseHTraceConfiguration.java      |   80 -
 .../hadoop/hbase/trace/SpanReceiverHost.java       |  120 --
 .../org/apache/hadoop/hbase/trace/TraceUtil.java   |  231 ++-
 .../hadoop/hbase/util/AbstractHBaseTool.java       |   12 +-
 .../hadoop/hbase/util/CoprocessorClassLoader.java  |    4 +-
 .../java/org/apache/hadoop/hbase/util/DNS.java     |    1 -
 .../java/org/apache/hadoop/hbase/util/IdLock.java  |    4 +-
 .../org/apache/hadoop/hbase/util/Random64.java     |    4 +-
 .../apache/hadoop/hbase/util/ReflectionUtils.java  |    6 +-
 .../java/org/apache/hadoop/hbase/util/Sleeper.java |    6 +-
 .../java/org/apache/hadoop/hbase/util/Threads.java |   77 +-
 hbase-common/src/main/resources/hbase-default.xml  |   28 +-
 .../org/apache/hadoop/hbase/TableNameTestRule.java |    2 +-
 .../apache/hadoop/hbase/TestCellComparator.java    |    5 +-
 .../hadoop/hbase/TestHBaseConfiguration.java       |   13 +
 .../hadoop/hbase/TestHDFSBlocksDistribution.java   |    6 +-
 .../java/org/apache/hadoop/hbase/TestKeyValue.java |   15 +-
 .../test/java/org/apache/hadoop/hbase/Waiter.java  |    9 +-
 .../hadoop/hbase/io/TestByteBuffAllocator.java     |    3 +-
 .../apache/hadoop/hbase/logging/TestJul2Slf4j.java |   96 ++
 .../hadoop/hbase/logging/TestLog4jUtils.java       |   39 +-
 .../hadoop/hbase/util/TestByteBufferArray.java     |    2 +-
 .../org/apache/hadoop/hbase/util/TestBytes.java    |    2 +-
 .../org/apache/hadoop/hbase/util/TestThreads.java  |    4 +-
 .../src/test/resources/hbase-deprecated-conf.xml   |   26 +-
 hbase-endpoint/pom.xml                             |   20 +-
 .../hbase/client/TestRpcControllerFactory.java     |    9 +-
 .../coprocessor/TestRowProcessorEndpoint.java      |  679 --------
 .../hadoop/hbase/coprocessor/TestSecureExport.java |    3 +-
 hbase-examples/README.txt                          |    2 +-
 hbase-examples/pom.xml                             |   20 +-
 .../example/ExampleMasterObserverWithMetrics.java  |    5 +-
 .../example/WriteHeavyIncrementObserver.java       |    3 +-
 .../apache/hadoop/hbase/thrift2/DemoClient.java    |    5 +-
 .../example/TestZooKeeperScanPolicyObserver.java   |    3 +-
 hbase-external-blockcache/pom.xml                  |    6 +-
 .../hadoop/hbase/io/hfile/MemcachedBlockCache.java |   14 +-
 hbase-hadoop-compat/pom.xml                        |  324 ++--
 .../master/MetricsAssignmentManagerSource.java     |   70 +
 .../master/MetricsAssignmentManagerSourceImpl.java |   58 +
 .../hadoop/hbase/master/MetricsMasterSource.java   |   22 +
 .../hbase/master/MetricsMasterSourceImpl.java      |   18 +
 .../hadoop/hbase/master/MetricsMasterWrapper.java  |   14 +
 .../regionserver/MetricsRegionSourceImpl.java      |    8 +-
 .../hbase/regionserver/MetricsTableQueryMeter.java |    3 +
 .../regionserver/MetricsTableQueryMeterImpl.java   |    3 -
 .../hbase/regionserver/wal/MetricsWALSource.java   |    9 +
 .../regionserver/wal/MetricsWALSourceImpl.java     |   13 +
 .../MetricsReplicationGlobalSourceSourceImpl.java  |   26 +-
 .../MetricsReplicationSourceSource.java            |    8 +-
 .../MetricsReplicationSourceSourceImpl.java        |   26 +
 .../metrics2/lib/DynamicMetricsRegistry.java       |   36 +-
 .../hadoop/hbase/test/MetricsAssertHelper.java     |   12 +-
 .../hadoop/hbase/test/MetricsAssertHelperImpl.java |   11 +-
 hbase-hbtop/pom.xml                                |   15 +-
 .../apache/hadoop/hbase/hbtop/screen/Screen.java   |    5 +-
 .../hbase/hbtop/screen/top/TopScreenModel.java     |    3 +-
 .../hbase/hbtop/screen/top/TopScreenPresenter.java |    5 +-
 .../hbase/hbtop/terminal/impl/TerminalImpl.java    |    6 +-
 .../hbtop/terminal/impl/TerminalPrinterImpl.java   |    3 +-
 .../hbtop/terminal/impl/batch/BatchTerminal.java   |    7 +
 .../terminal/impl/batch/BatchTerminalPrinter.java  |    3 +
 hbase-http/pom.xml                                 |   20 +-
 .../apache/hadoop/hbase/http/HttpRequestLog.java   |    9 +-
 .../org/apache/hadoop/hbase/http/HttpServer.java   |   31 +-
 .../org/apache/hadoop/hbase/http/InfoServer.java   |    1 +
 .../apache/hadoop/hbase/http/NoCacheFilter.java    |    4 +-
 .../apache/hadoop/hbase/http/ProfileServlet.java   |   13 +-
 .../org/apache/hadoop/hbase/http/log/LogLevel.java |    4 +-
 .../apache/hadoop/hbase/util/LogMonitoring.java    |    8 +-
 .../hadoop/hbase/http/TestHttpRequestLog.java      |   10 +-
 .../apache/hadoop/hbase/http/TestHttpServer.java   |   69 +-
 .../apache/hadoop/hbase/http/log/TestLogLevel.java |  199 +--
 hbase-it/pom.xml                                   |   24 +-
 .../org/apache/hadoop/hbase/ChaosZKClient.java     |    5 +-
 .../hadoop/hbase/DistributedHBaseCluster.java      |   17 +-
 .../hadoop/hbase/IntegrationTestBackupRestore.java |    5 +-
 .../apache/hadoop/hbase/IntegrationTestIngest.java |    7 +-
 .../hadoop/hbase/IntegrationTestManyRegions.java   |    5 +-
 .../IntegrationTestRegionReplicaReplication.java   |    7 +-
 .../hadoop/hbase/IntegrationTestingUtility.java    |    6 +-
 .../StripeCompactionsPerformanceEvaluation.java    |   10 +-
 .../chaos/actions/MoveRegionsOfTableAction.java    |    5 +-
 .../chaos/actions/RestartRsHoldingMetaAction.java  |    2 +-
 .../hbase/chaos/actions/SnapshotTableAction.java   |    3 +-
 .../StressAssignmentManagerMonkeyFactory.java      |    4 +-
 .../hbase/chaos/policies/PeriodicPolicy.java       |    5 +-
 .../hadoop/hbase/chaos/util/ChaosMonkeyRunner.java |   32 +-
 .../IntegrationTestTableSnapshotInputFormat.java   |    3 +-
 .../hadoop/hbase/mttr/IntegrationTestMTTR.java     |   22 +-
 .../hbase/test/IntegrationTestBigLinkedList.java   |   42 +-
 .../hbase/test/IntegrationTestLoadCommonCrawl.java |  839 ++++++++++
 ...nTestTimeBoundedRequestsWithRegionReplicas.java |    5 +-
 .../org/apache/hadoop/hbase/test/util/CRC64.java   |   64 +
 .../hbase/test/util/warc/WARCFileReader.java       |  165 ++
 .../hbase/test/util/warc/WARCFileWriter.java       |  245 +++
 .../hbase/test/util/warc/WARCInputFormat.java      |  134 ++
 .../hbase/test/util/warc/WARCOutputFormat.java     |  103 ++
 .../hadoop/hbase/test/util/warc/WARCRecord.java    |  396 +++++
 .../hadoop/hbase/test/util/warc/WARCWritable.java  |   96 ++
 .../trace/IntegrationTestSendTraceRequests.java    |  127 +-
 .../test/resources/CC-MAIN-2021-10-warc.paths.gz   |  Bin 0 -> 174648 bytes
 hbase-logging/pom.xml                              |   30 +-
 .../hadoop/hbase/logging/InternalLog4jUtils.java   |   58 +-
 .../hbase/logging/JulToSlf4jInitializer.java       |   34 +-
 .../test/java/org/apache/log4j/FileAppender.java   |  288 ++++
 hbase-logging/src/test/resources/log4j.properties  |   68 -
 hbase-logging/src/test/resources/log4j2.xml        |   46 +
 hbase-mapreduce/pom.xml                            |   29 +-
 .../hadoop/hbase/mapred/TableRecordReaderImpl.java |    7 +-
 .../hadoop/hbase/mapreduce/HFileOutputFormat2.java |   78 +-
 .../apache/hadoop/hbase/mapreduce/ImportTsv.java   |    3 +-
 .../mapreduce/RoundRobinTableInputFormat.java      |  172 ++
 .../apache/hadoop/hbase/mapreduce/SyncTable.java   |    3 +-
 .../hadoop/hbase/mapreduce/TableMapReduceUtil.java |   20 +-
 .../hbase/mapreduce/TableRecordReaderImpl.java     |    7 +-
 .../apache/hadoop/hbase/mapreduce/TableSplit.java  |   14 +-
 .../apache/hadoop/hbase/mapreduce/WALPlayer.java   |   13 +-
 .../hadoop/hbase/snapshot/ExportSnapshot.java      |    7 +-
 .../apache/hadoop/hbase/PerformanceEvaluation.java |   34 +-
 .../hadoop/hbase/mapreduce/TestCellCounter.java    |    3 +-
 .../hadoop/hbase/mapreduce/TestCopyTable.java      |    3 +-
 .../hbase/mapreduce/TestHFileOutputFormat2.java    |  201 +++
 .../hadoop/hbase/mapreduce/TestImportExport.java   |    5 +-
 .../mapreduce/TestRoundRobinTableInputFormat.java  |  177 ++
 .../hadoop/hbase/mapreduce/TestRowCounter.java     |   17 +-
 .../hadoop/hbase/mapreduce/TestSyncTable.java      |    5 +-
 .../hadoop/hbase/mapreduce/TestTableSplit.java     |   24 +-
 .../hadoop/hbase/mapreduce/TestWALInputFormat.java |    3 +-
 .../hbase/mapreduce/TestWALRecordReader.java       |   32 +-
 .../hbase/replication/TestVerifyReplication.java   |   21 +-
 .../replication/TestVerifyReplicationAdjunct.java  |   15 +-
 .../TestVerifyReplicationCrossDiffHdfs.java        |    5 +-
 .../hadoop/hbase/snapshot/TestExportSnapshot.java  |    6 +-
 .../snapshot/TestExportSnapshotV1NoCluster.java    |    3 +-
 .../org/apache/hadoop/hbase/util/LoadTestTool.java |   17 +-
 hbase-metrics-api/pom.xml                          |   15 +-
 hbase-metrics/pom.xml                              |   15 +-
 hbase-procedure/pom.xml                            |   15 +-
 .../hadoop/hbase/procedure2/ProcedureExecutor.java |    2 +-
 .../procedure2/RemoteProcedureDispatcher.java      |    8 +-
 .../hbase/procedure2/TimeoutExecutorThread.java    |    4 +-
 .../procedure2/store/wal/WALProcedureStore.java    |    9 +-
 .../hadoop/hbase/procedure2/util/DelayedUtil.java  |    5 +-
 .../TestProcedureSchedulerConcurrency.java         |    5 +-
 .../store/ProcedureStorePerformanceEvaluation.java |    5 +-
 .../ProcedureWALLoaderPerformanceEvaluation.java   |   14 +-
 hbase-protocol-shaded/pom.xml                      |    6 +-
 .../src/main/protobuf/client/Client.proto          |    4 +-
 .../src/main/protobuf/rpc/Tracing.proto            |   14 +-
 .../src/main/protobuf/server/ClusterStatus.proto   |   10 +
 .../src/main/protobuf/server/Snapshot.proto        |    1 +
 .../server/coprocessor/MultiRowMutation.proto      |    2 +
 .../protobuf/server/coprocessor/RowProcessor.proto |   46 -
 .../src/main/protobuf/server/master/Master.proto   |   21 +
 .../protobuf/server/master/MasterProcedure.proto   |   25 +-
 .../main/protobuf/server/master/RecentLogs.proto   |    5 +
 .../src/main/protobuf/server/region/WAL.proto      |    2 +
 hbase-replication/pom.xml                          |   20 +-
 .../hbase/replication/ReplicationFactory.java      |   10 +-
 .../hbase/replication/ReplicationTracker.java      |   50 -
 .../replication/ReplicationTrackerZKImpl.java      |  199 ---
 .../replication/TestZKReplicationPeerStorage.java  |   30 +-
 hbase-resource-bundle/pom.xml                      |    2 +-
 .../src/main/resources/META-INF/LICENSE.vm         |    2 +-
 .../src/main/resources/META-INF/NOTICE.vm          |    2 +-
 hbase-rest/pom.xml                                 |   20 +-
 .../org/apache/hadoop/hbase/rest/Constants.java    |    8 +
 .../org/apache/hadoop/hbase/rest/RESTServer.java   |    3 +-
 .../hadoop/hbase/rest/ScannerResultGenerator.java  |    3 +-
 .../apache/hadoop/hbase/rest/client/Client.java    |   71 +-
 .../hadoop/hbase/rest/PerformanceEvaluation.java   |    3 +-
 .../hadoop/hbase/rest/TestRESTServerSSL.java       |    5 +
 .../hbase/rest/client/TestRemoteAdminRetries.java  |    5 +-
 .../hbase/rest/client/TestRemoteHTableRetries.java |    5 +-
 .../hadoop/hbase/rest/client/TestRemoteTable.java  |    3 +-
 hbase-server/pom.xml                               |   43 +-
 .../hbase/tmpl/master/MasterStatusTmpl.jamon       |   32 +-
 .../hbase/tmpl/regionserver/RSStatusTmpl.jamon     |    8 +-
 .../org/apache/hadoop/hbase/HealthCheckChore.java  |   12 +-
 .../org/apache/hadoop/hbase/MetaTableAccessor.java |   13 +-
 .../java/org/apache/hadoop/hbase/ZNodeClearer.java |   32 +-
 .../hbase/client/AsyncRegionServerAdmin.java       |    2 +-
 .../client/coprocessor/RowProcessorClient.java     |   53 -
 .../coprocessor/BaseRowProcessorEndpoint.java      |  149 --
 .../hadoop/hbase/coprocessor/MetaTableMetrics.java |    4 +-
 .../coprocessor/MultiRowMutationEndpoint.java      |  184 ++-
 .../hadoop/hbase/coprocessor/RegionObserver.java   |    2 +-
 .../apache/hadoop/hbase/executor/EventHandler.java |   16 +-
 .../apache/hadoop/hbase/executor/EventType.java    |    9 +-
 .../hadoop/hbase/executor/ExecutorService.java     |   40 +-
 .../apache/hadoop/hbase/executor/ExecutorType.java |    3 +-
 .../java/org/apache/hadoop/hbase/io/Reference.java |    6 +-
 .../hadoop/hbase/io/hfile/BlockCacheFactory.java   |   14 -
 .../apache/hadoop/hbase/io/hfile/HFileBlock.java   |    9 +-
 .../hadoop/hbase/io/hfile/HFileReaderImpl.java     |   11 +-
 .../hadoop/hbase/io/hfile/HFileWriterImpl.java     |    9 +-
 .../hbase/io/hfile/LruAdaptiveBlockCache.java      |    5 +-
 .../hadoop/hbase/io/hfile/LruBlockCache.java       |    7 +-
 .../hadoop/hbase/io/hfile/PrefetchExecutor.java    |    3 +-
 .../hadoop/hbase/io/hfile/TinyLfuBlockCache.java   |   54 +-
 .../hadoop/hbase/io/hfile/bucket/BucketCache.java  |   19 +-
 .../org/apache/hadoop/hbase/ipc/CallRunner.java    |   64 +-
 .../hadoop/hbase/ipc/NettyRpcFrameDecoder.java     |    2 +
 .../apache/hadoop/hbase/ipc/NettyRpcServer.java    |    8 +-
 .../hbase/ipc/NettyRpcServerRequestDecoder.java    |    2 +-
 .../hadoop/hbase/ipc/NettyServerRpcConnection.java |    3 +-
 .../org/apache/hadoop/hbase/ipc/RpcServer.java     |    3 +-
 .../org/apache/hadoop/hbase/ipc/ServerCall.java    |   36 +-
 .../hadoop/hbase/ipc/ServerRpcConnection.java      |  221 +--
 .../apache/hadoop/hbase/ipc/SimpleRpcServer.java   |   18 +-
 .../hadoop/hbase/ipc/SimpleRpcServerResponder.java |    7 +-
 .../hbase/ipc/SimpleServerRpcConnection.java       |    5 +-
 .../org/apache/hadoop/hbase/master/HMaster.java    |  202 ++-
 .../hadoop/hbase/master/HMasterCommandLine.java    |    6 +-
 .../org/apache/hadoop/hbase/master/HbckChore.java  |   39 +-
 .../hadoop/hbase/master/MasterRpcServices.java     |   79 +-
 .../hbase/master/MetricsAssignmentManager.java     |   28 +
 .../apache/hadoop/hbase/master/MetricsMaster.java  |   14 +
 .../hbase/master/MetricsMasterWrapperImpl.java     |   24 +
 .../apache/hadoop/hbase/master/ServerManager.java  |   50 +-
 .../hbase/master/assignment/AssignmentManager.java |   97 +-
 .../master/assignment/AssignmentManagerUtil.java   |   63 +
 .../hbase/master/assignment/RegionStateStore.java  |    4 +-
 .../hbase/master/assignment/RegionStates.java      |   20 +-
 .../assignment/SplitTableRegionProcedure.java      |   27 +-
 .../hbase/master/balancer/BaseLoadBalancer.java    | 1730 --------------------
 .../hbase/master/balancer/ClusterStatusChore.java  |    2 +-
 .../balancer/LocalityBasedCandidateGenerator.java  |   93 --
 .../master/balancer/MaintenanceLoadBalancer.java   |  124 ++
 .../master/balancer/MasterClusterInfoProvider.java |  198 +++
 .../master/balancer/StochasticLoadBalancer.java    | 1533 -----------------
 .../hadoop/hbase/master/cleaner/CleanerChore.java  |   13 +-
 .../hadoop/hbase/master/cleaner/DirScanPool.java   |    5 +-
 .../hadoop/hbase/master/cleaner/HFileCleaner.java  |    5 +-
 .../hbase/master/{ => http}/MasterDumpServlet.java |    6 +-
 .../master/{ => http}/MasterRedirectServlet.java   |    6 +-
 .../master/{ => http}/MasterStatusServlet.java     |    4 +-
 .../hbase/master/{webapp => http}/MetaBrowser.java |    2 +-
 .../master/{webapp => http}/RegionReplicaInfo.java |    2 +-
 .../hbase/master/janitor/CatalogJanitor.java       |   99 +-
 .../hadoop/hbase/master/locking/LockManager.java   |    9 +-
 .../hadoop/hbase/master/locking/LockProcedure.java |    7 +-
 .../master/normalizer/SimpleRegionNormalizer.java  |   89 +-
 .../hbase/master/normalizer/package-info.java      |    6 +-
 .../master/procedure/ServerCrashProcedure.java     |    7 +-
 .../master/procedure/ServerProcedureInterface.java |   22 +-
 .../hadoop/hbase/master/procedure/ServerQueue.java |    2 +
 .../ClaimReplicationQueueRemoteProcedure.java      |  127 ++
 .../ClaimReplicationQueuesProcedure.java           |  147 ++
 .../hbase/master/snapshot/SnapshotFileCache.java   |   48 +-
 .../master/snapshot/SnapshotHFileCleaner.java      |    2 +-
 .../hbase/master/snapshot/SnapshotManager.java     |    4 +
 .../hbase/master/snapshot/TakeSnapshotHandler.java |    9 +-
 .../hadoop/hbase/mob/DefaultMobStoreCompactor.java |   20 +-
 .../hadoop/hbase/mob/MobFileCleanerChore.java      |    5 -
 .../java/org/apache/hadoop/hbase/mob/MobUtils.java |   12 +-
 .../monitoring/MemoryBoundedLogMessageBuffer.java  |    4 +-
 .../hbase/monitoring/MonitoredRPCHandlerImpl.java  |    4 +-
 .../hadoop/hbase/monitoring/MonitoredTaskImpl.java |    8 +-
 .../hbase/namequeues/BalancerRejectionDetails.java |   39 +-
 .../hadoop/hbase/namequeues/NamedQueuePayload.java |    6 +-
 .../impl/BalancerRejectionQueueService.java        |  133 ++
 .../hbase/namequeues/impl/SlowLogQueueService.java |    3 +-
 .../namequeues/request/NamedQueueGetRequest.java   |   11 +
 .../namequeues/response/NamedQueueGetResponse.java |   10 +
 .../hbase/procedure2/BaseRSProcedureCallable.java  |   42 +-
 .../hadoop/hbase/quotas/DefaultOperationQuota.java |   11 -
 .../hadoop/hbase/quotas/ExceedOperationQuota.java  |    1 -
 .../hadoop/hbase/quotas/NoopOperationQuota.java    |    5 -
 .../apache/hadoop/hbase/quotas/OperationQuota.java |    3 -
 .../hbase/regionserver/BaseRowProcessor.java       |   71 -
 .../hbase/regionserver/BusyRegionSplitPolicy.java  |    8 +-
 .../ConstantSizeRegionSplitPolicy.java             |   21 +-
 .../regionserver/CustomizedScanInfoBuilder.java    |   16 +-
 .../regionserver/DefaultStoreFileManager.java      |   11 +-
 .../DelimitedKeyPrefixRegionSplitPolicy.java       |   10 +
 ... DelimitedKeyPrefixRegionSplitRestriction.java} |   63 +-
 .../hbase/regionserver/FlushAllStoresPolicy.java   |    7 +-
 .../hbase/regionserver/FlushLargeStoresPolicy.java |    5 +
 .../apache/hadoop/hbase/regionserver/HRegion.java  |  566 +++----
 .../hbase/regionserver/HRegionFileSystem.java      |   22 +
 .../hadoop/hbase/regionserver/HRegionServer.java   |  144 +-
 .../regionserver/HRegionServerCommandLine.java     |    8 +-
 .../apache/hadoop/hbase/regionserver/HStore.java   |   42 +-
 .../IncreasingToUpperBoundRegionSplitPolicy.java   |    6 +
 .../regionserver/KeyPrefixRegionSplitPolicy.java   |   10 +
 .../KeyPrefixRegionSplitRestriction.java           |   76 +
 .../MemStoreCompactorSegmentsIterator.java         |    3 +-
 .../hadoop/hbase/regionserver/MemStoreFlusher.java |   93 +-
 .../hbase/regionserver/MetricsRegionServer.java    |   45 +-
 .../MetricsRegionServerWrapperImpl.java            |   27 +-
 ...esPolicy.java => NoRegionSplitRestriction.java} |   19 +-
 .../hadoop/hbase/regionserver/RSRpcServices.java   |  150 +-
 .../apache/hadoop/hbase/regionserver/Region.java   |   47 +-
 .../hbase/regionserver/RegionScannerImpl.java      |   56 +-
 .../regionserver/RegionServerTableMetrics.java     |   26 +-
 .../regionserver/RegionServicesForStores.java      |    8 +-
 .../hbase/regionserver/RegionSplitRestriction.java |  129 ++
 .../hadoop/hbase/regionserver/RowProcessor.java    |  159 --
 .../apache/hadoop/hbase/regionserver/ScanInfo.java |   10 +-
 .../hadoop/hbase/regionserver/ScanOptions.java     |    4 +
 .../hadoop/hbase/regionserver/ScannerContext.java  |    9 +-
 .../hbase/regionserver/SplitWALCallable.java       |   31 +-
 .../hbase/regionserver/SteppingSplitPolicy.java    |    7 +-
 .../hbase/regionserver/StoreFileManager.java       |   24 +-
 .../hadoop/hbase/regionserver/StoreFlusher.java    |    3 +-
 .../hadoop/hbase/regionserver/StoreScanner.java    |   19 +-
 .../hbase/regionserver/StripeStoreFileManager.java |   66 +-
 .../compactions/AbstractMultiOutputCompactor.java  |    6 +-
 .../hbase/regionserver/compactions/Compactor.java  |   77 +-
 .../compactions/CurrentHourProvider.java           |   21 +-
 .../compactions/DateTieredCompactor.java           |    4 +-
 .../regionserver/compactions/DefaultCompactor.java |    4 +-
 .../compactions/StripeCompactionPolicy.java        |   29 +-
 .../regionserver/compactions/StripeCompactor.java  |    8 +-
 .../regionserver/handler/WALSplitterHandler.java   |    5 +-
 .../regionserver/{ => http}/RSDumpServlet.java     |   46 +-
 .../regionserver/{ => http}/RSStatusServlet.java   |    3 +-
 .../querymatcher/ScanQueryMatcher.java             |    7 +-
 .../hbase/regionserver/wal/AbstractFSWAL.java      |  114 +-
 .../wal/AbstractProtobufLogWriter.java             |   38 +-
 .../hadoop/hbase/regionserver/wal/AsyncFSWAL.java  |  144 +-
 .../hbase/regionserver/wal/CompressionContext.java |  169 +-
 .../hadoop/hbase/regionserver/wal/FSHLog.java      |   75 +-
 .../hadoop/hbase/regionserver/wal/MetricsWAL.java  |   10 +
 .../hbase/regionserver/wal/ProtobufLogReader.java  |   37 +-
 .../hadoop/hbase/regionserver/wal/ReaderBase.java  |   20 +-
 .../hadoop/hbase/regionserver/wal/SyncFuture.java  |  151 +-
 .../hbase/regionserver/wal/SyncFutureCache.java    |   74 +
 .../hbase/regionserver/wal/WALCellCodec.java       |   61 +-
 .../hadoop/hbase/regionserver/wal/WALUtil.java     |    3 +-
 .../hbase/replication/HReplicationServer.java      |    2 -
 .../NamespaceTableCfWALEntryFilter.java            |   84 +-
 ...ble.java => ClaimReplicationQueueCallable.java} |   47 +-
 .../regionserver/DumpReplicationQueues.java        |   31 +-
 .../HBaseInterClusterReplicationEndpoint.java      |    6 +-
 .../replication/regionserver/MetricsSink.java      |    7 +-
 .../replication/regionserver/MetricsSource.java    |   45 +-
 .../regionserver/PeerProcedureHandler.java         |    4 +
 .../regionserver/PeerProcedureHandlerImpl.java     |    7 +
 .../RecoveredReplicationSourceShipper.java         |    2 +-
 .../regionserver/RefreshPeerCallable.java          |   31 +-
 .../ReplaySyncReplicationWALCallable.java          |   44 +-
 .../replication/regionserver/Replication.java      |   10 +-
 .../regionserver/ReplicationSource.java            |   33 +-
 .../regionserver/ReplicationSourceLogQueue.java    |    4 +
 .../regionserver/ReplicationSourceManager.java     |  417 ++---
 .../regionserver/ReplicationSourceShipper.java     |    5 +-
 .../regionserver/ReplicationSourceWALReader.java   |  129 +-
 .../regionserver/ReplicationSyncUp.java            |   43 +-
 .../SerialReplicationSourceWALReader.java          |   22 +-
 .../SwitchRpcThrottleRemoteCallable.java           |   27 +-
 .../replication/regionserver/WALEntryBatch.java    |    4 +
 .../WALEntryFilterRetryableException.java          |   29 +-
 .../replication/regionserver/WALEntryStream.java   |   17 +-
 .../hbase/rsgroup/RSGroupBasedLoadBalancer.java    |  108 +-
 .../hbase/security/access/AccessController.java    |   47 +-
 .../access/SnapshotScannerHDFSAclHelper.java       |   33 +-
 .../security/visibility/VisibilityController.java  |    2 +-
 .../org/apache/hadoop/hbase/tool/CanaryTool.java   |   10 +-
 .../hadoop/hbase/util/FSTableDescriptors.java      |   45 +-
 .../java/org/apache/hadoop/hbase/util/FSUtils.java |   34 +-
 .../apache/hadoop/hbase/util/HFileArchiveUtil.java |   11 +-
 .../apache/hadoop/hbase/util/JVMClusterUtil.java   |    4 +-
 .../hadoop/hbase/util/ModifyRegionUtils.java       |    2 +-
 .../apache/hadoop/hbase/util/RegionSplitter.java   |    6 +-
 .../hadoop/hbase/util/TableDescriptorChecker.java  |    2 +
 .../util/compaction/MajorCompactionTTLRequest.java |    3 +-
 .../hbase/util/compaction/MajorCompactor.java      |    4 +-
 .../apache/hadoop/hbase/wal/AbstractWALRoller.java |    8 +-
 .../org/apache/hadoop/hbase/wal/WALFactory.java    |   58 +-
 .../org/apache/hadoop/hbase/wal/WALSplitUtil.java  |    3 +-
 .../main/resources/hbase-webapps/master/header.jsp |    2 +
 .../hbase-webapps/master/operationDetails.jsp      |  136 ++
 .../main/resources/hbase-webapps/master/table.jsp  |   58 +-
 .../hbase-webapps/regionserver/header.jsp          |    1 +
 .../regionserver/rsOperationDetails.jsp            |  176 ++
 .../static/js/parser-date-iso8601.min.js           |    4 +
 .../java/org/apache/hadoop/hbase/HBaseCluster.java |    7 +-
 .../apache/hadoop/hbase/HBaseTestingUtility.java   |   18 +-
 .../hadoop/hbase/HFilePerformanceEvaluation.java   |    5 +-
 .../org/apache/hadoop/hbase/MiniHBaseCluster.java  |    9 +-
 .../apache/hadoop/hbase/MultithreadedTestUtil.java |    5 +-
 .../hadoop/hbase/PerformanceEvaluationCommons.java |    5 +-
 .../hadoop/hbase/TestClusterPortAssignment.java    |   17 +-
 .../hadoop/hbase/TestExecutorStatusChore.java      |   12 +-
 .../hadoop/hbase/TestGlobalMemStoreSize.java       |    5 +-
 .../org/apache/hadoop/hbase/TestIOFencing.java     |   17 +-
 .../apache/hadoop/hbase/TestMetaTableAccessor.java |   12 +-
 .../apache/hadoop/hbase/TestMetaTableLocator.java  |    3 +-
 .../hbase/TestMetaUpdatesGoToPriorityQueue.java    |    3 +-
 .../org/apache/hadoop/hbase/TestSerialization.java |    9 +-
 .../org/apache/hadoop/hbase/TestZooKeeper.java     |    3 +-
 .../hadoop/hbase/backup/TestHFileArchiving.java    |    9 +-
 ...shotFromClientAfterSplittingRegionTestBase.java |    5 +-
 ...hotFromClientCloneLinksAfterDeleteTestBase.java |    7 +-
 .../CloneSnapshotFromClientErrorTestBase.java      |    5 +-
 .../CloneSnapshotFromClientNormalTestBase.java     |    7 +-
 .../client/CloneSnapshotFromClientTestBase.java    |    4 +-
 .../RestoreSnapshotFromClientCloneTestBase.java    |    3 +-
 .../RestoreSnapshotFromClientSimpleTestBase.java   |    3 +-
 .../client/RestoreSnapshotFromClientTestBase.java  |    4 +-
 .../org/apache/hadoop/hbase/client/TestAdmin.java  |   11 +-
 .../org/apache/hadoop/hbase/client/TestAdmin2.java |    9 +-
 .../org/apache/hadoop/hbase/client/TestAdmin3.java |    3 +-
 .../hbase/client/TestAsyncClusterAdminApi.java     |   21 +
 .../hbase/client/TestAsyncRegionAdminApi.java      |   12 +-
 .../apache/hadoop/hbase/client/TestAsyncTable.java |    5 +-
 .../hbase/client/TestAsyncTableAdminApi.java       |   39 +-
 .../hbase/client/TestAsyncTableAdminApi2.java      |    5 +-
 .../TestAsyncTableBatchRetryImmediately.java       |   11 +-
 .../client/TestAsyncTableGetMultiThreaded.java     |    4 +-
 .../hbase/client/TestAsyncTableNoncedRetry.java    |  268 ++-
 .../hbase/client/TestBlockEvictionFromClient.java  |   10 +-
 .../hbase/client/TestClientScannerRPCTimeout.java  |    5 +-
 .../apache/hadoop/hbase/client/TestConnection.java |    2 +-
 .../hadoop/hbase/client/TestFromClientSide.java    |    5 +-
 .../hadoop/hbase/client/TestFromClientSide3.java   |    3 +-
 .../hadoop/hbase/client/TestFromClientSide4.java   |    4 +-
 .../hadoop/hbase/client/TestFromClientSide5.java   |  375 ++++-
 .../org/apache/hadoop/hbase/client/TestHbck.java   |    5 +-
 .../hbase/client/TestIncrementsFromClientSide.java |   53 +
 .../hbase/client/TestMalformedCellFromClient.java  |    4 +-
 ...oneSnapshotFromClientCloneLinksAfterDelete.java |    5 +-
 .../hbase/client/TestMultiRespectsLimits.java      |   10 +-
 .../hbase/client/TestMutationGetCellBuilder.java   |    3 +-
 .../client/TestPutDeleteEtcCellIteration.java      |    3 +-
 .../hbase/client/TestRequestTooBigException.java   |   85 +
 .../org/apache/hadoop/hbase/client/TestResult.java |  141 ++
 .../hbase/client/TestScannersFromClientSide.java   |    4 +-
 .../client/TestSnapshotCloneIndependence.java      |    5 +-
 .../hadoop/hbase/client/TestSnapshotMetadata.java  |    9 +-
 .../client/TestSnapshotTemporaryDirectory.java     |    5 +-
 .../hadoop/hbase/client/TestTableFavoredNodes.java |    5 +-
 .../hbase/client/TestTableSnapshotScanner.java     |    3 +-
 .../hbase/client/locking/TestEntityLocks.java      |    5 +-
 .../hadoop/hbase/codec/CodecPerformance.java       |    9 +-
 .../hbase/coprocessor/SimpleRegionObserver.java    |    1 +
 .../hbase/coprocessor/TestCoprocessorMetrics.java  |    5 +-
 .../hbase/coprocessor/TestCoprocessorStop.java     |    8 +-
 .../TestPostIncrementAndAppendBeforeWAL.java       |  137 +-
 .../coprocessor/TestRegionCoprocessorHost.java     |    2 +
 .../coprocessor/TestRegionObserverInterface.java   |    4 +-
 .../coprocessor/TestRegionObserverStacking.java    |    7 +-
 .../hadoop/hbase/coprocessor/TestWALObserver.java  |    8 +-
 .../hadoop/hbase/executor/TestExecutorService.java |   16 +-
 .../hbase/filter/TestDependentColumnFilter.java    |    3 +-
 .../hbase/filter/TestFilterSerialization.java      |    5 +-
 .../filter/TestFuzzyRowAndColumnRangeFilter.java   |    5 +-
 .../hbase/filter/TestFuzzyRowFilterEndToEnd.java   |    9 +-
 .../apache/hadoop/hbase/fs/TestBlockReorder.java   |   10 +-
 .../hbase/fs/TestBlockReorderBlockLocation.java    |    5 +-
 .../hbase/fs/TestBlockReorderMultiBlocks.java      |    9 +-
 .../org/apache/hadoop/hbase/io/TestHeapSize.java   |    5 +-
 .../hbase/io/encoding/TestChangingEncoding.java    |    9 +-
 .../io/encoding/TestLoadAndSwitchEncodeOnDisk.java |    2 +-
 .../apache/hadoop/hbase/io/hfile/TestHFile.java    |   74 +-
 .../hadoop/hbase/io/hfile/TestHFileBlock.java      |    5 +-
 .../hfile/TestHFileScannerImplReferenceCount.java  |    3 +-
 .../hbase/io/hfile/TestScannerFromBucketCache.java |    6 +-
 .../apache/hadoop/hbase/ipc/AbstractTestIPC.java   |   87 +-
 .../org/apache/hadoop/hbase/ipc/TestNettyIPC.java  |    1 +
 .../apache/hadoop/hbase/ipc/TestProtoBufRpc.java   |    7 +-
 .../hbase/ipc/TestProtobufRpcServiceImpl.java      |    8 +-
 .../hbase/ipc/TestRpcServerTraceLogging.java       |   24 +-
 .../hadoop/hbase/ipc/TestSimpleRpcScheduler.java   |   16 +-
 .../hadoop/hbase/master/AbstractTestDLS.java       |   10 +-
 .../hbase/master/MockNoopMasterServices.java       |    2 +-
 .../hbase/master/TestActiveMasterManager.java      |   92 +-
 .../hbase/master/TestClockSkewDetection.java       |   11 +-
 .../hadoop/hbase/master/TestMasterMetrics.java     |   57 +-
 .../hbase/master/TestMasterMetricsWrapper.java     |   12 +-
 .../hadoop/hbase/master/TestMasterNoCluster.java   |    8 +-
 .../hbase/master/TestMasterNotCarryTable.java      |    7 -
 .../hadoop/hbase/master/TestMasterQosFunction.java |    2 +-
 .../hadoop/hbase/master/TestMasterRepairMode.java  |   44 +-
 .../master/TestMetaAssignmentWithStopMaster.java   |    9 +-
 .../hadoop/hbase/master/TestRegionPlacement.java   |    3 +-
 .../hadoop/hbase/master/TestRegionPlacement2.java  |   20 +-
 .../hbase/master/TestRegionsRecoveryChore.java     |   10 +
 .../master/TestRetainAssignmentOnRestart.java      |   34 +-
 .../hadoop/hbase/master/TestRollingRestart.java    |   17 +-
 .../hadoop/hbase/master/TestSplitLogManager.java   |    3 +-
 .../hadoop/hbase/master/TestWarmupRegion.java      |    3 +-
 .../master/assignment/AssignmentTestingUtil.java   |   25 +
 .../master/assignment/MockMasterServices.java      |    7 +-
 ...estAssignRegionToUninitializedRegionServer.java |  163 ++
 .../master/assignment/TestAssignmentManager.java   |    5 +-
 .../assignment/TestAssignmentManagerBase.java      |    4 +-
 .../master/assignment/TestRegionReplicaSplit.java  |    3 +-
 .../hbase/master/assignment/TestRegionSplit.java   |   41 +-
 ...ava => TestRegionSplitAndSeparateChildren.java} |   65 +-
 .../master/assignment/TestRegionStateStore.java    |   10 +-
 .../hbase/master/assignment/TestRegionStates.java  |   13 +-
 .../master/assignment/TestRogueRSAssignment.java   |    4 +-
 .../assignment/TestSplitTableRegionProcedure.java  |   64 +-
 .../TestTransitRegionStateProcedure.java           |    4 +-
 .../LoadOnlyFavoredStochasticBalancer.java         |    9 +-
 .../balancer/RSGroupableBalancerTestBase.java      |    8 +-
 .../master/balancer/TestBalancerDecision.java      |   20 +-
 .../master/balancer/TestBalancerRejection.java     |  133 ++
 .../TestFavoredStochasticBalancerPickers.java      |   31 +-
 .../balancer/TestRSGroupBasedLoadBalancer.java     |   11 +-
 ...lancerWithStochasticLoadBalancerAsInternal.java |   13 +-
 .../master/balancer/TestRegionLocationFinder.java  |  170 --
 .../balancer/TestRegionsOnMasterOptions.java       |  220 ---
 .../TestStochasticBalancerJmxMetrics.java          |   25 +-
 ...BalancerHeterogeneousCostRulesLoadFromHDFS.java |   81 +
 .../hbase/master/cleaner/TestHFileCleaner.java     |   73 +-
 .../hbase/master/cleaner/TestHFileLinkCleaner.java |   64 +-
 .../hbase/master/cleaner/TestLogsCleaner.java      |   74 +-
 .../cleaner/TestReplicationHFileCleaner.java       |   75 +-
 .../master/{ => http}/TestMasterStatusServlet.java |    6 +-
 .../master/{webapp => http}/TestMetaBrowser.java   |    2 +-
 .../{webapp => http}/TestMetaBrowserNoCluster.java |    4 +-
 .../hbase/master/janitor/TestCatalogJanitor.java   |   19 +-
 .../master/janitor/TestCatalogJanitorCluster.java  |   11 +-
 .../janitor/TestCatalogJanitorInMemoryStates.java  |   26 +-
 .../hadoop/hbase/master/janitor/TestMetaFixer.java |   11 +-
 .../hbase/master/locking/TestLockProcedure.java    |    9 +-
 ...gionNormalizerManagerConfigurationObserver.java |    6 +-
 .../normalizer/TestSimpleRegionNormalizer.java     |   39 +-
 ...terProcedureSchedulerPerformanceEvaluation.java |    8 +-
 .../procedure/MasterProcedureTestingUtility.java   |    6 +-
 .../procedure/TestCloneSnapshotProcedure.java      |    3 +-
 .../hadoop/hbase/master/procedure/TestHBCKSCP.java |   30 +-
 .../TestHBCKSCPUnknown.java}                       |   38 +-
 .../procedure/TestRestoreSnapshotProcedure.java    |    3 +-
 .../procedure/TestSafemodeBringsDownMaster.java    |    7 +-
 .../hbase/master/region/MasterRegionTestBase.java  |    3 +-
 .../master/region/TestMasterRegionCompaction.java  |    3 +-
 .../region/TestMasterRegionOnTwoFileSystems.java   |    6 +-
 .../master/snapshot/TestTakeSnapshotHandler.java   |  111 ++
 .../hadoop/hbase/mob/FaultyMobStoreCompactor.java  |    3 +-
 .../org/apache/hadoop/hbase/mob/MobTestUtil.java   |    3 +-
 .../hbase/mob/TestExpiredMobFileCleaner.java       |    5 +-
 .../hadoop/hbase/mob/TestMobDataBlockEncoding.java |    3 +-
 .../hadoop/hbase/mob/TestMobStoreCompaction.java   |    5 +-
 .../hadoop/hbase/mob/TestMobStoreScanner.java      |   11 +-
 .../org/apache/hadoop/hbase/mob/TestMobUtils.java  |   94 ++
 .../RegionProcedureStorePerformanceEvaluation.java |   68 +-
 .../region/RegionProcedureStoreTestHelper.java     |    3 +-
 .../store/region/TestRegionProcedureStore.java     |    3 +-
 .../hadoop/hbase/quotas/TestRateLimiter.java       |    2 +-
 .../hbase/regionserver/TestAtomicOperation.java    |    2 +-
 .../regionserver/TestBulkLoadReplication.java      |    8 +-
 .../TestBulkLoadReplicationHFileRefs.java          |  310 ++++
 .../hadoop/hbase/regionserver/TestClusterId.java   |    4 +-
 .../hbase/regionserver/TestCompactSplitThread.java |   22 +-
 .../hbase/regionserver/TestCompactingMemStore.java |   10 +-
 .../TestCompactingToCellFlatMapMemStore.java       |   27 +-
 .../regionserver/TestCompactionAfterBulkLoad.java  |   46 +-
 .../hbase/regionserver/TestCompactionState.java    |    5 +-
 .../regionserver/TestCompactionWithByteBuff.java   |  155 ++
 .../regionserver/TestDataBlockEncodingTool.java    |    3 +-
 .../regionserver/TestDefaultCompactSelection.java  |    2 +-
 .../hbase/regionserver/TestDefaultMemStore.java    |   10 +-
 .../regionserver/TestEndToEndSplitTransaction.java |   21 +-
 .../hadoop/hbase/regionserver/TestHMobStore.java   |    3 +-
 .../hadoop/hbase/regionserver/TestHRegion.java     |   44 +-
 .../regionserver/TestHRegionReplayEvents.java      |    6 +-
 .../regionserver/TestHRegionServerBulkLoad.java    |    3 +-
 .../hbase/regionserver/TestHRegionTracing.java     |  183 +++
 .../hadoop/hbase/regionserver/TestHStore.java      |   63 +-
 .../hadoop/hbase/regionserver/TestHStoreFile.java  |    9 +-
 .../hbase/regionserver/TestMajorCompaction.java    |    3 +-
 .../regionserver/TestMasterAddressTracker.java     |   18 +-
 .../hbase/regionserver/TestMemStoreFlusher.java    |   98 ++
 .../regionserver/TestMemStoreSegmentsIterator.java |    5 +-
 .../regionserver/TestMetricsRegionServer.java      |   24 +
 .../regionserver/TestMetricsTableLatencies.java    |   52 +-
 .../hbase/regionserver/TestMultiLogThreshold.java  |   64 +-
 .../regionserver/TestRSKilledWhenInitializing.java |    3 +-
 .../hbase/regionserver/TestRSRpcServices.java      |   11 +-
 .../hbase/regionserver/TestRegionIncrement.java    |   11 +-
 .../hadoop/hbase/regionserver/TestRegionInfo.java  |    7 +-
 .../TestRegionMergeTransactionOnCluster.java       |   11 +-
 .../hadoop/hbase/regionserver/TestRegionOpen.java  |    3 +-
 .../hbase/regionserver/TestRegionReplicas.java     |    5 +-
 .../regionserver/TestRegionServerAbortTimeout.java |    5 +-
 .../regionserver/TestRegionServerHostname.java     |    8 +-
 .../regionserver/TestRegionServerMetrics.java      |   41 +-
 .../TestRegionServerOnlineConfigChange.java        |   18 +-
 .../TestRegionServerReadRequestMetrics.java        |   14 -
 .../TestRegionServerReportForDuty.java             |   75 +-
 .../regionserver/TestRegionSplitRestriction.java   |  184 +++
 .../hbase/regionserver/TestReversibleScanners.java |    3 +-
 .../regionserver/TestRowPrefixBloomFilter.java     |    3 +-
 .../hadoop/hbase/regionserver/TestScanner.java     |    7 +-
 .../regionserver/TestScannerWithBulkload.java      |   10 +-
 .../regionserver/TestSimpleTimeRangeTracker.java   |    3 +-
 .../hbase/regionserver/TestSplitLogWorker.java     |   68 +-
 .../TestSplitTransactionOnCluster.java             |   10 +-
 .../TestStoreFileScannerWithTagCompression.java    |    3 +-
 .../hbase/regionserver/TestStoreScanner.java       |   13 +-
 .../regionserver/TestStripeStoreFileManager.java   |   16 +-
 .../hadoop/hbase/regionserver/TestWALLockup.java   |  118 +-
 .../hadoop/hbase/regionserver/TestWideScanner.java |    3 +-
 .../compactions/PerfTestCompactionPolicies.java    |   11 +-
 .../regionserver/compactions/TestCloseChecker.java |    3 +-
 .../compactions/TestCurrentHourProvider.java       |   38 +-
 .../compactions/TestFIFOCompactionPolicy.java      |    2 +-
 .../compactions/TestStripeCompactionPolicy.java    |   31 +
 .../{ => http}/TestRSStatusServlet.java            |    6 +-
 .../TestCompactionScanQueryMatcher.java            |    5 +-
 .../querymatcher/TestUserScanQueryMatcher.java     |    5 +-
 .../TestCompactionWithThroughputController.java    |   17 +-
 .../TestFlushWithThroughputController.java         |    7 -
 .../hbase/regionserver/wal/AbstractTestFSWAL.java  |    8 +-
 .../wal/AbstractTestLogRollPeriod.java             |    5 +-
 .../regionserver/wal/AbstractTestProtobufLog.java  |    3 +-
 .../regionserver/wal/AbstractTestWALReplay.java    |    3 +-
 .../hbase/regionserver/wal/TestAsyncFSWAL.java     |    3 +-
 .../regionserver/wal/TestAsyncFSWALDurability.java |    8 +-
 .../wal/TestAsyncWALReplayValueCompression.java}   |   26 +-
 .../regionserver/wal/TestCombinedAsyncWriter.java  |    3 +-
 .../hbase/regionserver/wal/TestDurability.java     |   13 +-
 .../hadoop/hbase/regionserver/wal/TestFSHLog.java  |   91 +
 .../regionserver/wal/TestFSHLogDurability.java     |    8 +-
 .../hbase/regionserver/wal/TestHBaseWalOnEC.java   |   34 +-
 .../hbase/regionserver/wal/TestLogRollAbort.java   |    5 +-
 .../hbase/regionserver/wal/TestLogRolling.java     |    9 +-
 .../regionserver/wal/TestLogRollingNoCluster.java  |    3 +-
 .../hbase/regionserver/wal/TestMetricsWAL.java     |   29 +-
 .../hbase/regionserver/wal/TestSyncFuture.java     |    4 +-
 .../regionserver/wal/TestSyncFutureCache.java      |   69 +
 .../wal/TestWALCellCodecWithCompression.java       |  115 +-
 .../wal/TestWALReplayValueCompression.java}        |   23 +-
 .../replication/TestClaimReplicationQueue.java     |  165 ++
 .../hbase/replication/TestReplicationBase.java     |  105 +-
 .../TestReplicationEmptyWALRecovery.java           |  287 +++-
 .../hbase/replication/TestReplicationEndpoint.java |    3 +-
 .../replication/TestReplicationKillMasterRS.java   |   13 +-
 .../TestReplicationKillMasterRSCompressed.java     |    5 +-
 ...ReplicationKillMasterRSWithSeparateOldWALs.java |   13 +-
 .../hbase/replication/TestReplicationKillRS.java   |   19 +-
 .../replication/TestReplicationKillSlaveRS.java    |   13 +-
 ...tReplicationKillSlaveRSWithSeparateOldWALs.java |   12 +-
 .../replication/TestReplicationSmallTests.java     |    5 +-
 .../replication/TestReplicationTrackerZKImpl.java  |  271 ---
 .../TestReplicationWALEntryFilters.java            |    7 +-
 .../TestSyncReplicationStandbyKillRS.java          |   42 +-
 .../master/TestRecoverStandbyProcedure.java        |    3 +-
 ...tryStream.java => TestBasicWALEntryStream.java} |  410 ++---
 .../TestBasicWALEntryStreamAsyncFSWAL.java}        |   27 +-
 .../TestBasicWALEntryStreamFSHLog.java}            |   31 +-
 .../regionserver/TestDumpReplicationQueues.java    |    3 +-
 ...InterClusterReplicationEndpointFilterEdits.java |   30 +-
 .../TestRefreshPeerWhileRegionServerRestarts.java  |  115 ++
 .../regionserver/TestReplicationCompressedWAL.java |  108 ++
 .../regionserver/TestReplicationSink.java          |    5 +-
 .../regionserver/TestReplicationSource.java        |   33 +-
 .../regionserver/TestReplicationSourceManager.java |   71 +-
 .../TestReplicationValueCompressedWAL.java}        |   38 +-
 .../TestSerialReplicationEndpoint.java             |    5 +-
 .../TestWALEntryStreamDifferentCounts.java         |   89 +
 ...stWALEntryStreamDifferentCountsAsyncFSWAL.java} |   24 +-
 .../TestWALEntryStreamDifferentCountsFSHLog.java}  |   23 +-
 .../regionserver/WALEntryStreamTestBase.java       |  182 ++
 .../hbase/rsgroup/EnableRSGroupsTestBase.java      |    5 +-
 .../hadoop/hbase/rsgroup/TestRSGroupsAdmin2.java   |   11 +-
 .../hadoop/hbase/rsgroup/TestRSGroupsBase.java     |    3 +-
 .../hadoop/hbase/rsgroup/TestRSGroupsKillRS.java   |    9 +-
 .../hadoop/hbase/rsgroup/TestRSGroupsWithACL.java  |    2 +
 .../rsgroup/TestUpdateRSGroupConfiguration.java    |  122 ++
 .../hbase/rsgroup/VerifyingRSGroupAdmin.java       |    4 +
 .../hadoop/hbase/security/TestSecureIPC.java       |  124 ++
 .../security/access/TestAccessController.java      |    3 +-
 .../access/TestCellACLWithMultipleVersions.java    |    2 +-
 .../TestSnapshotScannerHDFSAclController.java      |    3 +
 .../TestSnapshotScannerHDFSAclController2.java     |    3 +
 .../security/token/TestTokenAuthentication.java    |    2 +-
 .../token/TestZKSecretWatcherRefreshKeys.java      |    7 +-
 .../hbase/snapshot/SnapshotTestingUtils.java       |    9 +-
 .../snapshot/TestFlushSnapshotFromClient.java      |    6 +-
 .../TestRestoreFlushSnapshotFromClient.java        |   14 +-
 .../apache/hadoop/hbase/tool/TestCanaryTool.java   |  171 +-
 .../apache/hadoop/hbase/trace/TestHTraceHooks.java |  134 --
 .../org/apache/hadoop/hbase/trace/TraceTree.java   |  148 --
 .../hadoop/hbase/util/ConstantDelayQueue.java      |    2 +-
 .../apache/hadoop/hbase/util/HFileTestUtil.java    |    5 +-
 .../org/apache/hadoop/hbase/util/MockServer.java   |   55 +-
 .../hadoop/hbase/util/MultiThreadedAction.java     |    6 +-
 .../hadoop/hbase/util/MultiThreadedReader.java     |    6 +-
 .../hbase/util/MultiThreadedReaderWithACL.java     |    2 +-
 .../hadoop/hbase/util/MultiThreadedUpdater.java    |   18 +-
 .../hbase/util/MultiThreadedUpdaterWithACL.java    |   10 +-
 .../hadoop/hbase/util/MultiThreadedWriter.java     |   10 +-
 .../hadoop/hbase/util/MultiThreadedWriterBase.java |    3 +-
 .../hbase/util/MultiThreadedWriterWithACL.java     |    8 +-
 .../hadoop/hbase/util/TestBloomFilterChunk.java    |   12 +-
 .../hadoop/hbase/util/TestCompressionTest.java     |   18 +-
 .../hbase/util/TestDefaultEnvironmentEdge.java     |   11 +-
 .../hadoop/hbase/util/TestFSTableDescriptors.java  |    4 +-
 .../org/apache/hadoop/hbase/util/TestFSUtils.java  |   16 +-
 .../org/apache/hadoop/hbase/util/TestIdLock.java   |    4 +-
 .../util/TestIdReadWriteLockWithObjectPool.java    |    4 +-
 .../util/TestIncrementingEnvironmentEdge.java      |    2 +-
 .../hbase/util/compaction/TestMajorCompactor.java  |    5 +-
 .../org/apache/hadoop/hbase/wal/FaultyFSLog.java   |   10 +-
 .../apache/hadoop/hbase/wal/FileSystemProxy.java   |  105 ++
 .../{TestSecureWAL.java => TestCompressedWAL.java} |   53 +-
 .../hadoop/hbase/wal/TestFSHLogProvider.java       |    3 +-
 .../org/apache/hadoop/hbase/wal/TestSecureWAL.java |    3 +-
 .../hbase/wal/TestSyncReplicationWALProvider.java  |    3 +-
 .../apache/hadoop/hbase/wal/TestWALFactory.java    |  156 +-
 .../apache/hadoop/hbase/wal/TestWALMethods.java    |    3 +-
 .../hadoop/hbase/wal/TestWALReaderOnSecureWAL.java |    6 +-
 .../apache/hadoop/hbase/wal/TestWALRootDir.java    |   12 +-
 .../org/apache/hadoop/hbase/wal/TestWALSplit.java  |    4 +-
 .../hadoop/hbase/wal/TestWALSplitToHFile.java      |   10 +-
 .../TestWALSplitValueCompression.java}             |   25 +-
 .../hadoop/hbase/wal/WALPerformanceEvaluation.java |   78 +-
 hbase-shaded/hbase-shaded-check-invariants/pom.xml |   18 +-
 .../hbase-shaded-client-byo-hadoop/pom.xml         |  230 +--
 hbase-shaded/hbase-shaded-client/pom.xml           |  151 +-
 hbase-shaded/hbase-shaded-mapreduce/pom.xml        |  462 +++---
 .../hbase-shaded-testing-util-tester/pom.xml       |  152 +-
 hbase-shaded/hbase-shaded-testing-util/pom.xml     |  429 +++--
 .../pom.xml                                        |   17 +-
 hbase-shaded/pom.xml                               | 1079 ++++++------
 hbase-shell/pom.xml                                |   20 +-
 hbase-shell/src/main/ruby/hbase/admin.rb           |   54 +-
 .../src/main/ruby/hbase/replication_admin.rb       |   23 +-
 hbase-shell/src/main/ruby/shell.rb                 |    2 +
 hbase-shell/src/main/ruby/shell/commands/alter.rb  |    2 +-
 .../ruby/shell/commands/get_balancer_rejections.rb |   49 +
 .../src/main/ruby/shell/commands/snapshot.rb       |    2 +-
 hbase-shell/src/main/ruby/shell/commands/trace.rb  |   43 +-
 .../ruby/shell/commands/update_rsgroup_config.rb   |   30 +-
 .../hadoop/hbase/client/TestReplicationShell.java  |    3 -
 hbase-shell/src/test/ruby/hbase/admin_test.rb      |    7 +
 hbase-testing-util/pom.xml                         |  330 ++--
 hbase-thrift/pom.xml                               |   24 +-
 .../org/apache/hadoop/hbase/thrift/Constants.java  |    4 +
 .../hbase/thrift/TBoundedThreadPoolServer.java     |    5 +-
 .../hbase/thrift/ThriftHBaseServiceHandler.java    |   27 +-
 .../apache/hadoop/hbase/thrift/ThriftServer.java   |   36 +-
 .../hbase/thrift/generated/AlreadyExists.java      |    8 +-
 .../hbase/thrift/generated/BatchMutation.java      |   12 +-
 .../hbase/thrift/generated/ColumnDescriptor.java   |   24 +-
 .../hadoop/hbase/thrift/generated/Hbase.java       |  748 ++++-----
 .../hadoop/hbase/thrift/generated/IOError.java     |  146 +-
 .../hbase/thrift/generated/IllegalArgument.java    |    8 +-
 .../hadoop/hbase/thrift/generated/Mutation.java    |   14 +-
 .../hadoop/hbase/thrift/generated/TAppend.java     |   18 +-
 .../hadoop/hbase/thrift/generated/TCell.java       |   10 +-
 .../hadoop/hbase/thrift/generated/TColumn.java     |   10 +-
 .../hadoop/hbase/thrift/generated/TIncrement.java  |   14 +-
 .../hadoop/hbase/thrift/generated/TRegionInfo.java |   20 +-
 .../hadoop/hbase/thrift/generated/TRowResult.java  |   16 +-
 .../hadoop/hbase/thrift/generated/TScan.java       |   28 +-
 .../hbase/thrift/generated/TThriftServerType.java  |    4 +-
 .../hbase/thrift2/ThriftHBaseServiceHandler.java   |    1 +
 .../hadoop/hbase/thrift2/client/ThriftAdmin.java   |    5 +
 .../hbase/thrift2/client/ThriftConnection.java     |   18 +-
 .../hadoop/hbase/thrift2/generated/TAppend.java    |   22 +-
 .../hbase/thrift2/generated/TAuthorization.java    |   10 +-
 .../hbase/thrift2/generated/TBloomFilterType.java  |    4 +-
 .../hbase/thrift2/generated/TCellVisibility.java   |    8 +-
 .../hadoop/hbase/thrift2/generated/TColumn.java    |   12 +-
 .../thrift2/generated/TColumnFamilyDescriptor.java |   50 +-
 .../hbase/thrift2/generated/TColumnIncrement.java  |   12 +-
 .../hbase/thrift2/generated/TColumnValue.java      |   18 +-
 .../hbase/thrift2/generated/TCompareOperator.java  |    4 +-
 .../thrift2/generated/TCompressionAlgorithm.java   |    4 +-
 .../hbase/thrift2/generated/TConsistency.java      |    4 +-
 .../thrift2/generated/TDataBlockEncoding.java      |    4 +-
 .../hadoop/hbase/thrift2/generated/TDelete.java    |   22 +-
 .../hbase/thrift2/generated/TDeleteType.java       |    4 +-
 .../hbase/thrift2/generated/TDurability.java       |    4 +-
 .../hbase/thrift2/generated/TFilterByOperator.java |    4 +-
 .../hadoop/hbase/thrift2/generated/TGet.java       |   40 +-
 .../hbase/thrift2/generated/THBaseService.java     |  594 +++----
 .../hbase/thrift2/generated/THRegionInfo.java      |   20 +-
 .../hbase/thrift2/generated/THRegionLocation.java  |   10 +-
 .../hadoop/hbase/thrift2/generated/TIOError.java   |  175 +-
 .../hbase/thrift2/generated/TIllegalArgument.java  |    8 +-
 .../hadoop/hbase/thrift2/generated/TIncrement.java |   22 +-
 .../hbase/thrift2/generated/TKeepDeletedCells.java |    4 +-
 .../hbase/thrift2/generated/TLogQueryFilter.java   |   20 +-
 .../hadoop/hbase/thrift2/generated/TLogType.java   |    4 +-
 .../hadoop/hbase/thrift2/generated/TMutation.java  |   10 +-
 .../thrift2/generated/TNamespaceDescriptor.java    |   12 +-
 .../hbase/thrift2/generated/TOnlineLogRecord.java  |   34 +-
 .../hadoop/hbase/thrift2/generated/TPut.java       |   22 +-
 .../hadoop/hbase/thrift2/generated/TReadType.java  |    4 +-
 .../hadoop/hbase/thrift2/generated/TResult.java    |   16 +-
 .../hbase/thrift2/generated/TRowMutations.java     |   12 +-
 .../hadoop/hbase/thrift2/generated/TScan.java      |   48 +-
 .../hbase/thrift2/generated/TServerName.java       |   12 +-
 .../hbase/thrift2/generated/TTableDescriptor.java  |   18 +-
 .../hadoop/hbase/thrift2/generated/TTableName.java |   10 +-
 .../hbase/thrift2/generated/TThriftServerType.java |    4 +-
 .../hadoop/hbase/thrift2/generated/TTimeRange.java |   10 +-
 .../org/apache/hadoop/hbase/thrift/Hbase.thrift    |    1 +
 .../org/apache/hadoop/hbase/thrift2/hbase.thrift   |    1 +
 .../hadoop/hbase/thrift/TestThriftServer.java      |   11 +-
 .../hbase/thrift/TestThriftServerCmdLine.java      |    2 +-
 .../hbase/thrift2/TestThrift2ServerCmdLine.java    |    2 +-
 .../hadoop/hbase/thrift2/TestThriftConnection.java |    3 +-
 .../thrift2/TestThriftHBaseServiceHandler.java     |   23 +-
 hbase-zookeeper/pom.xml                            |   19 +-
 .../hadoop/hbase/zookeeper/MetaTableLocator.java   |    6 +-
 .../hbase/zookeeper/MiniZooKeeperCluster.java      |   10 +-
 .../hbase/zookeeper/RecoverableZooKeeper.java      |   94 +-
 .../hadoop/hbase/zookeeper/ZKLeaderManager.java    |    2 +-
 .../hadoop/hbase/zookeeper/ZKNodeTracker.java      |    5 +-
 .../org/apache/hadoop/hbase/zookeeper/ZKUtil.java  |   10 +-
 pom.xml                                            |  275 +++-
 src/main/asciidoc/_chapters/architecture.adoc      |   28 +-
 src/main/asciidoc/_chapters/community.adoc         |   15 +-
 src/main/asciidoc/_chapters/compression.adoc       |    4 +-
 src/main/asciidoc/_chapters/configuration.adoc     |   48 +-
 src/main/asciidoc/_chapters/developer.adoc         |    6 +-
 src/main/asciidoc/_chapters/hbase-default.adoc     |   26 +
 src/main/asciidoc/_chapters/hbase_mob.adoc         |   14 +-
 .../asciidoc/_chapters/offheap_read_write.adoc     |    6 -
 src/main/asciidoc/_chapters/ops_mgt.adoc           |   39 +-
 src/main/asciidoc/_chapters/performance.adoc       |    3 +-
 src/main/asciidoc/_chapters/schema_design.adoc     |    4 +-
 src/main/asciidoc/_chapters/security.adoc          |    2 +-
 src/main/asciidoc/_chapters/tracing.adoc           |   57 +
 src/main/asciidoc/_chapters/troubleshooting.adoc   |   16 +-
 src/main/asciidoc/_chapters/upgrading.adoc         |    4 +-
 src/main/asciidoc/book.adoc                        |    1 +
 src/site/site.xml                                  |    8 -
 src/site/xdoc/downloads.xml                        |   69 +-
 1031 files changed, 28470 insertions(+), 18474 deletions(-)
 copy hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/DummyCostFunction.java => bin/hbase_startup.jsh (73%)
 delete mode 100644 conf/log4j.properties
 copy hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/DummyCostFunction.java => conf/log4j2-hbtop.xml (62%)
 create mode 100644 conf/log4j2.xml
 delete mode 100644 hbase-archetypes/hbase-client-project/src/main/resources/log4j.properties
 create mode 100644 hbase-archetypes/hbase-client-project/src/main/resources/log4j2.xml
 delete mode 100644 hbase-archetypes/hbase-shaded-client-project/src/main/resources/log4j.properties
 create mode 100644 hbase-archetypes/hbase-shaded-client-project/src/main/resources/log4j2.xml
 rename {hbase-server => hbase-balancer}/src/main/java/org/apache/hadoop/hbase/favored/FavoredNodeLoadBalancer.java (88%)
 rename {hbase-server => hbase-balancer}/src/main/java/org/apache/hadoop/hbase/favored/FavoredNodesManager.java (79%)
 rename {hbase-server => hbase-balancer}/src/main/java/org/apache/hadoop/hbase/master/LoadBalancer.java (55%)
 copy hbase-logging/src/main/java/org/apache/hadoop/hbase/AsyncConsoleAppender.java => hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/AssignRegionAction.java (53%)
 rename hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/ReplicationListener.java => hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalanceAction.java (60%)
 create mode 100644 hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
 create mode 100644 hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
 copy hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/ReplicationFactory.java => hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/CPRequestCostFunction.java (57%)
 rename {hbase-server => hbase-balancer}/src/main/java/org/apache/hadoop/hbase/master/balancer/CandidateGenerator.java (60%)
 create mode 100644 hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/ClusterInfoProvider.java
 copy hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/FlushAllStoresPolicy.java => hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/CostFromRegionLoadAsRateFunction.java (50%)
 create mode 100644 hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/CostFromRegionLoadFunction.java
 create mode 100644 hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/CostFunction.java
 create mode 100644 hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/DoubleArrayCost.java
 rename {hbase-server => hbase-balancer}/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredStochasticBalancer.java (81%)
 rename {hbase-server => hbase-balancer}/src/main/java/org/apache/hadoop/hbase/master/balancer/HeterogeneousRegionCountCostFunction.java (88%)
 rename {hbase-server => hbase-balancer}/src/main/java/org/apache/hadoop/hbase/master/balancer/LoadBalancerFactory.java (92%)
 rename {hbase-server => hbase-balancer}/src/main/java/org/apache/hadoop/hbase/master/balancer/LoadCandidateGenerator.java (87%)
 create mode 100644 hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/LocalityBasedCandidateGenerator.java
 create mode 100644 hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/LocalityBasedCostFunction.java
 copy hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/ReplicationFactory.java => hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/MemStoreSizeCostFunction.java (57%)
 create mode 100644 hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/MoveCostFunction.java
 copy hbase-logging/src/main/java/org/apache/hadoop/hbase/AsyncConsoleAppender.java => hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/MoveRegionAction.java (54%)
 create mode 100644 hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/PrimaryRegionCountSkewCostFunction.java
 copy hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/DummyCostFunction.java => hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/RackLocalityCostFunction.java (58%)
 copy hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/FlushAllStoresPolicy.java => hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/RandomCandidateGenerator.java (70%)
 copy hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/ReplicationFactory.java => hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/ReadRequestCostFunction.java (57%)
 create mode 100644 hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionCountSkewCostFunction.java
 rename hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionLocationFinder.java => hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionHDFSBlockLocationFinder.java (62%)
 rename {hbase-server => hbase-balancer}/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionReplicaCandidateGenerator.java (81%)
 create mode 100644 hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionReplicaGroupingCostFunction.java
 create mode 100644 hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionReplicaHostCostFunction.java
 create mode 100644 hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionReplicaRackCandidateGenerator.java
 create mode 100644 hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/RegionReplicaRackCostFunction.java
 copy hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/DummyCostFunction.java => hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/ServerLocalityCostFunction.java (59%)
 rename {hbase-server => hbase-balancer}/src/main/java/org/apache/hadoop/hbase/master/balancer/SimpleLoadBalancer.java (79%)
 create mode 100644 hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
 copy hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/ReplicationFactory.java => hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StoreFileCostFunction.java (57%)
 create mode 100644 hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/SwapRegionsAction.java
 copy hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/DummyCostFunction.java => hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/TableSkewCostFunction.java (53%)
 copy hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/ReplicationFactory.java => hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/WriteRequestCostFunction.java (57%)
 rename {hbase-server => hbase-balancer}/src/test/java/org/apache/hadoop/hbase/favored/TestFavoredNodeAssignmentHelper.java (90%)
 rename {hbase-server => hbase-balancer}/src/test/java/org/apache/hadoop/hbase/favored/TestStartcodeAgnosticServerName.java (100%)
 rename {hbase-server => hbase-balancer}/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java (81%)
 create mode 100644 hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/DummyClusterInfoProvider.java
 copy {hbase-server => hbase-balancer}/src/test/java/org/apache/hadoop/hbase/master/balancer/DummyCostFunction.java (83%)
 create mode 100644 hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/HeterogeneousCostRulesTestHelper.java
 rename {hbase-server => hbase-balancer}/src/test/java/org/apache/hadoop/hbase/master/balancer/LoadBalancerPerformanceEvaluation.java (94%)
 create mode 100644 hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/StochasticBalancerTestBase.java
 rename hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase2.java => hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/StochasticBalancerTestBase2.java (87%)
 rename {hbase-server => hbase-balancer}/src/test/java/org/apache/hadoop/hbase/master/balancer/TestBaseLoadBalancer.java (87%)
 copy hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerRegionReplicaMidCluster.java => hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/TestDoubleArrayCost.java (54%)
 create mode 100644 hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/TestRegionHDFSBlockLocationFinder.java
 rename {hbase-server => hbase-balancer}/src/test/java/org/apache/hadoop/hbase/master/balancer/TestSimpleLoadBalancer.java (79%)
 rename {hbase-server => hbase-balancer}/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java (65%)
 rename {hbase-server => hbase-balancer}/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerBalanceCluster.java (95%)
 rename {hbase-server => hbase-balancer}/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerHeterogeneousCost.java (67%)
 rename {hbase-server => hbase-balancer}/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerHeterogeneousCostRules.java (53%)
 rename {hbase-server => hbase-balancer}/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerLargeCluster.java (94%)
 rename {hbase-server => hbase-balancer}/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerMidCluster.java (96%)
 rename {hbase-server => hbase-balancer}/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerRegionReplica.java (82%)
 rename {hbase-server => hbase-balancer}/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerRegionReplicaHighReplication.java (95%)
 rename {hbase-server => hbase-balancer}/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerRegionReplicaLargeCluster.java (97%)
 copy {hbase-server => hbase-balancer}/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerRegionReplicaMidCluster.java (97%)
 rename {hbase-server => hbase-balancer}/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerRegionReplicaReplicationGreaterThanNumNodes.java (95%)
 rename {hbase-server => hbase-balancer}/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerRegionReplicaSameHosts.java (97%)
 rename {hbase-server => hbase-balancer}/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerRegionReplicaWithRacks.java (97%)
 rename {hbase-server => hbase-balancer}/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerSmallCluster.java (96%)
 copy hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/DummyCostFunction.java => hbase-client/src/main/java/org/apache/hadoop/hbase/PleaseRestartMasterException.java (72%)
 create mode 100644 hbase-client/src/main/java/org/apache/hadoop/hbase/client/BalancerRejection.java
 create mode 100644 hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestAsyncConnectionTracing.java
 create mode 100644 hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestAsyncRegionLocatorTracing.java
 create mode 100644 hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestAsyncTableTracing.java
 rename {hbase-server => hbase-common}/src/main/java/org/apache/hadoop/hbase/HDFSBlocksDistribution.java (100%)
 copy hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/FlushAllStoresPolicy.java => hbase-common/src/main/java/org/apache/hadoop/hbase/PrivateConstants.java (65%)
 create mode 100644 hbase-common/src/main/java/org/apache/hadoop/hbase/io/BoundedDelegatingInputStream.java
 create mode 100644 hbase-common/src/main/java/org/apache/hadoop/hbase/io/DelegatingInputStream.java
 delete mode 100644 hbase-common/src/main/java/org/apache/hadoop/hbase/trace/HBaseHTraceConfiguration.java
 delete mode 100644 hbase-common/src/main/java/org/apache/hadoop/hbase/trace/SpanReceiverHost.java
 rename {hbase-server => hbase-common}/src/test/java/org/apache/hadoop/hbase/TestHDFSBlocksDistribution.java (96%)
 create mode 100644 hbase-common/src/test/java/org/apache/hadoop/hbase/logging/TestJul2Slf4j.java
 rename hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/DummyCostFunction.java => hbase-common/src/test/resources/hbase-deprecated-conf.xml (72%)
 delete mode 100644 hbase-endpoint/src/test/java/org/apache/hadoop/hbase/coprocessor/TestRowProcessorEndpoint.java
 create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/test/IntegrationTestLoadCommonCrawl.java
 create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/test/util/CRC64.java
 create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/test/util/warc/WARCFileReader.java
 create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/test/util/warc/WARCFileWriter.java
 create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/test/util/warc/WARCInputFormat.java
 create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/test/util/warc/WARCOutputFormat.java
 create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/test/util/warc/WARCRecord.java
 create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/test/util/warc/WARCWritable.java
 create mode 100644 hbase-it/src/test/resources/CC-MAIN-2021-10-warc.paths.gz
 copy hbase-balancer/src/main/java/org/apache/hadoop/hbase/favored/FavoredNodesPromoter.java => hbase-logging/src/main/java/org/apache/hadoop/hbase/logging/JulToSlf4jInitializer.java (56%)
 create mode 100644 hbase-logging/src/test/java/org/apache/log4j/FileAppender.java
 delete mode 100644 hbase-logging/src/test/resources/log4j.properties
 create mode 100644 hbase-logging/src/test/resources/log4j2.xml
 create mode 100644 hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RoundRobinTableInputFormat.java
 create mode 100644 hbase-mapreduce/src/test/java/org/apache/hadoop/hbase/mapreduce/TestRoundRobinTableInputFormat.java
 delete mode 100644 hbase-protocol-shaded/src/main/protobuf/server/coprocessor/RowProcessor.proto
 delete mode 100644 hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/ReplicationTracker.java
 delete mode 100644 hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/ReplicationTrackerZKImpl.java
 rename {hbase-balancer => hbase-server}/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java (99%)
 delete mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/client/coprocessor/RowProcessorClient.java
 delete mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/coprocessor/BaseRowProcessorEndpoint.java
 delete mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
 delete mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/LocalityBasedCandidateGenerator.java
 create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/MaintenanceLoadBalancer.java
 create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/MasterClusterInfoProvider.java
 delete mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
 rename hbase-server/src/main/java/org/apache/hadoop/hbase/master/{ => http}/MasterDumpServlet.java (95%)
 rename hbase-server/src/main/java/org/apache/hadoop/hbase/master/{ => http}/MasterRedirectServlet.java (97%)
 rename hbase-server/src/main/java/org/apache/hadoop/hbase/master/{ => http}/MasterStatusServlet.java (95%)
 rename hbase-server/src/main/java/org/apache/hadoop/hbase/master/{webapp => http}/MetaBrowser.java (99%)
 rename hbase-server/src/main/java/org/apache/hadoop/hbase/master/{webapp => http}/RegionReplicaInfo.java (99%)
 create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/ClaimReplicationQueueRemoteProcedure.java
 create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/master/replication/ClaimReplicationQueuesProcedure.java
 copy hbase-logging/src/main/java/org/apache/hadoop/hbase/AsyncConsoleAppender.java => hbase-server/src/main/java/org/apache/hadoop/hbase/namequeues/BalancerRejectionDetails.java (53%)
 create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/namequeues/impl/BalancerRejectionQueueService.java
 rename hbase-logging/src/main/java/org/apache/hadoop/hbase/AsyncConsoleAppender.java => hbase-server/src/main/java/org/apache/hadoop/hbase/procedure2/BaseRSProcedureCallable.java (56%)
 delete mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/BaseRowProcessor.java
 copy hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/{DelimitedKeyPrefixRegionSplitPolicy.java => DelimitedKeyPrefixRegionSplitRestriction.java} (51%)
 create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyPrefixRegionSplitRestriction.java
 copy hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/{FlushAllStoresPolicy.java => NoRegionSplitRestriction.java} (66%)
 create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionSplitRestriction.java
 delete mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowProcessor.java
 rename hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/{ => http}/RSDumpServlet.java (79%)
 rename hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/{ => http}/RSStatusServlet.java (95%)
 create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/SyncFutureCache.java
 copy hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/{SwitchRpcThrottleRemoteCallable.java => ClaimReplicationQueueCallable.java} (55%)
 copy hbase-client/src/main/java/org/apache/hadoop/hbase/client/DoNotRetryRegionException.java => hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/WALEntryFilterRetryableException.java (54%)
 create mode 100644 hbase-server/src/main/resources/hbase-webapps/master/operationDetails.jsp
 create mode 100644 hbase-server/src/main/resources/hbase-webapps/regionserver/rsOperationDetails.jsp
 create mode 100644 hbase-server/src/main/resources/hbase-webapps/static/js/parser-date-iso8601.min.js
 create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestRequestTooBigException.java
 create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestAssignRegionToUninitializedRegionServer.java
 copy hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/{TestRegionSplit.java => TestRegionSplitAndSeparateChildren.java} (72%)
 create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestBalancerRejection.java
 delete mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestRegionLocationFinder.java
 delete mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestRegionsOnMasterOptions.java
 rename hbase-server/src/test/java/org/apache/hadoop/hbase/{ => master/balancer}/TestStochasticBalancerJmxMetrics.java (94%)
 create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerHeterogeneousCostRulesLoadFromHDFS.java
 rename hbase-server/src/test/java/org/apache/hadoop/hbase/master/{ => http}/TestMasterStatusServlet.java (96%)
 rename hbase-server/src/test/java/org/apache/hadoop/hbase/master/{webapp => http}/TestMetaBrowser.java (99%)
 rename hbase-server/src/test/java/org/apache/hadoop/hbase/master/{webapp => http}/TestMetaBrowserNoCluster.java (98%)
 rename hbase-server/src/test/java/org/apache/hadoop/hbase/master/{balancer/TestStochasticLoadBalancerRegionReplicaMidCluster.java => procedure/TestHBCKSCPUnknown.java} (52%)
 create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/snapshot/TestTakeSnapshotHandler.java
 create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/mob/TestMobUtils.java
 create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestBulkLoadReplicationHFileRefs.java
 create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactionWithByteBuff.java
 create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionTracing.java
 create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMemStoreFlusher.java
 create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitRestriction.java
 rename hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/{ => http}/TestRSStatusServlet.java (95%)
 copy hbase-server/src/test/java/org/apache/hadoop/hbase/{replication/TestReplicationKillMasterRSCompressed.java => regionserver/wal/TestAsyncWALReplayValueCompression.java} (62%)
 create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestSyncFutureCache.java
 copy hbase-server/src/test/java/org/apache/hadoop/hbase/{replication/TestReplicationKillMasterRSCompressed.java => regionserver/wal/TestWALReplayValueCompression.java} (64%)
 create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestClaimReplicationQueue.java
 delete mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationTrackerZKImpl.java
 rename hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/{TestWALEntryStream.java => TestBasicWALEntryStream.java} (66%)
 copy hbase-server/src/test/java/org/apache/hadoop/hbase/replication/{TestReplicationKillSlaveRSWithSeparateOldWALs.java => regionserver/TestBasicWALEntryStreamAsyncFSWAL.java} (64%)
 copy hbase-server/src/test/java/org/apache/hadoop/hbase/replication/{TestReplicationKillSlaveRSWithSeparateOldWALs.java => regionserver/TestBasicWALEntryStreamFSHLog.java} (63%)
 create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestRefreshPeerWhileRegionServerRestarts.java
 create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationCompressedWAL.java
 copy hbase-server/src/test/java/org/apache/hadoop/hbase/replication/{TestReplicationKillMasterRSCompressed.java => regionserver/TestReplicationValueCompressedWAL.java} (56%)
 create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestWALEntryStreamDifferentCounts.java
 copy hbase-server/src/test/java/org/apache/hadoop/hbase/replication/{TestReplicationKillSlaveRSWithSeparateOldWALs.java => regionserver/TestWALEntryStreamDifferentCountsAsyncFSWAL.java} (69%)
 copy hbase-server/src/test/java/org/apache/hadoop/hbase/replication/{TestReplicationKillSlaveRSWithSeparateOldWALs.java => regionserver/TestWALEntryStreamDifferentCountsFSHLog.java} (70%)
 create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/WALEntryStreamTestBase.java
 create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/rsgroup/TestUpdateRSGroupConfiguration.java
 delete mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/trace/TestHTraceHooks.java
 delete mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/trace/TraceTree.java
 create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/wal/FileSystemProxy.java
 copy hbase-server/src/test/java/org/apache/hadoop/hbase/wal/{TestSecureWAL.java => TestCompressedWAL.java} (76%)
 copy hbase-server/src/test/java/org/apache/hadoop/hbase/{replication/TestReplicationKillMasterRSCompressed.java => wal/TestWALSplitValueCompression.java} (66%)
 create mode 100644 hbase-shell/src/main/ruby/shell/commands/get_balancer_rejections.rb
 rename conf/log4j-hbtop.properties => hbase-shell/src/main/ruby/shell/commands/update_rsgroup_config.rb (63%)
 create mode 100644 src/main/asciidoc/_chapters/tracing.adoc

[hbase] 01/12: HBASE-24682 Refactor ReplicationSource#addHFileRefs method: move it to ReplicationSourceManager (#2020)

Posted by su...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sunxin pushed a commit to branch HBASE-24666
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 0c061bc6164fbb49afc7146095a1cf8b80a997bc
Author: Guanghao Zhang <zg...@apache.org>
AuthorDate: Wed Jul 8 14:29:08 2020 +0800

    HBASE-24682 Refactor ReplicationSource#addHFileRefs method: move it to ReplicationSourceManager (#2020)
    
    Signed-off-by: Wellington Chevreuil <wc...@apache.org>
---
 .../regionserver/ReplicationSource.java            | 19 ++--------------
 .../regionserver/ReplicationSourceInterface.java   | 14 ------------
 .../regionserver/ReplicationSourceManager.java     | 26 +++++++++++++++++++++-
 .../hbase/replication/ReplicationSourceDummy.java  |  9 +-------
 4 files changed, 28 insertions(+), 40 deletions(-)

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java
index d1268fa..a385ead 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java
@@ -35,6 +35,7 @@ import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.function.Predicate;
+
 import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
@@ -44,27 +45,24 @@ import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.Server;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableDescriptors;
-import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.regionserver.HRegionServer;
 import org.apache.hadoop.hbase.regionserver.RSRpcServices;
 import org.apache.hadoop.hbase.regionserver.RegionServerCoprocessorHost;
 import org.apache.hadoop.hbase.replication.ChainWALEntryFilter;
 import org.apache.hadoop.hbase.replication.ClusterMarkingEntryFilter;
 import org.apache.hadoop.hbase.replication.ReplicationEndpoint;
-import org.apache.hadoop.hbase.replication.ReplicationException;
 import org.apache.hadoop.hbase.replication.ReplicationPeer;
 import org.apache.hadoop.hbase.replication.ReplicationQueueInfo;
 import org.apache.hadoop.hbase.replication.ReplicationQueueStorage;
 import org.apache.hadoop.hbase.replication.SystemTableWALEntryFilter;
 import org.apache.hadoop.hbase.replication.WALEntryFilter;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.Pair;
 import org.apache.hadoop.hbase.util.Threads;
 import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
 import org.apache.hadoop.hbase.wal.WAL.Entry;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+
 import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
 
 /**
@@ -264,19 +262,6 @@ public class ReplicationSource implements ReplicationSourceInterface {
     return logQueue.getQueues();
   }
 
-  @Override
-  public void addHFileRefs(TableName tableName, byte[] family, List<Pair<Path, Path>> pairs)
-      throws ReplicationException {
-    String peerId = replicationPeer.getId();
-    if (replicationPeer.getPeerConfig().needToReplicate(tableName, family)) {
-      this.queueStorage.addHFileRefs(peerId, pairs);
-      metrics.incrSizeOfHFileRefsQueue(pairs.size());
-    } else {
-      LOG.debug("HFiles will not be replicated belonging to the table {} family {} to peer id {}",
-        tableName, Bytes.toString(family), peerId);
-    }
-  }
-
   private ReplicationEndpoint createReplicationEndpoint()
       throws InstantiationException, IllegalAccessException, ClassNotFoundException, IOException {
     RegionServerCoprocessorHost rsServerHost = null;
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceInterface.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceInterface.java
index 27e4b79..352cdd3 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceInterface.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceInterface.java
@@ -28,12 +28,9 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.Server;
 import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.replication.ReplicationEndpoint;
-import org.apache.hadoop.hbase.replication.ReplicationException;
 import org.apache.hadoop.hbase.replication.ReplicationPeer;
 import org.apache.hadoop.hbase.replication.ReplicationQueueStorage;
-import org.apache.hadoop.hbase.util.Pair;
 import org.apache.hadoop.hbase.wal.WAL.Entry;
 import org.apache.yetus.audience.InterfaceAudience;
 
@@ -61,17 +58,6 @@ public interface ReplicationSourceInterface {
   void enqueueLog(Path log);
 
   /**
-   * Add hfile names to the queue to be replicated.
-   * @param tableName Name of the table these files belongs to
-   * @param family Name of the family these files belong to
-   * @param pairs list of pairs of { HFile location in staging dir, HFile path in region dir which
-   *          will be added in the queue for replication}
-   * @throws ReplicationException If failed to add hfile references
-   */
-  void addHFileRefs(TableName tableName, byte[] family, List<Pair<Path, Path>> pairs)
-      throws ReplicationException;
-
-  /**
    * Start the replication
    */
   ReplicationSourceInterface startup();
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java
index 73efcfe..ad7c033 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java
@@ -61,6 +61,7 @@ import org.apache.hadoop.hbase.replication.ReplicationQueueInfo;
 import org.apache.hadoop.hbase.replication.ReplicationQueueStorage;
 import org.apache.hadoop.hbase.replication.ReplicationUtils;
 import org.apache.hadoop.hbase.replication.SyncReplicationState;
+import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.Pair;
 import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
 import org.apache.hadoop.hbase.wal.SyncReplicationWALProvider;
@@ -1050,7 +1051,30 @@ public class ReplicationSourceManager {
   public void addHFileRefs(TableName tableName, byte[] family, List<Pair<Path, Path>> pairs)
       throws IOException {
     for (ReplicationSourceInterface source : this.sources.values()) {
-      throwIOExceptionWhenFail(() -> source.addHFileRefs(tableName, family, pairs));
+      throwIOExceptionWhenFail(() -> addHFileRefs(source, tableName, family, pairs));
+    }
+  }
+
+  /**
+   * Add hfile names to the queue to be replicated.
+   * @param source the replication peer source
+   * @param tableName Name of the table these files belongs to
+   * @param family Name of the family these files belong to
+   * @param pairs list of pairs of { HFile location in staging dir, HFile path in region dir which
+   *          will be added in the queue for replication}
+   * @throws ReplicationException If failed to add hfile references
+   */
+  private void addHFileRefs(ReplicationSourceInterface source, TableName tableName, byte[] family,
+    List<Pair<Path, Path>> pairs) throws ReplicationException {
+    String peerId = source.getPeerId();
+    // Only the normal replication source update here, its peerId is equals to queueId.
+    ReplicationPeer replicationPeer = replicationPeers.getPeer(peerId);
+    if (replicationPeer.getPeerConfig().needToReplicate(tableName, family)) {
+      this.queueStorage.addHFileRefs(peerId, pairs);
+      source.getSourceMetrics().incrSizeOfHFileRefsQueue(pairs.size());
+    } else {
+      LOG.debug("HFiles will not be replicated belonging to the table {} family {} to peer id {}",
+        tableName, Bytes.toString(family), peerId);
     }
   }
 
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/ReplicationSourceDummy.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/ReplicationSourceDummy.java
index cab01d6..4f656b1 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/ReplicationSourceDummy.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/ReplicationSourceDummy.java
@@ -21,17 +21,16 @@ import java.io.IOException;
 import java.util.List;
 import java.util.UUID;
 import java.util.concurrent.atomic.AtomicBoolean;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.Server;
 import org.apache.hadoop.hbase.ServerName;
-import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.replication.regionserver.MetricsSource;
 import org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceInterface;
 import org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceManager;
 import org.apache.hadoop.hbase.replication.regionserver.WALFileLengthProvider;
-import org.apache.hadoop.hbase.util.Pair;
 import org.apache.hadoop.hbase.wal.WAL.Entry;
 
 /**
@@ -115,12 +114,6 @@ public class ReplicationSourceDummy implements ReplicationSourceInterface {
   }
 
   @Override
-  public void addHFileRefs(TableName tableName, byte[] family, List<Pair<Path, Path>> files)
-      throws ReplicationException {
-    return;
-  }
-
-  @Override
   public boolean isPeerEnabled() {
     return true;
   }

[hbase] 07/12: HBASE-24684 Fetch ReplicationSink servers list from HMaster instead o… (#2077)

Posted by su...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sunxin pushed a commit to branch HBASE-24666
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 03a30573d7eadd4094cacd33144e53992c899d57
Author: XinSun <dd...@gmail.com>
AuthorDate: Sun Sep 20 10:54:43 2020 +0800

    HBASE-24684 Fetch ReplicationSink servers list from HMaster instead o… (#2077)
    
    Signed-off-by: Wellington Chevreuil <wc...@apache.org>
---
 .../src/main/protobuf/server/master/Master.proto   |  12 +-
 .../hadoop/hbase/coprocessor/MasterObserver.java   |  16 +++
 .../org/apache/hadoop/hbase/master/HMaster.java    |   5 +
 .../hadoop/hbase/master/MasterCoprocessorHost.java |  18 +++
 .../hadoop/hbase/master/MasterRpcServices.java     |  21 ++++
 .../apache/hadoop/hbase/master/MasterServices.java |   6 +
 .../replication/HBaseReplicationEndpoint.java      | 140 +++++++++++++++++++--
 .../hbase/master/MockNoopMasterServices.java       |   5 +
 .../replication/TestHBaseReplicationEndpoint.java  |   5 +
 .../replication/TestReplicationFetchServers.java   | 106 ++++++++++++++++
 .../TestGlobalReplicationThrottler.java            |   4 +
 ...stRegionReplicaReplicationEndpointNoMaster.java |   2 +
 12 files changed, 327 insertions(+), 13 deletions(-)

diff --git a/hbase-protocol-shaded/src/main/protobuf/server/master/Master.proto b/hbase-protocol-shaded/src/main/protobuf/server/master/Master.proto
index 3d265dd..b9ed476 100644
--- a/hbase-protocol-shaded/src/main/protobuf/server/master/Master.proto
+++ b/hbase-protocol-shaded/src/main/protobuf/server/master/Master.proto
@@ -728,6 +728,13 @@ message BalancerRejectionsResponse {
   repeated BalancerRejection balancer_rejection = 1;
 }
 
+message ListReplicationSinkServersRequest {
+}
+
+message ListReplicationSinkServersResponse {
+  repeated ServerName server_name = 1;
+}
+
 service MasterService {
   /** Used by the client to get the number of regions that have received the updated schema */
   rpc GetSchemaAlterStatus(GetSchemaAlterStatusRequest)
@@ -1157,10 +1164,13 @@ service MasterService {
     returns (RenameRSGroupResponse);
 
   rpc UpdateRSGroupConfig(UpdateRSGroupConfigRequest)
-  returns (UpdateRSGroupConfigResponse);
+    returns (UpdateRSGroupConfigResponse);
 
   rpc GetLogEntries(LogRequest)
     returns(LogEntry);
+
+  rpc ListReplicationSinkServers(ListReplicationSinkServersRequest)
+    returns (ListReplicationSinkServersResponse);
 }
 
 // HBCK Service definitions.
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/coprocessor/MasterObserver.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/coprocessor/MasterObserver.java
index ac35caa..ec009cc 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/coprocessor/MasterObserver.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/coprocessor/MasterObserver.java
@@ -1782,4 +1782,20 @@ public interface MasterObserver {
   default void postHasUserPermissions(ObserverContext<MasterCoprocessorEnvironment> ctx,
       String userName, List<Permission> permissions) throws IOException {
   }
+
+  /**
+   * Called before getting servers for replication sink.
+   * @param ctx the coprocessor instance's environment
+   */
+  default void preListReplicationSinkServers(ObserverContext<MasterCoprocessorEnvironment> ctx)
+    throws IOException {
+  }
+
+  /**
+   * Called after getting servers for replication sink.
+   * @param ctx the coprocessor instance's environment
+   */
+  default void postListReplicationSinkServers(ObserverContext<MasterCoprocessorEnvironment> ctx)
+    throws IOException {
+  }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index ba38a19..903f392 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -3864,4 +3864,9 @@ public class HMaster extends HRegionServer implements MasterServices {
   public MetaLocationSyncer getMetaLocationSyncer() {
     return metaLocationSyncer;
   }
+
+  @Override
+  public List<ServerName> listReplicationSinkServers() throws IOException {
+    return this.serverManager.getOnlineServersList();
+  }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java
index 01d1a62..f775eba 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java
@@ -2038,4 +2038,22 @@ public class MasterCoprocessorHost
       }
     });
   }
+
+  public void preListReplicationSinkServers() throws IOException {
+    execOperation(coprocEnvironments.isEmpty() ? null : new MasterObserverOperation() {
+      @Override
+      public void call(MasterObserver observer) throws IOException {
+        observer.preListReplicationSinkServers(this);
+      }
+    });
+  }
+
+  public void postListReplicationSinkServers() throws IOException {
+    execOperation(coprocEnvironments.isEmpty() ? null : new MasterObserverOperation() {
+      @Override
+      public void call(MasterObserver observer) throws IOException {
+        observer.postListReplicationSinkServers(this);
+      }
+    });
+  }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
index e7bf96d..0efa308 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
@@ -264,6 +264,8 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListNamesp
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListNamespaceDescriptorsResponse;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListNamespacesRequest;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListNamespacesResponse;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListReplicationSinkServersRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListReplicationSinkServersResponse;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListTableDescriptorsByNamespaceRequest;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListTableDescriptorsByNamespaceResponse;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListTableNamesByNamespaceRequest;
@@ -3460,4 +3462,23 @@ public class MasterRpcServices extends RSRpcServices implements
       .addAllBalancerRejection(balancerRejections).build();
   }
 
+  public ListReplicationSinkServersResponse listReplicationSinkServers(
+    RpcController controller, ListReplicationSinkServersRequest request)
+    throws ServiceException {
+    ListReplicationSinkServersResponse.Builder builder =
+      ListReplicationSinkServersResponse.newBuilder();
+    try {
+      if (master.getMasterCoprocessorHost() != null) {
+        master.getMasterCoprocessorHost().preListReplicationSinkServers();
+      }
+      builder.addAllServerName(master.listReplicationSinkServers().stream()
+        .map(ProtobufUtil::toServerName).collect(Collectors.toList()));
+      if (master.getMasterCoprocessorHost() != null) {
+        master.getMasterCoprocessorHost().postListReplicationSinkServers();
+      }
+    } catch (IOException e) {
+      throw new ServiceException(e);
+    }
+    return builder.build();
+  }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java
index f24ecd4..3f7dc02 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java
@@ -577,4 +577,10 @@ public interface MasterServices extends Server {
    * We need to get this in MTP to tell the syncer the new meta replica count.
    */
   MetaLocationSyncer getMetaLocationSyncer();
+
+  /**
+   * Get a list of servers' addresses for replication sink.
+   * @return a list of servers' address
+   */
+  List<ServerName> listReplicationSinkServers() throws IOException;
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HBaseReplicationEndpoint.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HBaseReplicationEndpoint.java
index f38fd08..e788d8c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HBaseReplicationEndpoint.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HBaseReplicationEndpoint.java
@@ -18,6 +18,9 @@
 
 package org.apache.hadoop.hbase.replication;
 
+import static org.apache.hadoop.hbase.HConstants.DEFAULT_HBASE_RPC_SHORTOPERATION_TIMEOUT;
+import static org.apache.hadoop.hbase.HConstants.HBASE_RPC_SHORTOPERATION_TIMEOUT_KEY;
+
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
@@ -27,18 +30,22 @@ import java.util.UUID;
 import java.util.concurrent.ThreadLocalRandom;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Abortable;
 import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.client.AsyncClusterConnection;
 import org.apache.hadoop.hbase.client.AsyncRegionServerAdmin;
 import org.apache.hadoop.hbase.client.AsyncReplicationServerAdmin;
 import org.apache.hadoop.hbase.client.ClusterConnectionFactory;
 import org.apache.hadoop.hbase.protobuf.ReplicationProtobufUtil;
+import org.apache.hadoop.hbase.ScheduledChore;
+import org.apache.hadoop.hbase.Server;
+import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.security.User;
+import org.apache.hadoop.hbase.security.UserProvider;
+import org.apache.hadoop.hbase.util.FutureUtils;
 import org.apache.hadoop.hbase.wal.WAL;
-import org.apache.hadoop.hbase.zookeeper.ZKListener;
-import org.apache.hadoop.hbase.Abortable;
-import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
+import org.apache.hadoop.hbase.zookeeper.ZKListener;
 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
 import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
 import org.apache.yetus.audience.InterfaceAudience;
@@ -50,6 +57,12 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import org.apache.hbase.thirdparty.com.google.common.collect.Maps;
+import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
+
+import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListReplicationSinkServersRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListReplicationSinkServersResponse;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.MasterService;
 
 /**
  * A {@link BaseReplicationEndpoint} for replication endpoints whose
@@ -61,6 +74,13 @@ public abstract class HBaseReplicationEndpoint extends BaseReplicationEndpoint
 
   private static final Logger LOG = LoggerFactory.getLogger(HBaseReplicationEndpoint.class);
 
+  public static final String FETCH_SERVERS_USE_ZK_CONF_KEY =
+      "hbase.replication.fetch.servers.usezk";
+
+  public static final String FETCH_SERVERS_INTERVAL_CONF_KEY =
+      "hbase.replication.fetch.servers.interval";
+  public static final int DEFAULT_FETCH_SERVERS_INTERVAL = 10 * 60 * 1000; // 10 mins
+
   private ZKWatcher zkw = null;
   private final Object zkwLock = new Object();
 
@@ -92,6 +112,11 @@ public abstract class HBaseReplicationEndpoint extends BaseReplicationEndpoint
 
   private List<ServerName> sinkServers = new ArrayList<>(0);
 
+  private AsyncClusterConnection peerConnection;
+  private boolean fetchServersUseZk = false;
+  private FetchServersChore fetchServersChore;
+  private int shortOperationTimeout;
+
   /*
    * Some implementations of HBaseInterClusterReplicationEndpoint may require instantiate different
    * Connection implementations, or initialize it in a different way, so defining createConnection
@@ -127,6 +152,16 @@ public abstract class HBaseReplicationEndpoint extends BaseReplicationEndpoint
         LOG.warn("{} Failed to close the connection", ctx.getPeerId());
       }
     }
+    if (fetchServersChore != null) {
+      fetchServersChore.cancel();
+    }
+    if (peerConnection != null) {
+      try {
+        peerConnection.close();
+      } catch (IOException e) {
+        LOG.warn("Attempt to close peerConnection failed.", e);
+      }
+    }
   }
 
   /**
@@ -157,8 +192,27 @@ public abstract class HBaseReplicationEndpoint extends BaseReplicationEndpoint
   }
 
   @Override
-  protected void doStart() {
+  protected synchronized void doStart() {
+    this.shortOperationTimeout = ctx.getLocalConfiguration().getInt(
+        HBASE_RPC_SHORTOPERATION_TIMEOUT_KEY, DEFAULT_HBASE_RPC_SHORTOPERATION_TIMEOUT);
     try {
+      if (ctx.getLocalConfiguration().getBoolean(FETCH_SERVERS_USE_ZK_CONF_KEY, false)) {
+        fetchServersUseZk = true;
+      } else {
+        try {
+          if (ReplicationUtils.isPeerClusterSupportReplicationOffload(getPeerConnection())) {
+            fetchServersChore = new FetchServersChore(ctx.getServer(), this);
+            ctx.getServer().getChoreService().scheduleChore(fetchServersChore);
+            fetchServersUseZk = false;
+          } else {
+            fetchServersUseZk = true;
+          }
+        } catch (Throwable t) {
+          fetchServersUseZk = true;
+          LOG.warn("Peer {} try to fetch servers by admin failed. Using zk impl.",
+              ctx.getPeerId(), t);
+        }
+      }
       reloadZkWatcher();
       connectPeerCluster();
       notifyStarted();
@@ -201,7 +255,9 @@ public abstract class HBaseReplicationEndpoint extends BaseReplicationEndpoint
       }
       zkw = new ZKWatcher(ctx.getConfiguration(),
           "connection to cluster: " + ctx.getPeerId(), this);
-      zkw.registerListener(new PeerRegionServerListener(this));
+      if (fetchServersUseZk) {
+        zkw.registerListener(new PeerRegionServerListener(this));
+      }
     }
   }
 
@@ -227,11 +283,46 @@ public abstract class HBaseReplicationEndpoint extends BaseReplicationEndpoint
   }
 
   /**
+   * Get the connection to peer cluster
+   * @return connection to peer cluster
+   * @throws IOException If anything goes wrong connecting
+   */
+  private synchronized AsyncClusterConnection getPeerConnection() throws IOException {
+    if (peerConnection == null) {
+      Configuration conf = ctx.getConfiguration();
+      peerConnection = ClusterConnectionFactory.createAsyncClusterConnection(conf, null,
+          UserProvider.instantiate(conf).getCurrent());
+    }
+    return peerConnection;
+  }
+
+  /**
+   * Get the list of all the servers that are responsible for replication sink
+   * from the specified peer master
+   * @return list of server addresses or an empty list if the slave is unavailable
+   */
+  protected List<ServerName> fetchSlavesAddresses() {
+    try {
+      AsyncClusterConnection peerConn = getPeerConnection();
+      ServerName master = FutureUtils.get(peerConn.getAdmin().getMaster());
+      MasterService.BlockingInterface masterStub = MasterService.newBlockingStub(
+        peerConn.getRpcClient()
+          .createBlockingRpcChannel(master, User.getCurrent(), shortOperationTimeout));
+      ListReplicationSinkServersResponse resp = masterStub
+        .listReplicationSinkServers(null, ListReplicationSinkServersRequest.newBuilder().build());
+      return ProtobufUtil.toServerNameList(resp.getServerNameList());
+    } catch (ServiceException | IOException e) {
+      LOG.error("Peer {} fetches servers failed", ctx.getPeerId(), e);
+    }
+    return Collections.emptyList();
+  }
+
+  /**
    * Get the list of all the region servers from the specified peer
    *
    * @return list of region server addresses or an empty list if the slave is unavailable
    */
-  protected List<ServerName> fetchSlavesAddresses() {
+  protected List<ServerName> fetchSlavesAddressesByZK() {
     List<String> children = null;
     try {
       synchronized (zkwLock) {
@@ -254,7 +345,12 @@ public abstract class HBaseReplicationEndpoint extends BaseReplicationEndpoint
   }
 
   protected synchronized void chooseSinks() {
-    List<ServerName> slaveAddresses = fetchSlavesAddresses();
+    List<ServerName> slaveAddresses = Collections.emptyList();
+    if (fetchServersUseZk) {
+      slaveAddresses = fetchSlavesAddressesByZK();
+    } else {
+      slaveAddresses = fetchSlavesAddresses();
+    }
     if (slaveAddresses.isEmpty()) {
       LOG.warn("No sinks available at peer. Will not be able to replicate");
     }
@@ -285,6 +381,14 @@ public abstract class HBaseReplicationEndpoint extends BaseReplicationEndpoint
     return createSinkPeer(serverName);
   }
 
+  private SinkPeer createSinkPeer(ServerName serverName) throws IOException {
+    if (ReplicationUtils.isPeerClusterSupportReplicationOffload(conn)) {
+      return new ReplicationServerSinkPeer(serverName, conn.getReplicationServerAdmin(serverName));
+    } else {
+      return new RegionServerSinkPeer(serverName, conn.getRegionServerAdmin(serverName));
+    }
+  }
+
   /**
    * Report a {@code SinkPeer} as being bad (i.e. an attempt to replicate to it
    * failed). If a single SinkPeer is reported as bad more than
@@ -393,11 +497,23 @@ public abstract class HBaseReplicationEndpoint extends BaseReplicationEndpoint
     }
   }
 
-  private SinkPeer createSinkPeer(ServerName serverName) throws IOException {
-    if (ReplicationUtils.isPeerClusterSupportReplicationOffload(conn)) {
-      return new ReplicationServerSinkPeer(serverName, conn.getReplicationServerAdmin(serverName));
-    } else {
-      return new RegionServerSinkPeer(serverName, conn.getRegionServerAdmin(serverName));
+  /**
+   * Chore that will fetch the list of servers from peer master.
+   */
+  public static class FetchServersChore extends ScheduledChore {
+
+    private HBaseReplicationEndpoint endpoint;
+
+    public FetchServersChore(Server server, HBaseReplicationEndpoint endpoint) {
+      super("Peer-" + endpoint.ctx.getPeerId() + "-FetchServersChore", server,
+        server.getConfiguration()
+          .getInt(FETCH_SERVERS_INTERVAL_CONF_KEY, DEFAULT_FETCH_SERVERS_INTERVAL));
+      this.endpoint = endpoint;
+    }
+
+    @Override
+    protected void chore() {
+      endpoint.chooseSinks();
     }
   }
 }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java
index 933addf..5aea397 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java
@@ -520,4 +520,9 @@ public class MockNoopMasterServices implements MasterServices {
   public MetaLocationSyncer getMetaLocationSyncer() {
     return null;
   }
+
+  @Override
+  public List<ServerName> listReplicationSinkServers() {
+    return null;
+  }
 }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestHBaseReplicationEndpoint.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestHBaseReplicationEndpoint.java
index 4182eaf..6765794 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestHBaseReplicationEndpoint.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestHBaseReplicationEndpoint.java
@@ -199,6 +199,11 @@ public class TestHBaseReplicationEndpoint {
     }
 
     @Override
+    public List<ServerName> fetchSlavesAddressesByZK() {
+      return regionServers;
+    }
+
+    @Override
     public boolean replicate(ReplicateContext replicateContext) {
       return false;
     }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationFetchServers.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationFetchServers.java
new file mode 100644
index 0000000..9ceacee
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationFetchServers.java
@@ -0,0 +1,106 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.replication;
+
+import static org.apache.hadoop.hbase.coprocessor.CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Optional;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.client.AsyncClusterConnection;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.coprocessor.MasterCoprocessor;
+import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment;
+import org.apache.hadoop.hbase.coprocessor.MasterObserver;
+import org.apache.hadoop.hbase.coprocessor.ObserverContext;
+import org.apache.hadoop.hbase.security.User;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.testclassification.ReplicationTests;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
+
+import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListReplicationSinkServersRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListReplicationSinkServersResponse;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.MasterService;
+
+@Category({ ReplicationTests.class, MediumTests.class })
+public class TestReplicationFetchServers extends TestReplicationBase {
+
+  @ClassRule
+  public static final HBaseClassTestRule CLASS_RULE =
+      HBaseClassTestRule.forClass(TestReplicationFetchServers.class);
+
+  private static AtomicBoolean fetchFlag = new AtomicBoolean(false);
+
+  public static class MyObserver implements MasterCoprocessor, MasterObserver {
+
+    @Override
+    public Optional<MasterObserver> getMasterObserver() {
+      return Optional.of(this);
+    }
+
+    @Override
+    public void postListReplicationSinkServers(ObserverContext<MasterCoprocessorEnvironment> ctx) {
+      fetchFlag.set(true);
+    }
+  }
+
+  @BeforeClass
+  public static void setUpBeforeClass() throws Exception {
+    CONF2.set(MASTER_COPROCESSOR_CONF_KEY, MyObserver.class.getName());
+    TestReplicationBase.setUpBeforeClass();
+  }
+
+  @Before
+  public void beforeMethod() {
+    fetchFlag.set(false);
+  }
+
+  @Test
+  public void testMasterListReplicationPeerServers() throws IOException, ServiceException {
+    AsyncClusterConnection conn = UTIL2.getAsyncConnection();
+    ServerName master = UTIL2.getAdmin().getMaster();
+    MasterService.BlockingInterface masterStub = MasterService.newBlockingStub(
+        conn.getRpcClient().createBlockingRpcChannel(master, User.getCurrent(), 1000));
+    ListReplicationSinkServersResponse resp = masterStub.listReplicationSinkServers(
+        null, ListReplicationSinkServersRequest.newBuilder().build());
+    List<ServerName> servers = ProtobufUtil.toServerNameList(resp.getServerNameList());
+    assertFalse(servers.isEmpty());
+    assertTrue(fetchFlag.get());
+  }
+
+  @Test
+  public void testPutData() throws IOException {
+    htable1.put(new Put(row).addColumn(famName, famName, row));
+    UTIL2.waitFor(30000L, () -> !htable2.get(new Get(row)).isEmpty());
+    assertTrue(fetchFlag.get());
+  }
+}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestGlobalReplicationThrottler.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestGlobalReplicationThrottler.java
index f528bda..ef6811e 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestGlobalReplicationThrottler.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestGlobalReplicationThrottler.java
@@ -118,6 +118,10 @@ public class TestGlobalReplicationThrottler {
 
   @AfterClass
   public static void tearDownAfterClass() throws Exception {
+    Admin admin1 = utility1.getAdmin();
+    admin1.removeReplicationPeer("peer1");
+    admin1.removeReplicationPeer("peer2");
+    admin1.removeReplicationPeer("peer3");
     utility2.shutdownMiniCluster();
     utility1.shutdownMiniCluster();
   }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestRegionReplicaReplicationEndpointNoMaster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestRegionReplicaReplicationEndpointNoMaster.java
index ee1ae5f..c676e30 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestRegionReplicaReplicationEndpointNoMaster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestRegionReplicaReplicationEndpointNoMaster.java
@@ -256,11 +256,13 @@ public class TestRegionReplicaReplicationEndpointNoMaster {
 
     ReplicationEndpoint.Context context = mock(ReplicationEndpoint.Context.class);
     when(context.getConfiguration()).thenReturn(HTU.getConfiguration());
+    when(context.getLocalConfiguration()).thenReturn(HTU.getConfiguration());
     when(context.getMetrics()).thenReturn(mock(MetricsSource.class));
     when(context.getServer()).thenReturn(rs0);
     when(context.getTableDescriptors()).thenReturn(rs0.getTableDescriptors());
     replicator.init(context);
     replicator.startAsync();
+    HTU.waitFor(30000, replicator::isRunning);
 
     //load some data to primary
     HTU.loadNumericRows(table, f, 0, 1000);

[hbase] 02/12: HBASE-24681 Remove the cache walsById/walsByIdRecoveredQueues from ReplicationSourceManager (#2019)

Posted by su...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sunxin pushed a commit to branch HBASE-24666
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 8481d547fd14493570e2b32f6b95f0a5c0e15536
Author: Guanghao Zhang <zg...@apache.org>
AuthorDate: Mon Jul 13 17:35:32 2020 +0800

    HBASE-24681 Remove the cache walsById/walsByIdRecoveredQueues from ReplicationSourceManager (#2019)
    
    Signed-off-by: Wellington Chevreuil <wc...@apache.org>
---
 .../regionserver/ReplicationSourceManager.java     | 204 +++++++--------------
 1 file changed, 62 insertions(+), 142 deletions(-)

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java
index ad7c033..db12c00 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java
@@ -93,30 +93,6 @@ import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFacto
  * <li>No need synchronized on {@link #sources}. {@link #sources} is a ConcurrentHashMap and there
  * is a Lock for peer id in {@link PeerProcedureHandlerImpl}. So there is no race for peer
  * operations.</li>
- * <li>Need synchronized on {@link #walsById}. There are four methods which modify it,
- * {@link #addPeer(String)}, {@link #removePeer(String)},
- * {@link #cleanOldLogs(String, boolean, ReplicationSourceInterface)} and {@link #preLogRoll(Path)}.
- * {@link #walsById} is a ConcurrentHashMap and there is a Lock for peer id in
- * {@link PeerProcedureHandlerImpl}. So there is no race between {@link #addPeer(String)} and
- * {@link #removePeer(String)}. {@link #cleanOldLogs(String, boolean, ReplicationSourceInterface)}
- * is called by {@link ReplicationSourceInterface}. So no race with {@link #addPeer(String)}.
- * {@link #removePeer(String)} will terminate the {@link ReplicationSourceInterface} firstly, then
- * remove the wals from {@link #walsById}. So no race with {@link #removePeer(String)}. The only
- * case need synchronized is {@link #cleanOldLogs(String, boolean, ReplicationSourceInterface)} and
- * {@link #preLogRoll(Path)}.</li>
- * <li>No need synchronized on {@link #walsByIdRecoveredQueues}. There are three methods which
- * modify it, {@link #removePeer(String)} ,
- * {@link #cleanOldLogs(String, boolean, ReplicationSourceInterface)} and
- * {@link ReplicationSourceManager#claimQueue(ServerName, String)}.
- * {@link #cleanOldLogs(String, boolean, ReplicationSourceInterface)} is called by
- * {@link ReplicationSourceInterface}. {@link #removePeer(String)} will terminate the
- * {@link ReplicationSourceInterface} firstly, then remove the wals from
- * {@link #walsByIdRecoveredQueues}. And
- * {@link ReplicationSourceManager#claimQueue(ServerName, String)} will add the wals to
- * {@link #walsByIdRecoveredQueues} firstly, then start up a {@link ReplicationSourceInterface}. So
- * there is no race here. For {@link ReplicationSourceManager#claimQueue(ServerName, String)} and
- * {@link #removePeer(String)}, there is already synchronized on {@link #oldsources}. So no need
- * synchronized on {@link #walsByIdRecoveredQueues}.</li>
  * <li>Need synchronized on {@link #latestPaths} to avoid the new open source miss new log.</li>
  * <li>Need synchronized on {@link #oldsources} to avoid adding recovered source for the
  * to-be-removed peer.</li>
@@ -144,15 +120,6 @@ public class ReplicationSourceManager {
   // All about stopping
   private final Server server;
 
-  // All logs we are currently tracking
-  // Index structure of the map is: queue_id->logPrefix/logGroup->logs
-  // For normal replication source, the peer id is same with the queue id
-  private final ConcurrentMap<String, Map<String, NavigableSet<String>>> walsById;
-  // Logs for recovered sources we are currently tracking
-  // the map is: queue_id->logPrefix/logGroup->logs
-  // For recovered source, the queue id's format is peer_id-servername-*
-  private final ConcurrentMap<String, Map<String, NavigableSet<String>>> walsByIdRecoveredQueues;
-
   private final SyncReplicationPeerMappingManager syncReplicationPeerMappingManager;
 
   private final Configuration conf;
@@ -212,8 +179,6 @@ public class ReplicationSourceManager {
     this.queueStorage = queueStorage;
     this.replicationPeers = replicationPeers;
     this.server = server;
-    this.walsById = new ConcurrentHashMap<>();
-    this.walsByIdRecoveredQueues = new ConcurrentHashMap<>();
     this.oldsources = new ArrayList<>();
     this.conf = conf;
     this.fs = fs;
@@ -322,7 +287,6 @@ public class ReplicationSourceManager {
       // Delete queue from storage and memory and queue id is same with peer id for normal
       // source
       deleteQueue(peerId);
-      this.walsById.remove(peerId);
     }
     ReplicationPeerConfig peerConfig = peer.getPeerConfig();
     if (peerConfig.isSyncReplication()) {
@@ -364,15 +328,10 @@ public class ReplicationSourceManager {
     // synchronized on latestPaths to avoid missing the new log
     synchronized (this.latestPaths) {
       this.sources.put(peerId, src);
-      Map<String, NavigableSet<String>> walsByGroup = new HashMap<>();
-      this.walsById.put(peerId, walsByGroup);
       // Add the latest wal to that source's queue
       if (!latestPaths.isEmpty()) {
         for (Map.Entry<String, Path> walPrefixAndPath : latestPaths.entrySet()) {
           Path walPath = walPrefixAndPath.getValue();
-          NavigableSet<String> wals = new TreeSet<>();
-          wals.add(walPath.getName());
-          walsByGroup.put(walPrefixAndPath.getKey(), wals);
           // Abort RS and throw exception to make add peer failed
           abortAndThrowIOExceptionWhenFail(
             () -> this.queueStorage.addWAL(server.getServerName(), peerId, walPath.getName()));
@@ -426,7 +385,10 @@ public class ReplicationSourceManager {
       // map from walsById since later we may fail to delete them from the replication queue
       // storage, and when we retry next time, we can not know the wal files that need to be deleted
       // from the replication queue storage.
-      walsById.get(peerId).forEach((k, v) -> wals.put(k, new TreeSet<>(v)));
+      this.queueStorage.getWALsInQueue(this.server.getServerName(), peerId).forEach(wal -> {
+        String walPrefix = AbstractFSWALProvider.getWALPrefixFromWALName(wal);
+        wals.computeIfAbsent(walPrefix, p -> new TreeSet<>()).add(wal);
+      });
     }
     LOG.info("Startup replication source for " + src.getPeerId());
     src.startup();
@@ -435,15 +397,6 @@ public class ReplicationSourceManager {
         queueStorage.removeWAL(server.getServerName(), peerId, wal);
       }
     }
-    synchronized (walsById) {
-      Map<String, NavigableSet<String>> oldWals = walsById.get(peerId);
-      wals.forEach((k, v) -> {
-        NavigableSet<String> walsByGroup = oldWals.get(k);
-        if (walsByGroup != null) {
-          walsByGroup.removeAll(v);
-        }
-      });
-    }
     // synchronized on oldsources to avoid race with NodeFailoverWorker. Since NodeFailoverWorker is
     // a background task, we will delete the file from replication queue storage under the lock to
     // simplify the logic.
@@ -455,7 +408,6 @@ public class ReplicationSourceManager {
           oldSource.terminate(terminateMessage);
           oldSource.getSourceMetrics().clear();
           queueStorage.removeQueue(server.getServerName(), queueId);
-          walsByIdRecoveredQueues.remove(queueId);
           iter.remove();
         }
       }
@@ -468,7 +420,7 @@ public class ReplicationSourceManager {
    * replication queue storage and only to enqueue all logs to the new replication source
    * @param peerId the id of the replication peer
    */
-  public void refreshSources(String peerId) throws IOException {
+  public void refreshSources(String peerId) throws ReplicationException, IOException {
     String terminateMessage = "Peer " + peerId +
       " state or config changed. Will close the previous replication source and open a new one";
     ReplicationPeer peer = replicationPeers.getPeer(peerId);
@@ -481,9 +433,8 @@ public class ReplicationSourceManager {
         // Do not clear metrics
         toRemove.terminate(terminateMessage, null, false);
       }
-      for (NavigableSet<String> walsByGroup : walsById.get(peerId).values()) {
-        walsByGroup.forEach(wal -> src.enqueueLog(new Path(this.logDir, wal)));
-      }
+      this.queueStorage.getWALsInQueue(this.server.getServerName(), peerId)
+        .forEach(wal -> src.enqueueLog(new Path(this.logDir, wal)));
     }
     LOG.info("Startup replication source for " + src.getPeerId());
     src.startup();
@@ -504,9 +455,8 @@ public class ReplicationSourceManager {
       for (String queueId : previousQueueIds) {
         ReplicationSourceInterface recoveredReplicationSource = createSource(queueId, peer);
         this.oldsources.add(recoveredReplicationSource);
-        for (SortedSet<String> walsByGroup : walsByIdRecoveredQueues.get(queueId).values()) {
-          walsByGroup.forEach(wal -> recoveredReplicationSource.enqueueLog(new Path(wal)));
-        }
+        this.queueStorage.getWALsInQueue(this.server.getServerName(), queueId)
+          .forEach(wal -> recoveredReplicationSource.enqueueLog(new Path(wal)));
         toStartup.add(recoveredReplicationSource);
       }
     }
@@ -526,7 +476,6 @@ public class ReplicationSourceManager {
     LOG.info("Done with the recovered queue {}", src.getQueueId());
     // Delete queue from storage and memory
     deleteQueue(src.getQueueId());
-    this.walsByIdRecoveredQueues.remove(src.getQueueId());
     return true;
   }
 
@@ -549,8 +498,6 @@ public class ReplicationSourceManager {
     this.sources.remove(src.getPeerId());
     // Delete queue from storage and memory
     deleteQueue(src.getQueueId());
-    this.walsById.remove(src.getQueueId());
-
   }
 
   /**
@@ -635,42 +582,19 @@ public class ReplicationSourceManager {
    * @param inclusive whether we should also remove the given log file
    * @param source the replication source
    */
-  void cleanOldLogs(String log, boolean inclusive, ReplicationSourceInterface source) {
-    String logPrefix = AbstractFSWALProvider.getWALPrefixFromWALName(log);
-    if (source.isRecovered()) {
-      NavigableSet<String> wals = walsByIdRecoveredQueues.get(source.getQueueId()).get(logPrefix);
-      if (wals != null) {
-        NavigableSet<String> walsToRemove = wals.headSet(log, inclusive);
-        if (walsToRemove.isEmpty()) {
-          return;
-        }
-        cleanOldLogs(walsToRemove, source);
-        walsToRemove.clear();
-      }
-    } else {
-      NavigableSet<String> wals;
-      NavigableSet<String> walsToRemove;
-      // synchronized on walsById to avoid race with preLogRoll
-      synchronized (this.walsById) {
-        wals = walsById.get(source.getQueueId()).get(logPrefix);
-        if (wals == null) {
-          return;
-        }
-        walsToRemove = wals.headSet(log, inclusive);
-        if (walsToRemove.isEmpty()) {
-          return;
-        }
-        walsToRemove = new TreeSet<>(walsToRemove);
-      }
-      // cleanOldLogs may spend some time, especially for sync replication where we may want to
-      // remove remote wals as the remote cluster may have already been down, so we do it outside
-      // the lock to avoid block preLogRoll
-      cleanOldLogs(walsToRemove, source);
-      // now let's remove the files in the set
-      synchronized (this.walsById) {
-        wals.removeAll(walsToRemove);
-      }
+  void cleanOldLogs(String log, boolean inclusive,
+    ReplicationSourceInterface source) {
+    NavigableSet<String> walsToRemove;
+    synchronized (this.latestPaths) {
+      walsToRemove = getWalsToRemove(source.getQueueId(), log, inclusive);
+    }
+    if (walsToRemove.isEmpty()) {
+      return;
     }
+    // cleanOldLogs may spend some time, especially for sync replication where we may want to
+    // remove remote wals as the remote cluster may have already been down, so we do it outside
+    // the lock to avoid block preLogRoll
+    cleanOldLogs(walsToRemove, source);
   }
 
   private void removeRemoteWALs(String peerId, String remoteWALDir, Collection<String> wals)
@@ -750,37 +674,6 @@ public class ReplicationSourceManager {
         abortAndThrowIOExceptionWhenFail(
           () -> this.queueStorage.addWAL(server.getServerName(), source.getQueueId(), logName));
       }
-
-      // synchronized on walsById to avoid race with cleanOldLogs
-      synchronized (this.walsById) {
-        // Update walsById map
-        for (Map.Entry<String, Map<String, NavigableSet<String>>> entry : this.walsById
-          .entrySet()) {
-          String peerId = entry.getKey();
-          Map<String, NavigableSet<String>> walsByPrefix = entry.getValue();
-          boolean existingPrefix = false;
-          for (Map.Entry<String, NavigableSet<String>> walsEntry : walsByPrefix.entrySet()) {
-            SortedSet<String> wals = walsEntry.getValue();
-            if (this.sources.isEmpty()) {
-              // If there's no slaves, don't need to keep the old wals since
-              // we only consider the last one when a new slave comes in
-              wals.clear();
-            }
-            if (logPrefix.equals(walsEntry.getKey())) {
-              wals.add(logName);
-              existingPrefix = true;
-            }
-          }
-          if (!existingPrefix) {
-            // The new log belongs to a new group, add it into this peer
-            LOG.debug("Start tracking logs for wal group {} for peer {}", logPrefix, peerId);
-            NavigableSet<String> wals = new TreeSet<>();
-            wals.add(logName);
-            walsByPrefix.put(logPrefix, wals);
-          }
-        }
-      }
-
       // Add to latestPaths
       latestPaths.put(logPrefix, newLog);
     }
@@ -887,18 +780,6 @@ public class ReplicationSourceManager {
           return;
         }
       }
-      // track sources in walsByIdRecoveredQueues
-      Map<String, NavigableSet<String>> walsByGroup = new HashMap<>();
-      walsByIdRecoveredQueues.put(queueId, walsByGroup);
-      for (String wal : walsSet) {
-        String walPrefix = AbstractFSWALProvider.getWALPrefixFromWALName(wal);
-        NavigableSet<String> wals = walsByGroup.get(walPrefix);
-        if (wals == null) {
-          wals = new TreeSet<>();
-          walsByGroup.put(walPrefix, wals);
-        }
-        wals.add(wal);
-      }
       oldsources.add(src);
       LOG.info("Added source for recovered queue {}", src.getQueueId());
       for (String wal : walsSet) {
@@ -926,7 +807,18 @@ public class ReplicationSourceManager {
    * Get a copy of the wals of the normal sources on this rs
    * @return a sorted set of wal names
    */
-  public Map<String, Map<String, NavigableSet<String>>> getWALs() {
+  public Map<String, Map<String, NavigableSet<String>>> getWALs()
+    throws ReplicationException {
+    Map<String, Map<String, NavigableSet<String>>> walsById = new HashMap<>();
+    for (ReplicationSourceInterface source : sources.values()) {
+      String queueId = source.getQueueId();
+      Map<String, NavigableSet<String>> walsByGroup = new HashMap<>();
+      walsById.put(queueId, walsByGroup);
+      for (String wal : this.queueStorage.getWALsInQueue(this.server.getServerName(), queueId)) {
+        String walPrefix = AbstractFSWALProvider.getWALPrefixFromWALName(wal);
+        walsByGroup.computeIfAbsent(walPrefix, p -> new TreeSet<>()).add(wal);
+      }
+    }
     return Collections.unmodifiableMap(walsById);
   }
 
@@ -934,7 +826,18 @@ public class ReplicationSourceManager {
    * Get a copy of the wals of the recovered sources on this rs
    * @return a sorted set of wal names
    */
-  Map<String, Map<String, NavigableSet<String>>> getWalsByIdRecoveredQueues() {
+  Map<String, Map<String, NavigableSet<String>>> getWalsByIdRecoveredQueues()
+    throws ReplicationException {
+    Map<String, Map<String, NavigableSet<String>>> walsByIdRecoveredQueues = new HashMap<>();
+    for (ReplicationSourceInterface source : oldsources) {
+      String queueId = source.getQueueId();
+      Map<String, NavigableSet<String>> walsByGroup = new HashMap<>();
+      walsByIdRecoveredQueues.put(queueId, walsByGroup);
+      for (String wal : this.queueStorage.getWALsInQueue(this.server.getServerName(), queueId)) {
+        String walPrefix = AbstractFSWALProvider.getWALPrefixFromWALName(wal);
+        walsByGroup.computeIfAbsent(walPrefix, p -> new TreeSet<>()).add(wal);
+      }
+    }
     return Collections.unmodifiableMap(walsByIdRecoveredQueues);
   }
 
@@ -1165,4 +1068,21 @@ public class ReplicationSourceManager {
   ReplicationQueueStorage getQueueStorage() {
     return queueStorage;
   }
+
+  private NavigableSet<String> getWalsToRemove(String queueId, String log, boolean inclusive) {
+    NavigableSet<String> walsToRemove = new TreeSet<>();
+    String logPrefix = AbstractFSWALProvider.getWALPrefixFromWALName(log);
+    try {
+      this.queueStorage.getWALsInQueue(this.server.getServerName(), queueId).forEach(wal -> {
+        String walPrefix = AbstractFSWALProvider.getWALPrefixFromWALName(wal);
+        if (walPrefix.equals(logPrefix)) {
+          walsToRemove.add(wal);
+        }
+      });
+    } catch (ReplicationException e) {
+      // Just log the exception here, as the recovered replication source will try to cleanup again.
+      LOG.warn("Failed to read wals in queue {}", queueId, e);
+    }
+    return walsToRemove.headSet(log, inclusive);
+  }
 }

[hbase] 09/12: HBASE-25071 ReplicationServer support start ReplicationSource internal (#2452)

Posted by su...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sunxin pushed a commit to branch HBASE-24666
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit c8c85f4f4205ebcd6c7fbecf55072386c3bb842b
Author: Guanghao Zhang <zg...@apache.org>
AuthorDate: Mon Nov 9 11:46:02 2020 +0800

    HBASE-25071 ReplicationServer support start ReplicationSource internal (#2452)
    
    Signed-off-by: XinSun <dd...@gmail.com>
---
 .../server/replication/ReplicationServer.proto     |  14 +-
 .../replication/ZKReplicationQueueStorage.java     |   4 +-
 .../replication/ZKReplicationStorageBase.java      |   4 +
 .../hadoop/hbase/master/MasterRpcServices.java     |   2 +-
 .../hadoop/hbase/regionserver/RSRpcServices.java   |   2 +-
 .../replication/HBaseReplicationEndpoint.java      |  14 +-
 .../hbase/replication/HReplicationServer.java      | 175 ++++++++++++++++++---
 .../replication/ReplicationServerRpcServices.java  |  15 ++
 .../regionserver/RecoveredReplicationSource.java   |   9 +-
 .../regionserver/ReplicationSource.java            |  54 ++++++-
 .../regionserver/ReplicationSourceFactory.java     |   2 +-
 .../regionserver/ReplicationSourceInterface.java   |   6 +-
 .../regionserver/ReplicationSourceManager.java     |   9 +-
 .../hbase/replication/ReplicationSourceDummy.java  |   5 +-
 .../replication/TestReplicationFetchServers.java   |  43 +++--
 ...nServer.java => TestReplicationServerSink.java} |  25 +--
 .../replication/TestReplicationServerSource.java   |  69 ++++++++
 .../regionserver/TestReplicationSource.java        |  20 +--
 .../regionserver/TestReplicationSourceManager.java |  18 ++-
 19 files changed, 400 insertions(+), 90 deletions(-)

diff --git a/hbase-protocol-shaded/src/main/protobuf/server/replication/ReplicationServer.proto b/hbase-protocol-shaded/src/main/protobuf/server/replication/ReplicationServer.proto
index ed334c4..925aed4 100644
--- a/hbase-protocol-shaded/src/main/protobuf/server/replication/ReplicationServer.proto
+++ b/hbase-protocol-shaded/src/main/protobuf/server/replication/ReplicationServer.proto
@@ -24,9 +24,21 @@ option java_generic_services = true;
 option java_generate_equals_and_hash = true;
 option optimize_for = SPEED;
 
+import "HBase.proto";
 import "server/region/Admin.proto";
 
+message StartReplicationSourceRequest {
+  required ServerName server_name = 1;
+  required string queue_id = 2;
+}
+
+message StartReplicationSourceResponse {
+}
+
 service ReplicationServerService {
   rpc ReplicateWALEntry(ReplicateWALEntryRequest)
     returns(ReplicateWALEntryResponse);
-}
\ No newline at end of file
+
+  rpc StartReplicationSource(StartReplicationSourceRequest)
+    returns(StartReplicationSourceResponse);
+}
diff --git a/hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/ZKReplicationQueueStorage.java b/hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/ZKReplicationQueueStorage.java
index 5c480ba..08ac142 100644
--- a/hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/ZKReplicationQueueStorage.java
+++ b/hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/ZKReplicationQueueStorage.java
@@ -79,7 +79,7 @@ import org.apache.hbase.thirdparty.org.apache.commons.collections4.CollectionUti
  * </pre>
  */
 @InterfaceAudience.Private
-class ZKReplicationQueueStorage extends ZKReplicationStorageBase
+public class ZKReplicationQueueStorage extends ZKReplicationStorageBase
     implements ReplicationQueueStorage {
 
   private static final Logger LOG = LoggerFactory.getLogger(ZKReplicationQueueStorage.class);
@@ -121,7 +121,7 @@ class ZKReplicationQueueStorage extends ZKReplicationStorageBase
     return ZNodePaths.joinZNode(queuesZNode, serverName.getServerName());
   }
 
-  private String getQueueNode(ServerName serverName, String queueId) {
+  public String getQueueNode(ServerName serverName, String queueId) {
     return ZNodePaths.joinZNode(getRsNode(serverName), queueId);
   }
 
diff --git a/hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/ZKReplicationStorageBase.java b/hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/ZKReplicationStorageBase.java
index 596167f..a239bf8 100644
--- a/hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/ZKReplicationStorageBase.java
+++ b/hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/ZKReplicationStorageBase.java
@@ -74,4 +74,8 @@ public abstract class ZKReplicationStorageBase {
       throw new RuntimeException(e);
     }
   }
+
+  public ZKWatcher getZookeeper() {
+    return this.zookeeper;
+  }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
index c677458..c17d699 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
@@ -3475,7 +3475,7 @@ public class MasterRpcServices extends RSRpcServices implements
       if (master.getMasterCoprocessorHost() != null) {
         master.getMasterCoprocessorHost().preListReplicationSinkServers();
       }
-      builder.addAllServerName(master.listReplicationSinkServers().stream()
+      builder.addAllServerName(master.getReplicationServerManager().getOnlineServersList().stream()
         .map(ProtobufUtil::toServerName).collect(Collectors.toList()));
       if (master.getMasterCoprocessorHost() != null) {
         master.getMasterCoprocessorHost().postListReplicationSinkServers();
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
index 72fea23..91bf9cb 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
@@ -273,7 +273,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.RegionEventDe
 @SuppressWarnings("deprecation")
 public class RSRpcServices implements HBaseRPCErrorHandler,
     AdminService.BlockingInterface, ClientService.BlockingInterface, PriorityFunction,
-    ConfigurationObserver, ReplicationServerService.BlockingInterface {
+    ConfigurationObserver {
   private static final Logger LOG = LoggerFactory.getLogger(RSRpcServices.class);
 
   /** RPC scheduler to use for the region server. */
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HBaseReplicationEndpoint.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HBaseReplicationEndpoint.java
index 115df76..d17bb7f 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HBaseReplicationEndpoint.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HBaseReplicationEndpoint.java
@@ -315,6 +315,10 @@ public abstract class HBaseReplicationEndpoint extends BaseReplicationEndpoint
       if (!useZk || ReplicationUtils.isPeerClusterSupportReplicationOffload(conn)) {
         useZk = false;
         slaveAddresses = fetchSlavesAddresses();
+        if (slaveAddresses.isEmpty()) {
+          LOG.warn("No sinks available at peer. Try fetch sinks by using zk.");
+          useZk = true;
+        }
       } else {
         useZk = true;
       }
@@ -322,13 +326,15 @@ public abstract class HBaseReplicationEndpoint extends BaseReplicationEndpoint
       LOG.warn("Peer {} try to fetch servers by admin failed. Using zk impl.", ctx.getPeerId(), t);
       useZk = true;
     }
+
     if (useZk) {
       slaveAddresses = fetchSlavesAddressesByZK();
     }
 
     if (slaveAddresses.isEmpty()) {
-      LOG.warn("No sinks available at peer. Will not be able to replicate");
+      LOG.warn("No sinks available at peer. Will not be able to replicate.");
     }
+
     Collections.shuffle(slaveAddresses, ThreadLocalRandom.current());
     int numSinks = (int) Math.ceil(slaveAddresses.size() * ratio);
     synchronized (this) {
@@ -362,10 +368,10 @@ public abstract class HBaseReplicationEndpoint extends BaseReplicationEndpoint
   }
 
   private SinkPeer createSinkPeer(ServerName serverName) throws IOException {
-    if (ReplicationUtils.isPeerClusterSupportReplicationOffload(conn)) {
-      return new ReplicationServerSinkPeer(serverName, conn.getReplicationServerAdmin(serverName));
-    } else {
+    if (fetchServersUseZk) {
       return new RegionServerSinkPeer(serverName, conn.getRegionServerAdmin(serverName));
+    } else {
+      return new ReplicationServerSinkPeer(serverName, conn.getReplicationServerAdmin(serverName));
     }
   }
 
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HReplicationServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HReplicationServer.java
index e679a98..2d0336d 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HReplicationServer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HReplicationServer.java
@@ -20,10 +20,19 @@ package org.apache.hadoop.hbase.replication;
 import java.io.IOException;
 import java.lang.management.MemoryUsage;
 import java.net.InetSocketAddress;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.OptionalLong;
+import java.util.UUID;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicLong;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.ChoreService;
+import org.apache.hadoop.hbase.CompatibilitySingletonFactory;
 import org.apache.hadoop.hbase.CoordinatedStateManager;
 import org.apache.hadoop.hbase.DoNotRetryIOException;
 import org.apache.hadoop.hbase.HConstants;
@@ -33,17 +42,30 @@ import org.apache.hadoop.hbase.YouAreDeadException;
 import org.apache.hadoop.hbase.client.AsyncClusterConnection;
 import org.apache.hadoop.hbase.client.ClusterConnectionFactory;
 import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.fs.HFileSystem;
 import org.apache.hadoop.hbase.io.util.MemorySizeUtil;
 import org.apache.hadoop.hbase.ipc.RpcClient;
 import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
 import org.apache.hadoop.hbase.log.HBaseMarkers;
 import org.apache.hadoop.hbase.regionserver.ReplicationService;
 import org.apache.hadoop.hbase.regionserver.ReplicationSinkService;
+import org.apache.hadoop.hbase.replication.regionserver.MetricsReplicationGlobalSourceSource;
+import org.apache.hadoop.hbase.replication.regionserver.MetricsReplicationSourceFactory;
+import org.apache.hadoop.hbase.replication.regionserver.MetricsSource;
+import org.apache.hadoop.hbase.replication.regionserver.RecoveredReplicationSource;
 import org.apache.hadoop.hbase.replication.regionserver.ReplicationLoad;
+import org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceFactory;
+import org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceInterface;
+import org.apache.hadoop.hbase.security.SecurityConstants;
+import org.apache.hadoop.hbase.security.Superusers;
 import org.apache.hadoop.hbase.security.User;
 import org.apache.hadoop.hbase.security.UserProvider;
+import org.apache.hadoop.hbase.util.CommonFSUtils;
 import org.apache.hadoop.hbase.util.Sleeper;
+import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
 import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
+import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
+import org.apache.hadoop.hbase.zookeeper.ZKUtil;
 import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
 import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.util.ReflectionUtils;
@@ -65,7 +87,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.ReplicationServerStatus
  */
 @InterfaceAudience.Private
 @SuppressWarnings({ "deprecation"})
-public class HReplicationServer extends Thread implements Server {
+public class HReplicationServer extends Thread implements Server, ReplicationSourceController  {
 
   private static final Logger LOG = LoggerFactory.getLogger(HReplicationServer.class);
 
@@ -75,7 +97,7 @@ public class HReplicationServer extends Thread implements Server {
   /**
    * This servers start code.
    */
-  protected final long startCode;
+  private final long startCode;
 
   private volatile boolean stopped = false;
 
@@ -84,7 +106,11 @@ public class HReplicationServer extends Thread implements Server {
   private AtomicBoolean abortRequested;
 
   // flag set after we're done setting up server threads
-  final AtomicBoolean online = new AtomicBoolean(false);
+  private final AtomicBoolean online = new AtomicBoolean(false);
+
+  private final int msgInterval;
+  // A sleeper that sleeps for msgInterval.
+  private final Sleeper sleeper;
 
   /**
    * The server name the Master sees us as.  Its made from the hostname the
@@ -93,18 +119,22 @@ public class HReplicationServer extends Thread implements Server {
    */
   private ServerName serverName;
 
-  protected final Configuration conf;
+  private final Configuration conf;
 
-  private ReplicationSinkService replicationSinkService;
+  // zookeeper connection and watcher
+  private final ZKWatcher zooKeeper;
 
-  final int msgInterval;
-  // A sleeper that sleeps for msgInterval.
-  protected final Sleeper sleeper;
+  private final UUID clusterId;
 
   private final int shortOperationTimeout;
 
-  // zookeeper connection and watcher
-  protected final ZKWatcher zooKeeper;
+  private HFileSystem walFs;
+  private Path walRootDir;
+
+  /**
+   * ChoreService used to schedule tasks that we want to run periodically
+   */
+  private ChoreService choreService;
 
   // master address tracker
   private final MasterAddressTracker masterAddressTracker;
@@ -112,11 +142,23 @@ public class HReplicationServer extends Thread implements Server {
   /**
    * The asynchronous cluster connection to be shared by services.
    */
-  protected AsyncClusterConnection asyncClusterConnection;
+  private AsyncClusterConnection asyncClusterConnection;
 
   private UserProvider userProvider;
 
-  protected final ReplicationServerRpcServices rpcServices;
+  final ReplicationServerRpcServices rpcServices;
+
+  // Total buffer size on this RegionServer for holding batched edits to be shipped.
+  private final long totalBufferLimit;
+  private AtomicLong totalBufferUsed = new AtomicLong();
+
+  private final MetricsReplicationGlobalSourceSource globalMetrics;
+  private final Map<String, MetricsSource> sourceMetrics = new HashMap<>();
+  private final ConcurrentMap<String, ReplicationSourceInterface> sources =
+    new ConcurrentHashMap<>();
+
+  private final ReplicationQueueStorage queueStorage;
+  private final ReplicationPeers replicationPeers;
 
   // Stub to do region server status calls against the master.
   private volatile ReplicationServerStatusService.BlockingInterface rssStub;
@@ -124,12 +166,9 @@ public class HReplicationServer extends Thread implements Server {
   // RPC client. Used to make the stub above that does region server status checking.
   private RpcClient rpcClient;
 
-  /**
-   * ChoreService used to schedule tasks that we want to run periodically
-   */
-  private ChoreService choreService;
+  private ReplicationSinkService replicationSinkService;
 
-  public HReplicationServer(final Configuration conf) throws IOException {
+  public HReplicationServer(final Configuration conf) throws Exception {
     try {
       this.startCode = System.currentTimeMillis();
       this.conf = conf;
@@ -142,12 +181,29 @@ public class HReplicationServer extends Thread implements Server {
       serverName = ServerName.valueOf(hostName, this.rpcServices.isa.getPort(), this.startCode);
 
       this.userProvider = UserProvider.instantiate(conf);
+      // login the zookeeper client principal (if using security)
+      ZKUtil.loginClient(this.conf, HConstants.ZK_CLIENT_KEYTAB_FILE,
+        HConstants.ZK_CLIENT_KERBEROS_PRINCIPAL, hostName);
+      // login the server principal (if using secure Hadoop)
+      this.userProvider.login(SecurityConstants.REGIONSERVER_KRB_KEYTAB_FILE,
+        SecurityConstants.REGIONSERVER_KRB_PRINCIPAL, hostName);
+      // init superusers and add the server principal (if using security)
+      // or process owner as default super user.
+      Superusers.initialize(conf);
 
       this.msgInterval = conf.getInt("hbase.replicationserver.msginterval", 3 * 1000);
       this.sleeper = new Sleeper(this.msgInterval, this);
 
       this.shortOperationTimeout = conf.getInt(HConstants.HBASE_RPC_SHORTOPERATION_TIMEOUT_KEY,
           HConstants.DEFAULT_HBASE_RPC_SHORTOPERATION_TIMEOUT);
+      this.totalBufferLimit = conf.getLong(HConstants.REPLICATION_SOURCE_TOTAL_BUFFER_KEY,
+        HConstants.REPLICATION_SOURCE_TOTAL_BUFFER_DFAULT);
+      this.globalMetrics =
+        CompatibilitySingletonFactory.getInstance(MetricsReplicationSourceFactory.class)
+          .getGlobalSource();
+
+      initializeFileSystem();
+      this.choreService = new ChoreService(getName(), true);
 
       // Some unit tests don't need a cluster, so no zookeeper at all
       if (!conf.getBoolean("hbase.testing.nocluster", false)) {
@@ -160,6 +216,12 @@ public class HReplicationServer extends Thread implements Server {
         zooKeeper = null;
         masterAddressTracker = null;
       }
+
+      this.queueStorage = ReplicationStorageFactory.getReplicationQueueStorage(zooKeeper, conf);
+      this.replicationPeers =
+        ReplicationFactory.getReplicationPeers(zooKeeper, this.conf);
+      this.replicationPeers.init();
+      this.clusterId = ZKClusterId.getUUIDForCluster(zooKeeper);
       this.rpcServices.start(zooKeeper);
       this.choreService = new ChoreService(getName(), true);
     } catch (Throwable t) {
@@ -170,6 +232,15 @@ public class HReplicationServer extends Thread implements Server {
     }
   }
 
+  private void initializeFileSystem() throws IOException {
+    // Get fs instance used by this RS. Do we use checksum verification in the hbase? If hbase
+    // checksum verification enabled, then automatically switch off hdfs checksum verification.
+    boolean useHBaseChecksum = conf.getBoolean(HConstants.HBASE_CHECKSUM_VERIFICATION, true);
+    CommonFSUtils.setFsDefault(this.conf, CommonFSUtils.getWALRootDir(this.conf));
+    this.walFs = new HFileSystem(this.conf, useHBaseChecksum);
+    this.walRootDir = CommonFSUtils.getWALRootDir(this.conf);
+  }
+
   public String getProcessName() {
     return REPLICATION_SERVER;
   }
@@ -289,6 +360,9 @@ public class HReplicationServer extends Thread implements Server {
     if (this.replicationSinkService != null) {
       this.replicationSinkService.stopReplicationService();
     }
+    if (this.choreService != null) {
+      this.choreService.shutdown();
+    }
   }
 
   @Override
@@ -328,7 +402,7 @@ public class HReplicationServer extends Thread implements Server {
 
   @Override
   public ChoreService getChoreService() {
-    return this.choreService;
+    return choreService;
   }
 
   @Override
@@ -592,4 +666,69 @@ public class HReplicationServer extends Thread implements Server {
     }
     return interrupted;
   }
+
+  @Override
+  public long getTotalBufferLimit() {
+    return this.totalBufferLimit;
+  }
+
+  @Override
+  public AtomicLong getTotalBufferUsed() {
+    return this.totalBufferUsed;
+  }
+
+  @Override
+  public MetricsReplicationGlobalSourceSource getGlobalMetrics() {
+    return this.globalMetrics;
+  }
+
+  @Override
+  public void finishRecoveredSource(RecoveredReplicationSource src) {
+    this.sources.remove(src.getQueueId());
+    this.sourceMetrics.remove(src.getQueueId());
+    deleteQueue(src.getQueueId());
+    LOG.info("Finished recovering queue {} with the following stats: {}", src.getQueueId(),
+      src.getStats());
+  }
+
+  public void startReplicationSource(ServerName producer, String queueId)
+    throws IOException, ReplicationException {
+    ReplicationQueueInfo replicationQueueInfo = new ReplicationQueueInfo(queueId);
+    String peerId = replicationQueueInfo.getPeerId();
+    this.replicationPeers.addPeer(peerId);
+    Path walDir =
+      new Path(walRootDir, AbstractFSWALProvider.getWALDirectoryName(producer.toString()));
+    MetricsSource metrics = new MetricsSource(queueId);
+
+    ReplicationSourceInterface src = ReplicationSourceFactory.create(conf, queueId);
+    // init replication source
+    src.init(conf, walFs, walDir, this, queueStorage, replicationPeers.getPeer(peerId), this,
+      producer, queueId, clusterId, p -> OptionalLong.empty(), metrics);
+    queueStorage.getWALsInQueue(producer, queueId)
+      .forEach(walName -> src.enqueueLog(new Path(walDir, walName)));
+    src.startup();
+    sources.put(queueId, src);
+    sourceMetrics.put(queueId, metrics);
+  }
+
+  /**
+   * Delete a complete queue of wals associated with a replication source
+   * @param queueId the id of replication queue to delete
+   */
+  private void deleteQueue(String queueId) {
+    abortWhenFail(() -> this.queueStorage.removeQueue(getServerName(), queueId));
+  }
+
+  @FunctionalInterface
+  private interface ReplicationQueueOperation {
+    void exec() throws ReplicationException;
+  }
+
+  private void abortWhenFail(ReplicationQueueOperation op) {
+    try {
+      op.exec();
+    } catch (ReplicationException e) {
+      abort("Failed to operate on replication queue", e);
+    }
+  }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/ReplicationServerRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/ReplicationServerRpcServices.java
index 15d4f8c..b8c3884 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/ReplicationServerRpcServices.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/ReplicationServerRpcServices.java
@@ -56,11 +56,14 @@ import org.apache.zookeeper.KeeperException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ReplicateWALEntryRequest;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ReplicateWALEntryResponse;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.WALEntry;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.RPCProtos.RequestHeader;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.ReplicationServerProtos.ReplicationServerService;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.ReplicationServerProtos.StartReplicationSourceRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.ReplicationServerProtos.StartReplicationSourceResponse;
 
 import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableList;
 import org.apache.hbase.thirdparty.com.google.protobuf.Message;
@@ -321,4 +324,16 @@ public class ReplicationServerRpcServices implements HBaseRPCErrorHandler,
       throw new ServiceException(ie);
     }
   }
+
+  @Override
+  public StartReplicationSourceResponse startReplicationSource(RpcController controller,
+    StartReplicationSourceRequest request) throws ServiceException {
+    try {
+      replicationServer.startReplicationSource(ProtobufUtil.toServerName(request.getServerName()),
+        request.getQueueId());
+      return StartReplicationSourceResponse.newBuilder().build();
+    } catch (Exception e) {
+      throw new ServiceException(e);
+    }
+  }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/RecoveredReplicationSource.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/RecoveredReplicationSource.java
index 7cb159e..147556f 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/RecoveredReplicationSource.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/RecoveredReplicationSource.java
@@ -51,10 +51,11 @@ public class RecoveredReplicationSource extends ReplicationSource {
   @Override
   public void init(Configuration conf, FileSystem fs, Path walDir,
     ReplicationSourceController overallController, ReplicationQueueStorage queueStorage,
-    ReplicationPeer replicationPeer, Server server, String peerClusterZnode, UUID clusterId,
-    WALFileLengthProvider walFileLengthProvider, MetricsSource metrics) throws IOException {
-    super.init(conf, fs, walDir, overallController, queueStorage, replicationPeer, server,
-      peerClusterZnode, clusterId, walFileLengthProvider, metrics);
+    ReplicationPeer replicationPeer, Server server, ServerName producer, String queueId,
+    UUID clusterId, WALFileLengthProvider walFileLengthProvider, MetricsSource metrics)
+    throws IOException {
+    super.init(conf, fs, walDir, overallController, queueStorage, replicationPeer, server, producer,
+      queueId, clusterId, walFileLengthProvider, metrics);
     this.actualPeerId = this.replicationQueueInfo.getPeerId();
   }
 
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java
index 27f2ce7..0d9ee4b 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java
@@ -62,10 +62,13 @@ import org.apache.hadoop.hbase.replication.ReplicationSourceController;
 import org.apache.hadoop.hbase.replication.ReplicationUtils;
 import org.apache.hadoop.hbase.replication.SystemTableWALEntryFilter;
 import org.apache.hadoop.hbase.replication.WALEntryFilter;
+import org.apache.hadoop.hbase.replication.ZKReplicationQueueStorage;
 import org.apache.hadoop.hbase.util.Threads;
 import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
 import org.apache.hadoop.hbase.wal.SyncReplicationWALProvider;
 import org.apache.hadoop.hbase.wal.WAL.Entry;
+import org.apache.hadoop.hbase.zookeeper.ZKListener;
+import org.apache.hadoop.hbase.zookeeper.ZKUtil;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.apache.zookeeper.KeeperException;
 import org.slf4j.Logger;
@@ -149,6 +152,7 @@ public class ReplicationSource implements ReplicationSourceInterface {
   private int waitOnEndpointSeconds = -1;
 
   private Thread initThread;
+  private Thread fetchWALsThread;
 
   /**
    * WALs to replicate.
@@ -186,8 +190,9 @@ public class ReplicationSource implements ReplicationSourceInterface {
   @Override
   public void init(Configuration conf, FileSystem fs, Path walDir,
     ReplicationSourceController overallController, ReplicationQueueStorage queueStorage,
-    ReplicationPeer replicationPeer, Server server, String queueId, UUID clusterId,
-    WALFileLengthProvider walFileLengthProvider, MetricsSource metrics) throws IOException {
+    ReplicationPeer replicationPeer, Server server, ServerName producer, String queueId,
+    UUID clusterId, WALFileLengthProvider walFileLengthProvider, MetricsSource metrics)
+    throws IOException {
     this.server = server;
     this.conf = HBaseConfiguration.create(conf);
     this.walDir = walDir;
@@ -219,6 +224,19 @@ public class ReplicationSource implements ReplicationSourceInterface {
     this.abortOnError = this.conf.getBoolean("replication.source.regionserver.abort",
       true);
 
+    if (conf.getBoolean(HConstants.REPLICATION_OFFLOAD_ENABLE_KEY,
+      HConstants.REPLICATION_OFFLOAD_ENABLE_DEFAULT)) {
+      if (queueStorage instanceof ZKReplicationQueueStorage) {
+        ZKReplicationQueueStorage zkQueueStorage = (ZKReplicationQueueStorage) queueStorage;
+        zkQueueStorage.getZookeeper().registerListener(
+          new ReplicationQueueListener(this, zkQueueStorage, producer, queueId, walDir));
+        LOG.info("Register a ZKListener to track the WALs from {}'s replication queue, queueId={}",
+          producer, queueId);
+      } else {
+        throw new UnsupportedOperationException(
+          "hbase.replication.offload.enabled=true only support ZKReplicationQueueStorage");
+      }
+    }
     LOG.info("queueId={}, ReplicationSource: {}, currentBandwidth={}", queueId,
       replicationPeer.getId(), this.currentBandwidth);
   }
@@ -928,4 +946,36 @@ public class ReplicationSource implements ReplicationSourceInterface {
       server.abort("Failed to operate on replication queue", e);
     }
   }
+
+  /**
+   * Tracks changes to the WALs in the replication queue.
+   */
+  public static class ReplicationQueueListener extends ZKListener {
+
+    private final ReplicationSource source;
+    private final String queueNode;
+    private final Path walDir;
+
+    public ReplicationQueueListener(ReplicationSource source,
+      ZKReplicationQueueStorage zkQueueStorage, ServerName producer, String queueId, Path walDir) {
+      super(zkQueueStorage.getZookeeper());
+      this.source = source;
+      this.queueNode = zkQueueStorage.getQueueNode(producer, queueId);
+      this.walDir = walDir;
+    }
+
+    @Override
+    public synchronized void nodeChildrenChanged(String path) {
+      if (path.equals(queueNode)) {
+        LOG.info("Detected change to the WALs in the replication queue {}", queueNode);
+        try {
+          ZKUtil.listChildrenNoWatch(watcher, queueNode).forEach(walName -> {
+            source.enqueueLog(new Path(walDir, walName));
+          });
+        } catch (KeeperException e) {
+          LOG.warn("Failed to read WALs in the replication queue {}", queueNode, e);
+        }
+      }
+    }
+  }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceFactory.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceFactory.java
index 8863f14..56c8ee4 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceFactory.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceFactory.java
@@ -35,7 +35,7 @@ public final class ReplicationSourceFactory {
 
   private ReplicationSourceFactory() {}
 
-  static ReplicationSourceInterface create(Configuration conf, String queueId) {
+  public static ReplicationSourceInterface create(Configuration conf, String queueId) {
     ReplicationQueueInfo replicationQueueInfo = new ReplicationQueueInfo(queueId);
     boolean isQueueRecovered = replicationQueueInfo.isQueueRecovered();
     ReplicationSourceInterface src;
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceInterface.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceInterface.java
index 296bd27..461276e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceInterface.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceInterface.java
@@ -50,6 +50,7 @@ public interface ReplicationSourceInterface {
    * @param queueStorage the replication queue storage
    * @param replicationPeer the replication peer
    * @param server the server which start and run this replication source
+   * @param producer the name of region server which produce WAL to the replication queue
    * @param queueId the id of our replication queue
    * @param clusterId unique UUID for the cluster
    * @param walFileLengthProvider used to get the WAL length
@@ -57,8 +58,9 @@ public interface ReplicationSourceInterface {
    */
   void init(Configuration conf, FileSystem fs, Path walDir,
     ReplicationSourceController overallController, ReplicationQueueStorage queueStorage,
-    ReplicationPeer replicationPeer, Server server, String queueId, UUID clusterId,
-    WALFileLengthProvider walFileLengthProvider, MetricsSource metrics) throws IOException;
+    ReplicationPeer replicationPeer, Server server, ServerName producer, String queueId,
+    UUID clusterId, WALFileLengthProvider walFileLengthProvider, MetricsSource metrics)
+    throws IOException;
 
   /**
    * Add a log to the list of logs to replicate
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java
index b6cb087..3dc2d12 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java
@@ -304,8 +304,8 @@ public class ReplicationSourceManager implements ReplicationSourceController {
     WALFileLengthProvider walFileLengthProvider =
       this.walFactory.getWALProvider() != null?
         this.walFactory.getWALProvider().getWALFileLengthProvider() : p -> OptionalLong.empty();
-    src.init(conf, fs, logDir, this, queueStorage, replicationPeer, server, queueId, clusterId,
-      walFileLengthProvider, new MetricsSource(queueId));
+    src.init(conf, fs, logDir, this, queueStorage, replicationPeer, server, server.getServerName(),
+      queueId, clusterId, walFileLengthProvider, new MetricsSource(queueId));
     return src;
   }
 
@@ -925,8 +925,9 @@ public class ReplicationSourceManager implements ReplicationSourceController {
     CatalogReplicationSourcePeer peer = new CatalogReplicationSourcePeer(this.conf,
       this.clusterId.toString());
     final ReplicationSourceInterface crs = new CatalogReplicationSource();
-    crs.init(conf, fs, logDir, this, new NoopReplicationQueueStorage(), peer, server, peer.getId(),
-      clusterId, walProvider.getWALFileLengthProvider(), new MetricsSource(peer.getId()));
+    crs.init(conf, fs, logDir, this, new NoopReplicationQueueStorage(), peer, server,
+      server.getServerName(), peer.getId(), clusterId, walProvider.getWALFileLengthProvider(),
+      new MetricsSource(peer.getId()));
     // Add listener on the provider so we can pick up the WAL to replicate on roll.
     WALActionsListener listener = new WALActionsListener() {
       @Override public void postLogRoll(Path oldPath, Path newPath) throws IOException {
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/ReplicationSourceDummy.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/ReplicationSourceDummy.java
index 8a32e94..8f28dee 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/ReplicationSourceDummy.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/ReplicationSourceDummy.java
@@ -48,8 +48,9 @@ public class ReplicationSourceDummy implements ReplicationSourceInterface {
   @Override
   public void init(Configuration conf, FileSystem fs, Path walDir,
     ReplicationSourceController overallController, ReplicationQueueStorage queueStorage,
-    ReplicationPeer replicationPeer, Server server, String queueId, UUID clusterId,
-    WALFileLengthProvider walFileLengthProvider, MetricsSource metrics) throws IOException {
+    ReplicationPeer replicationPeer, Server server, ServerName producer, String queueId,
+    UUID clusterId, WALFileLengthProvider walFileLengthProvider, MetricsSource metrics)
+    throws IOException {
     this.queueId = queueId;
     this.metrics = metrics;
     this.walFileLengthProvider = walFileLengthProvider;
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationFetchServers.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationFetchServers.java
index 9ceacee..db4152e 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationFetchServers.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationFetchServers.java
@@ -18,10 +18,10 @@
 package org.apache.hadoop.hbase.replication;
 
 import static org.apache.hadoop.hbase.coprocessor.CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY;
-import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.List;
 import java.util.Optional;
 import java.util.concurrent.atomic.AtomicBoolean;
@@ -38,13 +38,14 @@ import org.apache.hadoop.hbase.coprocessor.ObserverContext;
 import org.apache.hadoop.hbase.security.User;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.testclassification.ReplicationTests;
+import org.junit.AfterClass;
 import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.ClassRule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
-
-import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ListReplicationSinkServersRequest;
@@ -53,11 +54,14 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.MasterServ
 
 @Category({ ReplicationTests.class, MediumTests.class })
 public class TestReplicationFetchServers extends TestReplicationBase {
+  private static final Logger LOG = LoggerFactory.getLogger(TestReplicationFetchServers.class);
 
   @ClassRule
   public static final HBaseClassTestRule CLASS_RULE =
       HBaseClassTestRule.forClass(TestReplicationFetchServers.class);
 
+  private static HReplicationServer replicationServer;
+
   private static AtomicBoolean fetchFlag = new AtomicBoolean(false);
 
   public static class MyObserver implements MasterCoprocessor, MasterObserver {
@@ -77,6 +81,17 @@ public class TestReplicationFetchServers extends TestReplicationBase {
   public static void setUpBeforeClass() throws Exception {
     CONF2.set(MASTER_COPROCESSOR_CONF_KEY, MyObserver.class.getName());
     TestReplicationBase.setUpBeforeClass();
+    replicationServer = new HReplicationServer(CONF2);
+    replicationServer.start();
+    UTIL2.waitFor(60000, () -> replicationServer.isOnline());
+  }
+
+  @AfterClass
+  public static void tearDownAfterClass() throws Exception {
+    TestReplicationBase.tearDownAfterClass();
+    if (!replicationServer.isStopped()) {
+      replicationServer.stop("test");
+    }
   }
 
   @Before
@@ -85,15 +100,23 @@ public class TestReplicationFetchServers extends TestReplicationBase {
   }
 
   @Test
-  public void testMasterListReplicationPeerServers() throws IOException, ServiceException {
+  public void testMasterListReplicationPeerServers() throws IOException {
     AsyncClusterConnection conn = UTIL2.getAsyncConnection();
     ServerName master = UTIL2.getAdmin().getMaster();
-    MasterService.BlockingInterface masterStub = MasterService.newBlockingStub(
-        conn.getRpcClient().createBlockingRpcChannel(master, User.getCurrent(), 1000));
-    ListReplicationSinkServersResponse resp = masterStub.listReplicationSinkServers(
-        null, ListReplicationSinkServersRequest.newBuilder().build());
-    List<ServerName> servers = ProtobufUtil.toServerNameList(resp.getServerNameList());
-    assertFalse(servers.isEmpty());
+    // Wait for the replication server report to master
+    UTIL2.waitFor(60000, () -> {
+      List<ServerName> servers = new ArrayList<>();
+      try {
+        MasterService.BlockingInterface masterStub = MasterService.newBlockingStub(
+          conn.getRpcClient().createBlockingRpcChannel(master, User.getCurrent(), 1000));
+        ListReplicationSinkServersResponse resp = masterStub.listReplicationSinkServers(
+          null, ListReplicationSinkServersRequest.newBuilder().build());
+        servers = ProtobufUtil.toServerNameList(resp.getServerNameList());
+      } catch (Exception e) {
+        LOG.debug("Failed to list replication servers", e);
+      }
+      return servers.size() == 1;
+    });
     assertTrue(fetchFlag.get());
   }
 
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationServer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationServerSink.java
similarity index 89%
rename from hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationServer.java
rename to hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationServerSink.java
index 30660c6..d97667b 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationServer.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationServerSink.java
@@ -43,7 +43,6 @@ import org.apache.hadoop.hbase.client.Table;
 import org.apache.hadoop.hbase.master.HMaster;
 import org.apache.hadoop.hbase.master.ReplicationServerManager;
 import org.apache.hadoop.hbase.replication.HBaseReplicationEndpoint.ReplicationServerSinkPeer;
-import org.apache.hadoop.hbase.replication.HBaseReplicationEndpoint.SinkPeer;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.testclassification.ReplicationTests;
 import org.apache.hadoop.hbase.util.Bytes;
@@ -64,13 +63,13 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 @Category({ReplicationTests.class, MediumTests.class})
-public class TestReplicationServer {
+public class TestReplicationServerSink {
 
   @ClassRule
   public static final HBaseClassTestRule CLASS_RULE =
-      HBaseClassTestRule.forClass(TestReplicationServer.class);
+      HBaseClassTestRule.forClass(TestReplicationServerSink.class);
 
-  private static final Logger LOG = LoggerFactory.getLogger(TestReplicationServer.class);
+  private static final Logger LOG = LoggerFactory.getLogger(TestReplicationServerSink.class);
 
   private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
 
@@ -126,6 +125,7 @@ public class TestReplicationServer {
     if (!replicationServer.isStopped()) {
       replicationServer.stop("test");
     }
+    TEST_UTIL.waitFor(10000, () -> !replicationServer.isAlive());
     replicationServer = null;
     replicationServerName = null;
   }
@@ -145,22 +145,7 @@ public class TestReplicationServer {
     replicateWALEntryAndVerify(sinkPeer);
   }
 
-  /**
-   * Requests region server using {@link AsyncReplicationServerAdmin}
-   */
-  @Test
-  public void testReplicateWAL2() throws Exception {
-    AsyncClusterConnection conn =
-        TEST_UTIL.getHBaseCluster().getMaster().getAsyncClusterConnection();
-    ServerName rs = TEST_UTIL.getHBaseCluster().getLiveRegionServerThreads().get(0)
-        .getRegionServer().getServerName();
-    AsyncReplicationServerAdmin replAdmin = conn.getReplicationServerAdmin(rs);
-
-    ReplicationServerSinkPeer sinkPeer = new ReplicationServerSinkPeer(rs, replAdmin);
-    replicateWALEntryAndVerify(sinkPeer);
-  }
-
-  private void replicateWALEntryAndVerify(SinkPeer sinkPeer) throws Exception {
+  private void replicateWALEntryAndVerify(ReplicationServerSinkPeer sinkPeer) throws Exception {
     Entry[] entries = new Entry[BATCH_SIZE];
     for(int i = 0; i < BATCH_SIZE; i++) {
       entries[i] = generateEdit(i, TABLENAME, Bytes.toBytes(i));
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationServerSource.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationServerSource.java
new file mode 100644
index 0000000..843e5b1
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationServerSource.java
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.replication;
+
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.testclassification.ReplicationTests;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@Category({ ReplicationTests.class, LargeTests.class })
+public class TestReplicationServerSource extends TestReplicationBase {
+
+  @ClassRule public static final HBaseClassTestRule CLASS_RULE =
+    HBaseClassTestRule.forClass(TestReplicationServerSource.class);
+
+  private static final Logger LOG = LoggerFactory.getLogger(TestReplicationServerSource.class);
+
+  private static HReplicationServer replicationServer;
+
+  @BeforeClass
+  public static void setUpBeforeClass() throws Exception {
+    UTIL1.getConfiguration().setBoolean(HConstants.REPLICATION_OFFLOAD_ENABLE_KEY, true);
+    TestReplicationBase.setUpBeforeClass();
+    replicationServer = new HReplicationServer(UTIL1.getConfiguration());
+    replicationServer.start();
+    UTIL1.waitFor(60000, () -> replicationServer.isOnline());
+  }
+
+  @AfterClass
+  public static void tearDownAfterClass() throws Exception {
+    replicationServer.stop("Tear down after test");
+    TestReplicationBase.tearDownAfterClass();
+  }
+
+  @Test
+  public void test() throws Exception {
+    try {
+      // Only start one region server in source cluster
+      ServerName producer = UTIL1.getMiniHBaseCluster().getRegionServer(0).getServerName();
+      replicationServer.startReplicationSource(producer, PEER_ID2);
+    } catch (Throwable e) {
+      LOG.info("Failed to start replicaiton source", e);
+    }
+    runSmallBatchTest();
+  }
+}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSource.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSource.java
index 697a5ec..bd673bc 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSource.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSource.java
@@ -139,7 +139,7 @@ public class TestReplicationSource {
     String queueId = "qid";
     RegionServerServices rss =
       TEST_UTIL.createMockRegionServerService(ServerName.parseServerName("a.b.c,1,1"));
-    rs.init(conf, null, null, manager, null, mockPeer, rss, queueId, null,
+    rs.init(conf, null, null, manager, null, mockPeer, rss, rss.getServerName(), queueId, null,
       p -> OptionalLong.empty(), new MetricsSource(queueId));
     try {
       rs.startup();
@@ -177,8 +177,8 @@ public class TestReplicationSource {
     String queueId = "qid";
     RegionServerServices rss =
       TEST_UTIL.createMockRegionServerService(ServerName.parseServerName("a.b.c,1,1"));
-    rs.init(conf, null, null, manager, null, mockPeer, rss, queueId,
-      uuid, p -> OptionalLong.empty(), new MetricsSource(queueId));
+    rs.init(conf, null, null, manager, null, mockPeer, rss, rss.getServerName(), queueId, uuid,
+      p -> OptionalLong.empty(), new MetricsSource(queueId));
     try {
       rs.startup();
       TEST_UTIL.waitFor(30000, () -> rs.getWalEntryFilter() != null);
@@ -264,9 +264,9 @@ public class TestReplicationSource {
       Configuration testConf = HBaseConfiguration.create();
       testConf.setInt("replication.source.maxretriesmultiplier", 1);
       ReplicationSourceManager manager = Mockito.mock(ReplicationSourceManager.class);
-      Mockito.when(manager.getTotalBufferUsed()).thenReturn(new AtomicLong());
-      source.init(testConf, null, null, manager, null, mockPeer, null, "testPeer",
-        null, p -> OptionalLong.empty(), null);
+      Mockito.when(manager.getTotalBufferUsed()).thenReturn(new AtomicLong(0));
+      source.init(testConf, null, null, manager, null, mockPeer, null, null, "testPeer", null,
+        p -> OptionalLong.empty(), null);
       ExecutorService executor = Executors.newSingleThreadExecutor();
       Future<?> future = executor.submit(
         () -> source.terminate("testing source termination"));
@@ -289,7 +289,7 @@ public class TestReplicationSource {
     ReplicationPeer mockPeer = mock(ReplicationPeer.class);
     Mockito.when(mockPeer.getPeerBandwidth()).thenReturn(0L);
     Configuration testConf = HBaseConfiguration.create();
-    source.init(testConf, null, null, mockManager, null, mockPeer, null,
+    source.init(testConf, null, null, mockManager, null, mockPeer, null, null,
       "testPeer", null, p -> OptionalLong.empty(), mock(MetricsSource.class));
     ReplicationSourceWALReader reader = new ReplicationSourceWALReader(null,
       conf, null, 0, null, source, null);
@@ -315,7 +315,7 @@ public class TestReplicationSource {
     reader.addEntryToBatch(batch, mockEntry);
     reader.entryBatchQueue.put(batch);
     source.terminate("test");
-    assertEquals(0, source.controller.getTotalBufferUsed().get());
+    assertEquals(0, mockManager.getTotalBufferUsed().get());
   }
 
   /**
@@ -536,7 +536,7 @@ public class TestReplicationSource {
     String queueId = "qid";
     RegionServerServices rss =
       TEST_UTIL.createMockRegionServerService(ServerName.parseServerName("a.b.c,1,1"));
-    rs.init(conf, null, null, manager, null, mockPeer, rss, queueId, null,
+    rs.init(conf, null, null, manager, null, mockPeer, rss, rss.getServerName(), queueId, null,
       p -> OptionalLong.empty(), new MetricsSource(queueId));
     return rss;
   }
@@ -655,7 +655,7 @@ public class TestReplicationSource {
         TEST_UTIL.createMockRegionServerService(ServerName.parseServerName("a.b.c,1,1"));
 
       ReplicationSource source = new ReplicationSource();
-      source.init(conf, null, null, manager, null, mockPeer, rss, id, null,
+      source.init(conf, null, null, manager, null, mockPeer, rss, rss.getServerName(), id, null,
         p -> OptionalLong.empty(), metrics);
 
       final Path log1 = new Path(logDir, "log-walgroup-a.8");
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSourceManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSourceManager.java
index 44914a5..e6b745e 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSourceManager.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSourceManager.java
@@ -414,7 +414,8 @@ public abstract class TestReplicationSourceManager {
     assertEquals(files, manager.getWalsByIdRecoveredQueues().get(id).get(group));
     ReplicationSourceInterface source = new ReplicationSource();
     source.init(conf, fs, null, manager, manager.getQueueStorage(), rp1.getPeer("1"),
-      manager.getServer(), id, null, p -> OptionalLong.empty(), null);
+      manager.getServer(), manager.getServer().getServerName(), id, null, p -> OptionalLong.empty(),
+      null);
     source.cleanOldWALs(file2, false);
     // log1 should be deleted
     assertEquals(Sets.newHashSet(file2), manager.getWalsByIdRecoveredQueues().get(id).get(group));
@@ -630,16 +631,16 @@ public abstract class TestReplicationSourceManager {
 
       ReplicationSourceInterface source = new ReplicationSource();
       source.init(conf, fs, null, manager, manager.getQueueStorage(),
-        mockReplicationPeerForSyncReplication(peerId2), manager.getServer(), peerId2, null,
-        p -> OptionalLong.empty(), null);
+        mockReplicationPeerForSyncReplication(peerId2), manager.getServer(),
+        manager.getServer().getServerName(), peerId2, null, p -> OptionalLong.empty(), null);
       source.cleanOldWALs(walName, true);
       // still there if peer id does not match
       assertTrue(fs.exists(remoteWAL));
 
       source = new ReplicationSource();
       source.init(conf, fs, null, manager, manager.getQueueStorage(),
-        mockReplicationPeerForSyncReplication(slaveId), manager.getServer(), slaveId, null,
-        p -> OptionalLong.empty(), null);
+        mockReplicationPeerForSyncReplication(slaveId), manager.getServer(),
+        manager.getServer().getServerName(), slaveId, null, p -> OptionalLong.empty(), null);
       source.cleanOldWALs(walName, true);
       assertFalse(fs.exists(remoteWAL));
     } finally {
@@ -819,9 +820,10 @@ public abstract class TestReplicationSourceManager {
 
     @Override
     public void init(Configuration conf, FileSystem fs, Path walDir,
-      ReplicationSourceController overallController, ReplicationQueueStorage rq, ReplicationPeer rp,
-      Server server, String peerClusterId, UUID clusterId,
-      WALFileLengthProvider walFileLengthProvider, MetricsSource metrics) throws IOException {
+      ReplicationSourceController overallController, ReplicationQueueStorage queueStorage,
+      ReplicationPeer replicationPeer, Server server, ServerName producer, String queueId,
+      UUID clusterId, WALFileLengthProvider walFileLengthProvider, MetricsSource metrics)
+      throws IOException {
       throw new IOException("Failing deliberately");
     }
   }

[hbase] 06/12: HBASE-24998 Introduce a ReplicationSourceOverallController interface and decouple ReplicationSourceManager and ReplicationSource (#2364)

Posted by su...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sunxin pushed a commit to branch HBASE-24666
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 4718d24699e8129c8b1fa4d0acf236137248d492
Author: Guanghao Zhang <zg...@apache.org>
AuthorDate: Sun Sep 20 09:02:53 2020 +0800

    HBASE-24998 Introduce a ReplicationSourceOverallController interface and decouple ReplicationSourceManager and ReplicationSource (#2364)
    
    Signed-off-by: meiyi <my...@gmail.com>
---
 .../java/org/apache/hadoop/hbase/HConstants.java   |  2 +
 .../hadoop/hbase/regionserver/RSRpcServices.java   |  4 +-
 .../replication/ReplicationSourceController.java   | 32 +++++++++-----
 .../regionserver/RecoveredReplicationSource.java   | 18 ++++----
 .../regionserver/ReplicationSource.java            | 35 ++++++---------
 .../regionserver/ReplicationSourceInterface.java   | 25 +++++++----
 .../regionserver/ReplicationSourceManager.java     | 51 +++++++++++++---------
 .../regionserver/ReplicationSourceShipper.java     |  4 +-
 .../regionserver/ReplicationSourceWALReader.java   | 13 +++---
 .../hbase/replication/ReplicationSourceDummy.java  | 21 +++++----
 .../regionserver/TestBasicWALEntryStream.java      | 15 ++++---
 .../regionserver/TestReplicationSource.java        |  2 +-
 .../regionserver/TestReplicationSourceManager.java |  3 +-
 13 files changed, 125 insertions(+), 100 deletions(-)

diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
index 10a38f6..6cde48d 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java
@@ -994,6 +994,8 @@ public final class HConstants {
   /*
    * cluster replication constants.
    */
+  public static final String REPLICATION_OFFLOAD_ENABLE_KEY = "hbase.replication.offload.enabled";
+  public static final boolean REPLICATION_OFFLOAD_ENABLE_DEFAULT = false;
   public static final String
       REPLICATION_SOURCE_SERVICE_CLASSNAME = "hbase.replication.source.service";
   public static final String REPLICATION_SERVICE_CLASSNAME_DEFAULT =
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
index c1f447c..72fea23 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
@@ -258,6 +258,8 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.QuotaProtos.GetSpaceQuo
 import org.apache.hadoop.hbase.shaded.protobuf.generated.QuotaProtos.GetSpaceQuotaSnapshotsResponse;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.QuotaProtos.GetSpaceQuotaSnapshotsResponse.TableQuotaSnapshot;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.RPCProtos.RequestHeader;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.ReplicationServerProtos;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.ReplicationServerProtos.ReplicationServerService;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.TooSlowLog.SlowLogPayload;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.BulkLoadDescriptor;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.CompactionDescriptor;
@@ -271,7 +273,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.RegionEventDe
 @SuppressWarnings("deprecation")
 public class RSRpcServices implements HBaseRPCErrorHandler,
     AdminService.BlockingInterface, ClientService.BlockingInterface, PriorityFunction,
-    ConfigurationObserver {
+    ConfigurationObserver, ReplicationServerService.BlockingInterface {
   private static final Logger LOG = LoggerFactory.getLogger(RSRpcServices.class);
 
   /** RPC scheduler to use for the region server. */
diff --git a/hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/ReplicationListener.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/ReplicationSourceController.java
similarity index 50%
rename from hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/ReplicationListener.java
rename to hbase-server/src/main/java/org/apache/hadoop/hbase/replication/ReplicationSourceController.java
index 5c21e1e..5bb9dd6 100644
--- a/hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/ReplicationListener.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/ReplicationSourceController.java
@@ -1,5 +1,4 @@
-/*
- *
+/**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -18,21 +17,32 @@
  */
 package org.apache.hadoop.hbase.replication;
 
-import org.apache.hadoop.hbase.ServerName;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.apache.hadoop.hbase.replication.regionserver.MetricsReplicationGlobalSourceSource;
+import org.apache.hadoop.hbase.replication.regionserver.RecoveredReplicationSource;
 import org.apache.yetus.audience.InterfaceAudience;
 
 /**
- * The replication listener interface can be implemented if a class needs to subscribe to events
- * generated by the ReplicationTracker. These events include things like addition/deletion of peer
- * clusters or failure of a local region server. To receive events, the class also needs to register
- * itself with a Replication Tracker.
+ * Used to control all replication sources inside one RegionServer or ReplicationServer.
+ * Used by {@link org.apache.hadoop.hbase.replication.regionserver.ReplicationSource} or
+ * {@link RecoveredReplicationSource}.
  */
 @InterfaceAudience.Private
-public interface ReplicationListener {
+public interface ReplicationSourceController {
+
+  /**
+   * Returns the maximum size in bytes of edits held in memory which are pending replication
+   * across all sources inside this RegionServer or ReplicationServer.
+   */
+  long getTotalBufferLimit();
+
+  AtomicLong getTotalBufferUsed();
+
+  MetricsReplicationGlobalSourceSource getGlobalMetrics();
 
   /**
-   * A region server has been removed from the local cluster
-   * @param regionServer the removed region server
+   * Call this when the recovered replication source replicated all WALs.
    */
-  public void regionServerRemoved(ServerName regionServer);
+  void finishRecoveredSource(RecoveredReplicationSource src);
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/RecoveredReplicationSource.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/RecoveredReplicationSource.java
index abbc046..7cb159e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/RecoveredReplicationSource.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/RecoveredReplicationSource.java
@@ -30,6 +30,7 @@ import org.apache.hadoop.hbase.Server;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.replication.ReplicationPeer;
 import org.apache.hadoop.hbase.replication.ReplicationQueueStorage;
+import org.apache.hadoop.hbase.replication.ReplicationSourceController;
 import org.apache.hadoop.hbase.util.CommonFSUtils;
 import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
 import org.apache.yetus.audience.InterfaceAudience;
@@ -45,18 +46,15 @@ public class RecoveredReplicationSource extends ReplicationSource {
 
   private static final Logger LOG = LoggerFactory.getLogger(RecoveredReplicationSource.class);
 
-  private Path walDir;
-
   private String actualPeerId;
 
   @Override
-  public void init(Configuration conf, FileSystem fs, Path walDir, ReplicationSourceManager manager,
-    ReplicationQueueStorage queueStorage, ReplicationPeer replicationPeer, Server server,
-    String peerClusterZnode, UUID clusterId, WALFileLengthProvider walFileLengthProvider,
-    MetricsSource metrics) throws IOException {
-    super.init(conf, fs, walDir, manager, queueStorage, replicationPeer, server, peerClusterZnode,
-      clusterId, walFileLengthProvider, metrics);
-    this.walDir = walDir;
+  public void init(Configuration conf, FileSystem fs, Path walDir,
+    ReplicationSourceController overallController, ReplicationQueueStorage queueStorage,
+    ReplicationPeer replicationPeer, Server server, String peerClusterZnode, UUID clusterId,
+    WALFileLengthProvider walFileLengthProvider, MetricsSource metrics) throws IOException {
+    super.init(conf, fs, walDir, overallController, queueStorage, replicationPeer, server,
+      peerClusterZnode, clusterId, walFileLengthProvider, metrics);
     this.actualPeerId = this.replicationQueueInfo.getPeerId();
   }
 
@@ -149,7 +147,7 @@ public class RecoveredReplicationSource extends ReplicationSource {
   void tryFinish() {
     if (workerThreads.isEmpty()) {
       this.getSourceMetrics().clear();
-      manager.finishRecoveredSource(this);
+      controller.finishRecoveredSource(this);
     }
   }
 
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java
index cfcc837..27f2ce7 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java
@@ -58,6 +58,7 @@ import org.apache.hadoop.hbase.replication.ReplicationException;
 import org.apache.hadoop.hbase.replication.ReplicationPeer;
 import org.apache.hadoop.hbase.replication.ReplicationQueueInfo;
 import org.apache.hadoop.hbase.replication.ReplicationQueueStorage;
+import org.apache.hadoop.hbase.replication.ReplicationSourceController;
 import org.apache.hadoop.hbase.replication.ReplicationUtils;
 import org.apache.hadoop.hbase.replication.SystemTableWALEntryFilter;
 import org.apache.hadoop.hbase.replication.WALEntryFilter;
@@ -96,8 +97,9 @@ public class ReplicationSource implements ReplicationSourceInterface {
   protected Configuration conf;
   protected ReplicationQueueInfo replicationQueueInfo;
 
-  // The manager of all sources to which we ping back our progress
-  ReplicationSourceManager manager;
+  protected Path walDir;
+
+  protected ReplicationSourceController controller;
   // Should we stop everything?
   protected Server server;
   // How long should we sleep for each retry
@@ -181,23 +183,14 @@ public class ReplicationSource implements ReplicationSourceInterface {
     this.baseFilterOutWALEntries = Collections.unmodifiableList(baseFilterOutWALEntries);
   }
 
-  /**
-   * Instantiation method used by region servers
-   * @param conf configuration to use
-   * @param fs file system to use
-   * @param manager replication manager to ping to
-   * @param server the server for this region server
-   * @param queueId the id of our replication queue
-   * @param clusterId unique UUID for the cluster
-   * @param metrics metrics for replication source
-   */
   @Override
-  public void init(Configuration conf, FileSystem fs, Path walDir, ReplicationSourceManager manager,
-      ReplicationQueueStorage queueStorage, ReplicationPeer replicationPeer, Server server,
-      String queueId, UUID clusterId, WALFileLengthProvider walFileLengthProvider,
-      MetricsSource metrics) throws IOException {
+  public void init(Configuration conf, FileSystem fs, Path walDir,
+    ReplicationSourceController overallController, ReplicationQueueStorage queueStorage,
+    ReplicationPeer replicationPeer, Server server, String queueId, UUID clusterId,
+    WALFileLengthProvider walFileLengthProvider, MetricsSource metrics) throws IOException {
     this.server = server;
     this.conf = HBaseConfiguration.create(conf);
+    this.walDir = walDir;
     this.waitOnEndpointSeconds =
       this.conf.getInt(WAIT_ON_ENDPOINT_SECONDS, DEFAULT_WAIT_ON_ENDPOINT_SECONDS);
     decorateConf();
@@ -209,7 +202,7 @@ public class ReplicationSource implements ReplicationSourceInterface {
     this.logQueue = new ReplicationSourceLogQueue(conf, metrics, this);
     this.queueStorage = queueStorage;
     this.replicationPeer = replicationPeer;
-    this.manager = manager;
+    this.controller = overallController;
     this.fs = fs;
     this.metrics = metrics;
     this.clusterId = clusterId;
@@ -336,9 +329,9 @@ public class ReplicationSource implements ReplicationSourceInterface {
         Threads.setDaemonThreadRunning(
             walReader, Thread.currentThread().getName()
             + ".replicationSource.wal-reader." + walGroupId + "," + queueId,
-          (t,e) -> this.uncaughtException(t, e, this.manager, this.getPeerId()));
+          (t,e) -> this.uncaughtException(t, e, null, this.getPeerId()));
         worker.setWALReader(walReader);
-        worker.startup((t,e) -> this.uncaughtException(t, e, this.manager, this.getPeerId()));
+        worker.startup((t,e) -> this.uncaughtException(t, e, null, this.getPeerId()));
         return worker;
       }
     });
@@ -766,9 +759,9 @@ public class ReplicationSource implements ReplicationSourceInterface {
       throttler.addPushSize(batchSize);
     }
     totalReplicatedEdits.addAndGet(entries.size());
-    long newBufferUsed = manager.getTotalBufferUsed().addAndGet(-batchSize);
+    long newBufferUsed = controller.getTotalBufferUsed().addAndGet(-batchSize);
     // Record the new buffer usage
-    this.manager.getGlobalMetrics().setWALReaderEditsBufferBytes(newBufferUsed);
+    controller.getGlobalMetrics().setWALReaderEditsBufferBytes(newBufferUsed);
   }
 
   @Override
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceInterface.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceInterface.java
index 77bba90..296bd27 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceInterface.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceInterface.java
@@ -31,6 +31,7 @@ import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.replication.ReplicationEndpoint;
 import org.apache.hadoop.hbase.replication.ReplicationPeer;
 import org.apache.hadoop.hbase.replication.ReplicationQueueStorage;
+import org.apache.hadoop.hbase.replication.ReplicationSourceController;
 import org.apache.hadoop.hbase.wal.WAL.Entry;
 import org.apache.yetus.audience.InterfaceAudience;
 
@@ -42,14 +43,22 @@ public interface ReplicationSourceInterface {
   /**
    * Initializer for the source
    *
-   * @param conf   the configuration to use
-   * @param fs     the file system to use
-   * @param server the server for this region server
-   */
-  void init(Configuration conf, FileSystem fs, Path walDir, ReplicationSourceManager manager,
-    ReplicationQueueStorage queueStorage, ReplicationPeer replicationPeer, Server server,
-    String queueId, UUID clusterId, WALFileLengthProvider walFileLengthProvider,
-    MetricsSource metrics) throws IOException;
+   * @param conf configuration to use
+   * @param fs file system to use
+   * @param walDir the directory where the WAL is located
+   * @param overallController the overall controller of all replication sources
+   * @param queueStorage the replication queue storage
+   * @param replicationPeer the replication peer
+   * @param server the server which start and run this replication source
+   * @param queueId the id of our replication queue
+   * @param clusterId unique UUID for the cluster
+   * @param walFileLengthProvider used to get the WAL length
+   * @param metrics metrics for this replication source
+   */
+  void init(Configuration conf, FileSystem fs, Path walDir,
+    ReplicationSourceController overallController, ReplicationQueueStorage queueStorage,
+    ReplicationPeer replicationPeer, Server server, String queueId, UUID clusterId,
+    WALFileLengthProvider walFileLengthProvider, MetricsSource metrics) throws IOException;
 
   /**
    * Add a log to the list of logs to replicate
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java
index f502a65..b6cb087 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java
@@ -56,6 +56,7 @@ import org.apache.hadoop.hbase.replication.ReplicationPeerImpl;
 import org.apache.hadoop.hbase.replication.ReplicationPeers;
 import org.apache.hadoop.hbase.replication.ReplicationQueueInfo;
 import org.apache.hadoop.hbase.replication.ReplicationQueueStorage;
+import org.apache.hadoop.hbase.replication.ReplicationSourceController;
 import org.apache.hadoop.hbase.replication.SyncReplicationState;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.Pair;
@@ -93,7 +94,7 @@ import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFacto
  * </ul>
  */
 @InterfaceAudience.Private
-public class ReplicationSourceManager {
+public class ReplicationSourceManager implements ReplicationSourceController {
   private static final Logger LOG = LoggerFactory.getLogger(ReplicationSourceManager.class);
   // all the sources that read this RS's logs and every peer only has one replication source
   private final ConcurrentMap<String, ReplicationSourceInterface> sources;
@@ -134,12 +135,6 @@ public class ReplicationSourceManager {
 
   private AtomicLong totalBufferUsed = new AtomicLong();
 
-  // How long should we sleep for each retry when deleting remote wal files for sync replication
-  // peer.
-  private final long sleepForRetries;
-  // Maximum number of retries before taking bold actions when deleting remote wal files for sync
-  // replication peer.
-  private final int maxRetriesMultiplier;
   // Total buffer size on this RegionServer for holding batched edits to be shipped.
   private final long totalBufferLimit;
   private final MetricsReplicationGlobalSourceSource globalMetrics;
@@ -155,6 +150,12 @@ public class ReplicationSourceManager {
   AtomicReference<ReplicationSourceInterface> catalogReplicationSource = new AtomicReference<>();
 
   /**
+   * When enable replication offload, will not create replication source and only write WAL to
+   * replication queue storage. The replication source will be started by ReplicationServer.
+   */
+  private final boolean replicationOffload;
+
+  /**
    * Creates a replication manager and sets the watch on all the other registered region servers
    * @param queueStorage the interface for manipulating replication queues
    * @param conf the configuration to use
@@ -197,12 +198,11 @@ public class ReplicationSourceManager {
     this.latestPaths = new HashMap<>();
     this.replicationForBulkLoadDataEnabled = conf.getBoolean(
       HConstants.REPLICATION_BULKLOAD_ENABLE_KEY, HConstants.REPLICATION_BULKLOAD_ENABLE_DEFAULT);
-    this.sleepForRetries = this.conf.getLong("replication.source.sync.sleepforretries", 1000);
-    this.maxRetriesMultiplier =
-      this.conf.getInt("replication.source.sync.maxretriesmultiplier", 60);
     this.totalBufferLimit = conf.getLong(HConstants.REPLICATION_SOURCE_TOTAL_BUFFER_KEY,
         HConstants.REPLICATION_SOURCE_TOTAL_BUFFER_DFAULT);
     this.globalMetrics = globalMetrics;
+    this.replicationOffload = conf.getBoolean(HConstants.REPLICATION_OFFLOAD_ENABLE_KEY,
+      HConstants.REPLICATION_OFFLOAD_ENABLE_DEFAULT);
   }
 
   /**
@@ -338,7 +338,9 @@ public class ReplicationSourceManager {
     if (peerConfig.isSyncReplication()) {
       syncReplicationPeerMappingManager.add(peer.getId(), peerConfig);
     }
-    src.startup();
+    if (!replicationOffload) {
+      src.startup();
+    }
     return src;
   }
 
@@ -431,7 +433,9 @@ public class ReplicationSourceManager {
         .forEach(wal -> src.enqueueLog(new Path(this.logDir, wal)));
     }
     LOG.info("Startup replication source for " + src.getPeerId());
-    src.startup();
+    if (!replicationOffload) {
+      src.startup();
+    }
 
     List<ReplicationSourceInterface> toStartup = new ArrayList<>();
     // synchronized on oldsources to avoid race with NodeFailoverWorker
@@ -454,8 +458,10 @@ public class ReplicationSourceManager {
         toStartup.add(recoveredReplicationSource);
       }
     }
-    for (ReplicationSourceInterface replicationSource : toStartup) {
-      replicationSource.startup();
+    if (!replicationOffload) {
+      for (ReplicationSourceInterface replicationSource : toStartup) {
+        replicationSource.startup();
+      }
     }
   }
 
@@ -473,7 +479,8 @@ public class ReplicationSourceManager {
     return true;
   }
 
-  void finishRecoveredSource(ReplicationSourceInterface src) {
+  @Override
+  public void finishRecoveredSource(RecoveredReplicationSource src) {
     synchronized (oldsources) {
       if (!removeRecoveredSource(src)) {
         return;
@@ -487,8 +494,7 @@ public class ReplicationSourceManager {
    * Clear the metrics and related replication queue of the specified old source
    * @param src source to clear
    */
-  void removeSource(ReplicationSourceInterface src) {
-    LOG.info("Done with the queue " + src.getQueueId());
+  private void removeSource(ReplicationSourceInterface src) {
     this.sources.remove(src.getPeerId());
     // Delete queue from storage and memory
     deleteQueue(src.getQueueId());
@@ -532,7 +538,7 @@ public class ReplicationSourceManager {
     }
   }
 
-  // public because of we call it in TestReplicationEmptyWALRecovery
+  @InterfaceAudience.Private
   public void preLogRoll(Path newLog) throws IOException {
     String logName = newLog.getName();
     String logPrefix = AbstractFSWALProvider.getWALPrefixFromWALName(logName);
@@ -550,8 +556,8 @@ public class ReplicationSourceManager {
     }
   }
 
-  // public because of we call it in TestReplicationEmptyWALRecovery
-  public void postLogRoll(Path newLog) throws IOException {
+  @InterfaceAudience.Private
+  public void postLogRoll(Path newLog) {
     // This only updates the sources we own, not the recovered ones
     for (ReplicationSourceInterface source : this.sources.values()) {
       source.enqueueLog(newLog);
@@ -758,6 +764,7 @@ public class ReplicationSourceManager {
     }
   }
 
+  @Override
   public AtomicLong getTotalBufferUsed() {
     return totalBufferUsed;
   }
@@ -766,6 +773,7 @@ public class ReplicationSourceManager {
    * Returns the maximum size in bytes of edits held in memory which are pending replication
    * across all sources inside this RegionServer.
    */
+  @Override
   public long getTotalBufferLimit() {
     return totalBufferLimit;
   }
@@ -856,7 +864,8 @@ public class ReplicationSourceManager {
     return executor.getActiveCount();
   }
 
-  MetricsReplicationGlobalSourceSource getGlobalMetrics() {
+  @Override
+  public MetricsReplicationGlobalSourceSource getGlobalMetrics() {
     return this.globalMetrics;
   }
 
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceShipper.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceShipper.java
index 35c4e54..b904af8 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceShipper.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceShipper.java
@@ -368,8 +368,8 @@ public class ReplicationSourceShipper extends Thread {
       LOG.trace("Decrementing totalBufferUsed by {}B while stopping Replication WAL Readers.",
         totalToDecrement.longValue());
     }
-    long newBufferUsed = source.manager.getTotalBufferUsed()
+    long newBufferUsed = source.controller.getTotalBufferUsed()
       .addAndGet(-totalToDecrement.longValue());
-    source.manager.getGlobalMetrics().setWALReaderEditsBufferBytes(newBufferUsed);
+    source.controller.getGlobalMetrics().setWALReaderEditsBufferBytes(newBufferUsed);
   }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceWALReader.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceWALReader.java
index d148162..698fd1e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceWALReader.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceWALReader.java
@@ -318,10 +318,11 @@ class ReplicationSourceWALReader extends Thread {
   //returns false if we've already exceeded the global quota
   private boolean checkQuota() {
     // try not to go over total quota
-    if (source.manager.getTotalBufferUsed().get() > source.manager.getTotalBufferLimit()) {
+    if (source.controller.getTotalBufferUsed().get() > source.controller
+      .getTotalBufferLimit()) {
       LOG.warn("peer={}, can't read more edits from WAL as buffer usage {}B exceeds limit {}B",
-        this.source.getPeerId(), source.manager.getTotalBufferUsed().get(),
-        source.manager.getTotalBufferLimit());
+        this.source.getPeerId(), source.controller.getTotalBufferUsed().get(),
+        source.controller.getTotalBufferLimit());
       Threads.sleep(sleepForRetries);
       return false;
     }
@@ -449,10 +450,10 @@ class ReplicationSourceWALReader extends Thread {
    * @return true if we should clear buffer and push all
    */
   private boolean acquireBufferQuota(long size) {
-    long newBufferUsed = source.manager.getTotalBufferUsed().addAndGet(size);
+    long newBufferUsed = source.controller.getTotalBufferUsed().addAndGet(size);
     // Record the new buffer usage
-    source.manager.getGlobalMetrics().setWALReaderEditsBufferBytes(newBufferUsed);
-    return newBufferUsed >= source.manager.getTotalBufferLimit();
+    source.controller.getGlobalMetrics().setWALReaderEditsBufferBytes(newBufferUsed);
+    return newBufferUsed >= source.controller.getTotalBufferLimit();
   }
 
   /**
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/ReplicationSourceDummy.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/ReplicationSourceDummy.java
index 42445a6..8a32e94 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/ReplicationSourceDummy.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/ReplicationSourceDummy.java
@@ -29,7 +29,6 @@ import org.apache.hadoop.hbase.Server;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.replication.regionserver.MetricsSource;
 import org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceInterface;
-import org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceManager;
 import org.apache.hadoop.hbase.replication.regionserver.WALEntryBatch;
 import org.apache.hadoop.hbase.replication.regionserver.WALFileLengthProvider;
 import org.apache.hadoop.hbase.wal.WAL.Entry;
@@ -40,21 +39,21 @@ import org.apache.hadoop.hbase.wal.WAL.Entry;
 public class ReplicationSourceDummy implements ReplicationSourceInterface {
 
   private ReplicationPeer replicationPeer;
-  private String peerClusterId;
+  private String queueId;
   private Path currentPath;
   private MetricsSource metrics;
   private WALFileLengthProvider walFileLengthProvider;
   private AtomicBoolean startup = new AtomicBoolean(false);
 
   @Override
-  public void init(Configuration conf, FileSystem fs, Path walDir, ReplicationSourceManager manager,
-    ReplicationQueueStorage rq, ReplicationPeer rp, Server server, String peerClusterId,
-    UUID clusterId, WALFileLengthProvider walFileLengthProvider, MetricsSource metrics)
-    throws IOException {
-    this.peerClusterId = peerClusterId;
+  public void init(Configuration conf, FileSystem fs, Path walDir,
+    ReplicationSourceController overallController, ReplicationQueueStorage queueStorage,
+    ReplicationPeer replicationPeer, Server server, String queueId, UUID clusterId,
+    WALFileLengthProvider walFileLengthProvider, MetricsSource metrics) throws IOException {
+    this.queueId = queueId;
     this.metrics = metrics;
     this.walFileLengthProvider = walFileLengthProvider;
-    this.replicationPeer = rp;
+    this.replicationPeer = replicationPeer;
   }
 
   @Override
@@ -97,14 +96,14 @@ public class ReplicationSourceDummy implements ReplicationSourceInterface {
 
   @Override
   public String getQueueId() {
-    return peerClusterId;
+    return queueId;
   }
 
   @Override
   public String getPeerId() {
-    String[] parts = peerClusterId.split("-", 2);
+    String[] parts = queueId.split("-", 2);
     return parts.length != 1 ?
-        parts[0] : peerClusterId;
+        parts[0] : queueId;
   }
 
   @Override
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestBasicWALEntryStream.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestBasicWALEntryStream.java
index 7402d82..616defa 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestBasicWALEntryStream.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestBasicWALEntryStream.java
@@ -55,6 +55,7 @@ import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.Waiter.ExplainingPredicate;
 import org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL;
 import org.apache.hadoop.hbase.regionserver.wal.WALCellCodec;
+import org.apache.hadoop.hbase.replication.ReplicationSourceController;
 import org.apache.hadoop.hbase.replication.WALEntryFilter;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
@@ -271,19 +272,19 @@ public abstract class TestBasicWALEntryStream extends WALEntryStreamTestBase {
     when(source.getWALFileLengthProvider()).thenReturn(log);
     when(source.getServer()).thenReturn(mockServer);
     when(source.isRecovered()).thenReturn(recovered);
-    source.manager = mockReplicationSourceManager();
+    source.controller = mockReplicationSourceController();
     return source;
   }
 
-  private ReplicationSourceManager mockReplicationSourceManager() {
-    ReplicationSourceManager mockSourceManager = Mockito.mock(ReplicationSourceManager.class);
+  private ReplicationSourceController mockReplicationSourceController() {
+    ReplicationSourceController controller = Mockito.mock(ReplicationSourceController.class);
     MetricsReplicationGlobalSourceSource globalMetrics =
       Mockito.mock(MetricsReplicationGlobalSourceSource.class);
-    when(mockSourceManager.getGlobalMetrics()).thenReturn(globalMetrics);
-    when(mockSourceManager.getTotalBufferUsed()).thenReturn(new AtomicLong(0));
-    when(mockSourceManager.getTotalBufferLimit())
+    when(controller.getGlobalMetrics()).thenReturn(globalMetrics);
+    when(controller.getTotalBufferUsed()).thenReturn(new AtomicLong(0));
+    when(controller.getTotalBufferLimit())
       .thenReturn((long) HConstants.REPLICATION_SOURCE_TOTAL_BUFFER_DFAULT);
-    return mockSourceManager;
+    return controller;
   }
 
   private ReplicationSourceWALReader createReader(boolean recovered, Configuration conf) {
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSource.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSource.java
index 0309731..697a5ec 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSource.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSource.java
@@ -315,7 +315,7 @@ public class TestReplicationSource {
     reader.addEntryToBatch(batch, mockEntry);
     reader.entryBatchQueue.put(batch);
     source.terminate("test");
-    assertEquals(0, source.manager.getTotalBufferUsed().get());
+    assertEquals(0, source.controller.getTotalBufferUsed().get());
   }
 
   /**
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSourceManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSourceManager.java
index b74b76e..44914a5 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSourceManager.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSourceManager.java
@@ -71,6 +71,7 @@ import org.apache.hadoop.hbase.replication.ReplicationPeer;
 import org.apache.hadoop.hbase.replication.ReplicationPeerConfig;
 import org.apache.hadoop.hbase.replication.ReplicationPeers;
 import org.apache.hadoop.hbase.replication.ReplicationQueueStorage;
+import org.apache.hadoop.hbase.replication.ReplicationSourceController;
 import org.apache.hadoop.hbase.replication.ReplicationSourceDummy;
 import org.apache.hadoop.hbase.replication.ReplicationStorageFactory;
 import org.apache.hadoop.hbase.replication.ReplicationUtils;
@@ -818,7 +819,7 @@ public abstract class TestReplicationSourceManager {
 
     @Override
     public void init(Configuration conf, FileSystem fs, Path walDir,
-      ReplicationSourceManager manager, ReplicationQueueStorage rq, ReplicationPeer rp,
+      ReplicationSourceController overallController, ReplicationQueueStorage rq, ReplicationPeer rp,
       Server server, String peerClusterId, UUID clusterId,
       WALFileLengthProvider walFileLengthProvider, MetricsSource metrics) throws IOException {
       throw new IOException("Failing deliberately");

[hbase] 08/12: HBASE-24999 Master manages ReplicationServers (#2579)

Posted by su...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sunxin pushed a commit to branch HBASE-24666
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 6d7bd0a6b40a685c079f1432d11258f191bc8b2b
Author: XinSun <dd...@gmail.com>
AuthorDate: Wed Oct 28 18:59:57 2020 +0800

    HBASE-24999 Master manages ReplicationServers (#2579)
    
    Signed-off-by: Guanghao Zhang <zg...@apache.org>
---
 .../server/master/ReplicationServerStatus.proto    |  34 ++++
 .../org/apache/hadoop/hbase/master/HMaster.java    |  10 +
 .../hadoop/hbase/master/MasterRpcServices.java     |  37 +++-
 .../apache/hadoop/hbase/master/MasterServices.java |   5 +
 .../hbase/master/ReplicationServerManager.java     | 204 ++++++++++++++++++++
 .../replication/HBaseReplicationEndpoint.java      | 148 ++++++--------
 .../hbase/replication/HReplicationServer.java      | 214 ++++++++++++++++++++-
 .../HBaseInterClusterReplicationEndpoint.java      |   1 -
 .../regionserver/ReplicationSyncUp.java            |   4 +-
 .../hbase/master/MockNoopMasterServices.java       |   5 +
 .../hbase/replication/TestReplicationBase.java     |   2 +
 .../hbase/replication/TestReplicationServer.java   |  57 +++++-
 12 files changed, 619 insertions(+), 102 deletions(-)

diff --git a/hbase-protocol-shaded/src/main/protobuf/server/master/ReplicationServerStatus.proto b/hbase-protocol-shaded/src/main/protobuf/server/master/ReplicationServerStatus.proto
new file mode 100644
index 0000000..d39a043
--- /dev/null
+++ b/hbase-protocol-shaded/src/main/protobuf/server/master/ReplicationServerStatus.proto
@@ -0,0 +1,34 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+syntax = "proto2";
+
+package hbase.pb;
+
+option java_package = "org.apache.hadoop.hbase.shaded.protobuf.generated";
+option java_outer_classname = "ReplicationServerStatusProtos";
+option java_generic_services = true;
+option java_generate_equals_and_hash = true;
+option optimize_for = SPEED;
+
+import "server/master/RegionServerStatus.proto";
+
+service ReplicationServerStatusService {
+
+  rpc ReplicationServerReport(RegionServerReportRequest)
+      returns(RegionServerReportResponse);
+}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index 903f392..8977ad5 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -307,6 +307,8 @@ public class HMaster extends HRegionServer implements MasterServices {
   // manager of assignment nodes in zookeeper
   private AssignmentManager assignmentManager;
 
+  // server manager to deal with replication server info
+  private ReplicationServerManager replicationServerManager;
 
   /**
    * Cache for the meta region replica's locations. Also tracks their changes to avoid stale
@@ -873,6 +875,8 @@ public class HMaster extends HRegionServer implements MasterServices {
         .collect(Collectors.toList());
     this.assignmentManager.setupRIT(ritList);
 
+    this.replicationServerManager = new ReplicationServerManager(this);
+
     // Start RegionServerTracker with listing of servers found with exiting SCPs -- these should
     // be registered in the deadServers set -- and with the list of servernames out on the
     // filesystem that COULD BE 'alive' (we'll schedule SCPs for each and let SCP figure it out).
@@ -1037,6 +1041,7 @@ public class HMaster extends HRegionServer implements MasterServices {
     this.hbckChore = new HbckChore(this);
     getChoreService().scheduleChore(hbckChore);
     this.serverManager.startChore();
+    this.replicationServerManager.startChore();
 
     // Only for rolling upgrade, where we need to migrate the data in namespace table to meta table.
     if (!waitForNamespaceOnline()) {
@@ -1361,6 +1366,11 @@ public class HMaster extends HRegionServer implements MasterServices {
   }
 
   @Override
+  public ReplicationServerManager getReplicationServerManager() {
+    return this.replicationServerManager;
+  }
+
+  @Override
   public MasterFileSystem getMasterFileSystem() {
     return this.fileSystemManager;
   }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
index 0efa308..c677458 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
@@ -402,6 +402,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.ReplicationProtos.Trans
 import org.apache.hadoop.hbase.shaded.protobuf.generated.ReplicationProtos.TransitReplicationPeerSyncReplicationStateResponse;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.ReplicationProtos.UpdateReplicationPeerConfigRequest;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.ReplicationProtos.UpdateReplicationPeerConfigResponse;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.ReplicationServerStatusProtos.ReplicationServerStatusService;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.VisibilityLabelsProtos.VisibilityLabelsService;
 
@@ -413,7 +414,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.VisibilityLabelsProtos.
 public class MasterRpcServices extends RSRpcServices implements
     MasterService.BlockingInterface, RegionServerStatusService.BlockingInterface,
     LockService.BlockingInterface, HbckService.BlockingInterface,
-    ClientMetaService.BlockingInterface {
+    ClientMetaService.BlockingInterface, ReplicationServerStatusService.BlockingInterface {
 
   private static final Logger LOG = LoggerFactory.getLogger(MasterRpcServices.class.getName());
   private static final Logger AUDITLOG =
@@ -546,7 +547,7 @@ public class MasterRpcServices extends RSRpcServices implements
    */
   @Override
   protected List<BlockingServiceAndInterface> getServices() {
-    List<BlockingServiceAndInterface> bssi = new ArrayList<>(5);
+    List<BlockingServiceAndInterface> bssi = new ArrayList<>(6);
     bssi.add(new BlockingServiceAndInterface(
         MasterService.newReflectiveBlockingService(this),
         MasterService.BlockingInterface.class));
@@ -559,6 +560,9 @@ public class MasterRpcServices extends RSRpcServices implements
         HbckService.BlockingInterface.class));
     bssi.add(new BlockingServiceAndInterface(ClientMetaService.newReflectiveBlockingService(this),
         ClientMetaService.BlockingInterface.class));
+    bssi.add(new BlockingServiceAndInterface(
+        ReplicationServerStatusService.newReflectiveBlockingService(this),
+        ReplicationServerStatusService.BlockingInterface.class));
     bssi.addAll(super.getServices());
     return bssi;
   }
@@ -3481,4 +3485,33 @@ public class MasterRpcServices extends RSRpcServices implements
     }
     return builder.build();
   }
+
+  @Override
+  public RegionServerReportResponse replicationServerReport(RpcController controller,
+      RegionServerReportRequest request) throws ServiceException {
+    try {
+      master.checkServiceStarted();
+      int versionNumber = 0;
+      String version = "0.0.0";
+      VersionInfo versionInfo = VersionInfoUtil.getCurrentClientVersionInfo();
+      if (versionInfo != null) {
+        version = versionInfo.getVersion();
+        versionNumber = VersionInfoUtil.getVersionNumber(versionInfo);
+      }
+      ClusterStatusProtos.ServerLoad sl = request.getLoad();
+      ServerName serverName = ProtobufUtil.toServerName(request.getServer());
+      ServerMetrics oldMetrics = master.getReplicationServerManager().getServerMetrics(serverName);
+      ServerMetrics newMetrics =
+          ServerMetricsBuilder.toServerMetrics(serverName, versionNumber, version, sl);
+      master.getReplicationServerManager().serverReport(serverName, newMetrics);
+      if (sl != null && master.metricsMaster != null) {
+        // Up our metrics.
+        master.metricsMaster.incrementRequests(sl.getTotalNumberOfRequests()
+            - (oldMetrics != null ? oldMetrics.getRequestCount() : 0));
+      }
+    } catch (IOException ioe) {
+      throw new ServiceException(ioe);
+    }
+    return RegionServerReportResponse.newBuilder().build();
+  }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java
index 3f7dc02..bb8fdca 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java
@@ -103,6 +103,11 @@ public interface MasterServices extends Server {
   ServerManager getServerManager();
 
   /**
+   * @return Master's {@link ReplicationServerManager} instance.
+   */
+  ReplicationServerManager getReplicationServerManager();
+
+  /**
    * @return Master's instance of {@link ExecutorService}
    */
   ExecutorService getExecutorService();
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ReplicationServerManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ReplicationServerManager.java
new file mode 100644
index 0000000..273b7f2
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ReplicationServerManager.java
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentNavigableMap;
+import java.util.concurrent.ConcurrentSkipListMap;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.ScheduledChore;
+import org.apache.hadoop.hbase.ServerMetrics;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * The ReplicationServerManager class manages info about replication servers.
+ * <p>
+ * Maintains lists of online and dead servers.
+ * <p>
+ * Servers are distinguished in two different ways.  A given server has a
+ * location, specified by hostname and port, and of which there can only be one
+ * online at any given time.  A server instance is specified by the location
+ * (hostname and port) as well as the startcode (timestamp from when the server
+ * was started).  This is used to differentiate a restarted instance of a given
+ * server from the original instance.
+ */
+@InterfaceAudience.Private
+public class ReplicationServerManager {
+
+  private static final Logger LOG = LoggerFactory.getLogger(ReplicationServerManager.class);
+
+  public static final String ONLINE_SERVER_REFRESH_INTERVAL =
+      "hbase.master.replication.server.refresh.interval";
+  public static final int ONLINE_SERVER_REFRESH_INTERVAL_DEFAULT = 60 * 1000; // 1 mins
+
+  private final MasterServices master;
+
+  /** Map of registered servers to their current load */
+  private final ConcurrentNavigableMap<ServerName, ServerMetrics> onlineServers =
+    new ConcurrentSkipListMap<>();
+
+  private OnlineServerRefresher onlineServerRefresher;
+  private int refreshPeriod;
+
+  /**
+   * Constructor.
+   */
+  public ReplicationServerManager(final MasterServices master) {
+    this.master = master;
+  }
+
+  /**
+   * start chore in ServerManager
+   */
+  public void startChore() {
+    Configuration conf = master.getConfiguration();
+    refreshPeriod = conf.getInt(ONLINE_SERVER_REFRESH_INTERVAL,
+        ONLINE_SERVER_REFRESH_INTERVAL_DEFAULT);
+    onlineServerRefresher = new OnlineServerRefresher("ReplicationServerRefresher", refreshPeriod);
+    master.getChoreService().scheduleChore(onlineServerRefresher);
+  }
+
+  /**
+   * Stop the ServerManager.
+   */
+  public void stop() {
+    if (onlineServerRefresher != null) {
+      onlineServerRefresher.cancel();
+    }
+  }
+
+  public void serverReport(ServerName sn, ServerMetrics sl) {
+    if (null == this.onlineServers.replace(sn, sl)) {
+      if (!checkAndRecordNewServer(sn, sl)) {
+        LOG.info("ReplicationServerReport ignored, could not record the server: {}", sn);
+      }
+    }
+  }
+
+  /**
+   * Check is a server of same host and port already exists,
+   * if not, or the existed one got a smaller start code, record it.
+   *
+   * @param serverName the server to check and record
+   * @param sl the server load on the server
+   * @return true if the server is recorded, otherwise, false
+   */
+  private boolean checkAndRecordNewServer(final ServerName serverName, final ServerMetrics sl) {
+    ServerName existingServer = null;
+    synchronized (this.onlineServers) {
+      existingServer = findServerWithSameHostnamePort(serverName);
+      if (existingServer != null && (existingServer.getStartcode() > serverName.getStartcode())) {
+        LOG.info("ReplicationServer serverName={} rejected; we already have {} registered with "
+          + "same hostname and port", serverName, existingServer);
+        return false;
+      }
+      recordNewServer(serverName, sl);
+      // Note that we assume that same ts means same server, and don't expire in that case.
+      if (existingServer != null && (existingServer.getStartcode() < serverName.getStartcode())) {
+        LOG.info("Triggering server recovery; existingServer {} looks stale, new server: {}",
+            existingServer, serverName);
+        expireServer(existingServer);
+      }
+    }
+    return true;
+  }
+
+  /**
+   * Assumes onlineServers is locked.
+   * @return ServerName with matching hostname and port.
+   */
+  private ServerName findServerWithSameHostnamePort(final ServerName serverName) {
+    ServerName end = ServerName.valueOf(serverName.getHostname(), serverName.getPort(),
+      Long.MAX_VALUE);
+
+    ServerName r = onlineServers.lowerKey(end);
+    if (r != null && ServerName.isSameAddress(r, serverName)) {
+      return r;
+    }
+    return null;
+  }
+
+  /**
+   * Assumes onlineServers is locked.
+   */
+  private void recordNewServer(final ServerName serverName, final ServerMetrics sl) {
+    LOG.info("Registering ReplicationServer={}", serverName);
+    this.onlineServers.put(serverName, sl);
+  }
+
+  /**
+   * Assumes onlineServers is locked.
+   * Expire the passed server. Remove it from list of online servers
+   */
+  public void expireServer(final ServerName serverName) {
+    LOG.info("Expiring ReplicationServer={}", serverName);
+    onlineServers.remove(serverName);
+  }
+
+  /**
+   * @return Read-only map of servers to serverinfo
+   */
+  public Map<ServerName, ServerMetrics> getOnlineServers() {
+    // Presumption is that iterating the returned Map is OK.
+    synchronized (this.onlineServers) {
+      return Collections.unmodifiableMap(this.onlineServers);
+    }
+  }
+
+  /**
+   * @return A copy of the internal list of online servers.
+   */
+  public List<ServerName> getOnlineServersList() {
+    return new ArrayList<>(this.onlineServers.keySet());
+  }
+
+  /**
+   * @param serverName server name
+   * @return ServerMetrics if serverName is known else null
+   */
+  public ServerMetrics getServerMetrics(final ServerName serverName) {
+    return this.onlineServers.get(serverName);
+  }
+
+  private class OnlineServerRefresher extends ScheduledChore {
+
+    public OnlineServerRefresher(String name, int p) {
+      super(name, master, p, 60 * 1000); // delay one minute before first execute
+    }
+
+    @Override
+    protected void chore() {
+      synchronized (onlineServers) {
+        List<ServerName> servers = getOnlineServersList();
+        servers.forEach(s -> {
+          ServerMetrics metrics = onlineServers.get(s);
+          if (metrics.getReportTimestamp() + refreshPeriod < System.currentTimeMillis()) {
+            expireServer(s);
+          }
+        });
+      }
+    }
+  }
+}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HBaseReplicationEndpoint.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HBaseReplicationEndpoint.java
index e788d8c..115df76 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HBaseReplicationEndpoint.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HBaseReplicationEndpoint.java
@@ -18,8 +18,8 @@
 
 package org.apache.hadoop.hbase.replication;
 
-import static org.apache.hadoop.hbase.HConstants.DEFAULT_HBASE_RPC_SHORTOPERATION_TIMEOUT;
-import static org.apache.hadoop.hbase.HConstants.HBASE_RPC_SHORTOPERATION_TIMEOUT_KEY;
+import static org.apache.hadoop.hbase.HConstants.DEFAULT_HBASE_CLIENT_OPERATION_TIMEOUT;
+import static org.apache.hadoop.hbase.HConstants.HBASE_CLIENT_OPERATION_TIMEOUT;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -32,16 +32,15 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.Abortable;
 import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.ScheduledChore;
+import org.apache.hadoop.hbase.Server;
+import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.client.AsyncClusterConnection;
 import org.apache.hadoop.hbase.client.AsyncRegionServerAdmin;
 import org.apache.hadoop.hbase.client.AsyncReplicationServerAdmin;
 import org.apache.hadoop.hbase.client.ClusterConnectionFactory;
 import org.apache.hadoop.hbase.protobuf.ReplicationProtobufUtil;
-import org.apache.hadoop.hbase.ScheduledChore;
-import org.apache.hadoop.hbase.Server;
-import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.security.User;
-import org.apache.hadoop.hbase.security.UserProvider;
 import org.apache.hadoop.hbase.util.FutureUtils;
 import org.apache.hadoop.hbase.wal.WAL;
 import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
@@ -67,6 +66,13 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.MasterServ
 /**
  * A {@link BaseReplicationEndpoint} for replication endpoints whose
  * target cluster is an HBase cluster.
+ * <p>
+ * Compatible with two implementations to fetch sink servers, fetching replication servers by
+ * accessing master and fetching region servers by listening to ZK.
+ * Give priority to fetch replication servers as sink servers by accessing master. if slave cluster
+ * isn't supported(version < 3.x) or exceptions occur, fetch region servers as sink servers via ZK.
+ * So we always register ZK listener, but ignored the ZK event if replication servers are available.
+ * </p>
  */
 @InterfaceAudience.Private
 public abstract class HBaseReplicationEndpoint extends BaseReplicationEndpoint
@@ -74,9 +80,6 @@ public abstract class HBaseReplicationEndpoint extends BaseReplicationEndpoint
 
   private static final Logger LOG = LoggerFactory.getLogger(HBaseReplicationEndpoint.class);
 
-  public static final String FETCH_SERVERS_USE_ZK_CONF_KEY =
-      "hbase.replication.fetch.servers.usezk";
-
   public static final String FETCH_SERVERS_INTERVAL_CONF_KEY =
       "hbase.replication.fetch.servers.interval";
   public static final int DEFAULT_FETCH_SERVERS_INTERVAL = 10 * 60 * 1000; // 10 mins
@@ -112,10 +115,9 @@ public abstract class HBaseReplicationEndpoint extends BaseReplicationEndpoint
 
   private List<ServerName> sinkServers = new ArrayList<>(0);
 
-  private AsyncClusterConnection peerConnection;
-  private boolean fetchServersUseZk = false;
+  private volatile boolean fetchServersUseZk = false;
   private FetchServersChore fetchServersChore;
-  private int shortOperationTimeout;
+  private int operationTimeout;
 
   /*
    * Some implementations of HBaseInterClusterReplicationEndpoint may require instantiate different
@@ -136,6 +138,8 @@ public abstract class HBaseReplicationEndpoint extends BaseReplicationEndpoint
     this.badSinkThreshold =
       ctx.getConfiguration().getInt("replication.bad.sink.threshold", DEFAULT_BAD_SINK_THRESHOLD);
     this.badReportCounts = Maps.newHashMap();
+    this.operationTimeout = ctx.getLocalConfiguration().getInt(
+        HBASE_CLIENT_OPERATION_TIMEOUT, DEFAULT_HBASE_CLIENT_OPERATION_TIMEOUT);
   }
 
   protected void disconnect() {
@@ -144,20 +148,12 @@ public abstract class HBaseReplicationEndpoint extends BaseReplicationEndpoint
         zkw.close();
       }
     }
-    if (this.conn != null) {
-      try {
-        this.conn.close();
-        this.conn = null;
-      } catch (IOException e) {
-        LOG.warn("{} Failed to close the connection", ctx.getPeerId());
-      }
-    }
     if (fetchServersChore != null) {
       fetchServersChore.cancel();
     }
-    if (peerConnection != null) {
+    if (conn != null) {
       try {
-        peerConnection.close();
+        conn.close();
       } catch (IOException e) {
         LOG.warn("Attempt to close peerConnection failed.", e);
       }
@@ -192,27 +188,10 @@ public abstract class HBaseReplicationEndpoint extends BaseReplicationEndpoint
   }
 
   @Override
-  protected synchronized void doStart() {
-    this.shortOperationTimeout = ctx.getLocalConfiguration().getInt(
-        HBASE_RPC_SHORTOPERATION_TIMEOUT_KEY, DEFAULT_HBASE_RPC_SHORTOPERATION_TIMEOUT);
+  protected void doStart() {
     try {
-      if (ctx.getLocalConfiguration().getBoolean(FETCH_SERVERS_USE_ZK_CONF_KEY, false)) {
-        fetchServersUseZk = true;
-      } else {
-        try {
-          if (ReplicationUtils.isPeerClusterSupportReplicationOffload(getPeerConnection())) {
-            fetchServersChore = new FetchServersChore(ctx.getServer(), this);
-            ctx.getServer().getChoreService().scheduleChore(fetchServersChore);
-            fetchServersUseZk = false;
-          } else {
-            fetchServersUseZk = true;
-          }
-        } catch (Throwable t) {
-          fetchServersUseZk = true;
-          LOG.warn("Peer {} try to fetch servers by admin failed. Using zk impl.",
-              ctx.getPeerId(), t);
-        }
-      }
+      fetchServersChore = new FetchServersChore(ctx.getServer(), this);
+      ctx.getServer().getChoreService().scheduleChore(fetchServersChore);
       reloadZkWatcher();
       connectPeerCluster();
       notifyStarted();
@@ -255,9 +234,7 @@ public abstract class HBaseReplicationEndpoint extends BaseReplicationEndpoint
       }
       zkw = new ZKWatcher(ctx.getConfiguration(),
           "connection to cluster: " + ctx.getPeerId(), this);
-      if (fetchServersUseZk) {
-        zkw.registerListener(new PeerRegionServerListener(this));
-      }
+      zkw.registerListener(new PeerRegionServerListener(this));
     }
   }
 
@@ -283,38 +260,25 @@ public abstract class HBaseReplicationEndpoint extends BaseReplicationEndpoint
   }
 
   /**
-   * Get the connection to peer cluster
-   * @return connection to peer cluster
-   * @throws IOException If anything goes wrong connecting
-   */
-  private synchronized AsyncClusterConnection getPeerConnection() throws IOException {
-    if (peerConnection == null) {
-      Configuration conf = ctx.getConfiguration();
-      peerConnection = ClusterConnectionFactory.createAsyncClusterConnection(conf, null,
-          UserProvider.instantiate(conf).getCurrent());
-    }
-    return peerConnection;
-  }
-
-  /**
    * Get the list of all the servers that are responsible for replication sink
    * from the specified peer master
-   * @return list of server addresses or an empty list if the slave is unavailable
+   * @return list of server addresses
    */
-  protected List<ServerName> fetchSlavesAddresses() {
+  protected List<ServerName> fetchSlavesAddresses() throws IOException {
     try {
-      AsyncClusterConnection peerConn = getPeerConnection();
-      ServerName master = FutureUtils.get(peerConn.getAdmin().getMaster());
+      ServerName master = FutureUtils.get(conn.getAdmin().getMaster());
       MasterService.BlockingInterface masterStub = MasterService.newBlockingStub(
-        peerConn.getRpcClient()
-          .createBlockingRpcChannel(master, User.getCurrent(), shortOperationTimeout));
+        conn.getRpcClient().createBlockingRpcChannel(master, User.getCurrent(), operationTimeout));
       ListReplicationSinkServersResponse resp = masterStub
         .listReplicationSinkServers(null, ListReplicationSinkServersRequest.newBuilder().build());
       return ProtobufUtil.toServerNameList(resp.getServerNameList());
-    } catch (ServiceException | IOException e) {
+    } catch (ServiceException e) {
       LOG.error("Peer {} fetches servers failed", ctx.getPeerId(), e);
+      throw ProtobufUtil.getRemoteException(e);
+    } catch (IOException e) {
+      LOG.error("Peer {} fetches servers failed", ctx.getPeerId(), e);
+      throw e;
     }
-    return Collections.emptyList();
   }
 
   /**
@@ -344,20 +308,34 @@ public abstract class HBaseReplicationEndpoint extends BaseReplicationEndpoint
     return addresses;
   }
 
-  protected synchronized void chooseSinks() {
-    List<ServerName> slaveAddresses = Collections.emptyList();
-    if (fetchServersUseZk) {
+  protected void chooseSinks() {
+    List<ServerName> slaveAddresses = Collections.EMPTY_LIST;
+    boolean useZk = fetchServersUseZk;
+    try {
+      if (!useZk || ReplicationUtils.isPeerClusterSupportReplicationOffload(conn)) {
+        useZk = false;
+        slaveAddresses = fetchSlavesAddresses();
+      } else {
+        useZk = true;
+      }
+    } catch (Throwable t) {
+      LOG.warn("Peer {} try to fetch servers by admin failed. Using zk impl.", ctx.getPeerId(), t);
+      useZk = true;
+    }
+    if (useZk) {
       slaveAddresses = fetchSlavesAddressesByZK();
-    } else {
-      slaveAddresses = fetchSlavesAddresses();
     }
+
     if (slaveAddresses.isEmpty()) {
       LOG.warn("No sinks available at peer. Will not be able to replicate");
     }
     Collections.shuffle(slaveAddresses, ThreadLocalRandom.current());
     int numSinks = (int) Math.ceil(slaveAddresses.size() * ratio);
-    this.sinkServers = slaveAddresses.subList(0, numSinks);
-    badReportCounts.clear();
+    synchronized (this) {
+      this.fetchServersUseZk = useZk;
+      this.sinkServers = slaveAddresses.subList(0, numSinks);
+      badReportCounts.clear();
+    }
   }
 
   protected synchronized int getNumSinks() {
@@ -368,16 +346,18 @@ public abstract class HBaseReplicationEndpoint extends BaseReplicationEndpoint
    * Get a randomly-chosen replication sink to replicate to.
    * @return a replication sink to replicate to
    */
-  protected synchronized SinkPeer getReplicationSink() throws IOException {
-    if (sinkServers.isEmpty()) {
-      LOG.info("Current list of sinks is out of date or empty, updating");
-      chooseSinks();
-    }
-    if (sinkServers.isEmpty()) {
-      throw new IOException("No replication sinks are available");
+  protected SinkPeer getReplicationSink() throws IOException {
+    ServerName serverName;
+    synchronized (this) {
+      if (sinkServers.isEmpty()) {
+        LOG.info("Current list of sinks is out of date or empty, updating");
+        chooseSinks();
+      }
+      if (sinkServers.isEmpty()) {
+        throw new IOException("No replication sinks are available");
+      }
+      serverName = sinkServers.get(ThreadLocalRandom.current().nextInt(sinkServers.size()));
     }
-    ServerName serverName =
-      sinkServers.get(ThreadLocalRandom.current().nextInt(sinkServers.size()));
     return createSinkPeer(serverName);
   }
 
@@ -438,7 +418,7 @@ public abstract class HBaseReplicationEndpoint extends BaseReplicationEndpoint
 
     @Override
     public synchronized void nodeChildrenChanged(String path) {
-      if (path.equals(regionServerListNode)) {
+      if (replicationEndpoint.fetchServersUseZk && path.equals(regionServerListNode)) {
         LOG.info("Detected change to peer region servers, fetching updated list");
         replicationEndpoint.chooseSinks();
       }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HReplicationServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HReplicationServer.java
index 385ccb7..e679a98 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HReplicationServer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HReplicationServer.java
@@ -18,6 +18,7 @@
 package org.apache.hadoop.hbase.replication;
 
 import java.io.IOException;
+import java.lang.management.MemoryUsage;
 import java.net.InetSocketAddress;
 import java.util.concurrent.atomic.AtomicBoolean;
 
@@ -28,21 +29,36 @@ import org.apache.hadoop.hbase.DoNotRetryIOException;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.Server;
 import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.YouAreDeadException;
 import org.apache.hadoop.hbase.client.AsyncClusterConnection;
 import org.apache.hadoop.hbase.client.ClusterConnectionFactory;
 import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.io.util.MemorySizeUtil;
+import org.apache.hadoop.hbase.ipc.RpcClient;
+import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
 import org.apache.hadoop.hbase.log.HBaseMarkers;
 import org.apache.hadoop.hbase.regionserver.ReplicationService;
 import org.apache.hadoop.hbase.regionserver.ReplicationSinkService;
+import org.apache.hadoop.hbase.replication.regionserver.ReplicationLoad;
 import org.apache.hadoop.hbase.security.User;
 import org.apache.hadoop.hbase.security.UserProvider;
 import org.apache.hadoop.hbase.util.Sleeper;
+import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
 import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
+import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import org.apache.hbase.thirdparty.com.google.protobuf.BlockingRpcChannel;
+import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
+
+import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.ClusterStatusProtos;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionServerReportRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.ReplicationServerStatusProtos.ReplicationServerStatusService;
+
 /**
  * HReplicationServer which is responsible to all replication stuff. It checks in with
  * the HMaster. There are many HReplicationServers in a single HBase deployment.
@@ -85,9 +101,14 @@ public class HReplicationServer extends Thread implements Server {
   // A sleeper that sleeps for msgInterval.
   protected final Sleeper sleeper;
 
+  private final int shortOperationTimeout;
+
   // zookeeper connection and watcher
   protected final ZKWatcher zooKeeper;
 
+  // master address tracker
+  private final MasterAddressTracker masterAddressTracker;
+
   /**
    * The asynchronous cluster connection to be shared by services.
    */
@@ -97,6 +118,17 @@ public class HReplicationServer extends Thread implements Server {
 
   protected final ReplicationServerRpcServices rpcServices;
 
+  // Stub to do region server status calls against the master.
+  private volatile ReplicationServerStatusService.BlockingInterface rssStub;
+
+  // RPC client. Used to make the stub above that does region server status checking.
+  private RpcClient rpcClient;
+
+  /**
+   * ChoreService used to schedule tasks that we want to run periodically
+   */
+  private ChoreService choreService;
+
   public HReplicationServer(final Configuration conf) throws IOException {
     try {
       this.startCode = System.currentTimeMillis();
@@ -114,16 +146,22 @@ public class HReplicationServer extends Thread implements Server {
       this.msgInterval = conf.getInt("hbase.replicationserver.msginterval", 3 * 1000);
       this.sleeper = new Sleeper(this.msgInterval, this);
 
+      this.shortOperationTimeout = conf.getInt(HConstants.HBASE_RPC_SHORTOPERATION_TIMEOUT_KEY,
+          HConstants.DEFAULT_HBASE_RPC_SHORTOPERATION_TIMEOUT);
+
       // Some unit tests don't need a cluster, so no zookeeper at all
       if (!conf.getBoolean("hbase.testing.nocluster", false)) {
         // Open connection to zookeeper and set primary watcher
         zooKeeper = new ZKWatcher(conf, getProcessName() + ":" +
             rpcServices.isa.getPort(), this, false);
+        masterAddressTracker = new MasterAddressTracker(getZooKeeper(), this);
+        masterAddressTracker.start();
       } else {
         zooKeeper = null;
+        masterAddressTracker = null;
       }
-
       this.rpcServices.start(zooKeeper);
+      this.choreService = new ChoreService(getName(), true);
     } catch (Throwable t) {
       // Make sure we log the exception. HReplicationServer is often started via reflection and the
       // cause of failed startup is lost.
@@ -148,6 +186,7 @@ public class HReplicationServer extends Thread implements Server {
     } catch (Throwable e) {
       abort("Fatal exception during initialization", e);
     }
+
     try {
       setupReplication();
       startReplicationService();
@@ -159,6 +198,7 @@ public class HReplicationServer extends Thread implements Server {
       while (!isStopped()) {
         long now = System.currentTimeMillis();
         if ((now - lastMsg) >= msgInterval) {
+          tryReplicationServerReport(lastMsg, now);
           lastMsg = System.currentTimeMillis();
         }
         if (!isStopped() && !isAborted()) {
@@ -175,6 +215,22 @@ public class HReplicationServer extends Thread implements Server {
       abort(t.getMessage(), t);
     }
 
+    if (this.asyncClusterConnection != null) {
+      try {
+        this.asyncClusterConnection.close();
+      } catch (IOException e) {
+        // Although the {@link Closeable} interface throws an {@link
+        // IOException}, in reality, the implementation would never do that.
+        LOG.warn("Attempt to close server's AsyncClusterConnection failed.", e);
+      }
+    }
+    if (rssStub != null) {
+      rssStub = null;
+    }
+    if (rpcClient != null) {
+      this.rpcClient.close();
+    }
+
     if (this.zooKeeper != null) {
       this.zooKeeper.close();
     }
@@ -202,11 +258,13 @@ public class HReplicationServer extends Thread implements Server {
   private void preRegistrationInitialization() {
     try {
       setupClusterConnection();
+      // Setup RPC client for master communication
+      this.rpcClient = asyncClusterConnection.getRpcClient();
     } catch (Throwable t) {
       // Call stop if error or process will stick around for ever since server
       // puts up non-daemon threads.
       this.rpcServices.stop();
-      abort("Initialization of RS failed.  Hence aborting RS.", t);
+      abort("Initialization of ReplicationServer failed. Hence aborting ReplicationServer.", t);
     }
   }
 
@@ -270,7 +328,7 @@ public class HReplicationServer extends Thread implements Server {
 
   @Override
   public ChoreService getChoreService() {
-    return null;
+    return this.choreService;
   }
 
   @Override
@@ -327,7 +385,7 @@ public class HReplicationServer extends Thread implements Server {
       throws IOException {
     // read in the name of the sink replication class from the config file.
     String sinkClassname = conf.get(HConstants.REPLICATION_SINK_SERVICE_CLASSNAME,
-        HConstants.REPLICATION_SERVICE_CLASSNAME_DEFAULT);
+        HConstants.REPLICATION_SINK_SERVICE_CLASSNAME_DEFAULT);
 
     server.replicationSinkService = newReplicationInstance(sinkClassname,
         ReplicationSinkService.class, conf, server);
@@ -386,4 +444,152 @@ public class HReplicationServer extends Thread implements Server {
   protected boolean setAbortRequested() {
     return abortRequested.compareAndSet(false, true);
   }
+
+  private void tryReplicationServerReport(long reportStartTime, long reportEndTime)
+      throws IOException {
+    ReplicationServerStatusService.BlockingInterface rss = rssStub;
+    if (rss == null) {
+      ServerName masterServerName = createReplicationServerStatusStub(true);
+      rss = rssStub;
+      if (masterServerName == null || rss == null) {
+        return;
+      }
+    }
+    ClusterStatusProtos.ServerLoad sl = buildServerLoad(reportStartTime, reportEndTime);
+    try {
+      RegionServerReportRequest.Builder request = RegionServerReportRequest
+          .newBuilder();
+      request.setServer(ProtobufUtil.toServerName(this.serverName));
+      request.setLoad(sl);
+      rss.replicationServerReport(null, request.build());
+    } catch (ServiceException se) {
+      IOException ioe = ProtobufUtil.getRemoteException(se);
+      if (ioe instanceof YouAreDeadException) {
+        // This will be caught and handled as a fatal error in run()
+        throw ioe;
+      }
+      if (rssStub == rss) {
+        rssStub = null;
+      }
+      // Couldn't connect to the master, get location from zk and reconnect
+      // Method blocks until new master is found or we are stopped
+      createReplicationServerStatusStub(true);
+    }
+  }
+
+  private ClusterStatusProtos.ServerLoad buildServerLoad(long reportStartTime, long reportEndTime) {
+    long usedMemory = -1L;
+    long maxMemory = -1L;
+    final MemoryUsage usage = MemorySizeUtil.safeGetHeapMemoryUsage();
+    if (usage != null) {
+      usedMemory = usage.getUsed();
+      maxMemory = usage.getMax();
+    }
+
+    ClusterStatusProtos.ServerLoad.Builder serverLoad = ClusterStatusProtos.ServerLoad.newBuilder();
+    serverLoad.setTotalNumberOfRequests(rpcServices.requestCount.sum());
+    serverLoad.setUsedHeapMB((int) (usedMemory / 1024 / 1024));
+    serverLoad.setMaxHeapMB((int) (maxMemory / 1024 / 1024));
+
+    serverLoad.setReportStartTime(reportStartTime);
+    serverLoad.setReportEndTime(reportEndTime);
+
+    // for the replicationLoad purpose. Only need to get from one executorService
+    // either source or sink will get the same info
+    ReplicationSinkService sinks = getReplicationSinkService();
+
+    if (sinks != null) {
+      // always refresh first to get the latest value
+      ReplicationLoad rLoad = sinks.refreshAndGetReplicationLoad();
+      if (rLoad != null) {
+        serverLoad.setReplLoadSink(rLoad.getReplicationLoadSink());
+      }
+    }
+    return serverLoad.build();
+  }
+
+  /**
+   * Get the current master from ZooKeeper and open the RPC connection to it. To get a fresh
+   * connection, the current rssStub must be null. Method will block until a master is available.
+   * You can break from this block by requesting the server stop.
+   * @param refresh If true then master address will be read from ZK, otherwise use cached data
+   * @return master + port, or null if server has been stopped
+   */
+  private synchronized ServerName createReplicationServerStatusStub(boolean refresh) {
+    if (rssStub != null) {
+      return masterAddressTracker.getMasterAddress();
+    }
+    ServerName sn = null;
+    long previousLogTime = 0;
+    ReplicationServerStatusService.BlockingInterface intRssStub = null;
+    boolean interrupted = false;
+    try {
+      while (keepLooping()) {
+        sn = this.masterAddressTracker.getMasterAddress(refresh);
+        if (sn == null) {
+          if (!keepLooping()) {
+            // give up with no connection.
+            LOG.debug("No master found and cluster is stopped; bailing out");
+            return null;
+          }
+          if (System.currentTimeMillis() > (previousLogTime + 1000)) {
+            LOG.debug("No master found; retry");
+            previousLogTime = System.currentTimeMillis();
+          }
+          refresh = true; // let's try pull it from ZK directly
+          if (sleepInterrupted(200)) {
+            interrupted = true;
+          }
+          continue;
+        }
+
+        try {
+          BlockingRpcChannel channel =
+              this.rpcClient.createBlockingRpcChannel(sn, userProvider.getCurrent(),
+                  shortOperationTimeout);
+          intRssStub = ReplicationServerStatusService.newBlockingStub(channel);
+          break;
+        } catch (IOException e) {
+          if (System.currentTimeMillis() > (previousLogTime + 1000)) {
+            e = e instanceof RemoteException ?
+                ((RemoteException)e).unwrapRemoteException() : e;
+            if (e instanceof ServerNotRunningYetException) {
+              LOG.info("Master isn't available yet, retrying");
+            } else {
+              LOG.warn("Unable to connect to master. Retrying. Error was:", e);
+            }
+            previousLogTime = System.currentTimeMillis();
+          }
+          if (sleepInterrupted(200)) {
+            interrupted = true;
+          }
+        }
+      }
+    } finally {
+      if (interrupted) {
+        Thread.currentThread().interrupt();
+      }
+    }
+    this.rssStub = intRssStub;
+    return sn;
+  }
+
+  /**
+   * @return True if we should break loop because cluster is going down or
+   *   this server has been stopped or hdfs has gone bad.
+   */
+  private boolean keepLooping() {
+    return !this.stopped;
+  }
+
+  private static boolean sleepInterrupted(long millis) {
+    boolean interrupted = false;
+    try {
+      Thread.sleep(millis);
+    } catch (InterruptedException e) {
+      LOG.warn("Interrupted while sleeping");
+      interrupted = true;
+    }
+    return interrupted;
+  }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/HBaseInterClusterReplicationEndpoint.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/HBaseInterClusterReplicationEndpoint.java
index e39ee23..c86523e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/HBaseInterClusterReplicationEndpoint.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/HBaseInterClusterReplicationEndpoint.java
@@ -47,7 +47,6 @@ import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.TableNotFoundException;
 import org.apache.hadoop.hbase.client.Admin;
-import org.apache.hadoop.hbase.client.AsyncRegionServerAdmin;
 import org.apache.hadoop.hbase.client.Connection;
 import org.apache.hadoop.hbase.client.ConnectionFactory;
 import org.apache.hadoop.hbase.ipc.RpcServer;
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSyncUp.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSyncUp.java
index 0e938ec..b9ad6c9 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSyncUp.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSyncUp.java
@@ -137,11 +137,13 @@ public class ReplicationSyncUp extends Configured implements Tool {
   class DummyServer implements Server {
     String hostname;
     ZKWatcher zkw;
+    ChoreService choreService;
 
     DummyServer(ZKWatcher zkw) {
       // a unique name in case the first run fails
       hostname = EnvironmentEdgeManager.currentTime() + ".SyncUpTool.replication.org";
       this.zkw = zkw;
+      this.choreService = new ChoreService("ReplicationSyncUpDummyServer", true);
     }
 
     DummyServer(String hostname) {
@@ -193,7 +195,7 @@ public class ReplicationSyncUp extends Configured implements Tool {
 
     @Override
     public ChoreService getChoreService() {
-      return null;
+      return choreService;
     }
 
     @Override
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java
index 5aea397..ec8fd9d 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java
@@ -154,6 +154,11 @@ public class MockNoopMasterServices implements MasterServices {
   }
 
   @Override
+  public ReplicationServerManager getReplicationServerManager() {
+    return null;
+  }
+
+  @Override
   public ZKWatcher getZooKeeper() {
     return null;
   }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationBase.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationBase.java
index 4c442fb..c1dc949 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationBase.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationBase.java
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hbase.replication;
 
+import static org.apache.hadoop.hbase.HConstants.HBASE_CLIENT_OPERATION_TIMEOUT;
 import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.fail;
@@ -207,6 +208,7 @@ public class TestReplicationBase {
     conf.setFloat("replication.source.ratio", 1.0f);
     conf.setBoolean("replication.source.eof.autorecovery", true);
     conf.setLong("hbase.serial.replication.waiting.ms", 100);
+    conf.setLong(HBASE_CLIENT_OPERATION_TIMEOUT, 5000);
   }
 
   static void configureClusters(HBaseTestingUtility util1,
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationServer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationServer.java
index 0ef23f2..30660c6 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationServer.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationServer.java
@@ -17,7 +17,10 @@
  */
 package org.apache.hadoop.hbase.replication;
 
+import static org.apache.hadoop.hbase.HConstants.HBASE_CLIENT_OPERATION_TIMEOUT;
+import static org.apache.hadoop.hbase.master.ReplicationServerManager.ONLINE_SERVER_REFRESH_INTERVAL;
 import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 
 import java.io.IOException;
@@ -38,7 +41,9 @@ import org.apache.hadoop.hbase.client.Get;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.Table;
 import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.master.ReplicationServerManager;
 import org.apache.hadoop.hbase.replication.HBaseReplicationEndpoint.ReplicationServerSinkPeer;
+import org.apache.hadoop.hbase.replication.HBaseReplicationEndpoint.SinkPeer;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.testclassification.ReplicationTests;
 import org.apache.hadoop.hbase.util.Bytes;
@@ -74,6 +79,7 @@ public class TestReplicationServer {
   private static HMaster MASTER;
 
   private static HReplicationServer replicationServer;
+  private static ServerName replicationServerName;
 
   private static Path baseNamespaceDir;
   private static Path hfileArchiveDir;
@@ -86,14 +92,11 @@ public class TestReplicationServer {
 
   @BeforeClass
   public static void beforeClass() throws Exception {
+    CONF.setLong(HBASE_CLIENT_OPERATION_TIMEOUT, 1000);
+    CONF.setLong(ONLINE_SERVER_REFRESH_INTERVAL, 10000);
     TEST_UTIL.startMiniCluster();
     MASTER = TEST_UTIL.getMiniHBaseCluster().getMaster();
-
-    replicationServer = new HReplicationServer(CONF);
-    replicationServer.start();
-
     TEST_UTIL.getMiniHBaseCluster().waitForActiveAndReadyMaster();
-    TEST_UTIL.waitFor(60000, () -> replicationServer.isOnline());
 
     Path rootDir = CommonFSUtils.getRootDir(CONF);
     baseNamespaceDir = new Path(rootDir, new Path(HConstants.BASE_NAMESPACE_DIR));
@@ -108,6 +111,11 @@ public class TestReplicationServer {
 
   @Before
   public void before() throws Exception {
+    replicationServer = new HReplicationServer(CONF);
+    replicationServer.start();
+    TEST_UTIL.waitFor(60000, () -> replicationServer.isOnline());
+    replicationServerName = replicationServer.getServerName();
+
     TEST_UTIL.createTable(TABLENAME, FAMILY);
     TEST_UTIL.waitTableAvailable(TABLENAME);
   }
@@ -115,6 +123,11 @@ public class TestReplicationServer {
   @After
   public void after() throws IOException {
     TEST_UTIL.deleteTableIfAny(TABLENAME);
+    if (!replicationServer.isStopped()) {
+      replicationServer.stop("test");
+    }
+    replicationServer = null;
+    replicationServerName = null;
   }
 
   /**
@@ -125,10 +138,10 @@ public class TestReplicationServer {
     AsyncClusterConnection conn =
         TEST_UTIL.getHBaseCluster().getMaster().getAsyncClusterConnection();
     AsyncReplicationServerAdmin replAdmin =
-        conn.getReplicationServerAdmin(replicationServer.getServerName());
+        conn.getReplicationServerAdmin(replicationServerName);
 
     ReplicationServerSinkPeer sinkPeer =
-        new ReplicationServerSinkPeer(replicationServer.getServerName(), replAdmin);
+        new ReplicationServerSinkPeer(replicationServerName, replAdmin);
     replicateWALEntryAndVerify(sinkPeer);
   }
 
@@ -143,12 +156,11 @@ public class TestReplicationServer {
         .getRegionServer().getServerName();
     AsyncReplicationServerAdmin replAdmin = conn.getReplicationServerAdmin(rs);
 
-    ReplicationServerSinkPeer
-      sinkPeer = new ReplicationServerSinkPeer(rs, replAdmin);
+    ReplicationServerSinkPeer sinkPeer = new ReplicationServerSinkPeer(rs, replAdmin);
     replicateWALEntryAndVerify(sinkPeer);
   }
 
-  private void replicateWALEntryAndVerify(ReplicationServerSinkPeer sinkPeer) throws Exception {
+  private void replicateWALEntryAndVerify(SinkPeer sinkPeer) throws Exception {
     Entry[] entries = new Entry[BATCH_SIZE];
     for(int i = 0; i < BATCH_SIZE; i++) {
       entries[i] = generateEdit(i, TABLENAME, Bytes.toBytes(i));
@@ -175,4 +187,29 @@ public class TestReplicationServer {
     edit.add(new KeyValue(row, Bytes.toBytes(FAMILY), Bytes.toBytes(FAMILY), timestamp, row));
     return new WAL.Entry(key, edit);
   }
+
+  @Test
+  public void testReplicationServerReport() throws Exception {
+    ReplicationServerManager replicationServerManager = MASTER.getReplicationServerManager();
+    assertNotNull(replicationServerManager);
+    TEST_UTIL.waitFor(60000, () -> !replicationServerManager.getOnlineServers().isEmpty()
+        && null != replicationServerManager.getServerMetrics(replicationServerName));
+    // put data via replication server
+    testReplicateWAL();
+    TEST_UTIL.waitFor(60000, () -> replicationServer.rpcServices.requestCount.sum() > 0
+        && replicationServer.rpcServices.requestCount.sum() == replicationServerManager
+        .getServerMetrics(replicationServerName).getRequestCount());
+  }
+
+  @Test
+  public void testReplicationServerExpire() throws Exception {
+    ReplicationServerManager replicationServerManager = MASTER.getReplicationServerManager();
+    TEST_UTIL.waitFor(60000, () -> !replicationServerManager.getOnlineServers().isEmpty()
+        && null != replicationServerManager.getServerMetrics(replicationServerName));
+
+    replicationServer.stop("test");
+
+    TEST_UTIL.waitFor(180000, 1000, replicationServerManager.getOnlineServers()::isEmpty);
+    assertNull(replicationServerManager.getServerMetrics(replicationServerName));
+  }
 }

[hbase] 05/12: HBASE-24982 Disassemble the method replicateWALEntry from AdminService to a new interface ReplicationServerService (#2360)

Posted by su...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sunxin pushed a commit to branch HBASE-24666
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit d2588a1dc196463d5cfd11146f14d018ab1c6efd
Author: XinSun <dd...@gmail.com>
AuthorDate: Wed Sep 9 15:00:37 2020 +0800

    HBASE-24982 Disassemble the method replicateWALEntry from AdminService to a new interface ReplicationServerService (#2360)
    
    Signed-off-by: Wellington Chevreuil <wc...@apache.org>
---
 .../hadoop/hbase/client/AsyncConnectionImpl.java   |  16 ++
 .../server/replication/ReplicationServer.proto     |  32 ++++
 .../hadoop/hbase/replication/ReplicationUtils.java |  19 ++
 .../hbase/client/AsyncClusterConnection.java       |   5 +
 .../hbase/client/AsyncClusterConnectionImpl.java   |   5 +
 .../hbase/client/AsyncReplicationServerAdmin.java  |  80 +++++++++
 .../hbase/protobuf/ReplicationProtobufUtil.java    |  18 ++
 .../hadoop/hbase/regionserver/RSRpcServices.java   |   4 +-
 .../replication/HBaseReplicationEndpoint.java      |  57 +++++-
 .../hbase/replication/HReplicationServer.java      |   2 -
 .../replication/ReplicationServerRpcServices.java  | 200 +--------------------
 .../HBaseInterClusterReplicationEndpoint.java      |   7 +-
 .../hbase/client/DummyAsyncClusterConnection.java  |   5 +
 .../replication/TestHBaseReplicationEndpoint.java  |  17 +-
 .../hbase/replication/TestReplicationServer.java   |  43 ++++-
 15 files changed, 284 insertions(+), 226 deletions(-)

diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncConnectionImpl.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncConnectionImpl.java
index 25a98ed..840da27 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncConnectionImpl.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/AsyncConnectionImpl.java
@@ -68,6 +68,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.AdminServic
 import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.ClientService;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.MasterService;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.ReplicationServerProtos.ReplicationServerService;
 
 /**
  * The implementation of AsyncConnection.
@@ -105,6 +106,8 @@ class AsyncConnectionImpl implements AsyncConnection {
   private final ConcurrentMap<String, ClientService.Interface> rsStubs = new ConcurrentHashMap<>();
   private final ConcurrentMap<String, AdminService.Interface> adminStubs =
       new ConcurrentHashMap<>();
+  private final ConcurrentMap<String, ReplicationServerService.Interface> replStubs =
+      new ConcurrentHashMap<>();
 
   private final AtomicReference<MasterService.Interface> masterStub = new AtomicReference<>();
 
@@ -283,12 +286,25 @@ class AsyncConnectionImpl implements AsyncConnection {
     return AdminService.newStub(rpcClient.createRpcChannel(serverName, user, rpcTimeout));
   }
 
+  private ReplicationServerService.Interface createReplicationServerStub(ServerName serverName)
+      throws IOException {
+    return ReplicationServerService.newStub(
+        rpcClient.createRpcChannel(serverName, user, rpcTimeout));
+  }
+
   AdminService.Interface getAdminStub(ServerName serverName) throws IOException {
     return ConcurrentMapUtils.computeIfAbsentEx(adminStubs,
       getStubKey(AdminService.getDescriptor().getName(), serverName),
       () -> createAdminServerStub(serverName));
   }
 
+  ReplicationServerService.Interface getReplicationServerStub(ServerName serverName)
+      throws IOException {
+    return ConcurrentMapUtils.computeIfAbsentEx(replStubs,
+        getStubKey(ReplicationServerService.getDescriptor().getName(), serverName),
+      () -> createReplicationServerStub(serverName));
+  }
+
   CompletableFuture<MasterService.Interface> getMasterStub() {
     return ConnectionUtils.getOrFetch(masterStub, masterStubMakeFuture, false, () -> {
       CompletableFuture<MasterService.Interface> future = new CompletableFuture<>();
diff --git a/hbase-protocol-shaded/src/main/protobuf/server/replication/ReplicationServer.proto b/hbase-protocol-shaded/src/main/protobuf/server/replication/ReplicationServer.proto
new file mode 100644
index 0000000..ed334c4
--- /dev/null
+++ b/hbase-protocol-shaded/src/main/protobuf/server/replication/ReplicationServer.proto
@@ -0,0 +1,32 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+syntax = "proto2";
+package hbase.pb;
+
+option java_package = "org.apache.hadoop.hbase.shaded.protobuf.generated";
+option java_outer_classname = "ReplicationServerProtos";
+option java_generic_services = true;
+option java_generate_equals_and_hash = true;
+option optimize_for = SPEED;
+
+import "server/region/Admin.proto";
+
+service ReplicationServerService {
+  rpc ReplicateWALEntry(ReplicateWALEntryRequest)
+    returns(ReplicateWALEntryResponse);
+}
\ No newline at end of file
diff --git a/hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/ReplicationUtils.java b/hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/ReplicationUtils.java
index a786206..7bafbc2 100644
--- a/hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/ReplicationUtils.java
+++ b/hbase-replication/src/main/java/org/apache/hadoop/hbase/replication/ReplicationUtils.java
@@ -30,6 +30,9 @@ import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.AsyncAdmin;
+import org.apache.hadoop.hbase.client.AsyncConnection;
+import org.apache.hadoop.hbase.util.FutureUtils;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -212,4 +215,20 @@ public final class ReplicationUtils {
     }
     return initialValue * HConstants.RETRY_BACKOFF[ntries];
   }
+
+  /**
+   * Check whether peer cluster supports replication offload.
+   * @param peerConn connection for peer cluster
+   * @return true if peer cluster version >= 3
+   * @throws IOException exception
+   */
+  public static boolean isPeerClusterSupportReplicationOffload(AsyncConnection peerConn)
+      throws IOException {
+    AsyncAdmin admin = peerConn.getAdmin();
+    String version = FutureUtils.get(admin.getClusterMetrics()).getHBaseVersion();
+    if (Integer.parseInt(version.split("\\.")[0]) >= 3) {
+      return true;
+    }
+    return false;
+  }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/client/AsyncClusterConnection.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/client/AsyncClusterConnection.java
index 92118ac..b6a3b97 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/client/AsyncClusterConnection.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/client/AsyncClusterConnection.java
@@ -42,6 +42,11 @@ public interface AsyncClusterConnection extends AsyncConnection {
   AsyncRegionServerAdmin getRegionServerAdmin(ServerName serverName);
 
   /**
+   * Get the admin service for the give replication server.
+   */
+  AsyncReplicationServerAdmin getReplicationServerAdmin(ServerName serverName);
+
+  /**
    * Get the nonce generator for this connection.
    */
   NonceGenerator getNonceGenerator();
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/client/AsyncClusterConnectionImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/client/AsyncClusterConnectionImpl.java
index 39fc3a2..e4c2ee3 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/client/AsyncClusterConnectionImpl.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/client/AsyncClusterConnectionImpl.java
@@ -71,6 +71,11 @@ class AsyncClusterConnectionImpl extends AsyncConnectionImpl implements AsyncClu
   }
 
   @Override
+  public AsyncReplicationServerAdmin getReplicationServerAdmin(ServerName serverName) {
+    return new AsyncReplicationServerAdmin(serverName, this);
+  }
+
+  @Override
   public CompletableFuture<FlushRegionResponse> flush(byte[] regionName,
       boolean writeFlushWALMarker) {
     RawAsyncHBaseAdmin admin = (RawAsyncHBaseAdmin) getAdmin();
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/client/AsyncReplicationServerAdmin.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/client/AsyncReplicationServerAdmin.java
new file mode 100644
index 0000000..7511a64
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/client/AsyncReplicationServerAdmin.java
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.client;
+
+import java.io.IOException;
+import java.util.concurrent.CompletableFuture;
+
+import org.apache.hadoop.hbase.CellScanner;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.ipc.HBaseRpcController;
+import org.apache.yetus.audience.InterfaceAudience;
+
+import org.apache.hbase.thirdparty.com.google.protobuf.RpcCallback;
+
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.ReplicationServerProtos.ReplicationServerService;
+
+/**
+ * A simple wrapper of the {@link ReplicationServerService} for a replication server.
+ * <p/>
+ * Notice that there is no retry, and this is intentional.
+ */
+@InterfaceAudience.Private
+public class AsyncReplicationServerAdmin {
+
+  private final ServerName server;
+
+  private final AsyncConnectionImpl conn;
+
+  AsyncReplicationServerAdmin(ServerName server, AsyncConnectionImpl conn) {
+    this.server = server;
+    this.conn = conn;
+  }
+
+  @FunctionalInterface
+  private interface RpcCall<RESP> {
+    void call(ReplicationServerService.Interface stub, HBaseRpcController controller,
+        RpcCallback<RESP> done);
+  }
+
+  private <RESP> CompletableFuture<RESP> call(RpcCall<RESP> rpcCall, CellScanner cellScanner) {
+    CompletableFuture<RESP> future = new CompletableFuture<>();
+    HBaseRpcController controller = conn.rpcControllerFactory.newController(cellScanner);
+    try {
+      rpcCall.call(conn.getReplicationServerStub(server), controller, resp -> {
+        if (controller.failed()) {
+          future.completeExceptionally(controller.getFailed());
+        } else {
+          future.complete(resp);
+        }
+      });
+    } catch (IOException e) {
+      future.completeExceptionally(e);
+    }
+    return future;
+  }
+
+  public CompletableFuture<AdminProtos.ReplicateWALEntryResponse> replicateWALEntry(
+      AdminProtos.ReplicateWALEntryRequest request, CellScanner cellScanner, int timeout) {
+    return call((stub, controller, done) -> {
+      controller.setCallTimeout(timeout);
+      stub.replicateWALEntry(controller, request, done);
+    }, cellScanner);
+  }
+}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/protobuf/ReplicationProtobufUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/protobuf/ReplicationProtobufUtil.java
index e47c929..17f48a6 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/protobuf/ReplicationProtobufUtil.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/protobuf/ReplicationProtobufUtil.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.CellScanner;
 import org.apache.hadoop.hbase.PrivateCellUtil;
 import org.apache.hadoop.hbase.client.AsyncRegionServerAdmin;
+import org.apache.hadoop.hbase.client.AsyncReplicationServerAdmin;
 import org.apache.hadoop.hbase.io.SizedCellScanner;
 import org.apache.hadoop.hbase.regionserver.wal.WALCellCodec;
 import org.apache.hadoop.hbase.util.FutureUtils;
@@ -62,6 +63,23 @@ public class ReplicationProtobufUtil {
   }
 
   /**
+   * A helper to replicate a list of WAL entries using replication server admin
+   * @param admin the replication server admin
+   * @param entries Array of WAL entries to be replicated
+   * @param replicationClusterId Id which will uniquely identify source cluster FS client
+   *          configurations in the replication configuration directory
+   * @param sourceBaseNamespaceDir Path to source cluster base namespace directory
+   * @param sourceHFileArchiveDir Path to the source cluster hfile archive directory
+   */
+  public static void replicateWALEntry(AsyncReplicationServerAdmin admin, Entry[] entries,
+      String replicationClusterId, Path sourceBaseNamespaceDir, Path sourceHFileArchiveDir,
+      int timeout) throws IOException {
+    Pair<ReplicateWALEntryRequest, CellScanner> p = buildReplicateWALEntryRequest(entries, null,
+        replicationClusterId, sourceBaseNamespaceDir, sourceHFileArchiveDir);
+    FutureUtils.get(admin.replicateWALEntry(p.getFirst(), p.getSecond(), timeout));
+  }
+
+  /**
    * Create a new ReplicateWALEntryRequest from a list of WAL entries
    * @param entries the WAL entries to be replicated
    * @return a pair of ReplicateWALEntryRequest and a CellScanner over all the WALEdit values found.
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
index 4dacb7f..c1f447c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
@@ -1594,8 +1594,8 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
     }
     if (admin) {
       bssi.add(new BlockingServiceAndInterface(
-      AdminService.newReflectiveBlockingService(this),
-      AdminService.BlockingInterface.class));
+          AdminService.newReflectiveBlockingService(this),
+          AdminService.BlockingInterface.class));
     }
     return new org.apache.hbase.thirdparty.com.google.common.collect.
         ImmutableList.Builder<BlockingServiceAndInterface>().addAll(bssi).build();
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HBaseReplicationEndpoint.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HBaseReplicationEndpoint.java
index 8678685..f38fd08 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HBaseReplicationEndpoint.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HBaseReplicationEndpoint.java
@@ -26,11 +26,15 @@ import java.util.Map;
 import java.util.UUID;
 import java.util.concurrent.ThreadLocalRandom;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.client.AsyncClusterConnection;
 import org.apache.hadoop.hbase.client.AsyncRegionServerAdmin;
+import org.apache.hadoop.hbase.client.AsyncReplicationServerAdmin;
 import org.apache.hadoop.hbase.client.ClusterConnectionFactory;
+import org.apache.hadoop.hbase.protobuf.ReplicationProtobufUtil;
 import org.apache.hadoop.hbase.security.User;
+import org.apache.hadoop.hbase.wal.WAL;
 import org.apache.hadoop.hbase.zookeeper.ZKListener;
 import org.apache.hadoop.hbase.Abortable;
 import org.apache.hadoop.hbase.ServerName;
@@ -278,7 +282,7 @@ public abstract class HBaseReplicationEndpoint extends BaseReplicationEndpoint
     }
     ServerName serverName =
       sinkServers.get(ThreadLocalRandom.current().nextInt(sinkServers.size()));
-    return new SinkPeer(serverName, conn.getRegionServerAdmin(serverName));
+    return createSinkPeer(serverName);
   }
 
   /**
@@ -340,21 +344,60 @@ public abstract class HBaseReplicationEndpoint extends BaseReplicationEndpoint
   /**
    * Wraps a replication region server sink to provide the ability to identify it.
    */
-  public static class SinkPeer {
+  public static abstract class SinkPeer {
     private ServerName serverName;
-    private AsyncRegionServerAdmin regionServer;
 
-    public SinkPeer(ServerName serverName, AsyncRegionServerAdmin regionServer) {
+    public SinkPeer(ServerName serverName) {
       this.serverName = serverName;
-      this.regionServer = regionServer;
     }
 
     ServerName getServerName() {
       return serverName;
     }
 
-    public AsyncRegionServerAdmin getRegionServer() {
-      return regionServer;
+    public abstract void replicateWALEntry(WAL.Entry[] entries, String replicationClusterId,
+      Path sourceBaseNamespaceDir, Path sourceHFileArchiveDir, int timeout) throws IOException;
+  }
+
+  public static class RegionServerSinkPeer extends SinkPeer {
+
+    private AsyncRegionServerAdmin regionServer;
+
+    public RegionServerSinkPeer(ServerName serverName,
+      AsyncRegionServerAdmin replicationServer) {
+      super(serverName);
+      this.regionServer = replicationServer;
+    }
+
+    public void replicateWALEntry(WAL.Entry[] entries, String replicationClusterId,
+      Path sourceBaseNamespaceDir, Path sourceHFileArchiveDir, int timeout) throws IOException {
+      ReplicationProtobufUtil.replicateWALEntry(regionServer, entries, replicationClusterId,
+        sourceBaseNamespaceDir, sourceHFileArchiveDir, timeout);
+    }
+  }
+
+  public static class ReplicationServerSinkPeer extends SinkPeer {
+
+    private AsyncReplicationServerAdmin replicationServer;
+
+    public ReplicationServerSinkPeer(ServerName serverName,
+      AsyncReplicationServerAdmin replicationServer) {
+      super(serverName);
+      this.replicationServer = replicationServer;
+    }
+
+    public void replicateWALEntry(WAL.Entry[] entries, String replicationClusterId,
+      Path sourceBaseNamespaceDir, Path sourceHFileArchiveDir, int timeout) throws IOException {
+      ReplicationProtobufUtil.replicateWALEntry(replicationServer, entries, replicationClusterId,
+        sourceBaseNamespaceDir, sourceHFileArchiveDir, timeout);
+    }
+  }
+
+  private SinkPeer createSinkPeer(ServerName serverName) throws IOException {
+    if (ReplicationUtils.isPeerClusterSupportReplicationOffload(conn)) {
+      return new ReplicationServerSinkPeer(serverName, conn.getReplicationServerAdmin(serverName));
+    } else {
+      return new RegionServerSinkPeer(serverName, conn.getRegionServerAdmin(serverName));
     }
   }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HReplicationServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HReplicationServer.java
index 31dec0c..385ccb7 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HReplicationServer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HReplicationServer.java
@@ -36,7 +36,6 @@ import org.apache.hadoop.hbase.regionserver.ReplicationService;
 import org.apache.hadoop.hbase.regionserver.ReplicationSinkService;
 import org.apache.hadoop.hbase.security.User;
 import org.apache.hadoop.hbase.security.UserProvider;
-import org.apache.hadoop.hbase.trace.TraceUtil;
 import org.apache.hadoop.hbase.util.Sleeper;
 import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
 import org.apache.hadoop.util.ReflectionUtils;
@@ -99,7 +98,6 @@ public class HReplicationServer extends Thread implements Server {
   protected final ReplicationServerRpcServices rpcServices;
 
   public HReplicationServer(final Configuration conf) throws IOException {
-    TraceUtil.initTracer(conf);
     try {
       this.startCode = System.currentTimeMillis();
       this.conf = conf;
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/ReplicationServerRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/ReplicationServerRpcServices.java
index 1b9b699..15d4f8c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/ReplicationServerRpcServices.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/ReplicationServerRpcServices.java
@@ -27,14 +27,12 @@ import java.util.concurrent.atomic.LongAdder;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.CellScanner;
-import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.Server;
 import org.apache.hadoop.hbase.client.ConnectionUtils;
 import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.ipc.HBaseRPCErrorHandler;
 import org.apache.hadoop.hbase.ipc.HBaseRpcController;
 import org.apache.hadoop.hbase.ipc.PriorityFunction;
-import org.apache.hadoop.hbase.ipc.QosPriority;
 import org.apache.hadoop.hbase.ipc.RpcServer.BlockingServiceAndInterface;
 import org.apache.hadoop.hbase.ipc.RpcServerFactory;
 import org.apache.hadoop.hbase.ipc.RpcServerInterface;
@@ -58,53 +56,11 @@ import org.apache.zookeeper.KeeperException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.AdminService;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ClearCompactionQueuesRequest;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ClearCompactionQueuesResponse;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ClearRegionBlockCacheRequest;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ClearRegionBlockCacheResponse;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ClearSlowLogResponseRequest;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ClearSlowLogResponses;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.CloseRegionRequest;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.CloseRegionResponse;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.CompactRegionRequest;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.CompactRegionResponse;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.CompactionSwitchRequest;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.CompactionSwitchResponse;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ExecuteProceduresRequest;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ExecuteProceduresResponse;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.FlushRegionRequest;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.FlushRegionResponse;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetOnlineRegionRequest;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetOnlineRegionResponse;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoRequest;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionLoadRequest;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionLoadResponse;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetServerInfoRequest;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetServerInfoResponse;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetStoreFileRequest;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetStoreFileResponse;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.OpenRegionRequest;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.OpenRegionResponse;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ReplicateWALEntryRequest;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ReplicateWALEntryResponse;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.RollWALWriterRequest;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.RollWALWriterResponse;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.SlowLogResponseRequest;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.SlowLogResponses;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.StopServerRequest;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.StopServerResponse;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.UpdateConfigurationRequest;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.UpdateConfigurationResponse;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.UpdateFavoredNodesRequest;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.UpdateFavoredNodesResponse;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.WALEntry;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.WarmupRegionRequest;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.WarmupRegionResponse;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.QuotaProtos.GetSpaceQuotaSnapshotsRequest;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.QuotaProtos.GetSpaceQuotaSnapshotsResponse;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.RPCProtos.RequestHeader;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.ReplicationServerProtos.ReplicationServerService;
 
 import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableList;
 import org.apache.hbase.thirdparty.com.google.protobuf.Message;
@@ -117,7 +73,7 @@ import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
 @InterfaceAudience.Private
 @SuppressWarnings("deprecation")
 public class ReplicationServerRpcServices implements HBaseRPCErrorHandler,
-    AdminService.BlockingInterface, PriorityFunction {
+    ReplicationServerService.BlockingInterface, PriorityFunction {
 
   protected static final Logger LOG = LoggerFactory.getLogger(ReplicationServerRpcServices.class);
 
@@ -256,8 +212,8 @@ public class ReplicationServerRpcServices implements HBaseRPCErrorHandler,
   protected List<BlockingServiceAndInterface> getServices() {
     List<BlockingServiceAndInterface> bssi = new ArrayList<>();
     bssi.add(new BlockingServiceAndInterface(
-      AdminService.newReflectiveBlockingService(this),
-      AdminService.BlockingInterface.class));
+      ReplicationServerService.newReflectiveBlockingService(this),
+        ReplicationServerService.BlockingInterface.class));
     return new ImmutableList.Builder<BlockingServiceAndInterface>().addAll(bssi).build();
   }
 
@@ -325,154 +281,6 @@ public class ReplicationServerRpcServices implements HBaseRPCErrorHandler,
     }
   }
 
-  @Override
-  public GetRegionInfoResponse getRegionInfo(RpcController controller, GetRegionInfoRequest request)
-      throws ServiceException {
-    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
-  }
-
-  @Override
-  public GetStoreFileResponse getStoreFile(RpcController controller, GetStoreFileRequest request)
-      throws ServiceException {
-    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
-  }
-
-  @Override
-  public GetOnlineRegionResponse getOnlineRegion(RpcController controller,
-      GetOnlineRegionRequest request) throws ServiceException {
-    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
-  }
-
-  @Override
-  public OpenRegionResponse openRegion(RpcController controller, OpenRegionRequest request)
-      throws ServiceException {
-    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
-  }
-
-  @Override
-  public WarmupRegionResponse warmupRegion(RpcController controller, WarmupRegionRequest request)
-      throws ServiceException {
-    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
-  }
-
-  @Override
-  public CloseRegionResponse closeRegion(RpcController controller, CloseRegionRequest request)
-      throws ServiceException {
-    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
-  }
-
-  @Override
-  public FlushRegionResponse flushRegion(RpcController controller, FlushRegionRequest request)
-      throws ServiceException {
-    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
-  }
-
-  @Override
-  public CompactionSwitchResponse compactionSwitch(RpcController controller,
-      CompactionSwitchRequest request) throws ServiceException {
-    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
-  }
-
-  @Override
-  public CompactRegionResponse compactRegion(RpcController controller,
-      CompactRegionRequest request) throws ServiceException {
-    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
-  }
-
-  @Override
-  public ReplicateWALEntryResponse replay(RpcController controller,
-      ReplicateWALEntryRequest request) throws ServiceException {
-    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
-  }
-
-  @Override
-  public RollWALWriterResponse rollWALWriter(RpcController controller, RollWALWriterRequest request)
-      throws ServiceException {
-    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
-  }
-
-  @Override
-  public GetServerInfoResponse getServerInfo(RpcController controller, GetServerInfoRequest request)
-      throws ServiceException {
-    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
-  }
-
-  /**
-   * Stop the replication server.
-   *
-   * @param controller the RPC controller
-   * @param request the request
-   */
-  @Override
-  @QosPriority(priority=HConstants.ADMIN_QOS)
-  public StopServerResponse stopServer(final RpcController controller,
-      final StopServerRequest request) {
-    requestCount.increment();
-    String reason = request.getReason();
-    replicationServer.stop(reason);
-    return StopServerResponse.newBuilder().build();
-  }
-
-  @Override
-  public UpdateFavoredNodesResponse updateFavoredNodes(RpcController controller,
-      UpdateFavoredNodesRequest request) throws ServiceException {
-    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
-  }
-
-  @Override
-  public UpdateConfigurationResponse updateConfiguration(RpcController controller,
-      UpdateConfigurationRequest request) throws ServiceException {
-    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
-  }
-
-  @Override
-  public GetRegionLoadResponse getRegionLoad(RpcController controller,
-      GetRegionLoadRequest request) throws ServiceException {
-    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
-  }
-
-  @Override
-  public ClearCompactionQueuesResponse clearCompactionQueues(RpcController controller,
-      ClearCompactionQueuesRequest request) throws ServiceException {
-    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
-  }
-
-  @Override
-  public ClearRegionBlockCacheResponse clearRegionBlockCache(RpcController controller,
-      ClearRegionBlockCacheRequest request) throws ServiceException {
-    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
-  }
-
-  @Override
-  public GetSpaceQuotaSnapshotsResponse getSpaceQuotaSnapshots(RpcController controller,
-      GetSpaceQuotaSnapshotsRequest request) throws ServiceException {
-    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
-  }
-
-  @Override
-  public ExecuteProceduresResponse executeProcedures(RpcController controller,
-      ExecuteProceduresRequest request) throws ServiceException {
-    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
-  }
-
-  @Override
-  public SlowLogResponses getSlowLogResponses(RpcController controller,
-      SlowLogResponseRequest request) throws ServiceException {
-    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
-  }
-
-  @Override
-  public SlowLogResponses getLargeLogResponses(RpcController controller,
-      SlowLogResponseRequest request) throws ServiceException {
-    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
-  }
-
-  @Override
-  public ClearSlowLogResponses clearSlowLogsResponses(RpcController controller,
-      ClearSlowLogResponseRequest request) throws ServiceException {
-    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
-  }
-
   protected AccessChecker getAccessChecker() {
     return accessChecker;
   }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/HBaseInterClusterReplicationEndpoint.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/HBaseInterClusterReplicationEndpoint.java
index 6dd60d1..e39ee23 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/HBaseInterClusterReplicationEndpoint.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/HBaseInterClusterReplicationEndpoint.java
@@ -51,7 +51,6 @@ import org.apache.hadoop.hbase.client.AsyncRegionServerAdmin;
 import org.apache.hadoop.hbase.client.Connection;
 import org.apache.hadoop.hbase.client.ConnectionFactory;
 import org.apache.hadoop.hbase.ipc.RpcServer;
-import org.apache.hadoop.hbase.protobuf.ReplicationProtobufUtil;
 import org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException;
 import org.apache.hadoop.hbase.regionserver.wal.WALUtil;
 import org.apache.hadoop.hbase.replication.HBaseReplicationEndpoint;
@@ -563,11 +562,9 @@ public class HBaseInterClusterReplicationEndpoint extends HBaseReplicationEndpoi
           logPeerId(), entriesHashCode, entries.size(), size, replicationClusterId);
       }
       sinkPeer = getReplicationSink();
-      AsyncRegionServerAdmin rsAdmin = sinkPeer.getRegionServer();
       try {
-        ReplicationProtobufUtil.replicateWALEntry(rsAdmin,
-          entries.toArray(new Entry[entries.size()]), replicationClusterId, baseNamespaceDir,
-          hfileArchiveDir, timeout);
+        sinkPeer.replicateWALEntry(entries.toArray(new Entry[entries.size()]), replicationClusterId,
+            baseNamespaceDir, hfileArchiveDir, timeout);
         if (LOG.isTraceEnabled()) {
           LOG.trace("{} Completed replicating batch {}", logPeerId(), entriesHashCode);
         }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/DummyAsyncClusterConnection.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/DummyAsyncClusterConnection.java
index 8755749..5af4086 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/DummyAsyncClusterConnection.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/DummyAsyncClusterConnection.java
@@ -109,6 +109,11 @@ public class DummyAsyncClusterConnection implements AsyncClusterConnection {
   }
 
   @Override
+  public AsyncReplicationServerAdmin getReplicationServerAdmin(ServerName serverName) {
+    return null;
+  }
+
+  @Override
   public NonceGenerator getNonceGenerator() {
     return null;
   }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestHBaseReplicationEndpoint.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestHBaseReplicationEndpoint.java
index 4160141..4182eaf 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestHBaseReplicationEndpoint.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestHBaseReplicationEndpoint.java
@@ -28,7 +28,8 @@ import org.apache.hadoop.hbase.HBaseClassTestRule;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.client.AsyncClusterConnection;
-import org.apache.hadoop.hbase.client.AsyncRegionServerAdmin;
+import org.apache.hadoop.hbase.client.AsyncReplicationServerAdmin;
+import org.apache.hadoop.hbase.replication.HBaseReplicationEndpoint.ReplicationServerSinkPeer;
 import org.apache.hadoop.hbase.replication.HBaseReplicationEndpoint.SinkPeer;
 import org.apache.hadoop.hbase.testclassification.ReplicationTests;
 import org.apache.hadoop.hbase.testclassification.SmallTests;
@@ -99,7 +100,7 @@ public class TestHBaseReplicationEndpoint {
     // Sanity check
     assertEquals(1, endpoint.getNumSinks());
 
-    SinkPeer sinkPeer = new SinkPeer(serverNameA, mock(AsyncRegionServerAdmin.class));
+    SinkPeer sinkPeer = mockSinkPeer(serverNameA);
     endpoint.reportBadSink(sinkPeer);
     // Just reporting a bad sink once shouldn't have an effect
     assertEquals(1, endpoint.getNumSinks());
@@ -123,7 +124,7 @@ public class TestHBaseReplicationEndpoint {
     assertEquals(expected, endpoint.getNumSinks());
 
     ServerName badSinkServer0 = endpoint.getSinkServers().get(0);
-    SinkPeer sinkPeer = new SinkPeer(badSinkServer0, mock(AsyncRegionServerAdmin.class));
+    SinkPeer sinkPeer = mockSinkPeer(badSinkServer0);
     for (int i = 0; i <= HBaseReplicationEndpoint.DEFAULT_BAD_SINK_THRESHOLD; i++) {
       endpoint.reportBadSink(sinkPeer);
     }
@@ -133,7 +134,7 @@ public class TestHBaseReplicationEndpoint {
 
     // now try a sink that has some successes
     ServerName badSinkServer1 = endpoint.getSinkServers().get(0);
-    sinkPeer = new SinkPeer(badSinkServer1, mock(AsyncRegionServerAdmin.class));
+    sinkPeer = mockSinkPeer(badSinkServer1);
     for (int i = 0; i < HBaseReplicationEndpoint.DEFAULT_BAD_SINK_THRESHOLD; i++) {
       endpoint.reportBadSink(sinkPeer);
     }
@@ -168,8 +169,8 @@ public class TestHBaseReplicationEndpoint {
     ServerName serverNameA = endpoint.getSinkServers().get(0);
     ServerName serverNameB = endpoint.getSinkServers().get(1);
 
-    SinkPeer sinkPeerA = new SinkPeer(serverNameA, mock(AsyncRegionServerAdmin.class));
-    SinkPeer sinkPeerB = new SinkPeer(serverNameB, mock(AsyncRegionServerAdmin.class));
+    SinkPeer sinkPeerA = mockSinkPeer(serverNameA);
+    SinkPeer sinkPeerB = mockSinkPeer(serverNameB);
 
     for (int i = 0; i <= HBaseReplicationEndpoint.DEFAULT_BAD_SINK_THRESHOLD; i++) {
       endpoint.reportBadSink(sinkPeerA);
@@ -207,4 +208,8 @@ public class TestHBaseReplicationEndpoint {
       return null;
     }
   }
+
+  private SinkPeer mockSinkPeer(ServerName serverName) {
+    return new ReplicationServerSinkPeer(serverName, mock(AsyncReplicationServerAdmin.class));
+  }
 }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationServer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationServer.java
index 6a0ef3d..0ef23f2 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationServer.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationServer.java
@@ -30,14 +30,15 @@ import org.apache.hadoop.hbase.HBaseClassTestRule;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.AsyncClusterConnection;
-import org.apache.hadoop.hbase.client.AsyncRegionServerAdmin;
+import org.apache.hadoop.hbase.client.AsyncReplicationServerAdmin;
 import org.apache.hadoop.hbase.client.Get;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.Table;
 import org.apache.hadoop.hbase.master.HMaster;
-import org.apache.hadoop.hbase.protobuf.ReplicationProtbufUtil;
+import org.apache.hadoop.hbase.replication.HBaseReplicationEndpoint.ReplicationServerSinkPeer;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.testclassification.ReplicationTests;
 import org.apache.hadoop.hbase.util.Bytes;
@@ -116,22 +117,48 @@ public class TestReplicationServer {
     TEST_UTIL.deleteTableIfAny(TABLENAME);
   }
 
+  /**
+   * Requests replication server using {@link AsyncReplicationServerAdmin}
+   */
   @Test
   public void testReplicateWAL() throws Exception {
-    AsyncClusterConnection conn = TEST_UTIL.getHBaseCluster().getLiveRegionServerThreads().get(0)
-        .getRegionServer().getAsyncClusterConnection();
-    AsyncRegionServerAdmin rsAdmin = conn.getRegionServerAdmin(replicationServer.getServerName());
+    AsyncClusterConnection conn =
+        TEST_UTIL.getHBaseCluster().getMaster().getAsyncClusterConnection();
+    AsyncReplicationServerAdmin replAdmin =
+        conn.getReplicationServerAdmin(replicationServer.getServerName());
+
+    ReplicationServerSinkPeer sinkPeer =
+        new ReplicationServerSinkPeer(replicationServer.getServerName(), replAdmin);
+    replicateWALEntryAndVerify(sinkPeer);
+  }
+
+  /**
+   * Requests region server using {@link AsyncReplicationServerAdmin}
+   */
+  @Test
+  public void testReplicateWAL2() throws Exception {
+    AsyncClusterConnection conn =
+        TEST_UTIL.getHBaseCluster().getMaster().getAsyncClusterConnection();
+    ServerName rs = TEST_UTIL.getHBaseCluster().getLiveRegionServerThreads().get(0)
+        .getRegionServer().getServerName();
+    AsyncReplicationServerAdmin replAdmin = conn.getReplicationServerAdmin(rs);
+
+    ReplicationServerSinkPeer
+      sinkPeer = new ReplicationServerSinkPeer(rs, replAdmin);
+    replicateWALEntryAndVerify(sinkPeer);
+  }
 
+  private void replicateWALEntryAndVerify(ReplicationServerSinkPeer sinkPeer) throws Exception {
     Entry[] entries = new Entry[BATCH_SIZE];
     for(int i = 0; i < BATCH_SIZE; i++) {
       entries[i] = generateEdit(i, TABLENAME, Bytes.toBytes(i));
     }
 
-    ReplicationProtbufUtil.replicateWALEntry(rsAdmin, entries, replicationClusterId,
-        baseNamespaceDir, hfileArchiveDir, 1000);
+    sinkPeer.replicateWALEntry(entries, replicationClusterId, baseNamespaceDir, hfileArchiveDir,
+        1000);
 
+    Table table = TEST_UTIL.getConnection().getTable(TABLENAME);
     for (int i = 0; i < BATCH_SIZE; i++) {
-      Table table = TEST_UTIL.getConnection().getTable(TABLENAME);
       Result result = table.get(new Get(Bytes.toBytes(i)));
       Cell cell = result.getColumnLatestCell(Bytes.toBytes(FAMILY), Bytes.toBytes(FAMILY));
       assertNotNull(cell);

[hbase] 10/12: HBASE-25113 [testing] HBaseCluster support ReplicationServer for UTs (#2662)

Posted by su...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sunxin pushed a commit to branch HBASE-24666
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 6ae60979a8a61c79c737f7a65d9b5318230b2537
Author: XinSun <dd...@gmail.com>
AuthorDate: Mon Nov 23 11:01:55 2020 +0800

    HBASE-25113 [testing] HBaseCluster support ReplicationServer for UTs (#2662)
    
    Signed-off-by: Guanghao Zhang <zg...@apache.org>
---
 .../org/apache/hadoop/hbase/LocalHBaseCluster.java | 63 ++++++++++++++++++-
 .../hbase/replication/HReplicationServer.java      | 13 ++++
 .../apache/hadoop/hbase/util/JVMClusterUtil.java   | 57 +++++++++++++++++-
 .../apache/hadoop/hbase/HBaseTestingUtility.java   |  8 +--
 .../org/apache/hadoop/hbase/MiniHBaseCluster.java  | 70 ++++++++++++++++++----
 .../hadoop/hbase/StartMiniClusterOption.java       | 24 ++++++--
 .../replication/TestReplicationServerSink.java     | 45 +++++++-------
 hbase-server/src/test/resources/hbase-site.xml     |  7 +++
 8 files changed, 242 insertions(+), 45 deletions(-)

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/LocalHBaseCluster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/LocalHBaseCluster.java
index f4847b9..24b658f 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/LocalHBaseCluster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/LocalHBaseCluster.java
@@ -32,9 +32,11 @@ import org.apache.hadoop.hbase.client.TableDescriptor;
 import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
 import org.apache.hadoop.hbase.master.HMaster;
 import org.apache.hadoop.hbase.regionserver.HRegionServer;
+import org.apache.hadoop.hbase.replication.HReplicationServer;
 import org.apache.hadoop.hbase.security.User;
 import org.apache.hadoop.hbase.util.JVMClusterUtil;
 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
+import org.apache.hadoop.hbase.util.JVMClusterUtil.ReplicationServerThread;
 import org.apache.hadoop.hbase.util.Threads;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
@@ -60,7 +62,10 @@ import org.slf4j.LoggerFactory;
 public class LocalHBaseCluster {
   private static final Logger LOG = LoggerFactory.getLogger(LocalHBaseCluster.class);
   private final List<JVMClusterUtil.MasterThread> masterThreads = new CopyOnWriteArrayList<>();
-  private final List<JVMClusterUtil.RegionServerThread> regionThreads = new CopyOnWriteArrayList<>();
+  private final List<JVMClusterUtil.RegionServerThread> regionThreads =
+      new CopyOnWriteArrayList<>();
+  private final List<JVMClusterUtil.ReplicationServerThread> replicationThreads =
+      new CopyOnWriteArrayList<>();
   private final static int DEFAULT_NO = 1;
   /** local mode */
   public static final String LOCAL = "local";
@@ -259,6 +264,26 @@ public class LocalHBaseCluster {
         });
   }
 
+  @SuppressWarnings("unchecked")
+  public JVMClusterUtil.ReplicationServerThread addReplicationServer(
+      Configuration config, final int index) throws IOException {
+    // Create each replication server with its own Configuration instance so each has
+    // its Connection instance rather than share (see HBASE_INSTANCES down in
+    // the guts of ConnectionManager).
+    JVMClusterUtil.ReplicationServerThread rst =
+        JVMClusterUtil.createReplicationServerThread(config, index);
+    this.replicationThreads.add(rst);
+    return rst;
+  }
+
+  public JVMClusterUtil.ReplicationServerThread addReplicationServer(
+      final Configuration config, final int index, User user)
+      throws IOException, InterruptedException {
+    return user.runAs(
+        (PrivilegedExceptionAction<ReplicationServerThread>) () -> addReplicationServer(config,
+            index));
+  }
+
   /**
    * @param serverNumber
    * @return region server
@@ -290,6 +315,40 @@ public class LocalHBaseCluster {
   }
 
   /**
+   * @param serverNumber replication server number
+   * @return replication server
+   */
+  public HReplicationServer getReplicationServer(int serverNumber) {
+    return replicationThreads.get(serverNumber).getReplicationServer();
+  }
+
+  /**
+   * @return Read-only list of replication server threads.
+   */
+  public List<JVMClusterUtil.ReplicationServerThread> getReplicationServers() {
+    return Collections.unmodifiableList(this.replicationThreads);
+  }
+
+  /**
+   * @return List of running servers (Some servers may have been killed or
+   *   aborted during lifetime of cluster; these servers are not included in this
+   *   list).
+   */
+  public List<JVMClusterUtil.ReplicationServerThread> getLiveReplicationServers() {
+    List<JVMClusterUtil.ReplicationServerThread> liveServers = new ArrayList<>();
+    List<ReplicationServerThread> list = getReplicationServers();
+    for (JVMClusterUtil.ReplicationServerThread rst: list) {
+      if (rst.isAlive()) {
+        liveServers.add(rst);
+      }
+      else {
+        LOG.info("Not alive {}", rst.getName());
+      }
+    }
+    return liveServers;
+  }
+
+  /**
    * @return the Configuration used by this LocalHBaseCluster
    */
   public Configuration getConfiguration() {
@@ -430,7 +489,7 @@ public class LocalHBaseCluster {
    * Start the cluster.
    */
   public void startup() throws IOException {
-    JVMClusterUtil.startup(this.masterThreads, this.regionThreads);
+    JVMClusterUtil.startup(this.masterThreads, this.regionThreads, this.replicationThreads);
   }
 
   /**
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HReplicationServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HReplicationServer.java
index 2d0336d..8d85b85 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HReplicationServer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HReplicationServer.java
@@ -443,6 +443,19 @@ public class HReplicationServer extends Thread implements Server, ReplicationSou
     return this.stopped;
   }
 
+  public void waitForServerOnline(){
+    while (!isStopped() && !isOnline()) {
+      synchronized (online) {
+        try {
+          online.wait(msgInterval);
+        } catch (InterruptedException ie) {
+          Thread.currentThread().interrupt();
+          break;
+        }
+      }
+    }
+  }
+
   /**
    * Setup WAL log and replication if enabled. Replication setup is done in here because it wants to
    * be hooked up to WAL.
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/JVMClusterUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/JVMClusterUtil.java
index 1e2ac3e..1f76864 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/JVMClusterUtil.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/JVMClusterUtil.java
@@ -27,6 +27,7 @@ import java.util.concurrent.TimeUnit;
 import java.util.function.Supplier;
 
 import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.replication.HReplicationServer;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -72,6 +73,33 @@ public class JVMClusterUtil {
   }
 
   /**
+   * Datastructure to hold ReplicationServer Thread and ReplicationServer instance
+   */
+  public static class ReplicationServerThread extends Thread {
+    private final HReplicationServer replicationServer;
+
+    public ReplicationServerThread(final HReplicationServer r, final int index) {
+      super(r, "ReplicationServer:" + index + ";" + r.getServerName().toShortString());
+      this.replicationServer = r;
+    }
+
+    /**
+     * @return the replication server
+     */
+    public HReplicationServer getReplicationServer() {
+      return this.replicationServer;
+    }
+
+    /**
+     * Block until the replication server has come online, indicating it is ready
+     * to be used.
+     */
+    public void waitForServerOnline() {
+      replicationServer.waitForServerOnline();
+    }
+  }
+
+  /**
    * Creates a {@link RegionServerThread}.
    * Call 'start' on the returned thread to make it run.
    * @param c Configuration to use.
@@ -98,6 +126,24 @@ public class JVMClusterUtil {
     return new JVMClusterUtil.RegionServerThread(server, index);
   }
 
+  /**
+   * Creates a {@link ReplicationServerThread}.
+   * Call 'start' on the returned thread to make it run.
+   * @param c Configuration to use.
+   * @param index Used distinguishing the object returned.
+   * @throws IOException
+   * @return Replication server added.
+   */
+  public static JVMClusterUtil.ReplicationServerThread createReplicationServerThread(
+      final Configuration c, final int index) throws IOException {
+    HReplicationServer server;
+    try {
+      server = new HReplicationServer(c);
+    } catch (Exception e) {
+      throw new IOException(e);
+    }
+    return new JVMClusterUtil.ReplicationServerThread(server, index);
+  }
 
   /**
    * Datastructure to hold Master Thread and Master instance
@@ -122,7 +168,7 @@ public class JVMClusterUtil {
    * @param c Configuration to use.
    * @param hmc Class to create.
    * @param index Used distinguishing the object returned.
-   * @throws IOException
+   * @throws IOException exception
    * @return Master added.
    */
   public static JVMClusterUtil.MasterThread createMasterThread(final Configuration c,
@@ -165,7 +211,8 @@ public class JVMClusterUtil {
    * @return Address to use contacting primary master.
    */
   public static String startup(final List<JVMClusterUtil.MasterThread> masters,
-      final List<JVMClusterUtil.RegionServerThread> regionservers) throws IOException {
+      final List<JVMClusterUtil.RegionServerThread> regionservers,
+      final List<JVMClusterUtil.ReplicationServerThread> replicationServers) throws IOException {
     // Implementation note: This method relies on timed sleeps in a loop. It's not great, and
     // should probably be re-written to use actual synchronization objects, but it's ok for now
 
@@ -193,6 +240,12 @@ public class JVMClusterUtil {
       }
     }
 
+    if (replicationServers != null) {
+      for (JVMClusterUtil.ReplicationServerThread t: replicationServers) {
+        t.start();
+      }
+    }
+
     // Wait for an active master to be initialized (implies being master)
     //  with this, when we return the cluster is complete
     final int initTimeout = configuration != null ? Integer.parseInt(
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
index 840b9e0..fb31b3c 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
@@ -1111,8 +1111,8 @@ public class HBaseTestingUtility extends HBaseZKTestingUtility {
 
     Configuration c = new Configuration(this.conf);
     this.hbaseCluster = new MiniHBaseCluster(c, option.getNumMasters(),
-      option.getNumAlwaysStandByMasters(), option.getNumRegionServers(), option.getRsPorts(),
-      option.getMasterClass(), option.getRsClass());
+        option.getNumAlwaysStandByMasters(), option.getNumRegionServers(), option.getRsPorts(),
+        option.getNumReplicationServers(), option.getMasterClass(), option.getRsClass());
     // Populate the master address configuration from mini cluster configuration.
     conf.set(HConstants.MASTER_ADDRS_KEY, MasterRegistry.getMasterAddr(c));
     // Don't leave here till we've done a successful scan of the hbase:meta
@@ -1237,8 +1237,8 @@ public class HBaseTestingUtility extends HBaseZKTestingUtility {
     closeConnection();
     this.hbaseCluster =
         new MiniHBaseCluster(this.conf, option.getNumMasters(), option.getNumAlwaysStandByMasters(),
-            option.getNumRegionServers(), option.getRsPorts(), option.getMasterClass(),
-            option.getRsClass());
+            option.getNumRegionServers(), option.getRsPorts(), option.getNumReplicationServers(),
+            option.getMasterClass(), option.getRsClass());
     // Don't leave here till we've done a successful scan of the hbase:meta
     Connection conn = ConnectionFactory.createConnection(this.conf);
     Table t = conn.getTable(TableName.META_TABLE_NAME);
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java
index 990867e..eb71623 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java
@@ -32,12 +32,14 @@ import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.regionserver.HRegion.FlushResult;
 import org.apache.hadoop.hbase.regionserver.HRegionServer;
 import org.apache.hadoop.hbase.regionserver.Region;
+import org.apache.hadoop.hbase.replication.HReplicationServer;
 import org.apache.hadoop.hbase.security.User;
 import org.apache.hadoop.hbase.test.MetricsAssertHelper;
 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
 import org.apache.hadoop.hbase.util.JVMClusterUtil;
 import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
 import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
+import org.apache.hadoop.hbase.util.JVMClusterUtil.ReplicationServerThread;
 import org.apache.hadoop.hbase.util.Threads;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
@@ -87,10 +89,10 @@ public class MiniHBaseCluster extends HBaseCluster {
    * @param numRegionServers initial number of region servers to start.
    */
   public MiniHBaseCluster(Configuration conf, int numMasters, int numRegionServers,
-         Class<? extends HMaster> masterClass,
-         Class<? extends MiniHBaseCluster.MiniHBaseClusterRegionServer> regionserverClass)
+      Class<? extends HMaster> masterClass,
+      Class<? extends MiniHBaseCluster.MiniHBaseClusterRegionServer> regionserverClass)
       throws IOException, InterruptedException {
-    this(conf, numMasters, 0, numRegionServers, null, masterClass, regionserverClass);
+    this(conf, numMasters, 0, numRegionServers, null, 0, masterClass, regionserverClass);
   }
 
   /**
@@ -98,20 +100,22 @@ public class MiniHBaseCluster extends HBaseCluster {
    *   restart where for sure the regionservers come up on same address+port (but
    *   just with different startcode); by default mini hbase clusters choose new
    *   arbitrary ports on each cluster start.
+   * @param numReplicationServers initial number of replication servers to start.
    * @throws IOException
    * @throws InterruptedException
    */
   public MiniHBaseCluster(Configuration conf, int numMasters, int numAlwaysStandByMasters,
-         int numRegionServers, List<Integer> rsPorts, Class<? extends HMaster> masterClass,
-         Class<? extends MiniHBaseCluster.MiniHBaseClusterRegionServer> regionserverClass)
+      int numRegionServers, List<Integer> rsPorts, int numReplicationServers,
+      Class<? extends HMaster> masterClass,
+      Class<? extends MiniHBaseCluster.MiniHBaseClusterRegionServer> regionserverClass)
       throws IOException, InterruptedException {
     super(conf);
 
     // Hadoop 2
     CompatibilityFactory.getInstance(MetricsAssertHelper.class).init();
 
-    init(numMasters, numAlwaysStandByMasters, numRegionServers, rsPorts, masterClass,
-        regionserverClass);
+    init(numMasters, numAlwaysStandByMasters, numRegionServers, rsPorts, numReplicationServers,
+        masterClass, regionserverClass);
     this.initialClusterStatus = getClusterMetrics();
   }
 
@@ -228,7 +232,8 @@ public class MiniHBaseCluster extends HBaseCluster {
   }
 
   private void init(final int nMasterNodes, final int numAlwaysStandByMasters,
-      final int nRegionNodes, List<Integer> rsPorts, Class<? extends HMaster> masterClass,
+      final int nRegionNodes, List<Integer> rsPorts, int numReplicationServers,
+      Class<? extends HMaster> masterClass,
       Class<? extends MiniHBaseCluster.MiniHBaseClusterRegionServer> regionserverClass)
   throws IOException, InterruptedException {
     try {
@@ -249,11 +254,17 @@ public class MiniHBaseCluster extends HBaseCluster {
         if (rsPorts != null) {
           rsConf.setInt(HConstants.REGIONSERVER_PORT, rsPorts.get(i));
         }
-        User user = HBaseTestingUtility.getDifferentUser(rsConf,
-            ".hfs."+index++);
+        User user = HBaseTestingUtility.getDifferentUser(rsConf, ".hfs." + index++);
         hbaseCluster.addRegionServer(rsConf, i, user);
       }
 
+      // manually add the replication servers as other users
+      for (int i = 0; i < numReplicationServers; i++) {
+        Configuration rsConf = HBaseConfiguration.create(conf);
+        User user = HBaseTestingUtility.getDifferentUser(rsConf, ".hfs." + index++);
+        hbaseCluster.addReplicationServer(rsConf, i, user);
+      }
+
       hbaseCluster.startup();
     } catch (IOException e) {
       shutdown();
@@ -792,7 +803,7 @@ public class MiniHBaseCluster extends HBaseCluster {
 
   /**
    * Grab a numbered region server of your choice.
-   * @param serverNumber
+   * @param serverNumber region server number
    * @return region server
    */
   public HRegionServer getRegionServer(int serverNumber) {
@@ -806,6 +817,43 @@ public class MiniHBaseCluster extends HBaseCluster {
         .findFirst().orElse(null);
   }
 
+  /**
+   * @return Number of live replication servers in the cluster currently.
+   */
+  public int getNumLiveReplicationServers() {
+    return this.hbaseCluster.getLiveReplicationServers().size();
+  }
+
+  /**
+   * @return List of replication server threads.
+   */
+  public List<JVMClusterUtil.ReplicationServerThread> getReplicationServerThreads() {
+    return this.hbaseCluster.getReplicationServers();
+  }
+
+  /**
+   * @return List of live replication server threads (skips the aborted and the killed)
+   */
+  public List<JVMClusterUtil.ReplicationServerThread> getLiveReplicationServerThreads() {
+    return this.hbaseCluster.getLiveReplicationServers();
+  }
+
+  /**
+   * Grab a numbered replication server of your choice.
+   * @param serverNumber
+   * @return replication server
+   */
+  public HReplicationServer getReplicationServer(int serverNumber) {
+    return hbaseCluster.getReplicationServer(serverNumber);
+  }
+
+  public HReplicationServer getReplicationServer(ServerName serverName) {
+    return hbaseCluster.getReplicationServers().stream()
+        .map(ReplicationServerThread::getReplicationServer)
+        .filter(r -> r.getServerName().equals(serverName))
+        .findFirst().orElse(null);
+  }
+
   public List<HRegion> getRegions(byte[] tableName) {
     return getRegions(TableName.valueOf(tableName));
   }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/StartMiniClusterOption.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/StartMiniClusterOption.java
index 7a9bd68..0aa35ed 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/StartMiniClusterOption.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/StartMiniClusterOption.java
@@ -75,6 +75,10 @@ public final class StartMiniClusterOption {
    * The class to use as HRegionServer, or null for default.
    */
   private Class<? extends MiniHBaseCluster.MiniHBaseClusterRegionServer> rsClass;
+  /**
+   * Number of replication servers to start up.
+   */
+  private final int numReplicationServers;
 
   /**
    * Number of datanodes. Used to create mini DSF cluster. Surpassed by {@link #dataNodeHosts} size.
@@ -109,7 +113,8 @@ public final class StartMiniClusterOption {
    */
   private StartMiniClusterOption(int numMasters, int numAlwaysStandByMasters,
       Class<? extends HMaster> masterClass, int numRegionServers, List<Integer> rsPorts,
-      Class<? extends MiniHBaseCluster.MiniHBaseClusterRegionServer> rsClass, int numDataNodes,
+      Class<? extends MiniHBaseCluster.MiniHBaseClusterRegionServer> rsClass,
+      int numReplicationServers, int numDataNodes,
       String[] dataNodeHosts, int numZkServers, boolean createRootDir, boolean createWALDir) {
     this.numMasters = numMasters;
     this.numAlwaysStandByMasters = numAlwaysStandByMasters;
@@ -117,6 +122,7 @@ public final class StartMiniClusterOption {
     this.numRegionServers = numRegionServers;
     this.rsPorts = rsPorts;
     this.rsClass = rsClass;
+    this.numReplicationServers = numReplicationServers;
     this.numDataNodes = numDataNodes;
     this.dataNodeHosts = dataNodeHosts;
     this.numZkServers = numZkServers;
@@ -148,6 +154,10 @@ public final class StartMiniClusterOption {
     return rsClass;
   }
 
+  public int getNumReplicationServers() {
+    return numReplicationServers;
+  }
+
   public int getNumDataNodes() {
     return numDataNodes;
   }
@@ -196,6 +206,7 @@ public final class StartMiniClusterOption {
     private Class<? extends HMaster> masterClass = null;
     private int numRegionServers = 1;
     private List<Integer> rsPorts = null;
+    private int numReplicationServers;
     private Class<? extends MiniHBaseCluster.MiniHBaseClusterRegionServer> rsClass = null;
     private int numDataNodes = 1;
     private String[] dataNodeHosts = null;
@@ -210,9 +221,9 @@ public final class StartMiniClusterOption {
       if (dataNodeHosts != null && dataNodeHosts.length != 0) {
         numDataNodes = dataNodeHosts.length;
       }
-      return new StartMiniClusterOption(numMasters,numAlwaysStandByMasters, masterClass,
-          numRegionServers, rsPorts, rsClass, numDataNodes, dataNodeHosts, numZkServers,
-          createRootDir, createWALDir);
+      return new StartMiniClusterOption(numMasters, numAlwaysStandByMasters, masterClass,
+          numRegionServers, rsPorts, rsClass, numReplicationServers,
+          numDataNodes, dataNodeHosts, numZkServers, createRootDir, createWALDir);
     }
 
     public Builder numMasters(int numMasters) {
@@ -269,6 +280,11 @@ public final class StartMiniClusterOption {
       this.createWALDir = createWALDir;
       return this;
     }
+
+    public Builder numReplicationServers(int numReplicationServers) {
+      this.numReplicationServers = numReplicationServers;
+      return this;
+    }
   }
 
 }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationServerSink.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationServerSink.java
index d97667b..a1cbebb 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationServerSink.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationServerSink.java
@@ -34,6 +34,7 @@ import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.StartMiniClusterOption;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.AsyncClusterConnection;
 import org.apache.hadoop.hbase.client.AsyncReplicationServerAdmin;
@@ -77,8 +78,8 @@ public class TestReplicationServerSink {
 
   private static HMaster MASTER;
 
-  private static HReplicationServer replicationServer;
-  private static ServerName replicationServerName;
+  private static HReplicationServer REPLICATION_SERVER;
+  private static ServerName REPLICATION_SERVER_NAME;
 
   private static Path baseNamespaceDir;
   private static Path hfileArchiveDir;
@@ -93,9 +94,13 @@ public class TestReplicationServerSink {
   public static void beforeClass() throws Exception {
     CONF.setLong(HBASE_CLIENT_OPERATION_TIMEOUT, 1000);
     CONF.setLong(ONLINE_SERVER_REFRESH_INTERVAL, 10000);
-    TEST_UTIL.startMiniCluster();
+    CONF.setBoolean(HConstants.REPLICATION_OFFLOAD_ENABLE_KEY, true);
+    TEST_UTIL.startMiniCluster(StartMiniClusterOption.builder().numReplicationServers(1).build());
     MASTER = TEST_UTIL.getMiniHBaseCluster().getMaster();
     TEST_UTIL.getMiniHBaseCluster().waitForActiveAndReadyMaster();
+    REPLICATION_SERVER = TEST_UTIL.getMiniHBaseCluster().getReplicationServerThreads().get(0)
+        .getReplicationServer();
+    REPLICATION_SERVER_NAME = REPLICATION_SERVER.getServerName();
 
     Path rootDir = CommonFSUtils.getRootDir(CONF);
     baseNamespaceDir = new Path(rootDir, new Path(HConstants.BASE_NAMESPACE_DIR));
@@ -110,11 +115,6 @@ public class TestReplicationServerSink {
 
   @Before
   public void before() throws Exception {
-    replicationServer = new HReplicationServer(CONF);
-    replicationServer.start();
-    TEST_UTIL.waitFor(60000, () -> replicationServer.isOnline());
-    replicationServerName = replicationServer.getServerName();
-
     TEST_UTIL.createTable(TABLENAME, FAMILY);
     TEST_UTIL.waitTableAvailable(TABLENAME);
   }
@@ -122,12 +122,6 @@ public class TestReplicationServerSink {
   @After
   public void after() throws IOException {
     TEST_UTIL.deleteTableIfAny(TABLENAME);
-    if (!replicationServer.isStopped()) {
-      replicationServer.stop("test");
-    }
-    TEST_UTIL.waitFor(10000, () -> !replicationServer.isAlive());
-    replicationServer = null;
-    replicationServerName = null;
   }
 
   /**
@@ -138,10 +132,10 @@ public class TestReplicationServerSink {
     AsyncClusterConnection conn =
         TEST_UTIL.getHBaseCluster().getMaster().getAsyncClusterConnection();
     AsyncReplicationServerAdmin replAdmin =
-        conn.getReplicationServerAdmin(replicationServerName);
+        conn.getReplicationServerAdmin(REPLICATION_SERVER_NAME);
 
     ReplicationServerSinkPeer sinkPeer =
-        new ReplicationServerSinkPeer(replicationServerName, replAdmin);
+        new ReplicationServerSinkPeer(REPLICATION_SERVER_NAME, replAdmin);
     replicateWALEntryAndVerify(sinkPeer);
   }
 
@@ -178,23 +172,30 @@ public class TestReplicationServerSink {
     ReplicationServerManager replicationServerManager = MASTER.getReplicationServerManager();
     assertNotNull(replicationServerManager);
     TEST_UTIL.waitFor(60000, () -> !replicationServerManager.getOnlineServers().isEmpty()
-        && null != replicationServerManager.getServerMetrics(replicationServerName));
+        && null != replicationServerManager.getServerMetrics(REPLICATION_SERVER_NAME));
     // put data via replication server
     testReplicateWAL();
-    TEST_UTIL.waitFor(60000, () -> replicationServer.rpcServices.requestCount.sum() > 0
-        && replicationServer.rpcServices.requestCount.sum() == replicationServerManager
-        .getServerMetrics(replicationServerName).getRequestCount());
+    TEST_UTIL.waitFor(60000, () -> REPLICATION_SERVER.rpcServices.requestCount.sum() > 0
+        && REPLICATION_SERVER.rpcServices.requestCount.sum() == replicationServerManager
+        .getServerMetrics(REPLICATION_SERVER_NAME).getRequestCount());
   }
 
   @Test
   public void testReplicationServerExpire() throws Exception {
+    int initialNum = TEST_UTIL.getMiniHBaseCluster().getNumLiveReplicationServers();
+    HReplicationServer replicationServer = new HReplicationServer(CONF);
+    replicationServer.start();
+    ServerName replicationServerName = replicationServer.getServerName();
+
     ReplicationServerManager replicationServerManager = MASTER.getReplicationServerManager();
-    TEST_UTIL.waitFor(60000, () -> !replicationServerManager.getOnlineServers().isEmpty()
+    TEST_UTIL.waitFor(60000, () ->
+        initialNum + 1 == replicationServerManager.getOnlineServers().size()
         && null != replicationServerManager.getServerMetrics(replicationServerName));
 
     replicationServer.stop("test");
 
-    TEST_UTIL.waitFor(180000, 1000, replicationServerManager.getOnlineServers()::isEmpty);
+    TEST_UTIL.waitFor(180000, 1000, () ->
+        initialNum == replicationServerManager.getOnlineServers().size());
     assertNull(replicationServerManager.getServerMetrics(replicationServerName));
   }
 }
diff --git a/hbase-server/src/test/resources/hbase-site.xml b/hbase-server/src/test/resources/hbase-site.xml
index 5e64bfc..36187a3 100644
--- a/hbase-server/src/test/resources/hbase-site.xml
+++ b/hbase-server/src/test/resources/hbase-site.xml
@@ -90,6 +90,13 @@
     </description>
   </property>
   <property>
+    <name>hbase.replicationserver.port</name>
+    <value>0</value>
+    <description>Always have replicationservers come up on port '0' so we don't clash over
+      default ports.
+    </description>
+  </property>
+  <property>
     <name>hbase.ipc.client.fallback-to-simple-auth-allowed</name>
     <value>true</value>
   </property>

[hbase] 03/12: HBASE-24735: Refactor ReplicationSourceManager: move logPositionAndCleanOldLogs/cleanUpHFileRefs to ReplicationSource inside (#2064)

Posted by su...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sunxin pushed a commit to branch HBASE-24666
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit eded3096db2365033be6148ca4cd8e94d5759512
Author: Guanghao Zhang <zg...@apache.org>
AuthorDate: Tue Aug 11 20:07:09 2020 +0800

    HBASE-24735: Refactor ReplicationSourceManager: move logPositionAndCleanOldLogs/cleanUpHFileRefs to ReplicationSource inside (#2064)
    
    Signed-off-by: Wellington Chevreuil <wc...@apache.org>
---
 .../regionserver/CatalogReplicationSource.java     |  13 +-
 .../regionserver/RecoveredReplicationSource.java   |  18 ++-
 .../regionserver/ReplicationSource.java            | 166 ++++++++++++++++++---
 .../regionserver/ReplicationSourceInterface.java   |  39 +++--
 .../regionserver/ReplicationSourceManager.java     | 142 +-----------------
 .../regionserver/ReplicationSourceShipper.java     |  21 +--
 .../regionserver/ReplicationSourceWALReader.java   |  16 +-
 .../replication/regionserver/WALEntryBatch.java    |   2 +-
 .../hbase/replication/ReplicationSourceDummy.java  |  24 +--
 .../regionserver/TestBasicWALEntryStream.java      |  16 +-
 .../regionserver/TestReplicationSource.java        |  16 +-
 .../regionserver/TestReplicationSourceManager.java |  49 +++---
 12 files changed, 269 insertions(+), 253 deletions(-)

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/CatalogReplicationSource.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/CatalogReplicationSource.java
index 8cb7860..15370e0 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/CatalogReplicationSource.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/CatalogReplicationSource.java
@@ -35,7 +35,18 @@ class CatalogReplicationSource extends ReplicationSource {
   }
 
   @Override
-  public void logPositionAndCleanOldLogs(WALEntryBatch entryBatch) {
+  public void setWALPosition(WALEntryBatch entryBatch) {
+    // Noop. This CatalogReplicationSource implementation does not persist state to backing storage
+    // nor does it keep its WALs in a general map up in ReplicationSourceManager --
+    // CatalogReplicationSource is used by the Catalog Read Replica feature which resets everytime
+    // the WAL source process crashes. Skip calling through to the default implementation.
+    // See "4.1 Skip maintaining zookeeper replication queue (offsets/WALs)" in the
+    // design doc attached to HBASE-18070 'Enable memstore replication for meta replica for detail'
+    // for background on why no need to keep WAL state.
+  }
+
+  @Override
+  public void cleanOldWALs(String log, boolean inclusive) {
     // Noop. This CatalogReplicationSource implementation does not persist state to backing storage
     // nor does it keep its WALs in a general map up in ReplicationSourceManager --
     // CatalogReplicationSource is used by the Catalog Read Replica feature which resets everytime
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/RecoveredReplicationSource.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/RecoveredReplicationSource.java
index 526c3e3..abbc046 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/RecoveredReplicationSource.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/RecoveredReplicationSource.java
@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.util.List;
 import java.util.UUID;
 import java.util.concurrent.PriorityBlockingQueue;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
@@ -44,15 +45,18 @@ public class RecoveredReplicationSource extends ReplicationSource {
 
   private static final Logger LOG = LoggerFactory.getLogger(RecoveredReplicationSource.class);
 
+  private Path walDir;
+
   private String actualPeerId;
 
   @Override
-  public void init(Configuration conf, FileSystem fs, ReplicationSourceManager manager,
-      ReplicationQueueStorage queueStorage, ReplicationPeer replicationPeer, Server server,
-      String peerClusterZnode, UUID clusterId, WALFileLengthProvider walFileLengthProvider,
-      MetricsSource metrics) throws IOException {
-    super.init(conf, fs, manager, queueStorage, replicationPeer, server, peerClusterZnode,
+  public void init(Configuration conf, FileSystem fs, Path walDir, ReplicationSourceManager manager,
+    ReplicationQueueStorage queueStorage, ReplicationPeer replicationPeer, Server server,
+    String peerClusterZnode, UUID clusterId, WALFileLengthProvider walFileLengthProvider,
+    MetricsSource metrics) throws IOException {
+    super.init(conf, fs, walDir, manager, queueStorage, replicationPeer, server, peerClusterZnode,
       clusterId, walFileLengthProvider, metrics);
+    this.walDir = walDir;
     this.actualPeerId = this.replicationQueueInfo.getPeerId();
   }
 
@@ -93,7 +97,7 @@ public class RecoveredReplicationSource extends ReplicationSource {
               deadRsDirectory.suffix(AbstractFSWALProvider.SPLITTING_EXT), path.getName()) };
           for (Path possibleLogLocation : locs) {
             LOG.info("Possible location " + possibleLogLocation.toUri().toString());
-            if (manager.getFs().exists(possibleLogLocation)) {
+            if (this.fs.exists(possibleLogLocation)) {
               // We found the right new location
               LOG.info("Log " + path + " still exists at " + possibleLogLocation);
               newPaths.add(possibleLogLocation);
@@ -126,7 +130,7 @@ public class RecoveredReplicationSource extends ReplicationSource {
   // N.B. the ReplicationSyncUp tool sets the manager.getWALDir to the root of the wal
   // area rather than to the wal area for a particular region server.
   private Path getReplSyncUpPath(Path path) throws IOException {
-    FileStatus[] rss = fs.listStatus(manager.getLogDir());
+    FileStatus[] rss = fs.listStatus(walDir);
     for (FileStatus rs : rss) {
       Path p = rs.getPath();
       FileStatus[] logs = fs.listStatus(p);
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java
index a385ead..cfcc837 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSource.java
@@ -26,7 +26,9 @@ import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
+import java.util.NavigableSet;
 import java.util.TreeMap;
+import java.util.TreeSet;
 import java.util.UUID;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.PriorityBlockingQueue;
@@ -35,6 +37,7 @@ import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.function.Predicate;
+import java.util.stream.Collectors;
 
 import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.conf.Configuration;
@@ -51,15 +54,19 @@ import org.apache.hadoop.hbase.regionserver.RegionServerCoprocessorHost;
 import org.apache.hadoop.hbase.replication.ChainWALEntryFilter;
 import org.apache.hadoop.hbase.replication.ClusterMarkingEntryFilter;
 import org.apache.hadoop.hbase.replication.ReplicationEndpoint;
+import org.apache.hadoop.hbase.replication.ReplicationException;
 import org.apache.hadoop.hbase.replication.ReplicationPeer;
 import org.apache.hadoop.hbase.replication.ReplicationQueueInfo;
 import org.apache.hadoop.hbase.replication.ReplicationQueueStorage;
+import org.apache.hadoop.hbase.replication.ReplicationUtils;
 import org.apache.hadoop.hbase.replication.SystemTableWALEntryFilter;
 import org.apache.hadoop.hbase.replication.WALEntryFilter;
 import org.apache.hadoop.hbase.util.Threads;
 import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
+import org.apache.hadoop.hbase.wal.SyncReplicationWALProvider;
 import org.apache.hadoop.hbase.wal.WAL.Entry;
 import org.apache.yetus.audience.InterfaceAudience;
+import org.apache.zookeeper.KeeperException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -90,7 +97,7 @@ public class ReplicationSource implements ReplicationSourceInterface {
   protected ReplicationQueueInfo replicationQueueInfo;
 
   // The manager of all sources to which we ping back our progress
-  protected ReplicationSourceManager manager;
+  ReplicationSourceManager manager;
   // Should we stop everything?
   protected Server server;
   // How long should we sleep for each retry
@@ -134,8 +141,6 @@ public class ReplicationSource implements ReplicationSourceInterface {
   protected final ConcurrentHashMap<String, ReplicationSourceShipper> workerThreads =
       new ConcurrentHashMap<>();
 
-  private AtomicLong totalBufferUsed;
-
   public static final String WAIT_ON_ENDPOINT_SECONDS =
     "hbase.replication.wait.on.endpoint.seconds";
   public static final int DEFAULT_WAIT_ON_ENDPOINT_SECONDS = 30;
@@ -187,7 +192,7 @@ public class ReplicationSource implements ReplicationSourceInterface {
    * @param metrics metrics for replication source
    */
   @Override
-  public void init(Configuration conf, FileSystem fs, ReplicationSourceManager manager,
+  public void init(Configuration conf, FileSystem fs, Path walDir, ReplicationSourceManager manager,
       ReplicationQueueStorage queueStorage, ReplicationPeer replicationPeer, Server server,
       String queueId, UUID clusterId, WALFileLengthProvider walFileLengthProvider,
       MetricsSource metrics) throws IOException {
@@ -216,7 +221,6 @@ public class ReplicationSource implements ReplicationSourceInterface {
     defaultBandwidth = this.conf.getLong("replication.source.per.peer.node.bandwidth", 0);
     currentBandwidth = getCurrentBandwidth();
     this.throttler = new ReplicationThrottler((double) currentBandwidth / 10.0);
-    this.totalBufferUsed = manager.getTotalBufferUsed();
     this.walFileLengthProvider = walFileLengthProvider;
 
     this.abortOnError = this.conf.getBoolean("replication.source.regionserver.abort",
@@ -392,11 +396,11 @@ public class ReplicationSource implements ReplicationSourceInterface {
   }
 
   private ReplicationSourceWALReader createNewWALReader(String walGroupId, long startPosition) {
-    return replicationPeer.getPeerConfig().isSerial()
-      ? new SerialReplicationSourceWALReader(fs, conf, logQueue, startPosition, walEntryFilter,
-      this, walGroupId)
-      : new ReplicationSourceWALReader(fs, conf, logQueue, startPosition, walEntryFilter,
-      this, walGroupId);
+    return replicationPeer.getPeerConfig().isSerial() ?
+      new SerialReplicationSourceWALReader(fs, conf, logQueue, startPosition, walEntryFilter,
+        this, walGroupId) :
+      new ReplicationSourceWALReader(fs, conf, logQueue, startPosition, walEntryFilter,
+        this, walGroupId);
   }
 
   /**
@@ -422,7 +426,7 @@ public class ReplicationSource implements ReplicationSourceInterface {
             t.getName());
           manager.refreshSources(peerId);
           break;
-        } catch (IOException e1) {
+        } catch (ReplicationException | IOException e1) {
           LOG.error("Replication sources refresh failed.", e1);
           sleepForRetries("Sleeping before try refreshing sources again",
             maxRetriesMultiplier);
@@ -437,11 +441,6 @@ public class ReplicationSource implements ReplicationSourceInterface {
   }
 
   @Override
-  public ReplicationSourceManager getSourceManager() {
-    return this.manager;
-  }
-
-  @Override
   public void tryThrottle(int batchSize) throws InterruptedException {
     checkBandwidthChangeAndResetThrottler();
     if (throttler.isEnabled()) {
@@ -767,7 +766,7 @@ public class ReplicationSource implements ReplicationSourceInterface {
       throttler.addPushSize(batchSize);
     }
     totalReplicatedEdits.addAndGet(entries.size());
-    long newBufferUsed = totalBufferUsed.addAndGet(-batchSize);
+    long newBufferUsed = manager.getTotalBufferUsed().addAndGet(-batchSize);
     // Record the new buffer usage
     this.manager.getGlobalMetrics().setWALReaderEditsBufferBytes(newBufferUsed);
   }
@@ -803,4 +802,137 @@ public class ReplicationSource implements ReplicationSourceInterface {
   public String logPeerId(){
     return "peerId=" + this.getPeerId() + ",";
   }
+
+  @Override
+  public void setWALPosition(WALEntryBatch entryBatch) {
+    String fileName = entryBatch.getLastWalPath().getName();
+    interruptOrAbortWhenFail(() -> this.queueStorage
+      .setWALPosition(server.getServerName(), getQueueId(), fileName,
+        entryBatch.getLastWalPosition(), entryBatch.getLastSeqIds()));
+  }
+
+  @Override
+  public void cleanOldWALs(String log, boolean inclusive) {
+    NavigableSet<String> walsToRemove = getWalsToRemove(log, inclusive);
+    if (walsToRemove.isEmpty()) {
+      return;
+    }
+    // cleanOldWALs may spend some time, especially for sync replication where we may want to
+    // remove remote wals as the remote cluster may have already been down, so we do it outside
+    // the lock to avoid block preLogRoll
+    cleanOldWALs(walsToRemove);
+  }
+
+  private NavigableSet<String> getWalsToRemove(String log, boolean inclusive) {
+    NavigableSet<String> walsToRemove = new TreeSet<>();
+    String logPrefix = AbstractFSWALProvider.getWALPrefixFromWALName(log);
+    try {
+      this.queueStorage.getWALsInQueue(this.server.getServerName(), getQueueId()).forEach(wal -> {
+        LOG.debug("getWalsToRemove wal {}", wal);
+        String walPrefix = AbstractFSWALProvider.getWALPrefixFromWALName(wal);
+        if (walPrefix.equals(logPrefix)) {
+          walsToRemove.add(wal);
+        }
+      });
+    } catch (ReplicationException e) {
+      // Just log the exception here, as the recovered replication source will try to cleanup again.
+      LOG.warn("Failed to read wals in queue {}", getQueueId(), e);
+    }
+    return walsToRemove.headSet(log, inclusive);
+  }
+
+  private void removeRemoteWALs(String peerId, String remoteWALDir, Collection<String> wals)
+    throws IOException {
+    Path remoteWALDirForPeer = ReplicationUtils.getPeerRemoteWALDir(remoteWALDir, peerId);
+    FileSystem fs = ReplicationUtils.getRemoteWALFileSystem(conf, remoteWALDir);
+    for (String wal : wals) {
+      Path walFile = new Path(remoteWALDirForPeer, wal);
+      try {
+        if (!fs.delete(walFile, false) && fs.exists(walFile)) {
+          throw new IOException("Can not delete " + walFile);
+        }
+      } catch (FileNotFoundException e) {
+        // Just ignore since this means the file has already been deleted.
+        // The javadoc of the FileSystem.delete methods does not specify the behavior of deleting an
+        // inexistent file, so here we deal with both, i.e, check the return value of the
+        // FileSystem.delete, and also catch FNFE.
+        LOG.debug("The remote wal {} has already been deleted?", walFile, e);
+      }
+    }
+  }
+
+  private void cleanOldWALs(NavigableSet<String> wals) {
+    LOG.debug("Removing {} logs in the list: {}", wals.size(), wals);
+    // The intention here is that, we want to delete the remote wal files ASAP as it may effect the
+    // failover time if you want to transit the remote cluster from S to A. And the infinite retry
+    // is not a problem, as if we can not contact with the remote HDFS cluster, then usually we can
+    // not contact with the HBase cluster either, so the replication will be blocked either.
+    if (isSyncReplication()) {
+      String peerId = getPeerId();
+      String remoteWALDir = replicationPeer.getPeerConfig().getRemoteWALDir();
+      // Filter out the wals need to be removed from the remote directory. Its name should be the
+      // special format, and also, the peer id in its name should match the peer id for the
+      // replication source.
+      List<String> remoteWals = wals.stream().filter(w -> SyncReplicationWALProvider
+        .getSyncReplicationPeerIdFromWALName(w).map(peerId::equals).orElse(false))
+        .collect(Collectors.toList());
+      LOG.debug("Removing {} logs from remote dir {} in the list: {}", remoteWals.size(),
+        remoteWALDir, remoteWals);
+      if (!remoteWals.isEmpty()) {
+        for (int sleepMultiplier = 0;;) {
+          try {
+            removeRemoteWALs(peerId, remoteWALDir, remoteWals);
+            break;
+          } catch (IOException e) {
+            LOG.warn("Failed to delete remote wals from remote dir {} for peer {}", remoteWALDir,
+              peerId);
+          }
+          if (!isSourceActive()) {
+            // skip the following operations
+            return;
+          }
+          if (ReplicationUtils.sleepForRetries("Failed to delete remote wals", sleepForRetries,
+            sleepMultiplier, maxRetriesMultiplier)) {
+            sleepMultiplier++;
+          }
+        }
+      }
+    }
+    for (String wal : wals) {
+      interruptOrAbortWhenFail(
+        () -> this.queueStorage.removeWAL(server.getServerName(), getQueueId(), wal));
+    }
+  }
+
+  public void cleanUpHFileRefs(List<String> files) {
+    interruptOrAbortWhenFail(() -> this.queueStorage.removeHFileRefs(getPeerId(), files));
+  }
+
+  @FunctionalInterface
+  private interface ReplicationQueueOperation {
+    void exec() throws ReplicationException;
+  }
+
+  /**
+   * Refresh replication source will terminate the old source first, then the source thread will be
+   * interrupted. Need to handle it instead of abort the region server.
+   */
+  private void interruptOrAbortWhenFail(ReplicationQueueOperation op) {
+    try {
+      op.exec();
+    } catch (ReplicationException e) {
+      if (e.getCause() != null && e.getCause() instanceof KeeperException.SystemErrorException
+        && e.getCause().getCause() != null && e.getCause()
+        .getCause() instanceof InterruptedException) {
+        // ReplicationRuntimeException(a RuntimeException) is thrown out here. The reason is
+        // that thread is interrupted deep down in the stack, it should pass the following
+        // processing logic and propagate to the most top layer which can handle this exception
+        // properly. In this specific case, the top layer is ReplicationSourceShipper#run().
+        throw new ReplicationRuntimeException(
+          "Thread is interrupted, the replication source may be terminated",
+          e.getCause().getCause());
+      }
+      server.abort("Failed to operate on replication queue", e);
+    }
+  }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceInterface.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceInterface.java
index 352cdd3..77bba90 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceInterface.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceInterface.java
@@ -41,15 +41,15 @@ import org.apache.yetus.audience.InterfaceAudience;
 public interface ReplicationSourceInterface {
   /**
    * Initializer for the source
-   * @param conf the configuration to use
-   * @param fs the file system to use
-   * @param manager the manager to use
+   *
+   * @param conf   the configuration to use
+   * @param fs     the file system to use
    * @param server the server for this region server
    */
-  void init(Configuration conf, FileSystem fs, ReplicationSourceManager manager,
-      ReplicationQueueStorage queueStorage, ReplicationPeer replicationPeer, Server server,
-      String queueId, UUID clusterId, WALFileLengthProvider walFileLengthProvider,
-      MetricsSource metrics) throws IOException;
+  void init(Configuration conf, FileSystem fs, Path walDir, ReplicationSourceManager manager,
+    ReplicationQueueStorage queueStorage, ReplicationPeer replicationPeer, Server server,
+    String queueId, UUID clusterId, WALFileLengthProvider walFileLengthProvider,
+    MetricsSource metrics) throws IOException;
 
   /**
    * Add a log to the list of logs to replicate
@@ -146,11 +146,6 @@ public interface ReplicationSourceInterface {
   ReplicationEndpoint getReplicationEndpoint();
 
   /**
-   * @return the replication source manager
-   */
-  ReplicationSourceManager getSourceManager();
-
-  /**
    * @return the wal file length provider
    */
   WALFileLengthProvider getWALFileLengthProvider();
@@ -196,14 +191,16 @@ public interface ReplicationSourceInterface {
   ReplicationQueueStorage getReplicationQueueStorage();
 
   /**
-   * Log the current position to storage. Also clean old logs from the replication queue.
-   * Use to bypass the default call to
-   * {@link ReplicationSourceManager#logPositionAndCleanOldLogs(ReplicationSourceInterface,
-   * WALEntryBatch)} whem implementation does not need to persist state to backing storage.
-   * @param entryBatch the wal entry batch we just shipped
-   * @return The instance of queueStorage used by this ReplicationSource.
+   * Set the current position of WAL to {@link ReplicationQueueStorage}
+   * @param entryBatch a batch of WAL entries to replicate
    */
-  default void logPositionAndCleanOldLogs(WALEntryBatch entryBatch) {
-    getSourceManager().logPositionAndCleanOldLogs(this, entryBatch);
-  }
+  void setWALPosition(WALEntryBatch entryBatch);
+
+  /**
+   * Cleans a WAL and all older WALs from replication queue. Called when we are sure that a WAL is
+   * closed and has no more entries.
+   * @param walName the name of WAL
+   * @param inclusive whether we should also remove the given WAL
+   */
+  void cleanOldWALs(String walName, boolean inclusive);
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java
index db12c00..f502a65 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceManager.java
@@ -17,10 +17,8 @@
  */
 package org.apache.hadoop.hbase.replication.regionserver;
 
-import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.Iterator;
@@ -40,7 +38,6 @@ import java.util.concurrent.ThreadPoolExecutor;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.concurrent.atomic.AtomicReference;
-import java.util.stream.Collectors;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -59,17 +56,14 @@ import org.apache.hadoop.hbase.replication.ReplicationPeerImpl;
 import org.apache.hadoop.hbase.replication.ReplicationPeers;
 import org.apache.hadoop.hbase.replication.ReplicationQueueInfo;
 import org.apache.hadoop.hbase.replication.ReplicationQueueStorage;
-import org.apache.hadoop.hbase.replication.ReplicationUtils;
 import org.apache.hadoop.hbase.replication.SyncReplicationState;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.Pair;
 import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
-import org.apache.hadoop.hbase.wal.SyncReplicationWALProvider;
 import org.apache.hadoop.hbase.wal.WAL;
 import org.apache.hadoop.hbase.wal.WALFactory;
 import org.apache.hadoop.hbase.wal.WALProvider;
 import org.apache.yetus.audience.InterfaceAudience;
-import org.apache.zookeeper.KeeperException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -310,7 +304,7 @@ public class ReplicationSourceManager {
     WALFileLengthProvider walFileLengthProvider =
       this.walFactory.getWALProvider() != null?
         this.walFactory.getWALProvider().getWALFileLengthProvider() : p -> OptionalLong.empty();
-    src.init(conf, fs, this, queueStorage, replicationPeer, server, queueId, clusterId,
+    src.init(conf, fs, logDir, this, queueStorage, replicationPeer, server, queueId, clusterId,
       walFileLengthProvider, new MetricsSource(queueId));
     return src;
   }
@@ -513,29 +507,6 @@ public class ReplicationSourceManager {
     void exec() throws ReplicationException;
   }
 
-  /**
-   * Refresh replication source will terminate the old source first, then the source thread will be
-   * interrupted. Need to handle it instead of abort the region server.
-   */
-  private void interruptOrAbortWhenFail(ReplicationQueueOperation op) {
-    try {
-      op.exec();
-    } catch (ReplicationException e) {
-      if (e.getCause() != null && e.getCause() instanceof KeeperException.SystemErrorException
-          && e.getCause().getCause() != null && e.getCause()
-          .getCause() instanceof InterruptedException) {
-        // ReplicationRuntimeException(a RuntimeException) is thrown out here. The reason is
-        // that thread is interrupted deep down in the stack, it should pass the following
-        // processing logic and propagate to the most top layer which can handle this exception
-        // properly. In this specific case, the top layer is ReplicationSourceShipper#run().
-        throw new ReplicationRuntimeException(
-          "Thread is interrupted, the replication source may be terminated",
-          e.getCause().getCause());
-      }
-      server.abort("Failed to operate on replication queue", e);
-    }
-  }
-
   private void abortWhenFail(ReplicationQueueOperation op) {
     try {
       op.exec();
@@ -561,106 +532,6 @@ public class ReplicationSourceManager {
     }
   }
 
-  /**
-   * This method will log the current position to storage. And also clean old logs from the
-   * replication queue.
-   * @param source the replication source
-   * @param entryBatch the wal entry batch we just shipped
-   */
-  public void logPositionAndCleanOldLogs(ReplicationSourceInterface source,
-      WALEntryBatch entryBatch) {
-    String fileName = entryBatch.getLastWalPath().getName();
-    interruptOrAbortWhenFail(() -> this.queueStorage.setWALPosition(server.getServerName(),
-      source.getQueueId(), fileName, entryBatch.getLastWalPosition(), entryBatch.getLastSeqIds()));
-    cleanOldLogs(fileName, entryBatch.isEndOfFile(), source);
-  }
-
-  /**
-   * Cleans a log file and all older logs from replication queue. Called when we are sure that a log
-   * file is closed and has no more entries.
-   * @param log Path to the log
-   * @param inclusive whether we should also remove the given log file
-   * @param source the replication source
-   */
-  void cleanOldLogs(String log, boolean inclusive,
-    ReplicationSourceInterface source) {
-    NavigableSet<String> walsToRemove;
-    synchronized (this.latestPaths) {
-      walsToRemove = getWalsToRemove(source.getQueueId(), log, inclusive);
-    }
-    if (walsToRemove.isEmpty()) {
-      return;
-    }
-    // cleanOldLogs may spend some time, especially for sync replication where we may want to
-    // remove remote wals as the remote cluster may have already been down, so we do it outside
-    // the lock to avoid block preLogRoll
-    cleanOldLogs(walsToRemove, source);
-  }
-
-  private void removeRemoteWALs(String peerId, String remoteWALDir, Collection<String> wals)
-      throws IOException {
-    Path remoteWALDirForPeer = ReplicationUtils.getPeerRemoteWALDir(remoteWALDir, peerId);
-    FileSystem fs = ReplicationUtils.getRemoteWALFileSystem(conf, remoteWALDir);
-    for (String wal : wals) {
-      Path walFile = new Path(remoteWALDirForPeer, wal);
-      try {
-        if (!fs.delete(walFile, false) && fs.exists(walFile)) {
-          throw new IOException("Can not delete " + walFile);
-        }
-      } catch (FileNotFoundException e) {
-        // Just ignore since this means the file has already been deleted.
-        // The javadoc of the FileSystem.delete methods does not specify the behavior of deleting an
-        // inexistent file, so here we deal with both, i.e, check the return value of the
-        // FileSystem.delete, and also catch FNFE.
-        LOG.debug("The remote wal {} has already been deleted?", walFile, e);
-      }
-    }
-  }
-
-  private void cleanOldLogs(NavigableSet<String> wals, ReplicationSourceInterface source) {
-    LOG.debug("Removing {} logs in the list: {}", wals.size(), wals);
-    // The intention here is that, we want to delete the remote wal files ASAP as it may effect the
-    // failover time if you want to transit the remote cluster from S to A. And the infinite retry
-    // is not a problem, as if we can not contact with the remote HDFS cluster, then usually we can
-    // not contact with the HBase cluster either, so the replication will be blocked either.
-    if (source.isSyncReplication()) {
-      String peerId = source.getPeerId();
-      String remoteWALDir = source.getPeer().getPeerConfig().getRemoteWALDir();
-      // Filter out the wals need to be removed from the remote directory. Its name should be the
-      // special format, and also, the peer id in its name should match the peer id for the
-      // replication source.
-      List<String> remoteWals = wals.stream().filter(w -> SyncReplicationWALProvider
-        .getSyncReplicationPeerIdFromWALName(w).map(peerId::equals).orElse(false))
-        .collect(Collectors.toList());
-      LOG.debug("Removing {} logs from remote dir {} in the list: {}", remoteWals.size(),
-        remoteWALDir, remoteWals);
-      if (!remoteWals.isEmpty()) {
-        for (int sleepMultiplier = 0;;) {
-          try {
-            removeRemoteWALs(peerId, remoteWALDir, remoteWals);
-            break;
-          } catch (IOException e) {
-            LOG.warn("Failed to delete remote wals from remote dir {} for peer {}", remoteWALDir,
-              peerId);
-          }
-          if (!source.isSourceActive()) {
-            // skip the following operations
-            return;
-          }
-          if (ReplicationUtils.sleepForRetries("Failed to delete remote wals", sleepForRetries,
-            sleepMultiplier, maxRetriesMultiplier)) {
-            sleepMultiplier++;
-          }
-        }
-      }
-    }
-    String queueId = source.getQueueId();
-    for (String wal : wals) {
-      interruptOrAbortWhenFail(
-        () -> this.queueStorage.removeWAL(server.getServerName(), queueId, wal));
-    }
-  }
-
   // public because of we call it in TestReplicationEmptyWALRecovery
   public void preLogRoll(Path newLog) throws IOException {
     String logName = newLog.getName();
@@ -981,10 +852,6 @@ public class ReplicationSourceManager {
     }
   }
 
-  public void cleanUpHFileRefs(String peerId, List<String> files) {
-    interruptOrAbortWhenFail(() -> this.queueStorage.removeHFileRefs(peerId, files));
-  }
-
   int activeFailoverTaskCount() {
     return executor.getActiveCount();
   }
@@ -993,6 +860,11 @@ public class ReplicationSourceManager {
     return this.globalMetrics;
   }
 
+  @InterfaceAudience.Private
+  Server getServer() {
+    return this.server;
+  }
+
   /**
    * Add an hbase:meta Catalog replication source. Called on open of an hbase:meta Region.
    * Create it once only. If exists already, use the existing one.
@@ -1044,7 +916,7 @@ public class ReplicationSourceManager {
     CatalogReplicationSourcePeer peer = new CatalogReplicationSourcePeer(this.conf,
       this.clusterId.toString());
     final ReplicationSourceInterface crs = new CatalogReplicationSource();
-    crs.init(conf, fs, this, new NoopReplicationQueueStorage(), peer, server, peer.getId(),
+    crs.init(conf, fs, logDir, this, new NoopReplicationQueueStorage(), peer, server, peer.getId(),
       clusterId, walProvider.getWALFileLengthProvider(), new MetricsSource(peer.getId()));
     // Add listener on the provider so we can pick up the WAL to replicate on roll.
     WALActionsListener listener = new WALActionsListener() {
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceShipper.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceShipper.java
index 9754c49..35c4e54 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceShipper.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceShipper.java
@@ -240,12 +240,6 @@ public class ReplicationSourceShipper extends Thread {
   }
 
   private void cleanUpHFileRefs(WALEdit edit) throws IOException {
-    String peerId = source.getPeerId();
-    if (peerId.contains("-")) {
-      // peerClusterZnode will be in the form peerId + "-" + rsZNode.
-      // A peerId will not have "-" in its name, see HBASE-11394
-      peerId = peerId.split("-")[0];
-    }
     List<Cell> cells = edit.getCells();
     int totalCells = cells.size();
     for (int i = 0; i < totalCells; i++) {
@@ -256,7 +250,7 @@ public class ReplicationSourceShipper extends Thread {
         int totalStores = stores.size();
         for (int j = 0; j < totalStores; j++) {
           List<String> storeFileList = stores.get(j).getStoreFileList();
-          source.getSourceManager().cleanUpHFileRefs(peerId, storeFileList);
+          source.cleanUpHFileRefs(storeFileList);
           source.getSourceMetrics().decrSizeOfHFileRefsQueue(storeFileList.size());
         }
       }
@@ -268,10 +262,11 @@ public class ReplicationSourceShipper extends Thread {
     // if end of file is true, then the logPositionAndCleanOldLogs method will remove the file
     // record on zk, so let's call it. The last wal position maybe zero if end of file is true and
     // there is no entry in the batch. It is OK because that the queue storage will ignore the zero
-    // position and the file will be removed soon in cleanOldLogs.
-    if (batch.isEndOfFile() || !batch.getLastWalPath().equals(currentPath) ||
-      batch.getLastWalPosition() != currentPosition) {
-      source.logPositionAndCleanOldLogs(batch);
+    // position and the file will be removed soon in cleanOldWALs.
+    if (batch.isEndOfFile() || !batch.getLastWalPath().equals(currentPath)
+      || batch.getLastWalPosition() != currentPosition) {
+      source.setWALPosition(batch);
+      source.cleanOldWALs(batch.getLastWalPath().getName(), batch.isEndOfFile());
       updated = true;
     }
     // if end of file is true, then we can just skip to the next file in queue.
@@ -373,8 +368,8 @@ public class ReplicationSourceShipper extends Thread {
       LOG.trace("Decrementing totalBufferUsed by {}B while stopping Replication WAL Readers.",
         totalToDecrement.longValue());
     }
-    long newBufferUsed = source.getSourceManager().getTotalBufferUsed()
+    long newBufferUsed = source.manager.getTotalBufferUsed()
       .addAndGet(-totalToDecrement.longValue());
-    source.getSourceManager().getGlobalMetrics().setWALReaderEditsBufferBytes(newBufferUsed);
+    source.manager.getGlobalMetrics().setWALReaderEditsBufferBytes(newBufferUsed);
   }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceWALReader.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceWALReader.java
index ca41184..d148162 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceWALReader.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/ReplicationSourceWALReader.java
@@ -25,7 +25,6 @@ import java.util.concurrent.BlockingQueue;
 import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.PriorityBlockingQueue;
 import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicLong;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -74,8 +73,6 @@ class ReplicationSourceWALReader extends Thread {
   //Indicates whether this particular worker is running
   private boolean isReaderRunning = true;
 
-  private AtomicLong totalBufferUsed;
-  private long totalBufferQuota;
   private final String walGroupId;
 
   /**
@@ -103,8 +100,6 @@ class ReplicationSourceWALReader extends Thread {
     // memory used will be batchSizeCapacity * (nb.batches + 1)
     // the +1 is for the current thread reading before placing onto the queue
     int batchCount = conf.getInt("replication.source.nb.batches", 1);
-    this.totalBufferUsed = source.getSourceManager().getTotalBufferUsed();
-    this.totalBufferQuota = source.getSourceManager().getTotalBufferLimit();
     this.sleepForRetries =
         this.conf.getLong("replication.source.sleepforretries", 1000);    // 1 second
     this.maxRetriesMultiplier =
@@ -323,9 +318,10 @@ class ReplicationSourceWALReader extends Thread {
   //returns false if we've already exceeded the global quota
   private boolean checkQuota() {
     // try not to go over total quota
-    if (totalBufferUsed.get() > totalBufferQuota) {
+    if (source.manager.getTotalBufferUsed().get() > source.manager.getTotalBufferLimit()) {
       LOG.warn("peer={}, can't read more edits from WAL as buffer usage {}B exceeds limit {}B",
-          this.source.getPeerId(), totalBufferUsed.get(), totalBufferQuota);
+        this.source.getPeerId(), source.manager.getTotalBufferUsed().get(),
+        source.manager.getTotalBufferLimit());
       Threads.sleep(sleepForRetries);
       return false;
     }
@@ -453,10 +449,10 @@ class ReplicationSourceWALReader extends Thread {
    * @return true if we should clear buffer and push all
    */
   private boolean acquireBufferQuota(long size) {
-    long newBufferUsed = totalBufferUsed.addAndGet(size);
+    long newBufferUsed = source.manager.getTotalBufferUsed().addAndGet(size);
     // Record the new buffer usage
-    this.source.getSourceManager().getGlobalMetrics().setWALReaderEditsBufferBytes(newBufferUsed);
-    return newBufferUsed >= totalBufferQuota;
+    source.manager.getGlobalMetrics().setWALReaderEditsBufferBytes(newBufferUsed);
+    return newBufferUsed >= source.manager.getTotalBufferLimit();
   }
 
   /**
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/WALEntryBatch.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/WALEntryBatch.java
index 8301dff..612cc53 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/WALEntryBatch.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/WALEntryBatch.java
@@ -31,7 +31,7 @@ import org.apache.yetus.audience.InterfaceAudience;
  * Holds a batch of WAL entries to replicate, along with some statistics
  */
 @InterfaceAudience.Private
-class WALEntryBatch {
+public class WALEntryBatch {
 
   // used by recovered replication queue to indicate that all the entries have been read.
   public static final WALEntryBatch NO_MORE_DATA = new WALEntryBatch(0, null);
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/ReplicationSourceDummy.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/ReplicationSourceDummy.java
index 4f656b1..42445a6 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/ReplicationSourceDummy.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/ReplicationSourceDummy.java
@@ -30,6 +30,7 @@ import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.replication.regionserver.MetricsSource;
 import org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceInterface;
 import org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceManager;
+import org.apache.hadoop.hbase.replication.regionserver.WALEntryBatch;
 import org.apache.hadoop.hbase.replication.regionserver.WALFileLengthProvider;
 import org.apache.hadoop.hbase.wal.WAL.Entry;
 
@@ -38,7 +39,6 @@ import org.apache.hadoop.hbase.wal.WAL.Entry;
  */
 public class ReplicationSourceDummy implements ReplicationSourceInterface {
 
-  private ReplicationSourceManager manager;
   private ReplicationPeer replicationPeer;
   private String peerClusterId;
   private Path currentPath;
@@ -47,11 +47,10 @@ public class ReplicationSourceDummy implements ReplicationSourceInterface {
   private AtomicBoolean startup = new AtomicBoolean(false);
 
   @Override
-  public void init(Configuration conf, FileSystem fs, ReplicationSourceManager manager,
-      ReplicationQueueStorage rq, ReplicationPeer rp, Server server, String peerClusterId,
-      UUID clusterId, WALFileLengthProvider walFileLengthProvider, MetricsSource metrics)
-      throws IOException {
-    this.manager = manager;
+  public void init(Configuration conf, FileSystem fs, Path walDir, ReplicationSourceManager manager,
+    ReplicationQueueStorage rq, ReplicationPeer rp, Server server, String peerClusterId,
+    UUID clusterId, WALFileLengthProvider walFileLengthProvider, MetricsSource metrics)
+    throws IOException {
     this.peerClusterId = peerClusterId;
     this.metrics = metrics;
     this.walFileLengthProvider = walFileLengthProvider;
@@ -134,11 +133,6 @@ public class ReplicationSourceDummy implements ReplicationSourceInterface {
   }
 
   @Override
-  public ReplicationSourceManager getSourceManager() {
-    return manager;
-  }
-
-  @Override
   public void tryThrottle(int batchSize) throws InterruptedException {
   }
 
@@ -162,6 +156,14 @@ public class ReplicationSourceDummy implements ReplicationSourceInterface {
   }
 
   @Override
+  public void setWALPosition(WALEntryBatch entryBatch) {
+  }
+
+  @Override
+  public void cleanOldWALs(String walName, boolean inclusive) {
+  }
+
+  @Override
   public ReplicationPeer getPeer() {
     return replicationPeer;
   }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestBasicWALEntryStream.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestBasicWALEntryStream.java
index ad77c9d..7402d82 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestBasicWALEntryStream.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestBasicWALEntryStream.java
@@ -265,21 +265,25 @@ public abstract class TestBasicWALEntryStream extends WALEntryStreamTestBase {
   }
 
   private ReplicationSource mockReplicationSource(boolean recovered, Configuration conf) {
-    ReplicationSourceManager mockSourceManager = Mockito.mock(ReplicationSourceManager.class);
-    when(mockSourceManager.getTotalBufferUsed()).thenReturn(new AtomicLong(0));
-    when(mockSourceManager.getTotalBufferLimit())
-      .thenReturn((long) HConstants.REPLICATION_SOURCE_TOTAL_BUFFER_DFAULT);
     Server mockServer = Mockito.mock(Server.class);
     ReplicationSource source = Mockito.mock(ReplicationSource.class);
-    when(source.getSourceManager()).thenReturn(mockSourceManager);
     when(source.getSourceMetrics()).thenReturn(new MetricsSource("1"));
     when(source.getWALFileLengthProvider()).thenReturn(log);
     when(source.getServer()).thenReturn(mockServer);
     when(source.isRecovered()).thenReturn(recovered);
+    source.manager = mockReplicationSourceManager();
+    return source;
+  }
+
+  private ReplicationSourceManager mockReplicationSourceManager() {
+    ReplicationSourceManager mockSourceManager = Mockito.mock(ReplicationSourceManager.class);
     MetricsReplicationGlobalSourceSource globalMetrics =
       Mockito.mock(MetricsReplicationGlobalSourceSource.class);
     when(mockSourceManager.getGlobalMetrics()).thenReturn(globalMetrics);
-    return source;
+    when(mockSourceManager.getTotalBufferUsed()).thenReturn(new AtomicLong(0));
+    when(mockSourceManager.getTotalBufferLimit())
+      .thenReturn((long) HConstants.REPLICATION_SOURCE_TOTAL_BUFFER_DFAULT);
+    return mockSourceManager;
   }
 
   private ReplicationSourceWALReader createReader(boolean recovered, Configuration conf) {
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSource.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSource.java
index f5d4f77..0309731 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSource.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSource.java
@@ -139,7 +139,7 @@ public class TestReplicationSource {
     String queueId = "qid";
     RegionServerServices rss =
       TEST_UTIL.createMockRegionServerService(ServerName.parseServerName("a.b.c,1,1"));
-    rs.init(conf, null, manager, null, mockPeer, rss, queueId, null,
+    rs.init(conf, null, null, manager, null, mockPeer, rss, queueId, null,
       p -> OptionalLong.empty(), new MetricsSource(queueId));
     try {
       rs.startup();
@@ -173,11 +173,11 @@ public class TestReplicationSource {
       thenReturn(DoNothingReplicationEndpoint.class.getName());
     Mockito.when(mockPeer.getPeerConfig()).thenReturn(peerConfig);
     ReplicationSourceManager manager = Mockito.mock(ReplicationSourceManager.class);
-    Mockito.when(manager.getTotalBufferUsed()).thenReturn(new AtomicLong());
+    Mockito.when(manager.getTotalBufferUsed()).thenReturn(new AtomicLong(0));
     String queueId = "qid";
     RegionServerServices rss =
       TEST_UTIL.createMockRegionServerService(ServerName.parseServerName("a.b.c,1,1"));
-    rs.init(conf, null, manager, null, mockPeer, rss, queueId,
+    rs.init(conf, null, null, manager, null, mockPeer, rss, queueId,
       uuid, p -> OptionalLong.empty(), new MetricsSource(queueId));
     try {
       rs.startup();
@@ -265,7 +265,7 @@ public class TestReplicationSource {
       testConf.setInt("replication.source.maxretriesmultiplier", 1);
       ReplicationSourceManager manager = Mockito.mock(ReplicationSourceManager.class);
       Mockito.when(manager.getTotalBufferUsed()).thenReturn(new AtomicLong());
-      source.init(testConf, null, manager, null, mockPeer, null, "testPeer",
+      source.init(testConf, null, null, manager, null, mockPeer, null, "testPeer",
         null, p -> OptionalLong.empty(), null);
       ExecutorService executor = Executors.newSingleThreadExecutor();
       Future<?> future = executor.submit(
@@ -289,7 +289,7 @@ public class TestReplicationSource {
     ReplicationPeer mockPeer = mock(ReplicationPeer.class);
     Mockito.when(mockPeer.getPeerBandwidth()).thenReturn(0L);
     Configuration testConf = HBaseConfiguration.create();
-    source.init(testConf, null, mockManager, null, mockPeer, null,
+    source.init(testConf, null, null, mockManager, null, mockPeer, null,
       "testPeer", null, p -> OptionalLong.empty(), mock(MetricsSource.class));
     ReplicationSourceWALReader reader = new ReplicationSourceWALReader(null,
       conf, null, 0, null, source, null);
@@ -315,7 +315,7 @@ public class TestReplicationSource {
     reader.addEntryToBatch(batch, mockEntry);
     reader.entryBatchQueue.put(batch);
     source.terminate("test");
-    assertEquals(0, source.getSourceManager().getTotalBufferUsed().get());
+    assertEquals(0, source.manager.getTotalBufferUsed().get());
   }
 
   /**
@@ -536,7 +536,7 @@ public class TestReplicationSource {
     String queueId = "qid";
     RegionServerServices rss =
       TEST_UTIL.createMockRegionServerService(ServerName.parseServerName("a.b.c,1,1"));
-    rs.init(conf, null, manager, null, mockPeer, rss, queueId, null,
+    rs.init(conf, null, null, manager, null, mockPeer, rss, queueId, null,
       p -> OptionalLong.empty(), new MetricsSource(queueId));
     return rss;
   }
@@ -655,7 +655,7 @@ public class TestReplicationSource {
         TEST_UTIL.createMockRegionServerService(ServerName.parseServerName("a.b.c,1,1"));
 
       ReplicationSource source = new ReplicationSource();
-      source.init(conf, null, manager, null, mockPeer, rss, id, null,
+      source.init(conf, null, null, manager, null, mockPeer, rss, id, null,
         p -> OptionalLong.empty(), metrics);
 
       final Path log1 = new Path(logDir, "log-walgroup-a.8");
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSourceManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSourceManager.java
index d6dbaf4..b74b76e 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSourceManager.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSourceManager.java
@@ -35,6 +35,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.NavigableMap;
 import java.util.NavigableSet;
+import java.util.OptionalLong;
 import java.util.Set;
 import java.util.SortedSet;
 import java.util.TreeMap;
@@ -335,8 +336,9 @@ public abstract class TestReplicationSourceManager {
     when(source.getQueueId()).thenReturn("1");
     when(source.isRecovered()).thenReturn(false);
     when(source.isSyncReplication()).thenReturn(false);
-    manager.logPositionAndCleanOldLogs(source,
-      new WALEntryBatch(0, manager.getSources().get(0).getCurrentPath()));
+    WALEntryBatch batch = new WALEntryBatch(0, manager.getSources().get(0).getCurrentPath());
+    source.setWALPosition(batch);
+    source.cleanOldWALs(batch.getLastWalPath().getName(), batch.isEndOfFile());
 
     wal.appendData(hri,
       new WALKeyImpl(hri.getEncodedNameAsBytes(), test, EnvironmentEdgeManager.currentTime(),
@@ -409,11 +411,10 @@ public abstract class TestReplicationSourceManager {
     assertEquals(1, manager.getWalsByIdRecoveredQueues().size());
     String id = "1-" + server.getServerName().getServerName();
     assertEquals(files, manager.getWalsByIdRecoveredQueues().get(id).get(group));
-    ReplicationSourceInterface source = mock(ReplicationSourceInterface.class);
-    when(source.getQueueId()).thenReturn(id);
-    when(source.isRecovered()).thenReturn(true);
-    when(source.isSyncReplication()).thenReturn(false);
-    manager.cleanOldLogs(file2, false, source);
+    ReplicationSourceInterface source = new ReplicationSource();
+    source.init(conf, fs, null, manager, manager.getQueueStorage(), rp1.getPeer("1"),
+      manager.getServer(), id, null, p -> OptionalLong.empty(), null);
+    source.cleanOldWALs(file2, false);
     // log1 should be deleted
     assertEquals(Sets.newHashSet(file2), manager.getWalsByIdRecoveredQueues().get(id).get(group));
   }
@@ -589,19 +590,15 @@ public abstract class TestReplicationSourceManager {
     }
   }
 
-  private ReplicationSourceInterface mockReplicationSource(String peerId) {
-    ReplicationSourceInterface source = mock(ReplicationSourceInterface.class);
-    when(source.getPeerId()).thenReturn(peerId);
-    when(source.getQueueId()).thenReturn(peerId);
-    when(source.isRecovered()).thenReturn(false);
-    when(source.isSyncReplication()).thenReturn(true);
+  private ReplicationPeer mockReplicationPeerForSyncReplication(String peerId) {
     ReplicationPeerConfig config = mock(ReplicationPeerConfig.class);
     when(config.getRemoteWALDir())
       .thenReturn(remoteLogDir.makeQualified(fs.getUri(), fs.getWorkingDirectory()).toString());
+    when(config.isSyncReplication()).thenReturn(true);
     ReplicationPeer peer = mock(ReplicationPeer.class);
     when(peer.getPeerConfig()).thenReturn(config);
-    when(source.getPeer()).thenReturn(peer);
-    return source;
+    when(peer.getId()).thenReturn(peerId);
+    return peer;
   }
 
   @Test
@@ -630,13 +627,19 @@ public abstract class TestReplicationSourceManager {
       manager.preLogRoll(wal);
       manager.postLogRoll(wal);
 
-      ReplicationSourceInterface source = mockReplicationSource(peerId2);
-      manager.cleanOldLogs(walName, true, source);
+      ReplicationSourceInterface source = new ReplicationSource();
+      source.init(conf, fs, null, manager, manager.getQueueStorage(),
+        mockReplicationPeerForSyncReplication(peerId2), manager.getServer(), peerId2, null,
+        p -> OptionalLong.empty(), null);
+      source.cleanOldWALs(walName, true);
       // still there if peer id does not match
       assertTrue(fs.exists(remoteWAL));
 
-      source = mockReplicationSource(slaveId);
-      manager.cleanOldLogs(walName, true, source);
+      source = new ReplicationSource();
+      source.init(conf, fs, null, manager, manager.getQueueStorage(),
+        mockReplicationPeerForSyncReplication(slaveId), manager.getServer(), slaveId, null,
+        p -> OptionalLong.empty(), null);
+      source.cleanOldWALs(walName, true);
       assertFalse(fs.exists(remoteWAL));
     } finally {
       removePeerAndWait(peerId2);
@@ -814,10 +817,10 @@ public abstract class TestReplicationSourceManager {
   static class FailInitializeDummyReplicationSource extends ReplicationSourceDummy {
 
     @Override
-    public void init(Configuration conf, FileSystem fs, ReplicationSourceManager manager,
-        ReplicationQueueStorage rq, ReplicationPeer rp, Server server, String peerClusterId,
-        UUID clusterId, WALFileLengthProvider walFileLengthProvider, MetricsSource metrics)
-        throws IOException {
+    public void init(Configuration conf, FileSystem fs, Path walDir,
+      ReplicationSourceManager manager, ReplicationQueueStorage rq, ReplicationPeer rp,
+      Server server, String peerClusterId, UUID clusterId,
+      WALFileLengthProvider walFileLengthProvider, MetricsSource metrics) throws IOException {
       throw new IOException("Failing deliberately");
     }
   }

[hbase] 11/12: HBASE-24737 Find a way to resolve WALFileLengthProvider#getLogFileSizeIfBeingWritten problem (#3045)

Posted by su...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sunxin pushed a commit to branch HBASE-24666
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit ff16870505d082fe5075cb31e60d6ec045cf2ab6
Author: XinSun <dd...@gmail.com>
AuthorDate: Tue Apr 27 11:13:15 2021 +0800

    HBASE-24737 Find a way to resolve WALFileLengthProvider#getLogFileSizeIfBeingWritten problem (#3045)
    
    Signed-off-by: Duo Zhang <zh...@apache.org>
---
 .../src/main/protobuf/server/region/Admin.proto    |  12 ++
 .../hbase/client/AsyncRegionServerAdmin.java       |   8 ++
 .../hadoop/hbase/regionserver/HRegionServer.java   |   2 +-
 .../hadoop/hbase/regionserver/RSRpcServices.java   |  24 ++++
 .../hbase/replication/HReplicationServer.java      |  11 +-
 .../regionserver/WALFileLengthProvider.java        |   3 +-
 .../RemoteWALFileLengthProvider.java               |  73 ++++++++++++
 .../org/apache/hadoop/hbase/wal/WALProvider.java   |  15 ++-
 .../hadoop/hbase/master/MockRegionServer.java      |   7 ++
 .../TestRemoteWALFileLengthProvider.java           | 130 +++++++++++++++++++++
 10 files changed, 280 insertions(+), 5 deletions(-)

diff --git a/hbase-protocol-shaded/src/main/protobuf/server/region/Admin.proto b/hbase-protocol-shaded/src/main/protobuf/server/region/Admin.proto
index 0667292..693a809 100644
--- a/hbase-protocol-shaded/src/main/protobuf/server/region/Admin.proto
+++ b/hbase-protocol-shaded/src/main/protobuf/server/region/Admin.proto
@@ -328,6 +328,15 @@ message ClearSlowLogResponses {
   required bool is_cleaned = 1;
 }
 
+message GetLogFileSizeIfBeingWrittenRequest {
+  required string wal_path = 1;
+}
+
+message GetLogFileSizeIfBeingWrittenResponse {
+  required bool is_being_written = 1;
+  optional uint64 length = 2;
+}
+
 service AdminService {
   rpc GetRegionInfo(GetRegionInfoRequest)
     returns(GetRegionInfoResponse);
@@ -399,4 +408,7 @@ service AdminService {
   rpc GetLogEntries(LogRequest)
     returns(LogEntry);
 
+  rpc GetLogFileSizeIfBeingWritten(GetLogFileSizeIfBeingWrittenRequest)
+    returns(GetLogFileSizeIfBeingWrittenResponse);
+
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/client/AsyncRegionServerAdmin.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/client/AsyncRegionServerAdmin.java
index 8ff869f..f18d894 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/client/AsyncRegionServerAdmin.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/client/AsyncRegionServerAdmin.java
@@ -42,6 +42,8 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ExecuteProc
 import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ExecuteProceduresResponse;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.FlushRegionRequest;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.FlushRegionResponse;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetLogFileSizeIfBeingWrittenRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetLogFileSizeIfBeingWrittenResponse;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetOnlineRegionRequest;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetOnlineRegionResponse;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoRequest;
@@ -216,4 +218,10 @@ public class AsyncRegionServerAdmin {
       ExecuteProceduresRequest request) {
     return call((stub, controller, done) -> stub.executeProcedures(controller, request, done));
   }
+
+  public CompletableFuture<GetLogFileSizeIfBeingWrittenResponse> getLogFileSizeIfBeingWritten(
+    GetLogFileSizeIfBeingWrittenRequest request) {
+    return call((stub, controller, done) ->
+      stub.getLogFileSizeIfBeingWritten(controller, request, done));
+  }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
index c00a8b7..a5eb4e7 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
@@ -2323,7 +2323,7 @@ public class HRegionServer extends Thread implements
     return walRoller;
   }
 
-  WALFactory getWalFactory() {
+  public WALFactory getWalFactory() {
     return walFactory;
   }
 
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
index 91bf9cb..edc33d7 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
@@ -37,6 +37,7 @@ import java.util.Map;
 import java.util.Map.Entry;
 import java.util.NavigableMap;
 import java.util.Optional;
+import java.util.OptionalLong;
 import java.util.Set;
 import java.util.TreeSet;
 import java.util.concurrent.ConcurrentHashMap;
@@ -136,6 +137,7 @@ import org.apache.hadoop.hbase.regionserver.handler.UnassignRegionHandler;
 import org.apache.hadoop.hbase.replication.ReplicationUtils;
 import org.apache.hadoop.hbase.replication.regionserver.RejectReplicationRequestStateChecker;
 import org.apache.hadoop.hbase.replication.regionserver.RejectRequestsFromClientStateChecker;
+import org.apache.hadoop.hbase.replication.regionserver.WALFileLengthProvider;
 import org.apache.hadoop.hbase.security.Superusers;
 import org.apache.hadoop.hbase.security.User;
 import org.apache.hadoop.hbase.security.access.AccessChecker;
@@ -189,6 +191,8 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ExecuteProc
 import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ExecuteProceduresResponse;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.FlushRegionRequest;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.FlushRegionResponse;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetLogFileSizeIfBeingWrittenRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetLogFileSizeIfBeingWrittenResponse;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetOnlineRegionRequest;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetOnlineRegionResponse;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoRequest;
@@ -4055,6 +4059,26 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
     throw new ServiceException("Invalid request params");
   }
 
+  @Override
+  public GetLogFileSizeIfBeingWrittenResponse getLogFileSizeIfBeingWritten(
+    RpcController controller, GetLogFileSizeIfBeingWrittenRequest request) throws ServiceException {
+    GetLogFileSizeIfBeingWrittenResponse.Builder builder =
+      GetLogFileSizeIfBeingWrittenResponse.newBuilder();
+    try {
+      WALFileLengthProvider walLengthProvider =
+        this.regionServer.getWalFactory().getWALProvider().getWALFileLengthProvider();
+      OptionalLong lengthOptional =
+        walLengthProvider.getLogFileSizeIfBeingWritten(new Path(request.getWalPath()));
+      if (lengthOptional.isPresent()) {
+        return builder.setIsBeingWritten(true).setLength(lengthOptional.getAsLong()).build();
+      } else {
+        return builder.setIsBeingWritten(false).build();
+      }
+    } catch (Exception e) {
+      throw new ServiceException(e);
+    }
+  }
+
   public RpcScheduler getRpcScheduler() {
     return rpcServer.getScheduler();
   }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HReplicationServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HReplicationServer.java
index 8d85b85..2654565 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HReplicationServer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HReplicationServer.java
@@ -56,6 +56,8 @@ import org.apache.hadoop.hbase.replication.regionserver.RecoveredReplicationSour
 import org.apache.hadoop.hbase.replication.regionserver.ReplicationLoad;
 import org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceFactory;
 import org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceInterface;
+import org.apache.hadoop.hbase.replication.regionserver.WALFileLengthProvider;
+import org.apache.hadoop.hbase.replication.replicationserver.RemoteWALFileLengthProvider;
 import org.apache.hadoop.hbase.security.SecurityConstants;
 import org.apache.hadoop.hbase.security.Superusers;
 import org.apache.hadoop.hbase.security.User;
@@ -716,7 +718,7 @@ public class HReplicationServer extends Thread implements Server, ReplicationSou
     ReplicationSourceInterface src = ReplicationSourceFactory.create(conf, queueId);
     // init replication source
     src.init(conf, walFs, walDir, this, queueStorage, replicationPeers.getPeer(peerId), this,
-      producer, queueId, clusterId, p -> OptionalLong.empty(), metrics);
+      producer, queueId, clusterId, createWALFileLengthProvider(producer, queueId), metrics);
     queueStorage.getWALsInQueue(producer, queueId)
       .forEach(walName -> src.enqueueLog(new Path(walDir, walName)));
     src.startup();
@@ -744,4 +746,11 @@ public class HReplicationServer extends Thread implements Server, ReplicationSou
       abort("Failed to operate on replication queue", e);
     }
   }
+
+  private WALFileLengthProvider createWALFileLengthProvider(ServerName producer, String queueId) {
+    if (new ReplicationQueueInfo(queueId).isQueueRecovered()) {
+      return p -> OptionalLong.empty();
+    }
+    return new RemoteWALFileLengthProvider(asyncClusterConnection, producer);
+  }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/WALFileLengthProvider.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/WALFileLengthProvider.java
index c60faa9..f91dd2c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/WALFileLengthProvider.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/regionserver/WALFileLengthProvider.java
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hbase.replication.regionserver;
 
+import java.io.IOException;
 import java.util.OptionalLong;
 
 import org.apache.hadoop.fs.Path;
@@ -33,5 +34,5 @@ import org.apache.yetus.audience.InterfaceAudience;
 @FunctionalInterface
 public interface WALFileLengthProvider {
 
-  OptionalLong getLogFileSizeIfBeingWritten(Path path);
+  OptionalLong getLogFileSizeIfBeingWritten(Path path) throws IOException;
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/replicationserver/RemoteWALFileLengthProvider.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/replicationserver/RemoteWALFileLengthProvider.java
new file mode 100644
index 0000000..07d216d
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/replicationserver/RemoteWALFileLengthProvider.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.replication.replicationserver;
+
+import java.io.IOException;
+import java.util.OptionalLong;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.client.AsyncClusterConnection;
+import org.apache.hadoop.hbase.client.AsyncRegionServerAdmin;
+import org.apache.hadoop.hbase.replication.regionserver.WALFileLengthProvider;
+import org.apache.hadoop.hbase.util.FutureUtils;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetLogFileSizeIfBeingWrittenRequest;
+
+/**
+ * Used by ReplicationServer while Replication offload enabled.
+ * On ReplicationServer, we need to know the length of the wal being writing from RegionServer that
+ * holds the wal. So achieve that through RPC call.
+ */
+@InterfaceAudience.Private
+public class RemoteWALFileLengthProvider implements WALFileLengthProvider {
+
+  private static final Logger LOG = LoggerFactory.getLogger(RemoteWALFileLengthProvider.class);
+
+  private AsyncClusterConnection conn;
+
+  private ServerName rs;
+
+  public RemoteWALFileLengthProvider(AsyncClusterConnection conn, ServerName rs) {
+    this.conn = conn;
+    this.rs = rs;
+  }
+
+  @Override
+  public OptionalLong getLogFileSizeIfBeingWritten(Path path) throws IOException {
+    AsyncRegionServerAdmin rsAdmin = conn.getRegionServerAdmin(rs);
+    GetLogFileSizeIfBeingWrittenRequest request =
+      GetLogFileSizeIfBeingWrittenRequest.newBuilder().setWalPath(path.toString()).build();
+    try {
+      AdminProtos.GetLogFileSizeIfBeingWrittenResponse response =
+        FutureUtils.get(rsAdmin.getLogFileSizeIfBeingWritten(request));
+      if (response.getIsBeingWritten()) {
+        return OptionalLong.of(response.getLength());
+      } else {
+        return OptionalLong.empty();
+      }
+    } catch (IOException e) {
+      LOG.warn("Exceptionally get the length of wal {} from RS {}", path.getName(), rs);
+      throw e;
+    }
+  }
+}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALProvider.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALProvider.java
index 01c1d11..a9bd50e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALProvider.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALProvider.java
@@ -130,7 +130,18 @@ public interface WALProvider {
   void addWALActionsListener(WALActionsListener listener);
 
   default WALFileLengthProvider getWALFileLengthProvider() {
-    return path -> getWALs().stream().map(w -> w.getLogFileSizeIfBeingWritten(path))
-        .filter(o -> o.isPresent()).findAny().orElse(OptionalLong.empty());
+    return path -> getWALs().stream().map(w -> {
+      try {
+        return w.getLogFileSizeIfBeingWritten(path);
+      } catch (IOException e) {
+        // Won't go here. For supporting replication offload in HBASE-24737, we introduce
+        // RemoteWALFileLengthProvider implementing WALFileLengthProvider, it is hold by
+        // ReplicationServer and gets the length of WALs from RS through RPC, it may throw an IOE.
+        // So we need declare WALFileLengthProvider.getLogFileSizeIfBeingWritten as throwing IOE.
+        // But this is safe here, WALProvider is only used by RS, getWALs returns WAL that extents
+        // WALFileLengthProvider and won't throw IOE.
+        return OptionalLong.empty();
+      }
+    }).filter(o -> o.isPresent()).findAny().orElse(OptionalLong.empty());
   }
 }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockRegionServer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockRegionServer.java
index 69a7a79..084b5af 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockRegionServer.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockRegionServer.java
@@ -690,6 +690,13 @@ class MockRegionServer implements AdminProtos.AdminService.BlockingInterface,
   }
 
   @Override
+  public AdminProtos.GetLogFileSizeIfBeingWrittenResponse getLogFileSizeIfBeingWritten(
+    RpcController controller, AdminProtos.GetLogFileSizeIfBeingWrittenRequest request)
+    throws ServiceException {
+    return null;
+  }
+
+  @Override
   public GetSpaceQuotaSnapshotsResponse getSpaceQuotaSnapshots(
       RpcController controller, GetSpaceQuotaSnapshotsRequest request)
       throws ServiceException {
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/replicationserver/TestRemoteWALFileLengthProvider.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/replicationserver/TestRemoteWALFileLengthProvider.java
new file mode 100644
index 0000000..a9adbec
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/replicationserver/TestRemoteWALFileLengthProvider.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.replication.replicationserver;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+import java.util.OptionalLong;
+import java.util.stream.Collectors;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.regionserver.HRegionServer;
+import org.apache.hadoop.hbase.replication.regionserver.WALFileLengthProvider;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.JVMClusterUtil;
+import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@Category({ MediumTests.class})
+public class TestRemoteWALFileLengthProvider {
+
+  @ClassRule
+  public static final HBaseClassTestRule CLASS_RULE =
+    HBaseClassTestRule.forClass(TestRemoteWALFileLengthProvider.class);
+
+  private static final Logger LOG = LoggerFactory.getLogger(TestRemoteWALFileLengthProvider.class);
+
+  @Rule
+  public final TestName name = new TestName();
+
+  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
+
+  private static final byte[] CF = Bytes.toBytes("C");
+
+  @BeforeClass
+  public static void setUpBeforeClass() throws Exception {
+    UTIL.startMiniCluster();
+  }
+
+  @AfterClass
+  public static void teardownAfterClass() throws Exception {
+    UTIL.shutdownMiniCluster();
+  }
+
+  @Test
+  public void test() throws Exception {
+    TableName tableName = TableName.valueOf(name.getMethodName());
+    Table table = UTIL.createTable(tableName, CF);
+    UTIL.waitUntilAllRegionsAssigned(tableName);
+    assertEquals(1, UTIL.getMiniHBaseCluster().getNumLiveRegionServers());
+
+    // Find the RS which holds test table regions.
+    HRegionServer rs =
+      UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().stream()
+        .map(JVMClusterUtil.RegionServerThread::getRegionServer)
+        .filter(s -> !s.getRegions(tableName).isEmpty())
+        .findFirst().get();
+    assertNotNull(rs);
+
+    // Put some data and request rolling log, make multiple wals.
+    table.put(new Put(Bytes.toBytes("r1")).addColumn(CF, CF, Bytes.toBytes("v")));
+    rs.getWalRoller().requestRollAll();
+    table.put(new Put(Bytes.toBytes("r2")).addColumn(CF, CF, Bytes.toBytes("v")));
+    UTIL.waitFor(60000, rs::walRollRequestFinished);
+
+    WALFileLengthProvider rsLengthProvider =
+      rs.getWalFactory().getWALProvider().getWALFileLengthProvider();
+    WALFileLengthProvider remoteLengthProvider =
+      new RemoteWALFileLengthProvider(UTIL.getAsyncConnection(), rs.getServerName());
+
+    // Check that RegionServer and ReplicationServer can get same result whether the wal is being
+    // written
+    boolean foundWalIsBeingWritten = false;
+    List<Path> wals = getRsWalsOnFs(rs);
+    assertTrue(wals.size() > 1);
+    for (Path wal : wals) {
+      Path path = new Path(rs.getWALRootDir(), wal);
+      OptionalLong rsWalLength = rsLengthProvider.getLogFileSizeIfBeingWritten(path);
+      OptionalLong remoteLength = remoteLengthProvider.getLogFileSizeIfBeingWritten(path);
+      assertEquals(rsWalLength.isPresent(), remoteLength.isPresent());
+      if (rsWalLength.isPresent() && remoteLength.isPresent()) {
+        foundWalIsBeingWritten = true;
+        assertEquals(rsWalLength.getAsLong(), remoteLength.getAsLong());
+      }
+    }
+    assertTrue(foundWalIsBeingWritten);
+  }
+
+  private List<Path> getRsWalsOnFs(HRegionServer rs) throws IOException {
+    FileSystem fs = rs.getFileSystem();
+    FileStatus[] fileStatuses = fs.listStatus(new Path(rs.getWALRootDir(),
+      AbstractFSWALProvider.getWALDirectoryName(rs.getServerName().toString())));
+    return Arrays.stream(fileStatuses).map(FileStatus::getPath).collect(Collectors.toList());
+  }
+}

[hbase] 12/12: HBASE-25807 Move method reportProcedureDone from RegionServerStatus.proto to Master.proto (#3205)

Posted by su...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sunxin pushed a commit to branch HBASE-24666
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit add13ab6072484fcdda248ed082a4f9cdace6d1a
Author: XinSun <dd...@gmail.com>
AuthorDate: Mon May 24 11:54:00 2021 +0800

    HBASE-25807 Move method reportProcedureDone from RegionServerStatus.proto to Master.proto (#3205)
    
    Signed-off-by: Duo Zhang <zh...@apache.org>
---
 .../src/main/protobuf/server/master/Master.proto    | 20 ++++++++++++++++++++
 .../protobuf/server/master/RegionServerStatus.proto | 21 +++++----------------
 .../hadoop/hbase/master/MasterRpcServices.java      |  6 +++---
 .../master/MasterRpcServicesVersionWrapper.java     |  5 +++--
 .../hadoop/hbase/regionserver/HRegionServer.java    |  2 +-
 .../regionserver/RemoteProcedureResultReporter.java |  7 +++----
 6 files changed, 35 insertions(+), 26 deletions(-)

diff --git a/hbase-protocol-shaded/src/main/protobuf/server/master/Master.proto b/hbase-protocol-shaded/src/main/protobuf/server/master/Master.proto
index b9ed476..13b3a35 100644
--- a/hbase-protocol-shaded/src/main/protobuf/server/master/Master.proto
+++ b/hbase-protocol-shaded/src/main/protobuf/server/master/Master.proto
@@ -735,6 +735,23 @@ message ListReplicationSinkServersResponse {
   repeated ServerName server_name = 1;
 }
 
+message RemoteProcedureResult {
+  required uint64 proc_id = 1;
+  enum Status {
+    SUCCESS = 1;
+    ERROR = 2;
+  }
+  required Status status = 2;
+  optional ForeignExceptionMessage error = 3;
+}
+
+message ReportProcedureDoneRequest {
+  repeated RemoteProcedureResult result = 1;
+}
+
+message ReportProcedureDoneResponse {
+}
+
 service MasterService {
   /** Used by the client to get the number of regions that have received the updated schema */
   rpc GetSchemaAlterStatus(GetSchemaAlterStatusRequest)
@@ -1171,6 +1188,9 @@ service MasterService {
 
   rpc ListReplicationSinkServers(ListReplicationSinkServersRequest)
     returns (ListReplicationSinkServersResponse);
+
+  rpc ReportProcedureDone(ReportProcedureDoneRequest)
+      returns(ReportProcedureDoneResponse);
 }
 
 // HBCK Service definitions.
diff --git a/hbase-protocol-shaded/src/main/protobuf/server/master/RegionServerStatus.proto b/hbase-protocol-shaded/src/main/protobuf/server/master/RegionServerStatus.proto
index c894a77..f3547da 100644
--- a/hbase-protocol-shaded/src/main/protobuf/server/master/RegionServerStatus.proto
+++ b/hbase-protocol-shaded/src/main/protobuf/server/master/RegionServerStatus.proto
@@ -29,6 +29,7 @@ option optimize_for = SPEED;
 import "HBase.proto";
 import "server/ClusterStatus.proto";
 import "server/ErrorHandling.proto";
+import "server/master/Master.proto";
 
 message RegionServerStartupRequest {
   /** Port number this regionserver is up on */
@@ -147,22 +148,6 @@ message RegionSpaceUseReportRequest {
 message RegionSpaceUseReportResponse {
 }
 
-message RemoteProcedureResult {
-  required uint64 proc_id = 1;
-  enum Status {
-    SUCCESS = 1;
-    ERROR = 2;
-  }
-  required Status status = 2;
-  optional ForeignExceptionMessage error = 3;
-}
-message ReportProcedureDoneRequest {
-  repeated RemoteProcedureResult result = 1;
-}
-
-message ReportProcedureDoneResponse {
-}
-
 message FileArchiveNotificationRequest {
   message FileWithSize {
     optional TableName table_name = 1;
@@ -211,6 +196,10 @@ service RegionServerStatusService {
   rpc ReportRegionSpaceUse(RegionSpaceUseReportRequest)
     returns(RegionSpaceUseReportResponse);
 
+  /**
+   * In HBASE-25807 this method was moved to Master.proto as replication server also need this.
+   * To avoid problems during upgrading, still keep this method here.
+   */
   rpc ReportProcedureDone(ReportProcedureDoneRequest)
     returns(ReportProcedureDoneResponse);
 
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
index c17d699..b7c0bff 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java
@@ -291,6 +291,9 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.OfflineReg
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RecommissionRegionServerRequest;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RecommissionRegionServerResponse;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RegionSpecifierAndState;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RemoteProcedureResult;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ReportProcedureDoneRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ReportProcedureDoneResponse;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RestoreSnapshotRequest;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RestoreSnapshotResponse;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RunCatalogScanRequest;
@@ -378,9 +381,6 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProto
 import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionSpaceUse;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionSpaceUseReportRequest;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionSpaceUseReportResponse;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RemoteProcedureResult;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportProcedureDoneRequest;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportProcedureDoneResponse;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRSFatalErrorRequest;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRSFatalErrorResponse;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServicesVersionWrapper.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServicesVersionWrapper.java
index a98f5ae..b35b24c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServicesVersionWrapper.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServicesVersionWrapper.java
@@ -24,6 +24,7 @@ import org.apache.yetus.audience.InterfaceAudience;
 import org.apache.hbase.thirdparty.com.google.protobuf.RpcController;
 import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
 
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos;
 
 /**
@@ -96,8 +97,8 @@ public class MasterRpcServicesVersionWrapper
   }
 
   @Override
-  public RegionServerStatusProtos.ReportProcedureDoneResponse reportProcedureDone(
-      RpcController controller, RegionServerStatusProtos.ReportProcedureDoneRequest request)
+  public MasterProtos.ReportProcedureDoneResponse reportProcedureDone(
+      RpcController controller, MasterProtos.ReportProcedureDoneRequest request)
       throws ServiceException {
     return masterRpcServices.reportProcedureDone(controller, request);
   }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
index a5eb4e7..21fab00 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
@@ -223,6 +223,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.RegionServe
 import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.RegionSpecifier;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.RegionSpecifier.RegionSpecifierType;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.LockServiceProtos.LockService;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ReportProcedureDoneRequest;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.GetLastFlushedSequenceIdRequest;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.GetLastFlushedSequenceIdResponse;
@@ -234,7 +235,6 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProto
 import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionSpaceUseReportRequest;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportProcedureDoneRequest;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRSFatalErrorRequest;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RemoteProcedureResultReporter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RemoteProcedureResultReporter.java
index 63e050a..e8c1766 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RemoteProcedureResultReporter.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RemoteProcedureResultReporter.java
@@ -30,8 +30,8 @@ import org.slf4j.LoggerFactory;
 
 import org.apache.hbase.thirdparty.com.google.protobuf.TextFormat;
 
-import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RemoteProcedureResult;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportProcedureDoneRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.RemoteProcedureResult;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.ReportProcedureDoneRequest;
 
 /**
  * A thread which calls {@code reportProcedureDone} to tell master the result of a remote procedure.
@@ -101,8 +101,7 @@ class RemoteProcedureResultReporter extends Thread {
         }
         LOG.info("Failed procedure report " + TextFormat.shortDebugString(request) + "; retry (#" +
           tries + ")" + (pause ? " after " + pauseTime + "ms delay (Master is coming online...)."
-            : " immediately."),
-          e);
+            : " immediately."), e);
         Threads.sleep(pauseTime);
         tries++;
       }

[hbase] 04/12: HBASE-24683 Add a basic ReplicationServer which only implement ReplicationSink Service (#2111)

Posted by su...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

sunxin pushed a commit to branch HBASE-24666
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 2fcdb7af28f4682aef1e21db4ea22c2181ee04e6
Author: XinSun <dd...@gmail.com>
AuthorDate: Fri Sep 4 18:53:46 2020 +0800

    HBASE-24683 Add a basic ReplicationServer which only implement ReplicationSink Service (#2111)
    
    Signed-off-by: Guanghao Zhang <zg...@apache.org>
---
 .../java/org/apache/hadoop/hbase/util/DNS.java     |   3 +-
 .../hbase/replication/HReplicationServer.java      | 391 ++++++++++++++++
 .../replication/ReplicationServerRpcServices.java  | 516 +++++++++++++++++++++
 .../hbase/replication/TestReplicationServer.java   | 151 ++++++
 4 files changed, 1060 insertions(+), 1 deletion(-)

diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/DNS.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/DNS.java
index 098884c..a933f6c 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/DNS.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/DNS.java
@@ -63,7 +63,8 @@ public final class DNS {
 
   public enum ServerType {
     MASTER("master"),
-    REGIONSERVER("regionserver");
+    REGIONSERVER("regionserver"),
+    REPLICATIONSERVER("replicationserver");
 
     private String name;
     ServerType(String name) {
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HReplicationServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HReplicationServer.java
new file mode 100644
index 0000000..31dec0c
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/HReplicationServer.java
@@ -0,0 +1,391 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.replication;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.ChoreService;
+import org.apache.hadoop.hbase.CoordinatedStateManager;
+import org.apache.hadoop.hbase.DoNotRetryIOException;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.Server;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.client.AsyncClusterConnection;
+import org.apache.hadoop.hbase.client.ClusterConnectionFactory;
+import org.apache.hadoop.hbase.client.Connection;
+import org.apache.hadoop.hbase.log.HBaseMarkers;
+import org.apache.hadoop.hbase.regionserver.ReplicationService;
+import org.apache.hadoop.hbase.regionserver.ReplicationSinkService;
+import org.apache.hadoop.hbase.security.User;
+import org.apache.hadoop.hbase.security.UserProvider;
+import org.apache.hadoop.hbase.trace.TraceUtil;
+import org.apache.hadoop.hbase.util.Sleeper;
+import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * HReplicationServer which is responsible to all replication stuff. It checks in with
+ * the HMaster. There are many HReplicationServers in a single HBase deployment.
+ */
+@InterfaceAudience.Private
+@SuppressWarnings({ "deprecation"})
+public class HReplicationServer extends Thread implements Server {
+
+  private static final Logger LOG = LoggerFactory.getLogger(HReplicationServer.class);
+
+  /** replication server process name */
+  public static final String REPLICATION_SERVER = "replicationserver";
+
+  /**
+   * This servers start code.
+   */
+  protected final long startCode;
+
+  private volatile boolean stopped = false;
+
+  // Go down hard. Used if file system becomes unavailable and also in
+  // debugging and unit tests.
+  private AtomicBoolean abortRequested;
+
+  // flag set after we're done setting up server threads
+  final AtomicBoolean online = new AtomicBoolean(false);
+
+  /**
+   * The server name the Master sees us as.  Its made from the hostname the
+   * master passes us, port, and server start code. Gets set after registration
+   * against Master.
+   */
+  private ServerName serverName;
+
+  protected final Configuration conf;
+
+  private ReplicationSinkService replicationSinkService;
+
+  final int msgInterval;
+  // A sleeper that sleeps for msgInterval.
+  protected final Sleeper sleeper;
+
+  // zookeeper connection and watcher
+  protected final ZKWatcher zooKeeper;
+
+  /**
+   * The asynchronous cluster connection to be shared by services.
+   */
+  protected AsyncClusterConnection asyncClusterConnection;
+
+  private UserProvider userProvider;
+
+  protected final ReplicationServerRpcServices rpcServices;
+
+  public HReplicationServer(final Configuration conf) throws IOException {
+    TraceUtil.initTracer(conf);
+    try {
+      this.startCode = System.currentTimeMillis();
+      this.conf = conf;
+
+      this.abortRequested = new AtomicBoolean(false);
+
+      this.rpcServices = createRpcServices();
+
+      String hostName = this.rpcServices.isa.getHostName();
+      serverName = ServerName.valueOf(hostName, this.rpcServices.isa.getPort(), this.startCode);
+
+      this.userProvider = UserProvider.instantiate(conf);
+
+      this.msgInterval = conf.getInt("hbase.replicationserver.msginterval", 3 * 1000);
+      this.sleeper = new Sleeper(this.msgInterval, this);
+
+      // Some unit tests don't need a cluster, so no zookeeper at all
+      if (!conf.getBoolean("hbase.testing.nocluster", false)) {
+        // Open connection to zookeeper and set primary watcher
+        zooKeeper = new ZKWatcher(conf, getProcessName() + ":" +
+            rpcServices.isa.getPort(), this, false);
+      } else {
+        zooKeeper = null;
+      }
+
+      this.rpcServices.start(zooKeeper);
+    } catch (Throwable t) {
+      // Make sure we log the exception. HReplicationServer is often started via reflection and the
+      // cause of failed startup is lost.
+      LOG.error("Failed construction ReplicationServer", t);
+      throw t;
+    }
+  }
+
+  public String getProcessName() {
+    return REPLICATION_SERVER;
+  }
+
+  @Override
+  public void run() {
+    if (isStopped()) {
+      LOG.info("Skipping run; stopped");
+      return;
+    }
+    try {
+      // Do pre-registration initializations; zookeeper, lease threads, etc.
+      preRegistrationInitialization();
+    } catch (Throwable e) {
+      abort("Fatal exception during initialization", e);
+    }
+    try {
+      setupReplication();
+      startReplicationService();
+
+      online.set(true);
+
+      long lastMsg = System.currentTimeMillis();
+      // The main run loop.
+      while (!isStopped()) {
+        long now = System.currentTimeMillis();
+        if ((now - lastMsg) >= msgInterval) {
+          lastMsg = System.currentTimeMillis();
+        }
+        if (!isStopped() && !isAborted()) {
+          this.sleeper.sleep();
+        }
+      }
+
+      stopServiceThreads();
+
+      if (this.rpcServices != null) {
+        this.rpcServices.stop();
+      }
+    } catch (Throwable t) {
+      abort(t.getMessage(), t);
+    }
+
+    if (this.zooKeeper != null) {
+      this.zooKeeper.close();
+    }
+    LOG.info("Exiting; stopping=" + this.serverName + "; zookeeper connection closed.");
+  }
+
+  private Configuration cleanupConfiguration() {
+    Configuration conf = this.conf;
+    conf.set(HConstants.CLIENT_CONNECTION_REGISTRY_IMPL_CONF_KEY,
+        HConstants.ZK_CONNECTION_REGISTRY_CLASS);
+    if (conf.get(HConstants.CLIENT_ZOOKEEPER_QUORUM) != null) {
+      // Use server ZK cluster for server-issued connections, so we clone
+      // the conf and unset the client ZK related properties
+      conf = new Configuration(this.conf);
+      conf.unset(HConstants.CLIENT_ZOOKEEPER_QUORUM);
+    }
+    return conf;
+  }
+
+  /**
+   * All initialization needed before we go register with Master.<br>
+   * Do bare minimum. Do bulk of initializations AFTER we've connected to the Master.<br>
+   * In here we just put up the RpcServer, setup Connection, and ZooKeeper.
+   */
+  private void preRegistrationInitialization() {
+    try {
+      setupClusterConnection();
+    } catch (Throwable t) {
+      // Call stop if error or process will stick around for ever since server
+      // puts up non-daemon threads.
+      this.rpcServices.stop();
+      abort("Initialization of RS failed.  Hence aborting RS.", t);
+    }
+  }
+
+  /**
+   * Setup our cluster connection if not already initialized.
+   */
+  protected final synchronized void setupClusterConnection() throws IOException {
+    if (asyncClusterConnection == null) {
+      Configuration conf = cleanupConfiguration();
+      InetSocketAddress localAddress = new InetSocketAddress(this.rpcServices.isa.getAddress(), 0);
+      User user = userProvider.getCurrent();
+      asyncClusterConnection =
+          ClusterConnectionFactory.createAsyncClusterConnection(conf, localAddress, user);
+    }
+  }
+
+  /**
+   * Wait on all threads to finish. Presumption is that all closes and stops
+   * have already been called.
+   */
+  protected void stopServiceThreads() {
+    if (this.replicationSinkService != null) {
+      this.replicationSinkService.stopReplicationService();
+    }
+  }
+
+  @Override
+  public Configuration getConfiguration() {
+    return conf;
+  }
+
+  @Override
+  public ZKWatcher getZooKeeper() {
+    return zooKeeper;
+  }
+
+  @Override
+  public Connection getConnection() {
+    return getAsyncConnection().toConnection();
+  }
+
+  @Override
+  public Connection createConnection(Configuration conf) throws IOException {
+    throw new DoNotRetryIOException(new UnsupportedOperationException("This's ReplicationServer."));
+  }
+
+  @Override
+  public AsyncClusterConnection getAsyncClusterConnection() {
+    return this.asyncClusterConnection;
+  }
+
+  @Override
+  public ServerName getServerName() {
+    return serverName;
+  }
+
+  @Override
+  public CoordinatedStateManager getCoordinatedStateManager() {
+    return null;
+  }
+
+  @Override
+  public ChoreService getChoreService() {
+    return null;
+  }
+
+  @Override
+  public void abort(String why, Throwable cause) {
+    if (!setAbortRequested()) {
+      // Abort already in progress, ignore the new request.
+      LOG.debug(
+          "Abort already in progress. Ignoring the current request with reason: {}", why);
+      return;
+    }
+    String msg = "***** ABORTING replication server " + this + ": " + why + " *****";
+    if (cause != null) {
+      LOG.error(HBaseMarkers.FATAL, msg, cause);
+    } else {
+      LOG.error(HBaseMarkers.FATAL, msg);
+    }
+    stop(why);
+  }
+
+  @Override
+  public boolean isAborted() {
+    return abortRequested.get();
+  }
+
+  @Override
+  public void stop(final String msg) {
+    if (!this.stopped) {
+      LOG.info("***** STOPPING region server '" + this + "' *****");
+      this.stopped = true;
+      LOG.info("STOPPED: " + msg);
+      // Wakes run() if it is sleeping
+      sleeper.skipSleepCycle();
+    }
+  }
+
+  @Override
+  public boolean isStopped() {
+    return this.stopped;
+  }
+
+  /**
+   * Setup WAL log and replication if enabled. Replication setup is done in here because it wants to
+   * be hooked up to WAL.
+   */
+  private void setupReplication() throws IOException {
+    // Instantiate replication if replication enabled. Pass it the log directories.
+    createNewReplicationInstance(conf, this);
+  }
+
+  /**
+   * Load the replication executorService objects, if any
+   */
+  private static void createNewReplicationInstance(Configuration conf, HReplicationServer server)
+      throws IOException {
+    // read in the name of the sink replication class from the config file.
+    String sinkClassname = conf.get(HConstants.REPLICATION_SINK_SERVICE_CLASSNAME,
+        HConstants.REPLICATION_SERVICE_CLASSNAME_DEFAULT);
+
+    server.replicationSinkService = newReplicationInstance(sinkClassname,
+        ReplicationSinkService.class, conf, server);
+  }
+
+  private static <T extends ReplicationService> T newReplicationInstance(String classname,
+      Class<T> xface, Configuration conf, HReplicationServer server) throws IOException {
+    final Class<? extends T> clazz;
+    try {
+      ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
+      clazz = Class.forName(classname, true, classLoader).asSubclass(xface);
+    } catch (java.lang.ClassNotFoundException nfe) {
+      throw new IOException("Could not find class for " + classname);
+    }
+    T service = ReflectionUtils.newInstance(clazz, conf);
+    service.initialize(server, null, null, null, null);
+    return service;
+  }
+
+  /**
+   * Start up replication source and sink handlers.
+   */
+  private void startReplicationService() throws IOException {
+    if (this.replicationSinkService != null) {
+      this.replicationSinkService.startReplicationService();
+    }
+  }
+
+  /**
+   * @return Return the object that implements the replication sink executorService.
+   */
+  public ReplicationSinkService getReplicationSinkService() {
+    return replicationSinkService;
+  }
+
+  /**
+   * Report the status of the server. A server is online once all the startup is
+   * completed (setting up filesystem, starting executorService threads, etc.). This
+   * method is designed mostly to be useful in tests.
+   *
+   * @return true if online, false if not.
+   */
+  public boolean isOnline() {
+    return online.get();
+  }
+
+  protected ReplicationServerRpcServices createRpcServices() throws IOException {
+    return new ReplicationServerRpcServices(this);
+  }
+
+  /**
+   * Sets the abort state if not already set.
+   * @return True if abortRequested set to True successfully, false if an abort is already in
+   * progress.
+   */
+  protected boolean setAbortRequested() {
+    return abortRequested.compareAndSet(false, true);
+  }
+}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/ReplicationServerRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/ReplicationServerRpcServices.java
new file mode 100644
index 0000000..1b9b699
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/replication/ReplicationServerRpcServices.java
@@ -0,0 +1,516 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.replication;
+
+import java.io.IOException;
+import java.lang.reflect.InvocationTargetException;
+import java.net.BindException;
+import java.net.InetSocketAddress;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.atomic.LongAdder;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.CellScanner;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.Server;
+import org.apache.hadoop.hbase.client.ConnectionUtils;
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
+import org.apache.hadoop.hbase.ipc.HBaseRPCErrorHandler;
+import org.apache.hadoop.hbase.ipc.HBaseRpcController;
+import org.apache.hadoop.hbase.ipc.PriorityFunction;
+import org.apache.hadoop.hbase.ipc.QosPriority;
+import org.apache.hadoop.hbase.ipc.RpcServer.BlockingServiceAndInterface;
+import org.apache.hadoop.hbase.ipc.RpcServerFactory;
+import org.apache.hadoop.hbase.ipc.RpcServerInterface;
+import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
+import org.apache.hadoop.hbase.log.HBaseMarkers;
+import org.apache.hadoop.hbase.net.Address;
+import org.apache.hadoop.hbase.regionserver.RSRpcServices;
+import org.apache.hadoop.hbase.regionserver.RegionServerAbortedException;
+import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
+import org.apache.hadoop.hbase.regionserver.RpcSchedulerFactory;
+import org.apache.hadoop.hbase.regionserver.SimpleRpcSchedulerFactory;
+import org.apache.hadoop.hbase.security.User;
+import org.apache.hadoop.hbase.security.access.AccessChecker;
+import org.apache.hadoop.hbase.security.access.NoopAccessChecker;
+import org.apache.hadoop.hbase.security.access.ZKPermissionWatcher;
+import org.apache.hadoop.hbase.util.DNS;
+import org.apache.hadoop.hbase.util.DNS.ServerType;
+import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.apache.zookeeper.KeeperException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.AdminService;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ClearCompactionQueuesRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ClearCompactionQueuesResponse;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ClearRegionBlockCacheRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ClearRegionBlockCacheResponse;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ClearSlowLogResponseRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ClearSlowLogResponses;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.CloseRegionRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.CloseRegionResponse;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.CompactRegionRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.CompactRegionResponse;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.CompactionSwitchRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.CompactionSwitchResponse;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ExecuteProceduresRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ExecuteProceduresResponse;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.FlushRegionRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.FlushRegionResponse;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetOnlineRegionRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetOnlineRegionResponse;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionLoadRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionLoadResponse;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetServerInfoRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetServerInfoResponse;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetStoreFileRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetStoreFileResponse;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.OpenRegionRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.OpenRegionResponse;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ReplicateWALEntryRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ReplicateWALEntryResponse;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.RollWALWriterRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.RollWALWriterResponse;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.SlowLogResponseRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.SlowLogResponses;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.StopServerRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.StopServerResponse;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.UpdateConfigurationRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.UpdateConfigurationResponse;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.UpdateFavoredNodesRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.UpdateFavoredNodesResponse;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.WALEntry;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.WarmupRegionRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.WarmupRegionResponse;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.QuotaProtos.GetSpaceQuotaSnapshotsRequest;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.QuotaProtos.GetSpaceQuotaSnapshotsResponse;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.RPCProtos.RequestHeader;
+
+import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableList;
+import org.apache.hbase.thirdparty.com.google.protobuf.Message;
+import org.apache.hbase.thirdparty.com.google.protobuf.RpcController;
+import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
+
+/**
+ * Implements the regionserver RPC services for {@link HReplicationServer}.
+ */
+@InterfaceAudience.Private
+@SuppressWarnings("deprecation")
+public class ReplicationServerRpcServices implements HBaseRPCErrorHandler,
+    AdminService.BlockingInterface, PriorityFunction {
+
+  protected static final Logger LOG = LoggerFactory.getLogger(ReplicationServerRpcServices.class);
+
+  /** Parameter name for port replication server listens on. */
+  public static final String REPLICATION_SERVER_PORT = "hbase.replicationserver.port";
+
+  /** Default port replication server listens on. */
+  public static final int DEFAULT_REPLICATION_SERVER_PORT = 16040;
+
+  /** default port for replication server web api */
+  public static final int DEFAULT_REPLICATION_SERVER_INFOPORT = 16050;
+
+  // Request counter.
+  final LongAdder requestCount = new LongAdder();
+
+  // Server to handle client requests.
+  final RpcServerInterface rpcServer;
+  final InetSocketAddress isa;
+
+  protected final HReplicationServer replicationServer;
+
+  // The reference to the priority extraction function
+  private final PriorityFunction priority;
+
+  private AccessChecker accessChecker;
+  private ZKPermissionWatcher zkPermissionWatcher;
+
+  public ReplicationServerRpcServices(final HReplicationServer rs) throws IOException {
+    final Configuration conf = rs.getConfiguration();
+    replicationServer = rs;
+
+    final RpcSchedulerFactory rpcSchedulerFactory;
+    try {
+      rpcSchedulerFactory = getRpcSchedulerFactoryClass().asSubclass(RpcSchedulerFactory.class)
+          .getDeclaredConstructor().newInstance();
+    } catch (NoSuchMethodException | InvocationTargetException |
+        InstantiationException | IllegalAccessException e) {
+      throw new IllegalArgumentException(e);
+    }
+    // Server to handle client requests.
+    final InetSocketAddress initialIsa;
+    final InetSocketAddress bindAddress;
+
+    String hostname = DNS.getHostname(conf, ServerType.REPLICATIONSERVER);
+    int port = conf.getInt(REPLICATION_SERVER_PORT, DEFAULT_REPLICATION_SERVER_PORT);
+    // Creation of a HSA will force a resolve.
+    initialIsa = new InetSocketAddress(hostname, port);
+    bindAddress = new InetSocketAddress(
+        conf.get("hbase.replicationserver.ipc.address", hostname), port);
+
+    if (initialIsa.getAddress() == null) {
+      throw new IllegalArgumentException("Failed resolve of " + initialIsa);
+    }
+    priority = createPriority();
+    // Using Address means we don't get the IP too. Shorten it more even to just the host name
+    // w/o the domain.
+    final String name = rs.getProcessName() + "/" +
+        Address.fromParts(initialIsa.getHostName(), initialIsa.getPort()).toStringWithoutDomain();
+    // Set how many times to retry talking to another server over Connection.
+    ConnectionUtils.setServerSideHConnectionRetriesConfig(conf, name, LOG);
+    rpcServer = createRpcServer(rs, rpcSchedulerFactory, bindAddress, name);
+
+    final InetSocketAddress address = rpcServer.getListenerAddress();
+    if (address == null) {
+      throw new IOException("Listener channel is closed");
+    }
+    // Set our address, however we need the final port that was given to rpcServer
+    isa = new InetSocketAddress(initialIsa.getHostName(), address.getPort());
+    rpcServer.setErrorHandler(this);
+    rs.setName(name);
+  }
+
+  protected RpcServerInterface createRpcServer(
+      final Server server,
+      final RpcSchedulerFactory rpcSchedulerFactory,
+      final InetSocketAddress bindAddress,
+      final String name
+  ) throws IOException {
+    final Configuration conf = server.getConfiguration();
+    boolean reservoirEnabled = conf.getBoolean(ByteBuffAllocator.ALLOCATOR_POOL_ENABLED_KEY, true);
+    try {
+      return RpcServerFactory.createRpcServer(server, name, getServices(),
+          bindAddress, // use final bindAddress for this server.
+          conf, rpcSchedulerFactory.create(conf, this, server), reservoirEnabled);
+    } catch (BindException be) {
+      throw new IOException(be.getMessage() + ". To switch ports use the '"
+          + REPLICATION_SERVER_PORT + "' configuration property.",
+          be.getCause() != null ? be.getCause() : be);
+    }
+  }
+
+  protected Class<?> getRpcSchedulerFactoryClass() {
+    final Configuration conf = replicationServer.getConfiguration();
+    return conf.getClass(RSRpcServices.REGION_SERVER_RPC_SCHEDULER_FACTORY_CLASS,
+      SimpleRpcSchedulerFactory.class);
+  }
+
+  public PriorityFunction getPriority() {
+    return priority;
+  }
+
+  public Configuration getConfiguration() {
+    return replicationServer.getConfiguration();
+  }
+
+  void start(ZKWatcher zkWatcher) {
+    if (AccessChecker.isAuthorizationSupported(getConfiguration())) {
+      accessChecker = new AccessChecker(getConfiguration());
+    } else {
+      accessChecker = new NoopAccessChecker(getConfiguration());
+    }
+    if (!getConfiguration().getBoolean("hbase.testing.nocluster", false) && zkWatcher != null) {
+      zkPermissionWatcher =
+          new ZKPermissionWatcher(zkWatcher, accessChecker.getAuthManager(), getConfiguration());
+      try {
+        zkPermissionWatcher.start();
+      } catch (KeeperException e) {
+        LOG.error("ZooKeeper permission watcher initialization failed", e);
+      }
+    }
+    rpcServer.start();
+  }
+
+  void stop() {
+    if (zkPermissionWatcher != null) {
+      zkPermissionWatcher.close();
+    }
+    rpcServer.stop();
+  }
+
+  /**
+   * By default, put up an Admin Service.
+   * @return immutable list of blocking services and the security info classes that this server
+   *   supports
+   */
+  protected List<BlockingServiceAndInterface> getServices() {
+    List<BlockingServiceAndInterface> bssi = new ArrayList<>();
+    bssi.add(new BlockingServiceAndInterface(
+      AdminService.newReflectiveBlockingService(this),
+      AdminService.BlockingInterface.class));
+    return new ImmutableList.Builder<BlockingServiceAndInterface>().addAll(bssi).build();
+  }
+
+  public InetSocketAddress getSocketAddress() {
+    return isa;
+  }
+
+  @Override
+  public int getPriority(RequestHeader header, Message param, User user) {
+    return priority.getPriority(header, param, user);
+  }
+
+  @Override
+  public long getDeadline(RequestHeader header, Message param) {
+    return priority.getDeadline(header, param);
+  }
+
+  /*
+   * Check if an OOME and, if so, abort immediately to avoid creating more objects.
+   *
+   * @param e
+   *
+   * @return True if we OOME'd and are aborting.
+   */
+  @Override
+  public boolean checkOOME(final Throwable e) {
+    return exitIfOOME(e);
+  }
+
+  public static boolean exitIfOOME(final Throwable e) {
+    boolean stop = false;
+    try {
+      if (e instanceof OutOfMemoryError
+          || (e.getCause() != null && e.getCause() instanceof OutOfMemoryError)
+          || (e.getMessage() != null && e.getMessage().contains(
+              "java.lang.OutOfMemoryError"))) {
+        stop = true;
+        LOG.error(HBaseMarkers.FATAL, "Run out of memory; "
+          + ReplicationServerRpcServices.class.getSimpleName() + " will abort itself immediately",
+          e);
+      }
+    } finally {
+      if (stop) {
+        Runtime.getRuntime().halt(1);
+      }
+    }
+    return stop;
+  }
+
+  /**
+   * Called to verify that this server is up and running.
+   */
+  protected void checkOpen() throws IOException {
+    if (replicationServer.isAborted()) {
+      throw new RegionServerAbortedException("Server " + replicationServer.getServerName()
+          + " aborting");
+    }
+    if (replicationServer.isStopped()) {
+      throw new RegionServerStoppedException("Server " + replicationServer.getServerName()
+          + " stopping");
+    }
+    if (!replicationServer.isOnline()) {
+      throw new ServerNotRunningYetException("Server " + replicationServer.getServerName()
+          + " is not running yet");
+    }
+  }
+
+  @Override
+  public GetRegionInfoResponse getRegionInfo(RpcController controller, GetRegionInfoRequest request)
+      throws ServiceException {
+    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
+  }
+
+  @Override
+  public GetStoreFileResponse getStoreFile(RpcController controller, GetStoreFileRequest request)
+      throws ServiceException {
+    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
+  }
+
+  @Override
+  public GetOnlineRegionResponse getOnlineRegion(RpcController controller,
+      GetOnlineRegionRequest request) throws ServiceException {
+    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
+  }
+
+  @Override
+  public OpenRegionResponse openRegion(RpcController controller, OpenRegionRequest request)
+      throws ServiceException {
+    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
+  }
+
+  @Override
+  public WarmupRegionResponse warmupRegion(RpcController controller, WarmupRegionRequest request)
+      throws ServiceException {
+    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
+  }
+
+  @Override
+  public CloseRegionResponse closeRegion(RpcController controller, CloseRegionRequest request)
+      throws ServiceException {
+    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
+  }
+
+  @Override
+  public FlushRegionResponse flushRegion(RpcController controller, FlushRegionRequest request)
+      throws ServiceException {
+    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
+  }
+
+  @Override
+  public CompactionSwitchResponse compactionSwitch(RpcController controller,
+      CompactionSwitchRequest request) throws ServiceException {
+    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
+  }
+
+  @Override
+  public CompactRegionResponse compactRegion(RpcController controller,
+      CompactRegionRequest request) throws ServiceException {
+    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
+  }
+
+  @Override
+  public ReplicateWALEntryResponse replay(RpcController controller,
+      ReplicateWALEntryRequest request) throws ServiceException {
+    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
+  }
+
+  @Override
+  public RollWALWriterResponse rollWALWriter(RpcController controller, RollWALWriterRequest request)
+      throws ServiceException {
+    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
+  }
+
+  @Override
+  public GetServerInfoResponse getServerInfo(RpcController controller, GetServerInfoRequest request)
+      throws ServiceException {
+    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
+  }
+
+  /**
+   * Stop the replication server.
+   *
+   * @param controller the RPC controller
+   * @param request the request
+   */
+  @Override
+  @QosPriority(priority=HConstants.ADMIN_QOS)
+  public StopServerResponse stopServer(final RpcController controller,
+      final StopServerRequest request) {
+    requestCount.increment();
+    String reason = request.getReason();
+    replicationServer.stop(reason);
+    return StopServerResponse.newBuilder().build();
+  }
+
+  @Override
+  public UpdateFavoredNodesResponse updateFavoredNodes(RpcController controller,
+      UpdateFavoredNodesRequest request) throws ServiceException {
+    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
+  }
+
+  @Override
+  public UpdateConfigurationResponse updateConfiguration(RpcController controller,
+      UpdateConfigurationRequest request) throws ServiceException {
+    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
+  }
+
+  @Override
+  public GetRegionLoadResponse getRegionLoad(RpcController controller,
+      GetRegionLoadRequest request) throws ServiceException {
+    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
+  }
+
+  @Override
+  public ClearCompactionQueuesResponse clearCompactionQueues(RpcController controller,
+      ClearCompactionQueuesRequest request) throws ServiceException {
+    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
+  }
+
+  @Override
+  public ClearRegionBlockCacheResponse clearRegionBlockCache(RpcController controller,
+      ClearRegionBlockCacheRequest request) throws ServiceException {
+    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
+  }
+
+  @Override
+  public GetSpaceQuotaSnapshotsResponse getSpaceQuotaSnapshots(RpcController controller,
+      GetSpaceQuotaSnapshotsRequest request) throws ServiceException {
+    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
+  }
+
+  @Override
+  public ExecuteProceduresResponse executeProcedures(RpcController controller,
+      ExecuteProceduresRequest request) throws ServiceException {
+    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
+  }
+
+  @Override
+  public SlowLogResponses getSlowLogResponses(RpcController controller,
+      SlowLogResponseRequest request) throws ServiceException {
+    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
+  }
+
+  @Override
+  public SlowLogResponses getLargeLogResponses(RpcController controller,
+      SlowLogResponseRequest request) throws ServiceException {
+    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
+  }
+
+  @Override
+  public ClearSlowLogResponses clearSlowLogsResponses(RpcController controller,
+      ClearSlowLogResponseRequest request) throws ServiceException {
+    throw new ServiceException(new UnsupportedOperationException("This's Replication Server"));
+  }
+
+  protected AccessChecker getAccessChecker() {
+    return accessChecker;
+  }
+
+  protected PriorityFunction createPriority() {
+    return new PriorityFunction() {
+      @Override
+      public int getPriority(RequestHeader header, Message param, User user) {
+        return 0;
+      }
+
+      @Override
+      public long getDeadline(RequestHeader header, Message param) {
+        return 0;
+      }
+    };
+  }
+
+  @Override
+  public ReplicateWALEntryResponse replicateWALEntry(RpcController controller,
+      ReplicateWALEntryRequest request) throws ServiceException {
+    try {
+      checkOpen();
+      if (replicationServer.getReplicationSinkService() != null) {
+        requestCount.increment();
+        List<WALEntry> entries = request.getEntryList();
+        CellScanner cellScanner = ((HBaseRpcController) controller).cellScanner();
+        // TODO: CP pre
+        replicationServer.getReplicationSinkService().replicateLogEntries(entries, cellScanner,
+            request.getReplicationClusterId(), request.getSourceBaseNamespaceDirPath(),
+            request.getSourceHFileArchiveDirPath());
+        // TODO: CP post
+        return ReplicateWALEntryResponse.newBuilder().build();
+      } else {
+        throw new ServiceException("Replication services are not initialized yet");
+      }
+    } catch (IOException ie) {
+      throw new ServiceException(ie);
+    }
+  }
+}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationServer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationServer.java
new file mode 100644
index 0000000..6a0ef3d
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationServer.java
@@ -0,0 +1,151 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.replication;
+
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.AsyncClusterConnection;
+import org.apache.hadoop.hbase.client.AsyncRegionServerAdmin;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.protobuf.ReplicationProtbufUtil;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.testclassification.ReplicationTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.CommonFSUtils;
+import org.apache.hadoop.hbase.util.Threads;
+import org.apache.hadoop.hbase.wal.WAL;
+import org.apache.hadoop.hbase.wal.WAL.Entry;
+import org.apache.hadoop.hbase.wal.WALEdit;
+import org.apache.hadoop.hbase.wal.WALKeyImpl;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@Category({ReplicationTests.class, MediumTests.class})
+public class TestReplicationServer {
+
+  @ClassRule
+  public static final HBaseClassTestRule CLASS_RULE =
+      HBaseClassTestRule.forClass(TestReplicationServer.class);
+
+  private static final Logger LOG = LoggerFactory.getLogger(TestReplicationServer.class);
+
+  private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+
+  private static Configuration CONF = TEST_UTIL.getConfiguration();
+
+  private static HMaster MASTER;
+
+  private static HReplicationServer replicationServer;
+
+  private static Path baseNamespaceDir;
+  private static Path hfileArchiveDir;
+  private static String replicationClusterId;
+
+  private static int BATCH_SIZE = 10;
+
+  private static TableName TABLENAME = TableName.valueOf("t");
+  private static String FAMILY = "C";
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    TEST_UTIL.startMiniCluster();
+    MASTER = TEST_UTIL.getMiniHBaseCluster().getMaster();
+
+    replicationServer = new HReplicationServer(CONF);
+    replicationServer.start();
+
+    TEST_UTIL.getMiniHBaseCluster().waitForActiveAndReadyMaster();
+    TEST_UTIL.waitFor(60000, () -> replicationServer.isOnline());
+
+    Path rootDir = CommonFSUtils.getRootDir(CONF);
+    baseNamespaceDir = new Path(rootDir, new Path(HConstants.BASE_NAMESPACE_DIR));
+    hfileArchiveDir = new Path(rootDir, new Path(HConstants.HFILE_ARCHIVE_DIRECTORY));
+    replicationClusterId = "12345";
+  }
+
+  @AfterClass
+  public static void afterClass() throws IOException {
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
+  @Before
+  public void before() throws Exception {
+    TEST_UTIL.createTable(TABLENAME, FAMILY);
+    TEST_UTIL.waitTableAvailable(TABLENAME);
+  }
+
+  @After
+  public void after() throws IOException {
+    TEST_UTIL.deleteTableIfAny(TABLENAME);
+  }
+
+  @Test
+  public void testReplicateWAL() throws Exception {
+    AsyncClusterConnection conn = TEST_UTIL.getHBaseCluster().getLiveRegionServerThreads().get(0)
+        .getRegionServer().getAsyncClusterConnection();
+    AsyncRegionServerAdmin rsAdmin = conn.getRegionServerAdmin(replicationServer.getServerName());
+
+    Entry[] entries = new Entry[BATCH_SIZE];
+    for(int i = 0; i < BATCH_SIZE; i++) {
+      entries[i] = generateEdit(i, TABLENAME, Bytes.toBytes(i));
+    }
+
+    ReplicationProtbufUtil.replicateWALEntry(rsAdmin, entries, replicationClusterId,
+        baseNamespaceDir, hfileArchiveDir, 1000);
+
+    for (int i = 0; i < BATCH_SIZE; i++) {
+      Table table = TEST_UTIL.getConnection().getTable(TABLENAME);
+      Result result = table.get(new Get(Bytes.toBytes(i)));
+      Cell cell = result.getColumnLatestCell(Bytes.toBytes(FAMILY), Bytes.toBytes(FAMILY));
+      assertNotNull(cell);
+      assertTrue(Bytes.equals(CellUtil.cloneValue(cell), Bytes.toBytes(i)));
+    }
+  }
+
+  private static WAL.Entry generateEdit(int i, TableName tableName, byte[] row) {
+    Threads.sleep(1);
+    long timestamp = System.currentTimeMillis();
+    WALKeyImpl key = new WALKeyImpl(new byte[32], tableName, i, timestamp,
+        HConstants.DEFAULT_CLUSTER_ID, null);
+    WALEdit edit = new WALEdit();
+    edit.add(new KeyValue(row, Bytes.toBytes(FAMILY), Bytes.toBytes(FAMILY), timestamp, row));
+    return new WAL.Entry(key, edit);
+  }
+}