You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@hbase.apache.org by op...@apache.org on 2019/06/18 12:32:02 UTC

[hbase] branch HBASE-21879 updated (0d12281 -> afaf7a9)

This is an automated email from the ASF dual-hosted git repository.

openinx pushed a change to branch HBASE-21879
in repository https://gitbox.apache.org/repos/asf/hbase.git.


    omit 0d12281  HBASE-22598 Deprecated the hbase.ipc.server.reservoir.initial.buffer.size & hbase.ipc.server.reservoir.initial.max for HBase2.x compatibility (#318)
    omit 7840613b HBASE-22531 The HFileReaderImpl#shouldUseHeap return the incorrect true when disabled BlockCache (#304)
    omit e4a9147  HBASE-22491 Separate the heap HFileBlock and offheap HFileBlock because the heap block won't need refCnt and save into prevBlocks list before shipping (#268)
    omit a6e3d5b  HBASE-22504 Optimize the MultiByteBuff#get(ByteBuffer, offset, len)
    omit e2c8e65  HBASE-22483 It's better to use 65KB as the default buffer size in ByteBuffAllocator (#279)
    omit 810d287  HBASE-22463 Some paths in HFileScannerImpl did not consider block#release which will exhaust the ByteBuffAllocator (#257)
    omit 8bfe0bb  HBASE-22422 Retain an ByteBuff with refCnt=0 when getBlock from LRUCache (#242)
    omit 812042d  HBASE-22435 Add a UT to address the HFileBlock#heapSize() in TestHeapSize
    omit 081b167  HBASE-22412 Improve the metrics in ByteBuffAllocator
    omit 951c19a  HBASE-22090 The HFileBlock#CacheableDeserializer should pass ByteBuffAllocator to the newly created HFileBlock
    omit e11dc0b  HBASE-21921 Notify users if the ByteBufAllocator is always allocating ByteBuffers from heap which means the increacing GC pressure
    omit 2e82751  HBASE-22122 Change to release mob hfile's block after rpc server shipped response to client
    omit 4b06dd8  HBASE-21937 Make the Compression#decompress can accept ByteBuff as input
    omit d4e41fd  HBASE-22211 Remove the returnBlock method because we can just call HFileBlock#release directly
    omit c13deb2  HBASE-21957 Unify refCount of BucketEntry and refCount of hbase.nio.ByteBuff into one
    omit ce0edf5  HBASE-22159 ByteBufferIOEngine should support write off-heap ByteBuff to the bufferArray
    omit 7aa0e66  HBASE-22127 Ensure that the block cached in the LRUBlockCache offheap is allocated from heap
    omit 2fa3786  HBASE-22005 Use ByteBuff's refcnt to track the life cycle of data block
    omit b37fd5c  HBASE-22016 Rewrite the block reading methods by using hbase.nio.ByteBuff
    omit bb0ae84  HBASE-21917 Make the HFileBlock#validateChecksum can accept ByteBuff as an input. (addendum)
    omit efcde65  HBASE-21917 Make the HFileBlock#validateChecksum can accept ByteBuff as an input.
    omit e66c9b8  HBASE-21916 Abstract an ByteBuffAllocator to allocate/free ByteBuffer in ByteBufferPool
     add 6735cc1  HBASE-22502 Purge the logs when we reach the EOF for the last wal file when replication
     add 4e98bf1  HBASE-22500 Addendum set hadoop 3 version to 3.1.2 in pom
     add b2b7811  HBASE-22511 Add some more rs-status paths to RS UI links
     add 04e5bf9  HBASE-22523 Refactor RegionStates#getAssignmentsByTable to make it easy to understand
     add ab99e24  HBASE-22522 The integration test in master branch's nightly job has error "ERROR: Only found 1050 rows."
     add 073de44  HBASE-22513 Admin#getQuota does not work correctly if exceedThrottleQuota is set
     add 51c49a1  HBASE-22497 Mark HBase 1.2 line as EOM
     add b4d5a9a  HBASE-22518 yetus personality is treating branch-1.4 like earlier branches for hadoopcheck
     add 7d9f79b  Add hbaseconasia2019 to front-page news section
     add 2603785  HBASE-22524 Refactor TestReplicationSyncUpTool
     add aacacc5  HBASE-22501 Modify the hadoop support matrix in the ref guide
     add dd99ea8  HBASE-22535 TestShellRSGroups fails when run on JDK11
     add 2ae3b48  HBASE-21415 Snapshot requests can be done concurrently and the master will simply run them sequentially.
     add 0ad4b4e  HBASE-22536 TestForeignExceptionSerialization fails when run on JDK11
     add b31d755  Add 2.1.5 for 2.1.4
     add 9b23e1d  HBASE-22534 TestCellUtil fails when run on JDK11
     add 5bc996e  HBASE-22160 Add sorting functionality in regionserver web UI for user
     add addc145  Remove 2.1.4 notice addressed by 2.1.5 release
     add a826f41  HBASE-22546 TestRegionServerHostname#testRegionServerHostname fails reliably for me
     add 4d4f4ee  HBASE-22549 Document how to re-run github PR checks
     add 6ea2566  [HBASE-22525] : Return procedures to use bulk operation over iteration for Perf Improvement (#284)
     add 13c5af3  HBASE-22520 Avoid possible NPE while performing seekBefore in Hal… (#281)
     add 2e9087b  HBASE-22453 A NullPointerException could be thrown (#272)
     add 302a9ce  test commit adding my details in developer section pom.xml
     add 4157d1e  HBASE-22116 Added keytab and principal support for HttpDoAsClient.
     add 1b39140  HBASE-22477 Throwing exception when meta region is not in OPEN state in client registry may crash a master
     add 8028112  HBASE-22548 Split TestAdmin1
     add 8b989fd  HBASE-22495 Update SyncTable section, explaining from which specific minor versions doDeletes/doPuts is available
     add b32e716  HBASE-22481 Javadoc Warnings reference not found
     add aab70b5  HBASE-22554 Upgrade to surefire 2.22.2
     add 0838c35  HBASE-2284 Javadoc Warnings: Fix warnings coming due to @result tag in TestCoprocessorWhitelistMasterObserver (#256)
     add 454eddd  HBASE-22482 Fix tag Warnings in TestPartitionedMobCompactor (#255)
     add 876d2ac  HBASE-22284 optimization StringBuilder.append of AbstractMemStore.toString #182
     add e8ef8ad  HBASE-22357 Fixed remaining Checkstyle issues in hbase-replication
     add f620285  HBASE-22262 Removed deprecated methods from Filter class
     add 99ea828  HBASE-22373 Moved maven-eclipse-plugin to eclipse profile
     add 8db9c84  HBASE-22551 TestMasterOperationsForRegionReplicas is flakey
     add 03436e6  HBASE-22552 Rewrite TestEndToEndSplitTransaction.testCanSplitJustAfterASplit
     add 2f9d995  HBASE-22563 Reduce old builds by half
     add ada772a  HBASE-22408 add dead and unknown server open regions metric to AM
     add 8386571  HBASE-22560 Upgrade jackson (2.9.9) and jetty (9.3.27) minor versions
     add 934d469  HBASE-22545 TestLogLevel broken
     add 8745d5e  HBASE-21935 Replace make_rc.sh with customized spark/dev/create-release
     add ef44f30  HBASE-21953 Point make_rc.sh at ./create-release/do-release-docker.sh instead
     add 853e586  HBASE-22566 Update the 2.x upgrade chapter to include default compaction throughput limits
     add 04a91f9  HBASE-22569 Should treat null consistency as Consistency.STRONG in ConnectionUtils.timelineConsistentRead
     add 863b7b9  HBASE-22395 Document RC voting guidelines in ref guide
     add c2cf06d  HBASE-22575 Add 2.2.0 to the download page
     add 0445186  HBASE-22458: TestClassFinder fails when run on JDK11
     add 0fead7f  HBASE-22264 Separate out jars related to JDK 11 into a folder in /lib
     add 35d7088  HBASE-22562 Remove dead code: skipControl
     add 9ba7651  HBASE-22565 Javadoc Warnings: @see cannot be used in inline documentation
     add 8e15f4e  HBASE-22530 The metrics of store files count of region are returned to clients incorrectly (Eungsop Yoo)
     add ab44531  HBASE-22559 [RPC] set guard against CALL_QUEUE_HANDLER_FACTOR_CONF_KEY
     add 1712d2b  HBASE-22584 Downloads page is out of date for 1.4 and 1.3 release lines (#308)
     add 47a9644  HBASE-22275 Removed deprecated getRegionInfo in HRegionLocation
     add ed30909  HBASE-19303 Removed ReplicationAdmin and all its usages
     add 9b413cf  HBASE-22590 Remove the deprecated methods in Table interface (#309)
     add 5f2699e  HBASE-22593 Added Jenv file to gitignore (#314)
     add 214553d  [HBASE-22591] : RecoverableZooKeeper improvement for getData, getChil… (#310)
     add 5da5deb  HBASE-22344 Documented the deprecation of public and limited private APIs (#208)
     new 568d129  HBASE-21916 Abstract an ByteBuffAllocator to allocate/free ByteBuffer in ByteBufferPool
     new 53c783f  HBASE-21917 Make the HFileBlock#validateChecksum can accept ByteBuff as an input.
     new 6aec709  HBASE-21917 Make the HFileBlock#validateChecksum can accept ByteBuff as an input. (addendum)
     new 7698bae  HBASE-22016 Rewrite the block reading methods by using hbase.nio.ByteBuff
     new 57df2d7  HBASE-22005 Use ByteBuff's refcnt to track the life cycle of data block
     new 982cb7f  HBASE-22127 Ensure that the block cached in the LRUBlockCache offheap is allocated from heap
     new 0a105dc  HBASE-22159 ByteBufferIOEngine should support write off-heap ByteBuff to the bufferArray
     new c323fc7  HBASE-21957 Unify refCount of BucketEntry and refCount of hbase.nio.ByteBuff into one
     new 1b92063  HBASE-22211 Remove the returnBlock method because we can just call HFileBlock#release directly
     new 6b584de  HBASE-21937 Make the Compression#decompress can accept ByteBuff as input
     new c589504  HBASE-22122 Change to release mob hfile's block after rpc server shipped response to client
     new d3c3a3f  HBASE-21921 Notify users if the ByteBufAllocator is always allocating ByteBuffers from heap which means the increacing GC pressure
     new 5600873  HBASE-22090 The HFileBlock#CacheableDeserializer should pass ByteBuffAllocator to the newly created HFileBlock
     new 69eeb59  HBASE-22412 Improve the metrics in ByteBuffAllocator
     new b8e1ad5  HBASE-22435 Add a UT to address the HFileBlock#heapSize() in TestHeapSize
     new 6028c5c  HBASE-22422 Retain an ByteBuff with refCnt=0 when getBlock from LRUCache (#242)
     new 3b031c6  HBASE-22463 Some paths in HFileScannerImpl did not consider block#release which will exhaust the ByteBuffAllocator (#257)
     new e0fd6cf  HBASE-22483 It's better to use 65KB as the default buffer size in ByteBuffAllocator (#279)
     new 66c0ffd  HBASE-22504 Optimize the MultiByteBuff#get(ByteBuffer, offset, len)
     new c3c78a7  HBASE-22491 Separate the heap HFileBlock and offheap HFileBlock because the heap block won't need refCnt and save into prevBlocks list before shipping (#268)
     new 5572cf4  HBASE-22531 The HFileReaderImpl#shouldUseHeap return the incorrect true when disabled BlockCache (#304)
     new afaf7a9  HBASE-22598 Deprecated the hbase.ipc.server.reservoir.initial.buffer.size & hbase.ipc.server.reservoir.initial.max for HBase2.x compatibility (#318)

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (0d12281)
            \
             N -- N -- N   refs/heads/HBASE-21879 (afaf7a9)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 22 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .gitignore                                         |    1 +
 bin/hbase                                          |   63 +
 dev-support/Jenkinsfile                            |    2 +-
 dev-support/Jenkinsfile_GitHub                     |    2 +-
 dev-support/checkcompatibility.py                  |   12 +-
 dev-support/create-release/do-release-docker.sh    |  165 +++
 dev-support/create-release/do-release.sh           |   81 ++
 dev-support/create-release/hbase-rm/Dockerfile     |   47 +
 dev-support/create-release/release-build.sh        |  349 ++++++
 dev-support/create-release/release-tag.sh          |  101 ++
 dev-support/create-release/release-util.sh         |  312 +++++
 dev-support/create-release/vote.tmpl               |   28 +
 .../flaky-tests/flaky-reporting.Jenkinsfile        |    4 +-
 .../flaky-tests/run-flaky-tests.Jenkinsfile        |    2 +-
 dev-support/hbase-personality.sh                   |   13 +-
 .../hbase_nightly_pseudo-distributed-test.sh       |    4 +-
 dev-support/make_rc.sh                             |  102 +-
 hbase-assembly/pom.xml                             |    5 +
 .../src/main/assembly/hadoop-two-compat.xml        |   65 +-
 .../hadoop/hbase/backup/util/RestoreTool.java      |    5 +-
 .../hadoop/hbase/backup/TestBackupMerge.java       |    2 +-
 .../TestIncrementalBackupMergeWithFailures.java    |    2 +-
 .../src/main/resources/hbase/checkstyle.xml        |    7 +
 .../org/apache/hadoop/hbase/HColumnDescriptor.java |    5 +-
 .../org/apache/hadoop/hbase/HRegionLocation.java   |   11 -
 .../org/apache/hadoop/hbase/HTableDescriptor.java  |   24 +-
 .../org/apache/hadoop/hbase/MetaTableAccessor.java |    4 +-
 .../apache/hadoop/hbase/RegionMetricsBuilder.java  |    2 +-
 .../java/org/apache/hadoop/hbase/client/Admin.java |   40 +-
 .../org/apache/hadoop/hbase/client/AsyncAdmin.java |   20 +-
 .../apache/hadoop/hbase/client/AsyncProcess.java   |    2 +-
 .../hbase/client/AsyncRequestFutureImpl.java       |    8 +-
 .../apache/hadoop/hbase/client/ClientScanner.java  |    7 +-
 .../hadoop/hbase/client/ConnectionUtils.java       |    2 +-
 .../hadoop/hbase/client/FlushRegionCallable.java   |    4 +-
 .../org/apache/hadoop/hbase/client/HTable.java     |  111 +-
 .../hadoop/hbase/client/MultiServerCallable.java   |    3 +-
 .../hbase/client/RegionAdminServiceCallable.java   |    2 +-
 .../hbase/client/RegionCoprocessorRpcChannel.java  |    2 +-
 .../client/RegionCoprocessorRpcChannelImpl.java    |    6 +-
 .../hadoop/hbase/client/RegionServerCallable.java  |    9 +-
 .../hadoop/hbase/client/ResultStatsUtil.java       |    2 +-
 .../hbase/client/ReversedScannerCallable.java      |    6 +-
 .../client/RpcRetryingCallerWithReadReplicas.java  |    2 +-
 .../java/org/apache/hadoop/hbase/client/Scan.java  |   51 +-
 .../hadoop/hbase/client/ScannerCallable.java       |    9 +-
 .../hbase/client/ScannerCallableWithReplicas.java  |   11 +-
 .../hadoop/hbase/client/SecureBulkLoadClient.java  |    4 +-
 .../hbase/client/SimpleRequestController.java      |   12 +-
 .../hadoop/hbase/client/SnapshotDescription.java   |   26 +-
 .../java/org/apache/hadoop/hbase/client/Table.java |  264 -----
 .../hadoop/hbase/client/TableDescriptor.java       |    4 +
 .../hbase/client/TableDescriptorBuilder.java       |   25 +-
 .../hadoop/hbase/client/ZKAsyncRegistry.java       |    4 +-
 .../hbase/client/replication/ReplicationAdmin.java |  396 -------
 .../hadoop/hbase/filter/ColumnCountGetFilter.java  |    6 -
 .../hbase/filter/ColumnPaginationFilter.java       |    6 -
 .../hadoop/hbase/filter/ColumnPrefixFilter.java    |    6 -
 .../hadoop/hbase/filter/ColumnRangeFilter.java     |    6 -
 .../hadoop/hbase/filter/DependentColumnFilter.java |   10 -
 .../apache/hadoop/hbase/filter/FamilyFilter.java   |    6 -
 .../org/apache/hadoop/hbase/filter/Filter.java     |   50 +-
 .../org/apache/hadoop/hbase/filter/FilterBase.java |   18 +-
 .../org/apache/hadoop/hbase/filter/FilterList.java |   11 -
 .../apache/hadoop/hbase/filter/FilterListBase.java |    5 -
 .../hadoop/hbase/filter/FilterListWithAND.java     |   15 -
 .../hadoop/hbase/filter/FilterListWithOR.java      |   15 -
 .../hadoop/hbase/filter/FirstKeyOnlyFilter.java    |    6 -
 .../FirstKeyValueMatchingQualifiersFilter.java     |    9 +-
 .../apache/hadoop/hbase/filter/FuzzyRowFilter.java |    6 -
 .../hadoop/hbase/filter/InclusiveStopFilter.java   |    6 -
 .../apache/hadoop/hbase/filter/KeyOnlyFilter.java  |    6 -
 .../hadoop/hbase/filter/MultiRowRangeFilter.java   |    6 -
 .../hbase/filter/MultipleColumnPrefixFilter.java   |    6 -
 .../org/apache/hadoop/hbase/filter/PageFilter.java |    6 -
 .../apache/hadoop/hbase/filter/PrefixFilter.java   |    6 -
 .../hadoop/hbase/filter/QualifierFilter.java       |    6 -
 .../hadoop/hbase/filter/RandomRowFilter.java       |    6 -
 .../org/apache/hadoop/hbase/filter/RowFilter.java  |    6 -
 .../hbase/filter/SingleColumnValueFilter.java      |    6 -
 .../org/apache/hadoop/hbase/filter/SkipFilter.java |    6 -
 .../hadoop/hbase/filter/TimestampsFilter.java      |    6 -
 .../apache/hadoop/hbase/filter/ValueFilter.java    |    6 -
 .../hadoop/hbase/filter/WhileMatchFilter.java      |   15 +-
 .../apache/hadoop/hbase/quotas/QuotaRetriever.java |    6 +
 .../hadoop/hbase/client/TestAsyncProcess.java      |    4 +-
 .../hbase/client/TestSimpleRequestController.java  |   10 +-
 .../java/org/apache/hadoop/hbase/AuthUtil.java     |   10 +-
 .../apache/hadoop/hbase/HBaseConfiguration.java    |    8 +-
 .../java/org/apache/hadoop/hbase/TableName.java    |    5 +-
 .../hadoop/hbase/io/ImmutableBytesWritable.java    |    4 +-
 .../java/org/apache/hadoop/hbase/util/Bytes.java   |   18 +-
 .../java/org/apache/hadoop/hbase/util/Counter.java |    5 +-
 .../org/apache/hadoop/hbase/util/VersionInfo.java  |    2 +-
 .../java/org/apache/hadoop/hbase/ClassFinder.java  |   22 +-
 .../hadoop/hbase/HBaseCommonTestingUtility.java    |    3 +-
 .../java/org/apache/hadoop/hbase/TestCellUtil.java |    9 +-
 .../org/apache/hadoop/hbase/TestClassFinder.java   |   53 +-
 .../hbase/coprocessor/TestCoprocessorEndpoint.java |    8 +-
 .../regionserver/TestServerCustomProtocol.java     |   14 +-
 .../apache/hadoop/hbase/thrift/HttpDoAsClient.java |  105 +-
 .../master/MetricsAssignmentManagerSource.java     |    6 +
 .../master/MetricsAssignmentManagerSourceImpl.java |   14 +
 .../apache/hadoop/hbase/http/log/TestLogLevel.java |   33 +-
 .../hbase/mapreduce/IntegrationTestBulkLoad.java   |    2 +-
 .../hbase/test/IntegrationTestReplication.java     |    3 +-
 .../hadoop/hbase/mapred/HRegionPartitioner.java    |    2 +-
 .../mapred/MultiTableSnapshotInputFormat.java      |   27 +-
 .../hadoop/hbase/mapred/TableOutputFormat.java     |    6 +-
 .../apache/hadoop/hbase/mapreduce/CellCreator.java |    2 +
 .../hadoop/hbase/mapreduce/HFileOutputFormat2.java |    5 +-
 .../hadoop/hbase/mapreduce/HRegionPartitioner.java |    2 +-
 .../hbase/mapreduce/MultiTableInputFormatBase.java |   16 +-
 .../mapreduce/SimpleTotalOrderPartitioner.java     |    9 +
 .../hbase/mapreduce/TableInputFormatBase.java      |    6 +-
 .../hadoop/hbase/mapreduce/TableMapReduceUtil.java |   10 +-
 .../hbase/mapreduce/TestHFileOutputFormat2.java    |   29 +-
 .../hbase/replication/TestVerifyReplication.java   |  128 +-
 .../hadoop/hbase/replication/ReplicationPeer.java  |    5 +-
 .../replication/ZKReplicationQueueStorage.java     |   10 +-
 .../src/main/resources/META-INF/LICENSE.vm         |  256 +++-
 .../src/main/resources/supplemental-models.xml     |  352 ++++++
 .../apache/hadoop/hbase/rest/SchemaResource.java   |   24 +-
 .../hadoop/hbase/rest/client/RemoteHTable.java     |  485 +++-----
 .../hadoop/hbase/rest/TestTableResource.java       |    4 +-
 .../apache/hadoop/hbase/rest/TestTableScan.java    |    6 +-
 .../hadoop/hbase/rest/client/TestRemoteTable.java  |   13 +-
 .../hadoop/hbase/rsgroup/RSGroupAdminClient.java   |    2 +-
 hbase-server/pom.xml                               |   51 +-
 .../hbase/tmpl/regionserver/RSStatusTmpl.jamon     |   35 +
 .../hbase/tmpl/regionserver/RegionListTmpl.jamon   |   34 +-
 .../hadoop/hbase/client/VersionInfoUtil.java       |    2 +-
 .../hadoop/hbase/coprocessor/RegionObserver.java   |    7 +-
 .../apache/hadoop/hbase/filter/FilterWrapper.java  |   13 -
 .../hadoop/hbase/io/HalfStoreFileReader.java       |    7 +-
 .../org/apache/hadoop/hbase/ipc/RpcExecutor.java   |   14 +
 .../org/apache/hadoop/hbase/master/HMaster.java    |   21 +-
 .../hbase/master/MetricsAssignmentManager.java     |    8 +
 .../apache/hadoop/hbase/master/ServerManager.java  |   17 +-
 .../master/SnapshotOfRegionAssignmentFromMeta.java |    6 +-
 .../hbase/master/assignment/AssignmentManager.java |   89 +-
 .../hbase/master/assignment/RegionStates.java      |   81 +-
 .../hbase/procedure/ProcedureManagerHost.java      |    6 +-
 .../hbase/regionserver/AbstractMemStore.java       |    2 +-
 .../hbase/regionserver/RowTooBigException.java     |    4 +-
 .../hbase/regionserver/RpcSchedulerFactory.java    |    4 +
 .../regionserver/SimpleRpcSchedulerFactory.java    |    4 +
 .../hadoop/hbase/regionserver/StoreFileReader.java |   14 +-
 ...ressureAwareCompactionThroughputController.java |    9 -
 .../PressureAwareFlushThroughputController.java    |    6 -
 .../PressureAwareThroughputController.java         |    8 -
 .../hadoop/hbase/regionserver/wal/FSHLog.java      |    3 +-
 .../hbase/regionserver/wal/ProtobufLogReader.java  |   18 +-
 .../RegionReplicaReplicationEndpoint.java          |   14 +-
 .../hbase/security/access/AccessController.java    |   25 +-
 .../hadoop/hbase/tool/LoadIncrementalHFiles.java   |    6 +-
 .../org/apache/hadoop/hbase/util/HBaseFsck.java    |   19 +-
 .../apache/hadoop/hbase/util/RegionSplitter.java   |   11 +-
 .../java/org/apache/hadoop/hbase/wal/WALEdit.java  |   10 +-
 .../main/resources/hbase-webapps/master/table.jsp  |   11 +-
 .../org/apache/hadoop/hbase/HBaseTestCase.java     |    5 +-
 .../apache/hadoop/hbase/HBaseTestingUtility.java   |  112 +-
 .../org/apache/hadoop/hbase/MetaMockingUtil.java   |   13 +-
 .../apache/hadoop/hbase/TestHRegionLocation.java   |    2 +-
 .../apache/hadoop/hbase/TestMetaTableAccessor.java |    2 +-
 .../org/apache/hadoop/hbase/TestRegionMetrics.java |   23 +
 .../org/apache/hadoop/hbase/client/TestAdmin.java  |  550 +++++++++
 .../org/apache/hadoop/hbase/client/TestAdmin1.java | 1245 +++-----------------
 .../org/apache/hadoop/hbase/client/TestAdmin2.java |   40 +-
 .../org/apache/hadoop/hbase/client/TestAdmin3.java |  409 +++++++
 .../apache/hadoop/hbase/client/TestAdminBase.java  |   70 ++
 .../hbase/client/TestBlockEvictionFromClient.java  |   22 +-
 .../hbase/client/TestConnectionImplementation.java |   67 +-
 .../hadoop/hbase/client/TestFromClientSide.java    |  101 +-
 .../hadoop/hbase/client/TestFromClientSide3.java   |   26 +-
 .../hbase/client/TestIncrementsFromClientSide.java |   40 +-
 .../hbase/client/TestReplicaWithCluster.java       |   11 +-
 .../hadoop/hbase/client/TestReplicasClient.java    |   14 +-
 .../hbase/client/TestScannersFromClientSide.java   |    3 +-
 .../hadoop/hbase/client/TestTableFavoredNodes.java |   17 +-
 .../client/replication/TestReplicationAdmin.java   | 1185 -------------------
 .../TestReplicationAdminWithClusters.java          |  336 ------
 ...ReplicationAdminWithTwoDifferentZKClusters.java |  108 --
 .../hbase/coprocessor/TestCoprocessorMetrics.java  |   10 +-
 .../hbase/coprocessor/TestMasterObserver.java      |    6 +-
 .../TestPassCustomCellViaRegionObserver.java       |    5 +-
 .../coprocessor/TestRegionObserverInterface.java   |    4 +-
 .../TestForeignExceptionSerialization.java         |   19 +-
 .../hbase/filter/TestMultiRowRangeFilter.java      |   65 +-
 .../io/encoding/TestLoadAndSwitchEncodeOnDisk.java |    2 +-
 .../master/TestCatalogJanitorInMemoryStates.java   |   16 +-
 .../TestMasterOperationsForRegionReplicas.java     |  184 +--
 .../hadoop/hbase/master/TestWarmupRegion.java      |    5 +-
 .../procedure/MasterProcedureTestingUtility.java   |    7 +-
 .../compactions/TestPartitionedMobCompactor.java   |    9 +-
 .../apache/hadoop/hbase/quotas/TestQuotaAdmin.java |    1 +
 .../hadoop/hbase/regionserver/RegionAsTable.java   |   89 --
 .../regionserver/TestEndToEndSplitTransaction.java |  134 +--
 .../hbase/regionserver/TestHRegionFileSystem.java  |    2 +-
 .../hbase/regionserver/TestHRegionOnCluster.java   |    6 +-
 .../regionserver/TestHRegionServerBulkLoad.java    |    4 +-
 .../TestNewVersionBehaviorFromClientSide.java      |   11 +-
 .../regionserver/TestPerColumnFamilyFlush.java     |    7 -
 .../hbase/regionserver/TestRegionReplicas.java     |    5 +-
 .../regionserver/TestRegionServerHostname.java     |    5 +-
 .../regionserver/TestRegionServerMetrics.java      |    4 +-
 .../regionserver/TestRegionServerNoMaster.java     |   14 +-
 .../TestRegionServerOnlineConfigChange.java        |    4 +-
 .../regionserver/TestRemoveRegionMetrics.java      |    6 +-
 .../TestSettingTimeoutOnBlockingPoint.java         |   14 +-
 .../querymatcher/TestUserScanQueryMatcher.java     |    5 +-
 .../replication/TestMultiSlaveReplication.java     |    8 +-
 .../replication/TestNamespaceReplication.java      |    4 +-
 .../replication/TestPerTableCFReplication.java     |   27 +-
 .../hbase/replication/TestReplicationBase.java     |  127 +-
 .../TestReplicationChangingPeerRegionservers.java  |   10 +-
 .../TestReplicationDisableInactivePeer.java        |    8 +-
 .../replication/TestReplicationDroppedTables.java  |   50 +-
 .../TestReplicationEmptyWALRecovery.java           |   22 +-
 .../hbase/replication/TestReplicationEndpoint.java |  126 +-
 .../replication/TestReplicationKillMasterRS.java   |    2 +-
 .../TestReplicationKillMasterRSCompressed.java     |    2 +-
 ...ReplicationKillMasterRSWithSeparateOldWALs.java |    4 +-
 .../hbase/replication/TestReplicationKillRS.java   |    6 +-
 .../replication/TestReplicationKillSlaveRS.java    |    2 +-
 ...tReplicationKillSlaveRSWithSeparateOldWALs.java |    4 +-
 .../replication/TestReplicationMetricsforUI.java   |    6 +-
 .../replication/TestReplicationSmallTests.java     |   17 +-
 .../hbase/replication/TestReplicationStatus.java   |    8 +-
 .../TestReplicationStatusAfterLagging.java         |   10 +-
 ...licationStatusBothNormalAndRecoveryLagging.java |    8 +-
 ...ationStatusSourceStartedTargetStoppedNewOp.java |    8 +-
 ...ationStatusSourceStartedTargetStoppedNoOps.java |    8 +-
 ...atusSourceStartedTargetStoppedWithRecovery.java |    8 +-
 .../replication/TestReplicationSyncUpTool.java     |  325 ++---
 .../replication/TestReplicationSyncUpToolBase.java |  141 +++
 ...estReplicationSyncUpToolWithBulkLoadedData.java |  131 +-
 .../hbase/replication/TestReplicationWithTags.java |    7 +-
 ...estReplicationEndpointWithMultipleAsyncWAL.java |    4 +-
 .../TestReplicationEndpointWithMultipleWAL.java    |    4 +-
 ...KillMasterRSCompressedWithMultipleAsyncWAL.java |    4 +-
 ...ationKillMasterRSCompressedWithMultipleWAL.java |    4 +-
 ...tReplicationSyncUpToolWithMultipleAsyncWAL.java |   12 +-
 .../TestReplicationSyncUpToolWithMultipleWAL.java  |   14 +-
 .../TestGlobalReplicationThrottler.java            |   13 +-
 .../TestRegionReplicaReplicationEndpoint.java      |   30 +-
 ...stRegionReplicaReplicationEndpointNoMaster.java |    6 +-
 .../regionserver/TestReplicationSink.java          |    4 +-
 .../replication/regionserver/TestReplicator.java   |   36 +-
 .../regionserver/TestWALEntrySinkFilter.java       |   72 --
 .../security/access/TestAccessController.java      |   11 +-
 .../TestCoprocessorWhitelistMasterObserver.java    |   71 +-
 .../hbase/snapshot/TestRegionSnapshotTask.java     |    2 +-
 .../hadoop/hbase/util/BaseTestHBaseFsck.java       |    4 +-
 .../hadoop/hbase/util/TestRegionSplitter.java      |    4 +-
 .../util/hbck/OfflineMetaRebuildTestCore.java      |    8 +-
 .../org/apache/hadoop/hbase/wal/TestWALSplit.java  |    4 +-
 hbase-shell/src/main/ruby/hbase/admin.rb           |    2 +-
 hbase-shell/src/main/ruby/hbase/table.rb           |    2 +-
 .../src/main/ruby/shell/commands/locate_region.rb  |    2 +-
 hbase-shell/src/test/ruby/hbase/admin_test.rb      |    4 +-
 .../hbase/thrift/ThriftHBaseServiceHandler.java    |   13 +-
 .../hbase/thrift2/ThriftHBaseServiceHandler.java   |    2 +-
 .../hadoop/hbase/thrift2/ThriftUtilities.java      |    4 +-
 .../hadoop/hbase/thrift2/client/ThriftTable.java   |   11 +-
 .../hadoop/hbase/thrift2/TestThriftConnection.java |    3 +-
 hbase-zookeeper/pom.xml                            |   51 +-
 .../hbase/zookeeper/RecoverableZooKeeper.java      |  145 +--
 pom.xml                                            |   32 +-
 src/main/asciidoc/_chapters/community.adoc         |    3 -
 src/main/asciidoc/_chapters/configuration.adoc     |   14 +-
 src/main/asciidoc/_chapters/developer.adoc         |  332 ++++--
 src/main/asciidoc/_chapters/ops_mgt.adoc           |   10 +
 src/main/asciidoc/_chapters/upgrading.adoc         |   33 +-
 src/site/site.xml                                  |    5 -
 src/site/xdoc/downloads.xml                        |   76 +-
 src/site/xdoc/index.xml                            |    6 +-
 src/site/xdoc/old_news.xml                         |    5 +
 278 files changed, 5820 insertions(+), 6387 deletions(-)
 create mode 100755 dev-support/create-release/do-release-docker.sh
 create mode 100755 dev-support/create-release/do-release.sh
 create mode 100644 dev-support/create-release/hbase-rm/Dockerfile
 create mode 100755 dev-support/create-release/release-build.sh
 create mode 100755 dev-support/create-release/release-tag.sh
 create mode 100755 dev-support/create-release/release-util.sh
 create mode 100644 dev-support/create-release/vote.tmpl
 delete mode 100644 hbase-client/src/main/java/org/apache/hadoop/hbase/client/replication/ReplicationAdmin.java
 create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java
 create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin3.java
 create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdminBase.java
 delete mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/client/replication/TestReplicationAdmin.java
 delete mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/client/replication/TestReplicationAdminWithClusters.java
 delete mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/client/replication/TestReplicationAdminWithTwoDifferentZKClusters.java
 create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationSyncUpToolBase.java
 rename {hbase-endpoint => hbase-server}/src/test/java/org/apache/hadoop/hbase/replication/TestReplicationSyncUpToolWithBulkLoadedData.java (62%)

[hbase] 09/22: HBASE-22211 Remove the returnBlock method because we can just call HFileBlock#release directly

Posted by op...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

openinx pushed a commit to branch HBASE-21879
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 1b9206329444aae0335b4e7ff26a717e96ded091
Author: huzheng <op...@gmail.com>
AuthorDate: Mon Apr 22 15:57:12 2019 +0800

    HBASE-22211 Remove the returnBlock method because we can just call HFileBlock#release directly
---
 .../apache/hadoop/hbase/io/hfile/BlockCache.java   | 24 -----------
 .../hadoop/hbase/io/hfile/BlockCacheUtil.java      |  4 +-
 .../hadoop/hbase/io/hfile/CombinedBlockCache.java  |  6 ---
 .../hadoop/hbase/io/hfile/CompoundBloomFilter.java | 10 ++---
 .../org/apache/hadoop/hbase/io/hfile/HFile.java    |  6 ---
 .../hadoop/hbase/io/hfile/HFileBlockIndex.java     | 17 ++++----
 .../hadoop/hbase/io/hfile/HFileReaderImpl.java     | 46 ++++++----------------
 .../hadoop/hbase/io/hfile/LruBlockCache.java       |  4 +-
 .../hadoop/hbase/regionserver/StoreFileReader.java |  6 ++-
 .../hadoop/hbase/io/hfile/TestCacheOnWrite.java    |  6 +--
 .../apache/hadoop/hbase/io/hfile/TestHFile.java    |  2 +-
 .../hadoop/hbase/io/hfile/TestHFileBlockIndex.java |  4 --
 .../hbase/regionserver/TestHeapMemoryManager.java  |  4 --
 13 files changed, 37 insertions(+), 102 deletions(-)

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java
index 570519c..6849a97 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java
@@ -21,7 +21,6 @@ package org.apache.hadoop.hbase.io.hfile;
 import java.util.Iterator;
 
 import org.apache.yetus.audience.InterfaceAudience;
-import org.apache.hadoop.hbase.io.hfile.Cacheable.MemoryType;
 
 /**
  * Block cache interface. Anything that implements the {@link Cacheable}
@@ -133,27 +132,4 @@ public interface BlockCache extends Iterable<CachedBlock> {
    * @return The list of sub blockcaches that make up this one; returns null if no sub caches.
    */
   BlockCache [] getBlockCaches();
-
-  /**
-   * Called when the scanner using the block decides to decrease refCnt of block and return the
-   * block once its usage is over. This API should be called after the block is used, failing to do
-   * so may have adverse effects by preventing the blocks from being evicted because of which it
-   * will prevent new hot blocks from getting added to the block cache. The implementation of the
-   * BlockCache will decide on what to be done with the block based on the memory type of the
-   * block's {@link MemoryType}. <br>
-   * <br>
-   * Note that if two handlers read from backingMap in off-heap BucketCache at the same time, BC
-   * will return two ByteBuff, which reference to the same memory area in buckets, but wrapped by
-   * two different ByteBuff, and each of them has its own independent refCnt(=1). so here, if
-   * returnBlock with different blocks in two handlers, it has no problem. but if both the two
-   * handlers returnBlock with the same block, then the refCnt exception will happen here. <br>
-   * TODO let's unify the ByteBuff's refCnt and BucketEntry's refCnt in HBASE-21957, after that
-   * we'll just call the Cacheable#release instead of calling release in some path and calling
-   * returnBlock in other paths in current version.
-   * @param cacheKey the cache key of the block
-   * @param block the hfileblock to be returned
-   */
-  default void returnBlock(BlockCacheKey cacheKey, Cacheable block) {
-    block.release();
-  }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheUtil.java
index bf3a279..46e8e24 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheUtil.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheUtil.java
@@ -247,8 +247,8 @@ public class BlockCacheUtil {
         return false;
       }
     } finally {
-      // return the block since we need to decrement the count
-      blockCache.returnBlock(cacheKey, existingBlock);
+      // Release this block to decrement the reference count.
+      existingBlock.release();
     }
   }
 
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
index cb01540..36916359 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
@@ -379,12 +379,6 @@ public class CombinedBlockCache implements ResizableBlockCache, HeapSize {
     this.l1Cache.setMaxSize(size);
   }
 
-  @Override
-  public void returnBlock(BlockCacheKey cacheKey, Cacheable block) {
-    // returnBlock is meaningful for L2 cache alone.
-    this.l2Cache.returnBlock(cacheKey, block);
-  }
-
   @VisibleForTesting
   public int getRpcRefCount(BlockCacheKey cacheKey) {
     return (this.l2Cache instanceof BucketCache)
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CompoundBloomFilter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CompoundBloomFilter.java
index 2aceed7..29f29e1 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CompoundBloomFilter.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CompoundBloomFilter.java
@@ -105,8 +105,8 @@ public class CompoundBloomFilter extends CompoundBloomFilterBase
       result = BloomFilterUtil.contains(key, keyOffset, keyLength, bloomBuf,
           bloomBlock.headerSize(), bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount);
     } finally {
-      // After the use return back the block if it was served from a cache.
-      reader.returnBlock(bloomBlock);
+      // After the use, should release the block to deallocate byte buffers.
+      bloomBlock.release();
     }
     if (numPositivesPerChunk != null && result) {
       // Update statistics. Only used in unit tests.
@@ -144,10 +144,10 @@ public class CompoundBloomFilter extends CompoundBloomFilterBase
     try {
       ByteBuff bloomBuf = bloomBlock.getBufferReadOnly();
       result = BloomFilterUtil.contains(keyCell, bloomBuf, bloomBlock.headerSize(),
-          bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount, type);
+        bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount, type);
     } finally {
-      // After the use return back the block if it was served from a cache.
-      reader.returnBlock(bloomBlock);
+      // After the use, should release the block to deallocate the byte buffers.
+      bloomBlock.release();
     }
     if (numPositivesPerChunk != null && result) {
       // Update statistics. Only used in unit tests.
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
index 78ebedc..33e815e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
@@ -407,12 +407,6 @@ public class HFile {
         final boolean updateCacheMetrics, BlockType expectedBlockType,
         DataBlockEncoding expectedDataBlockEncoding)
         throws IOException;
-
-    /**
-     * Return the given block back to the cache, if it was obtained from cache.
-     * @param block Block to be returned.
-     */
-    void returnBlock(HFileBlock block);
   }
 
   /** An interface used by clients to open and iterate an {@link HFile}. */
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java
index 90d11ac..ad61839 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java
@@ -381,10 +381,9 @@ public class HFileBlockIndex {
             nextIndexedKey = tmpNextIndexKV;
           }
         } finally {
-          if (!dataBlock) {
-            // Return the block immediately if it is not the
-            // data block
-            cachingBlockReader.returnBlock(block);
+          if (!dataBlock && block != null) {
+            // Release the block immediately if it is not the data block
+            block.release();
           }
         }
       }
@@ -394,9 +393,11 @@ public class HFileBlockIndex {
         // Though we have retrieved a data block we have found an issue
         // in the retrieved data block. Hence returned the block so that
         // the ref count can be decremented
-        cachingBlockReader.returnBlock(block);
-        throw new IOException("Reached a data block at level " + lookupLevel +
-            " but the number of levels is " + searchTreeLevel);
+        if (block != null) {
+          block.release();
+        }
+        throw new IOException("Reached a data block at level " + lookupLevel
+            + " but the number of levels is " + searchTreeLevel);
       }
 
       // set the next indexed key for the current block.
@@ -436,7 +437,7 @@ public class HFileBlockIndex {
           byte[] bytes = b.toBytes(keyOffset, keyLen);
           targetMidKey = new KeyValue.KeyOnlyKeyValue(bytes, 0, bytes.length);
         } finally {
-          cachingBlockReader.returnBlock(midLeafBlock);
+          midLeafBlock.release();
         }
       } else {
         // The middle of the root-level index.
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
index 1137961..02e56e9 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
@@ -291,7 +291,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
                 // Ideally here the readBlock won't find the block in cache. We call this
                 // readBlock so that block data is read from FS and cached in BC. we must call
                 // returnBlock here to decrease the reference count of block.
-                returnBlock(block);
+                block.release();
               }
             }
           } catch (IOException e) {
@@ -377,20 +377,6 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
     return fileSize;
   }
 
-  @Override
-  public void returnBlock(HFileBlock block) {
-    if (block != null) {
-      if (this.cacheConf.getBlockCache().isPresent()) {
-        BlockCacheKey cacheKey = new BlockCacheKey(this.getFileContext().getHFileName(),
-            block.getOffset(), this.isPrimaryReplicaReader(), block.getBlockType());
-        cacheConf.getBlockCache().get().returnBlock(cacheKey, block);
-      } else {
-        // Release the block here, it means the RPC path didn't ref to this block any more.
-        block.release();
-      }
-    }
-  }
-
   /**
    * @return the first key in the file. May be null if file has no entries. Note
    *         that this is not the first row key, but rather the byte form of the
@@ -553,23 +539,15 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
       this.curBlock = null;
     }
 
-    private void returnBlock(HFileBlock block) {
-      if (LOG.isTraceEnabled()) {
-        LOG.trace("Returning the block : " + block);
-      }
-      this.reader.returnBlock(block);
-    }
-
     private void returnBlocks(boolean returnAll) {
-      for (int i = 0; i < this.prevBlocks.size(); i++) {
-        returnBlock(this.prevBlocks.get(i));
-      }
+      this.prevBlocks.forEach(HFileBlock::release);
       this.prevBlocks.clear();
       if (returnAll && this.curBlock != null) {
-        returnBlock(this.curBlock);
+        this.curBlock.release();
         this.curBlock = null;
       }
     }
+
     @Override
     public boolean isSeeked(){
       return blockBuffer != null;
@@ -897,7 +875,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
         // The first key in the current block 'seekToBlock' is greater than the given
         // seekBefore key. We will go ahead by reading the next block that satisfies the
         // given key. Return the current block before reading the next one.
-        reader.returnBlock(seekToBlock);
+        seekToBlock.release();
         // It is important that we compute and pass onDiskSize to the block
         // reader so that it does not have to read the header separately to
         // figure out the size.  Currently, we do not have a way to do this
@@ -948,7 +926,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
         if (block != null && !block.getBlockType().isData()) { // Findbugs: NP_NULL_ON_SOME_PATH
           // Whatever block we read we will be returning it unless
           // it is a datablock. Just in case the blocks are non data blocks
-          reader.returnBlock(block);
+          block.release();
         }
       } while (!block.getBlockType().isData());
 
@@ -1325,9 +1303,9 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
         if (cacheConf.shouldCacheCompressed(cachedBlock.getBlockType().getCategory())) {
           HFileBlock compressedBlock = cachedBlock;
           cachedBlock = compressedBlock.unpack(hfileContext, fsBlockReader);
-          // In case of compressed block after unpacking we can return the compressed block
+          // In case of compressed block after unpacking we can release the compressed block
           if (compressedBlock != cachedBlock) {
-            cache.returnBlock(cacheKey, compressedBlock);
+            compressedBlock.release();
           }
         }
         validateBlockType(cachedBlock, expectedBlockType);
@@ -1361,11 +1339,9 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
             // schema definition change.
             LOG.info("Evicting cached block with key " + cacheKey
                 + " because of a data block encoding mismatch" + "; expected: "
-                + expectedDataBlockEncoding + ", actual: " + actualDataBlockEncoding + ", path="
-                + path);
-            // This is an error scenario. so here we need to decrement the
-            // count.
-            cache.returnBlock(cacheKey, cachedBlock);
+                + expectedDataBlockEncoding + ", actual: " + actualDataBlockEncoding + ", path=" + path);
+            // This is an error scenario. so here we need to release the block.
+            cachedBlock.release();
             cache.evictBlock(cacheKey);
           }
           return null;
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java
index b01d014..82e64e7 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java
@@ -530,8 +530,8 @@ public class LruBlockCache implements FirstLevelBlockCache {
             if (result instanceof HFileBlock && ((HFileBlock) result).usesSharedMemory()) {
               Cacheable original = result;
               result = ((HFileBlock) original).deepCloneOnHeap();
-              // deepClone an new one, so need to put the original one back to free it.
-              victimHandler.returnBlock(cacheKey, original);
+              // deepClone an new one, so need to release the original one to deallocate it.
+              original.release();
             }
             cacheBlock(cacheKey, result, /* inMemory = */ false);
           }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileReader.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileReader.java
index 702ea8d..f5e7e76 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileReader.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileReader.java
@@ -456,8 +456,10 @@ public class StoreFileReader {
       LOG.error("Bad bloom filter data -- proceeding without", e);
       setGeneralBloomFilterFaulty();
     } finally {
-      // Return the bloom block so that its ref count can be decremented.
-      reader.returnBlock(bloomBlock);
+      // Release the bloom block so that its ref count can be decremented.
+      if (bloomBlock != null) {
+        bloomBlock.release();
+      }
     }
     return true;
   }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java
index 60a4445..3a769b0 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java
@@ -336,15 +336,15 @@ public class TestCacheOnWrite {
         // Call return twice because for the isCache cased the counter would have got incremented
         // twice. Notice that here we need to returnBlock with different blocks. see comments in
         // BucketCache#returnBlock.
-        blockCache.returnBlock(blockCacheKey, blockPair.getSecond());
+        blockPair.getSecond().release();
         if (cacheCompressedData) {
           if (this.compress == Compression.Algorithm.NONE
               || cowType == CacheOnWriteType.INDEX_BLOCKS
               || cowType == CacheOnWriteType.BLOOM_BLOCKS) {
-            blockCache.returnBlock(blockCacheKey, blockPair.getFirst());
+            blockPair.getFirst().release();
           }
         } else {
-          blockCache.returnBlock(blockCacheKey, blockPair.getFirst());
+          blockPair.getFirst().release();
         }
       }
     }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java
index 0ed933b..101fd91 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java
@@ -223,7 +223,7 @@ public class TestHFile  {
           Assert.assertTrue(hfb.isOnHeap());
         }
       } finally {
-        combined.returnBlock(key, cachedBlock);
+        cachedBlock.release();
       }
       block.release(); // return back the ByteBuffer back to allocator.
     }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java
index 6f8d0b0..faef386 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java
@@ -176,10 +176,6 @@ public class TestHFileBlockIndex {
     }
 
     @Override
-    public void returnBlock(HFileBlock block) {
-    }
-
-    @Override
     public HFileBlock readBlock(long offset, long onDiskSize,
         boolean cacheBlock, boolean pread, boolean isCompaction,
         boolean updateCacheMetrics, BlockType expectedBlockType,
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHeapMemoryManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHeapMemoryManager.java
index 8c9ce75..f8f73ca 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHeapMemoryManager.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHeapMemoryManager.java
@@ -735,10 +735,6 @@ public class TestHeapMemoryManager {
       return null;
     }
 
-    @Override
-    public void returnBlock(BlockCacheKey cacheKey, Cacheable buf) {
-    }
-
     public void setTestBlockSize(long testBlockSize) {
       this.testBlockSize = testBlockSize;
     }

[hbase] 18/22: HBASE-22483 It's better to use 65KB as the default buffer size in ByteBuffAllocator (#279)

Posted by op...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

openinx pushed a commit to branch HBASE-21879
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit e0fd6cf101faaf4449229b0061177674e35cfd9e
Author: openinx <op...@gmail.com>
AuthorDate: Fri May 31 17:53:00 2019 +0800

    HBASE-22483 It's better to use 65KB as the default buffer size in ByteBuffAllocator (#279)
---
 .../apache/hadoop/hbase/io/ByteBuffAllocator.java    | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java
index 75a4699..c85675b 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java
@@ -71,8 +71,24 @@ public class ByteBuffAllocator {
   public static final String MAX_BUFFER_COUNT_KEY = "hbase.ipc.server.allocator.max.buffer.count";
 
   public static final String BUFFER_SIZE_KEY = "hbase.ipc.server.allocator.buffer.size";
-  // 64 KB. Making it same as the chunk size what we will write/read to/from the socket channel.
-  public static final int DEFAULT_BUFFER_SIZE = 64 * 1024;
+
+  /**
+   * There're some reasons why better to choose 65KB(rather than 64KB) as the default buffer size:
+   * <p>
+   * 1. Almost all of the data blocks have the block size: 64KB + delta, whose delta is very small,
+   * depends on the size of lastKeyValue. If we set buffer.size=64KB, then each block will be
+   * allocated as a MultiByteBuff: one 64KB DirectByteBuffer and delta bytes HeapByteBuffer, the
+   * HeapByteBuffer will increase the GC pressure. Ideally, we should let the data block to be
+   * allocated as a SingleByteBuff, it has simpler data structure, faster access speed, less heap
+   * usage.
+   * <p>
+   * 2. Since the blocks are MultiByteBuff when using buffer.size=64KB, so we have to calculate the
+   * checksum by an temp heap copying (see HBASE-21917), while if it's a SingleByteBuff, we can
+   * speed the checksum by calling the hadoop' checksum in native lib, which is more faster.
+   * <p>
+   * For performance comparison, please see HBASE-22483.
+   */
+  public static final int DEFAULT_BUFFER_SIZE = 65 * 1024;
 
   public static final String MIN_ALLOCATE_SIZE_KEY =
       "hbase.ipc.server.reservoir.minimal.allocating.size";

[hbase] 04/22: HBASE-22016 Rewrite the block reading methods by using hbase.nio.ByteBuff

Posted by op...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

openinx pushed a commit to branch HBASE-21879
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 7698baea1f4a8038693d4676091c6872b26b5d2e
Author: huzheng <op...@gmail.com>
AuthorDate: Fri Mar 8 16:46:06 2019 +0800

    HBASE-22016 Rewrite the block reading methods by using hbase.nio.ByteBuff
---
 .../apache/hadoop/hbase/io/hfile/BlockIOUtils.java | 223 ++++++++++++++
 .../apache/hadoop/hbase/io/hfile/HFileBlock.java   | 337 ++++++++-------------
 ...ckPositionalRead.java => TestBlockIOUtils.java} | 122 ++++++--
 .../apache/hadoop/hbase/io/hfile/TestChecksum.java |  14 +-
 4 files changed, 453 insertions(+), 243 deletions(-)

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockIOUtils.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockIOUtils.java
new file mode 100644
index 0000000..dbd5b2e
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockIOUtils.java
@@ -0,0 +1,223 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.io.hfile;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.fs.ByteBufferReadable;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.hbase.nio.ByteBuff;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.yetus.audience.InterfaceAudience;
+
+@InterfaceAudience.Private
+class BlockIOUtils {
+
+  static boolean isByteBufferReadable(FSDataInputStream is) {
+    InputStream cur = is.getWrappedStream();
+    for (;;) {
+      if ((cur instanceof FSDataInputStream)) {
+        cur = ((FSDataInputStream) cur).getWrappedStream();
+      } else {
+        break;
+      }
+    }
+    return cur instanceof ByteBufferReadable;
+  }
+
+  /**
+   * Read length bytes into ByteBuffers directly.
+   * @param buf the destination {@link ByteBuff}
+   * @param dis the HDFS input stream which implement the ByteBufferReadable interface.
+   * @param length bytes to read.
+   * @throws IOException exception to throw if any error happen
+   */
+  static void readFully(ByteBuff buf, FSDataInputStream dis, int length) throws IOException {
+    if (!isByteBufferReadable(dis)) {
+      // If InputStream does not support the ByteBuffer read, just read to heap and copy bytes to
+      // the destination ByteBuff.
+      byte[] heapBuf = new byte[length];
+      IOUtils.readFully(dis, heapBuf, 0, length);
+      copyToByteBuff(heapBuf, 0, length, buf);
+      return;
+    }
+    ByteBuffer[] buffers = buf.nioByteBuffers();
+    int remain = length;
+    int idx = 0;
+    ByteBuffer cur = buffers[idx];
+    while (remain > 0) {
+      while (!cur.hasRemaining()) {
+        if (++idx >= buffers.length) {
+          throw new IOException(
+              "Not enough ByteBuffers to read the reminding " + remain + " " + "bytes");
+        }
+        cur = buffers[idx];
+      }
+      cur.limit(cur.position() + Math.min(remain, cur.remaining()));
+      int bytesRead = dis.read(cur);
+      if (bytesRead < 0) {
+        throw new IOException(
+            "Premature EOF from inputStream, but still need " + remain + " " + "bytes");
+      }
+      remain -= bytesRead;
+    }
+  }
+
+  /**
+   * Read from an input stream at least <code>necessaryLen</code> and if possible,
+   * <code>extraLen</code> also if available. Analogous to
+   * {@link IOUtils#readFully(InputStream, byte[], int, int)}, but specifies a number of "extra"
+   * bytes to also optionally read.
+   * @param in the input stream to read from
+   * @param buf the buffer to read into
+   * @param bufOffset the destination offset in the buffer
+   * @param necessaryLen the number of bytes that are absolutely necessary to read
+   * @param extraLen the number of extra bytes that would be nice to read
+   * @return true if succeeded reading the extra bytes
+   * @throws IOException if failed to read the necessary bytes
+   */
+  private static boolean readWithExtraOnHeap(InputStream in, byte[] buf, int bufOffset,
+      int necessaryLen, int extraLen) throws IOException {
+    int bytesRemaining = necessaryLen + extraLen;
+    while (bytesRemaining > 0) {
+      int ret = in.read(buf, bufOffset, bytesRemaining);
+      if (ret < 0) {
+        if (bytesRemaining <= extraLen) {
+          // We could not read the "extra data", but that is OK.
+          break;
+        }
+        throw new IOException("Premature EOF from inputStream (read " + "returned " + ret
+            + ", was trying to read " + necessaryLen + " necessary bytes and " + extraLen
+            + " extra bytes, " + "successfully read " + (necessaryLen + extraLen - bytesRemaining));
+      }
+      bufOffset += ret;
+      bytesRemaining -= ret;
+    }
+    return bytesRemaining <= 0;
+  }
+
+  /**
+   * Read bytes into ByteBuffers directly, those buffers either contains the extraLen bytes or only
+   * contains necessaryLen bytes, which depends on how much bytes do the last time we read.
+   * @param buf the destination {@link ByteBuff}.
+   * @param dis input stream to read.
+   * @param necessaryLen bytes which we must read
+   * @param extraLen bytes which we may read
+   * @return if the returned flag is true, then we've finished to read the extraLen into our
+   *         ByteBuffers, otherwise we've not read the extraLen bytes yet.
+   * @throws IOException if failed to read the necessary bytes.
+   */
+  static boolean readWithExtra(ByteBuff buf, FSDataInputStream dis, int necessaryLen, int extraLen)
+      throws IOException {
+    if (!isByteBufferReadable(dis)) {
+      // If InputStream does not support the ByteBuffer read, just read to heap and copy bytes to
+      // the destination ByteBuff.
+      byte[] heapBuf = new byte[necessaryLen + extraLen];
+      boolean ret = readWithExtraOnHeap(dis, heapBuf, 0, necessaryLen, extraLen);
+      copyToByteBuff(heapBuf, 0, heapBuf.length, buf);
+      return ret;
+    }
+    ByteBuffer[] buffers = buf.nioByteBuffers();
+    int bytesRead = 0;
+    int remain = necessaryLen + extraLen;
+    int idx = 0;
+    ByteBuffer cur = buffers[idx];
+    while (bytesRead < necessaryLen) {
+      while (!cur.hasRemaining()) {
+        if (++idx >= buffers.length) {
+          throw new IOException("Not enough ByteBuffers to read the reminding " + remain + "bytes");
+        }
+        cur = buffers[idx];
+      }
+      cur.limit(cur.position() + Math.min(remain, cur.remaining()));
+      int ret = dis.read(cur);
+      if (ret < 0) {
+        throw new IOException("Premature EOF from inputStream (read returned " + ret
+            + ", was trying to read " + necessaryLen + " necessary bytes and " + extraLen
+            + " extra bytes, successfully read " + bytesRead);
+      }
+      bytesRead += ret;
+      remain -= ret;
+    }
+    return (extraLen > 0) && (bytesRead == necessaryLen + extraLen);
+  }
+
+  /**
+   * Read from an input stream at least <code>necessaryLen</code> and if possible,
+   * <code>extraLen</code> also if available. Analogous to
+   * {@link IOUtils#readFully(InputStream, byte[], int, int)}, but uses positional read and
+   * specifies a number of "extra" bytes that would be desirable but not absolutely necessary to
+   * read.
+   * @param buff ByteBuff to read into.
+   * @param dis the input stream to read from
+   * @param position the position within the stream from which to start reading
+   * @param necessaryLen the number of bytes that are absolutely necessary to read
+   * @param extraLen the number of extra bytes that would be nice to read
+   * @return true if and only if extraLen is > 0 and reading those extra bytes was successful
+   * @throws IOException if failed to read the necessary bytes
+   */
+  static boolean preadWithExtra(ByteBuff buff, FSDataInputStream dis, long position,
+      int necessaryLen, int extraLen) throws IOException {
+    int remain = necessaryLen + extraLen;
+    byte[] buf = new byte[remain];
+    int bytesRead = 0;
+    while (bytesRead < necessaryLen) {
+      int ret = dis.read(position + bytesRead, buf, bytesRead, remain);
+      if (ret < 0) {
+        throw new IOException("Premature EOF from inputStream (positional read returned " + ret
+            + ", was trying to read " + necessaryLen + " necessary bytes and " + extraLen
+            + " extra bytes, successfully read " + bytesRead);
+      }
+      bytesRead += ret;
+      remain -= ret;
+    }
+    // Copy the bytes from on-heap bytes[] to ByteBuffer[] now, and after resolving HDFS-3246, we
+    // will read the bytes to ByteBuffer[] directly without allocating any on-heap byte[].
+    // TODO I keep the bytes copy here, because I want to abstract the ByteBuffer[]
+    // preadWithExtra method for the upper layer, only need to refactor this method if the
+    // ByteBuffer pread is OK.
+    copyToByteBuff(buf, 0, bytesRead, buff);
+    return (extraLen > 0) && (bytesRead == necessaryLen + extraLen);
+  }
+
+  private static int copyToByteBuff(byte[] buf, int offset, int len, ByteBuff out)
+      throws IOException {
+    if (offset < 0 || len < 0 || offset + len > buf.length) {
+      throw new IOException("Invalid offset=" + offset + " and len=" + len + ", cap=" + buf.length);
+    }
+    ByteBuffer[] buffers = out.nioByteBuffers();
+    int idx = 0, remain = len, copyLen;
+    ByteBuffer cur = buffers[idx];
+    while (remain > 0) {
+      while (!cur.hasRemaining()) {
+        if (++idx >= buffers.length) {
+          throw new IOException("Not enough ByteBuffers to read the reminding " + remain + "bytes");
+        }
+        cur = buffers[idx];
+      }
+      copyLen = Math.min(cur.remaining(), remain);
+      cur.put(buf, offset, copyLen);
+      remain -= copyLen;
+      offset += copyLen;
+    }
+    return len;
+  }
+}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
index 91e63fd..4773678 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
@@ -21,7 +21,6 @@ import java.io.DataInputStream;
 import java.io.DataOutput;
 import java.io.DataOutputStream;
 import java.io.IOException;
-import java.io.InputStream;
 import java.nio.ByteBuffer;
 import java.util.concurrent.atomic.AtomicReference;
 import java.util.concurrent.locks.Lock;
@@ -51,7 +50,6 @@ import org.apache.hadoop.hbase.nio.SingleByteBuff;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.ChecksumType;
 import org.apache.hadoop.hbase.util.ClassSize;
-import org.apache.hadoop.io.IOUtils;
 
 import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
 import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
@@ -280,9 +278,7 @@ public class HFileBlock implements Cacheable {
       boolean usesChecksum = buf.get() == (byte) 1;
       long offset = buf.getLong();
       int nextBlockOnDiskSize = buf.getInt();
-      HFileBlock hFileBlock =
-          new HFileBlock(newByteBuff, usesChecksum, memType, offset, nextBlockOnDiskSize, null);
-      return hFileBlock;
+      return new HFileBlock(newByteBuff, usesChecksum, memType, offset, nextBlockOnDiskSize, null);
     }
 
     @Override
@@ -315,9 +311,9 @@ public class HFileBlock implements Cacheable {
    * param.
    */
   private HFileBlock(HFileBlock that, boolean bufCopy) {
-    init(that.blockType, that.onDiskSizeWithoutHeader,
-        that.uncompressedSizeWithoutHeader, that.prevBlockOffset,
-        that.offset, that.onDiskDataSizeWithHeader, that.nextBlockOnDiskSize, that.fileContext);
+    init(that.blockType, that.onDiskSizeWithoutHeader, that.uncompressedSizeWithoutHeader,
+      that.prevBlockOffset, that.offset, that.onDiskDataSizeWithHeader, that.nextBlockOnDiskSize,
+      that.fileContext);
     if (bufCopy) {
       this.buf = new SingleByteBuff(ByteBuffer.wrap(that.buf.toBytes(0, that.buf.limit())));
     } else {
@@ -331,6 +327,7 @@ public class HFileBlock implements Cacheable {
    * and is sitting in a byte buffer and we want to stuff the block into cache.
    *
    * <p>TODO: The caller presumes no checksumming
+   * <p>TODO: HFile block writer can also off-heap ? </p>
    * required of this block instance since going into cache; checksum already verified on
    * underlying block data pulled in from filesystem. Is that correct? What if cache is SSD?
    *
@@ -349,8 +346,8 @@ public class HFileBlock implements Cacheable {
       int uncompressedSizeWithoutHeader, long prevBlockOffset, ByteBuffer b, boolean fillHeader,
       long offset, final int nextBlockOnDiskSize, int onDiskDataSizeWithHeader,
       HFileContext fileContext) {
-    init(blockType, onDiskSizeWithoutHeader, uncompressedSizeWithoutHeader,
-        prevBlockOffset, offset, onDiskDataSizeWithHeader, nextBlockOnDiskSize, fileContext);
+    init(blockType, onDiskSizeWithoutHeader, uncompressedSizeWithoutHeader, prevBlockOffset, offset,
+      onDiskDataSizeWithHeader, nextBlockOnDiskSize, fileContext);
     this.buf = new SingleByteBuff(b);
     if (fillHeader) {
       overwriteHeader();
@@ -366,7 +363,8 @@ public class HFileBlock implements Cacheable {
    * @param buf Has header, content, and trailing checksums if present.
    */
   HFileBlock(ByteBuff buf, boolean usesHBaseChecksum, MemoryType memType, final long offset,
-      final int nextBlockOnDiskSize, HFileContext fileContext) throws IOException {
+      final int nextBlockOnDiskSize, HFileContext fileContext)
+      throws IOException {
     buf.rewind();
     final BlockType blockType = BlockType.read(buf);
     final int onDiskSizeWithoutHeader = buf.getInt(Header.ON_DISK_SIZE_WITHOUT_HEADER_INDEX);
@@ -394,8 +392,8 @@ public class HFileBlock implements Cacheable {
     }
     fileContext = fileContextBuilder.build();
     assert usesHBaseChecksum == fileContext.isUseHBaseChecksum();
-    init(blockType, onDiskSizeWithoutHeader, uncompressedSizeWithoutHeader,
-        prevBlockOffset, offset, onDiskDataSizeWithHeader, nextBlockOnDiskSize, fileContext);
+    init(blockType, onDiskSizeWithoutHeader, uncompressedSizeWithoutHeader, prevBlockOffset, offset,
+      onDiskDataSizeWithHeader, nextBlockOnDiskSize, fileContext);
     this.memType = memType;
     this.offset = offset;
     this.buf = buf;
@@ -406,9 +404,8 @@ public class HFileBlock implements Cacheable {
    * Called from constructors.
    */
   private void init(BlockType blockType, int onDiskSizeWithoutHeader,
-      int uncompressedSizeWithoutHeader, long prevBlockOffset,
-      long offset, int onDiskDataSizeWithHeader, final int nextBlockOnDiskSize,
-      HFileContext fileContext) {
+      int uncompressedSizeWithoutHeader, long prevBlockOffset, long offset,
+      int onDiskDataSizeWithHeader, final int nextBlockOnDiskSize, HFileContext fileContext) {
     this.blockType = blockType;
     this.onDiskSizeWithoutHeader = onDiskSizeWithoutHeader;
     this.uncompressedSizeWithoutHeader = uncompressedSizeWithoutHeader;
@@ -425,10 +422,9 @@ public class HFileBlock implements Cacheable {
    * @param verifyChecksum true if checksum verification is in use.
    * @return Size of the block with header included.
    */
-  private static int getOnDiskSizeWithHeader(final ByteBuffer headerBuf,
+  private static int getOnDiskSizeWithHeader(final ByteBuff headerBuf,
       boolean verifyChecksum) {
-    return headerBuf.getInt(Header.ON_DISK_SIZE_WITHOUT_HEADER_INDEX) +
-      headerSize(verifyChecksum);
+    return headerBuf.getInt(Header.ON_DISK_SIZE_WITHOUT_HEADER_INDEX) + headerSize(verifyChecksum);
   }
 
   /**
@@ -651,9 +647,10 @@ public class HFileBlock implements Cacheable {
     ByteBuff dup = this.buf.duplicate();
     dup.position(this.headerSize());
     dup = dup.slice();
+
     ctx.prepareDecoding(unpacked.getOnDiskSizeWithoutHeader(),
-      unpacked.getUncompressedSizeWithoutHeader(), unpacked.getBufferWithoutHeader(),
-      dup);
+      unpacked.getUncompressedSizeWithoutHeader(), unpacked.getBufferWithoutHeader(), dup);
+
     return unpacked;
   }
 
@@ -667,15 +664,14 @@ public class HFileBlock implements Cacheable {
     int headerSize = headerSize();
     int capacityNeeded = headerSize + uncompressedSizeWithoutHeader + cksumBytes;
 
-    // TODO we need consider allocating offheap here?
-    ByteBuffer newBuf = ByteBuffer.allocate(capacityNeeded);
+    ByteBuff newBuf = new SingleByteBuff(ByteBuffer.allocate(capacityNeeded));
 
     // Copy header bytes into newBuf.
     // newBuf is HBB so no issue in calling array()
     buf.position(0);
-    buf.get(newBuf.array(), newBuf.arrayOffset(), headerSize);
+    newBuf.put(0, buf, 0, headerSize);
 
-    buf = new SingleByteBuff(newBuf);
+    buf = newBuf;
     // set limit to exclude next block's header
     buf.limit(headerSize + uncompressedSizeWithoutHeader + cksumBytes);
   }
@@ -692,17 +688,6 @@ public class HFileBlock implements Cacheable {
     return bufCapacity == expectedCapacity || bufCapacity == expectedCapacity + headerSize;
   }
 
-  /** An additional sanity-check in case no compression or encryption is being used. */
-  @VisibleForTesting
-  void sanityCheckUncompressedSize() throws IOException {
-    if (onDiskSizeWithoutHeader != uncompressedSizeWithoutHeader + totalChecksumBytes()) {
-      throw new IOException("Using no compression but "
-          + "onDiskSizeWithoutHeader=" + onDiskSizeWithoutHeader + ", "
-          + "uncompressedSizeWithoutHeader=" + uncompressedSizeWithoutHeader
-          + ", numChecksumbytes=" + totalChecksumBytes());
-    }
-  }
-
   /**
    * Cannot be {@link #UNSET}. Must be a legitimate value. Used re-making the {@link BlockCacheKey} when
    * block is returned to the cache.
@@ -748,82 +733,6 @@ public class HFileBlock implements Cacheable {
   }
 
   /**
-   * Read from an input stream at least <code>necessaryLen</code> and if possible,
-   * <code>extraLen</code> also if available. Analogous to
-   * {@link IOUtils#readFully(InputStream, byte[], int, int)}, but specifies a
-   * number of "extra" bytes to also optionally read.
-   *
-   * @param in the input stream to read from
-   * @param buf the buffer to read into
-   * @param bufOffset the destination offset in the buffer
-   * @param necessaryLen the number of bytes that are absolutely necessary to read
-   * @param extraLen the number of extra bytes that would be nice to read
-   * @return true if succeeded reading the extra bytes
-   * @throws IOException if failed to read the necessary bytes
-   */
-  static boolean readWithExtra(InputStream in, byte[] buf,
-      int bufOffset, int necessaryLen, int extraLen) throws IOException {
-    int bytesRemaining = necessaryLen + extraLen;
-    while (bytesRemaining > 0) {
-      int ret = in.read(buf, bufOffset, bytesRemaining);
-      if (ret == -1 && bytesRemaining <= extraLen) {
-        // We could not read the "extra data", but that is OK.
-        break;
-      }
-      if (ret < 0) {
-        throw new IOException("Premature EOF from inputStream (read "
-            + "returned " + ret + ", was trying to read " + necessaryLen
-            + " necessary bytes and " + extraLen + " extra bytes, "
-            + "successfully read "
-            + (necessaryLen + extraLen - bytesRemaining));
-      }
-      bufOffset += ret;
-      bytesRemaining -= ret;
-    }
-    return bytesRemaining <= 0;
-  }
-
-  /**
-   * Read from an input stream at least <code>necessaryLen</code> and if possible,
-   * <code>extraLen</code> also if available. Analogous to
-   * {@link IOUtils#readFully(InputStream, byte[], int, int)}, but uses
-   * positional read and specifies a number of "extra" bytes that would be
-   * desirable but not absolutely necessary to read.
-   *
-   * @param in the input stream to read from
-   * @param position the position within the stream from which to start reading
-   * @param buf the buffer to read into
-   * @param bufOffset the destination offset in the buffer
-   * @param necessaryLen the number of bytes that are absolutely necessary to
-   *     read
-   * @param extraLen the number of extra bytes that would be nice to read
-   * @return true if and only if extraLen is > 0 and reading those extra bytes
-   *     was successful
-   * @throws IOException if failed to read the necessary bytes
-   */
-  @VisibleForTesting
-  static boolean positionalReadWithExtra(FSDataInputStream in,
-      long position, byte[] buf, int bufOffset, int necessaryLen, int extraLen)
-      throws IOException {
-    int bytesRemaining = necessaryLen + extraLen;
-    int bytesRead = 0;
-    while (bytesRead < necessaryLen) {
-      int ret = in.read(position, buf, bufOffset, bytesRemaining);
-      if (ret < 0) {
-        throw new IOException("Premature EOF from inputStream (positional read "
-            + "returned " + ret + ", was trying to read " + necessaryLen
-            + " necessary bytes and " + extraLen + " extra bytes, "
-            + "successfully read " + bytesRead);
-      }
-      position += ret;
-      bufOffset += ret;
-      bytesRemaining -= ret;
-      bytesRead += ret;
-    }
-    return bytesRead != necessaryLen && bytesRemaining <= 0;
-  }
-
-  /**
    * Unified version 2 {@link HFile} block writer. The intended usage pattern
    * is as follows:
    * <ol>
@@ -988,18 +897,6 @@ public class HFileBlock implements Cacheable {
     }
 
     /**
-     * Returns the stream for the user to write to. The block writer takes care
-     * of handling compression and buffering for caching on write. Can only be
-     * called in the "writing" state.
-     *
-     * @return the data output stream for the user to write to
-     */
-    DataOutputStream getUserDataStream() {
-      expectState(State.WRITING);
-      return userDataStream;
-    }
-
-    /**
      * Transitions the block writer from the "writing" state to the "block
      * ready" state.  Does nothing if a block is already finished.
      */
@@ -1261,11 +1158,9 @@ public class HFileBlock implements Cacheable {
     }
 
     /**
-     * Clones the header followed by the on-disk (compressed/encoded/encrypted) data. This is
-     * needed for storing packed blocks in the block cache. Expects calling semantics identical to
-     * {@link #getUncompressedBufferWithHeader()}. Returns only the header and data,
-     * Does not include checksum data.
-     *
+     * Clones the header followed by the on-disk (compressed/encoded/encrypted) data. This is needed
+     * for storing packed blocks in the block cache. Returns only the header and data, Does not
+     * include checksum data.
      * @return Returns a copy of block bytes for caching on write
      */
     private ByteBuffer cloneOnDiskBufferWithHeader() {
@@ -1321,11 +1216,10 @@ public class HFileBlock implements Cacheable {
                                 .withIncludesMvcc(fileContext.isIncludesMvcc())
                                 .withIncludesTags(fileContext.isIncludesTags())
                                 .build();
-       return new HFileBlock(blockType, getOnDiskSizeWithoutHeader(),
+      return new HFileBlock(blockType, getOnDiskSizeWithoutHeader(),
           getUncompressedSizeWithoutHeader(), prevOffset,
-          cacheConf.shouldCacheCompressed(blockType.getCategory())?
-            cloneOnDiskBufferWithHeader() :
-            cloneUncompressedBufferWithHeader(),
+          cacheConf.shouldCacheCompressed(blockType.getCategory()) ? cloneOnDiskBufferWithHeader()
+              : cloneUncompressedBufferWithHeader(),
           FILL_HEADER, startOffset, UNSET,
           onDiskBlockBytesWithHeader.size() + onDiskChecksum.length, newContext);
     }
@@ -1415,8 +1309,8 @@ public class HFileBlock implements Cacheable {
    */
   private static class PrefetchedHeader {
     long offset = -1;
-    byte [] header = new byte[HConstants.HFILEBLOCK_HEADER_SIZE];
-    final ByteBuffer buf = ByteBuffer.wrap(header, 0, HConstants.HFILEBLOCK_HEADER_SIZE);
+    byte[] header = new byte[HConstants.HFILEBLOCK_HEADER_SIZE];
+    final ByteBuff buf = new SingleByteBuff(ByteBuffer.wrap(header, 0, header.length));
 
     @Override
     public String toString() {
@@ -1479,11 +1373,11 @@ public class HFileBlock implements Cacheable {
     }
 
     /**
-     * A constructor that reads files with the latest minor version.
-     * This is used by unit tests only.
+     * A constructor that reads files with the latest minor version. This is used by unit tests
+     * only.
      */
     FSReaderImpl(FSDataInputStream istream, long fileSize, HFileContext fileContext)
-    throws IOException {
+        throws IOException {
       this(new FSDataInputStreamWrapper(istream), fileSize, null, null, fileContext);
     }
 
@@ -1520,60 +1414,49 @@ public class HFileBlock implements Cacheable {
     }
 
     /**
-     * Does a positional read or a seek and read into the given buffer. Returns
-     * the on-disk size of the next block, or -1 if it could not be read/determined; e.g. EOF.
-     *
+     * Does a positional read or a seek and read into the given byte buffer. We need take care that
+     * we will call the {@link ByteBuff#release()} for every exit to deallocate the ByteBuffers,
+     * otherwise the memory leak may happen.
      * @param dest destination buffer
-     * @param destOffset offset into the destination buffer at where to put the bytes we read
      * @param size size of read
      * @param peekIntoNextBlock whether to read the next block's on-disk size
      * @param fileOffset position in the stream to read at
      * @param pread whether we should do a positional read
      * @param istream The input source of data
-     * @return the on-disk size of the next block with header size included, or
-     *         -1 if it could not be determined; if not -1, the <code>dest</code> INCLUDES the
-     *         next header
-     * @throws IOException
+     * @return true to indicate the destination buffer include the next block header, otherwise only
+     *         include the current block data without the next block header.
+     * @throws IOException if any IO error happen.
      */
-    @VisibleForTesting
-    protected int readAtOffset(FSDataInputStream istream, byte[] dest, int destOffset, int size,
-        boolean peekIntoNextBlock, long fileOffset, boolean pread)
-        throws IOException {
-      if (peekIntoNextBlock && destOffset + size + hdrSize > dest.length) {
-        // We are asked to read the next block's header as well, but there is
-        // not enough room in the array.
-        throw new IOException("Attempted to read " + size + " bytes and " + hdrSize +
-            " bytes of next header into a " + dest.length + "-byte array at offset " + destOffset);
-      }
-
+    protected boolean readAtOffset(FSDataInputStream istream, ByteBuff dest, int size,
+        boolean peekIntoNextBlock, long fileOffset, boolean pread) throws IOException {
       if (!pread) {
         // Seek + read. Better for scanning.
         HFileUtil.seekOnMultipleSources(istream, fileOffset);
-        // TODO: do we need seek time latencies?
         long realOffset = istream.getPos();
         if (realOffset != fileOffset) {
-          throw new IOException("Tried to seek to " + fileOffset + " to " + "read " + size +
-              " bytes, but pos=" + realOffset + " after seek");
+          throw new IOException("Tried to seek to " + fileOffset + " to read " + size
+              + " bytes, but pos=" + realOffset + " after seek");
         }
-
         if (!peekIntoNextBlock) {
-          IOUtils.readFully(istream, dest, destOffset, size);
-          return -1;
+          BlockIOUtils.readFully(dest, istream, size);
+          return false;
         }
 
-        // Try to read the next block header.
-        if (!readWithExtra(istream, dest, destOffset, size, hdrSize)) {
-          return -1;
+        // Try to read the next block header
+        if (!BlockIOUtils.readWithExtra(dest, istream, size, hdrSize)) {
+          // did not read the next block header.
+          return false;
         }
       } else {
         // Positional read. Better for random reads; or when the streamLock is already locked.
         int extraSize = peekIntoNextBlock ? hdrSize : 0;
-        if (!positionalReadWithExtra(istream, fileOffset, dest, destOffset, size, extraSize)) {
-          return -1;
+        if (!BlockIOUtils.preadWithExtra(dest, istream, fileOffset, size, extraSize)) {
+          // did not read the next block header.
+          return false;
         }
       }
       assert peekIntoNextBlock;
-      return Bytes.toInt(dest, destOffset + size + BlockType.MAGIC_LENGTH) + hdrSize;
+      return true;
     }
 
     /**
@@ -1672,7 +1555,7 @@ public class HFileBlock implements Cacheable {
      * is not right.
      * @throws IOException
      */
-    private void verifyOnDiskSizeMatchesHeader(final int passedIn, final ByteBuffer headerBuf,
+    private void verifyOnDiskSizeMatchesHeader(final int passedIn, final ByteBuff headerBuf,
         final long offset, boolean verifyChecksum)
     throws IOException {
       // Assert size provided aligns with what is in the header
@@ -1691,11 +1574,11 @@ public class HFileBlock implements Cacheable {
      * we have to backup the stream because we over-read (the next block's header).
      * @see PrefetchedHeader
      * @return The cached block header or null if not found.
-     * @see #cacheNextBlockHeader(long, byte[], int, int)
+     * @see #cacheNextBlockHeader(long, ByteBuff, int, int)
      */
-    private ByteBuffer getCachedHeader(final long offset) {
+    private ByteBuff getCachedHeader(final long offset) {
       PrefetchedHeader ph = this.prefetchedHeader.get();
-      return ph != null && ph.offset == offset? ph.buf: null;
+      return ph != null && ph.offset == offset ? ph.buf : null;
     }
 
     /**
@@ -1704,13 +1587,24 @@ public class HFileBlock implements Cacheable {
      * @see PrefetchedHeader
      */
     private void cacheNextBlockHeader(final long offset,
-        final byte [] header, final int headerOffset, final int headerLength) {
+        ByteBuff onDiskBlock, int onDiskSizeWithHeader, int headerLength) {
       PrefetchedHeader ph = new PrefetchedHeader();
       ph.offset = offset;
-      System.arraycopy(header, headerOffset, ph.header, 0, headerLength);
+      onDiskBlock.get(onDiskSizeWithHeader, ph.header, 0, headerLength);
       this.prefetchedHeader.set(ph);
     }
 
+    private int getNextBlockOnDiskSize(boolean readNextHeader, ByteBuff onDiskBlock,
+        int onDiskSizeWithHeader) {
+      int nextBlockOnDiskSize = -1;
+      if (readNextHeader) {
+        nextBlockOnDiskSize =
+            onDiskBlock.getIntAfterPosition(onDiskSizeWithHeader + BlockType.MAGIC_LENGTH)
+                + hdrSize;
+      }
+      return nextBlockOnDiskSize;
+    }
+
     /**
      * Reads a version 2 block.
      *
@@ -1737,7 +1631,7 @@ public class HFileBlock implements Cacheable {
       // Try and get cached header. Will serve us in rare case where onDiskSizeWithHeaderL is -1
       // and will save us having to seek the stream backwards to reread the header we
       // read the last time through here.
-      ByteBuffer headerBuf = getCachedHeader(offset);
+      ByteBuff headerBuf = getCachedHeader(offset);
       LOG.trace("Reading {} at offset={}, pread={}, verifyChecksum={}, cachedHeader={}, " +
           "onDiskSizeWithHeader={}", this.fileContext.getHFileName(), offset, pread,
           verifyChecksum, headerBuf, onDiskSizeWithHeader);
@@ -1757,9 +1651,9 @@ public class HFileBlock implements Cacheable {
           if (LOG.isTraceEnabled()) {
             LOG.trace("Extra see to get block size!", new RuntimeException());
           }
-          headerBuf = ByteBuffer.allocate(hdrSize);
-          readAtOffset(is, headerBuf.array(), headerBuf.arrayOffset(), hdrSize, false,
-              offset, pread);
+          headerBuf = new SingleByteBuff(ByteBuffer.allocate(hdrSize));
+          readAtOffset(is, headerBuf, hdrSize, false, offset, pread);
+          headerBuf.rewind();
         }
         onDiskSizeWithHeader = getOnDiskSizeWithHeader(headerBuf, checksumSupport);
       }
@@ -1770,46 +1664,55 @@ public class HFileBlock implements Cacheable {
       // says where to start reading. If we have the header cached, then we don't need to read
       // it again and we can likely read from last place we left off w/o need to backup and reread
       // the header we read last time through here.
-      // TODO: Make this ByteBuffer-based. Will make it easier to go to HDFS with BBPool (offheap).
-      byte [] onDiskBlock = new byte[onDiskSizeWithHeader + hdrSize];
-      int nextBlockOnDiskSize = readAtOffset(is, onDiskBlock, preReadHeaderSize,
+      ByteBuff onDiskBlock =
+          new SingleByteBuff(ByteBuffer.allocate(onDiskSizeWithHeader + hdrSize));
+      boolean initHFileBlockSuccess = false;
+      try {
+        if (headerBuf != null) {
+          onDiskBlock.put(0, headerBuf, 0, hdrSize).position(hdrSize);
+        }
+        boolean readNextHeader = readAtOffset(is, onDiskBlock,
           onDiskSizeWithHeader - preReadHeaderSize, true, offset + preReadHeaderSize, pread);
-      if (headerBuf != null) {
-        // The header has been read when reading the previous block OR in a distinct header-only
-        // read. Copy to this block's header.
-        System.arraycopy(headerBuf.array(), headerBuf.arrayOffset(), onDiskBlock, 0, hdrSize);
-      } else {
-        headerBuf = ByteBuffer.wrap(onDiskBlock, 0, hdrSize);
-      }
-      // Do a few checks before we go instantiate HFileBlock.
-      assert onDiskSizeWithHeader > this.hdrSize;
-      verifyOnDiskSizeMatchesHeader(onDiskSizeWithHeader, headerBuf, offset, checksumSupport);
-      ByteBuff onDiskBlockByteBuff =
-          new SingleByteBuff(ByteBuffer.wrap(onDiskBlock, 0, onDiskSizeWithHeader));
-      // Verify checksum of the data before using it for building HFileBlock.
-      if (verifyChecksum && !validateChecksum(offset, onDiskBlockByteBuff, hdrSize)) {
-        return null;
-      }
-      long duration = System.currentTimeMillis() - startTime;
-      if (updateMetrics) {
-        HFile.updateReadLatency(duration, pread);
-      }
-      // The onDiskBlock will become the headerAndDataBuffer for this block.
-      // If nextBlockOnDiskSizeWithHeader is not zero, the onDiskBlock already
-      // contains the header of next block, so no need to set next block's header in it.
-      HFileBlock hFileBlock = new HFileBlock(onDiskBlockByteBuff, checksumSupport,
-          MemoryType.EXCLUSIVE, offset, nextBlockOnDiskSize, fileContext);
-      // Run check on uncompressed sizings.
-      if (!fileContext.isCompressedOrEncrypted()) {
-        hFileBlock.sanityCheckUncompressed();
-      }
-      LOG.trace("Read {} in {} ns", hFileBlock, duration);
-      // Cache next block header if we read it for the next time through here.
-      if (nextBlockOnDiskSize != -1) {
-        cacheNextBlockHeader(offset + hFileBlock.getOnDiskSizeWithHeader(),
-            onDiskBlock, onDiskSizeWithHeader, hdrSize);
+        onDiskBlock.rewind(); // in case of moving position when copying a cached header
+        int nextBlockOnDiskSize =
+            getNextBlockOnDiskSize(readNextHeader, onDiskBlock, onDiskSizeWithHeader);
+        if (headerBuf == null) {
+          headerBuf = onDiskBlock.duplicate().position(0).limit(hdrSize);
+        }
+        // Do a few checks before we go instantiate HFileBlock.
+        assert onDiskSizeWithHeader > this.hdrSize;
+        verifyOnDiskSizeMatchesHeader(onDiskSizeWithHeader, headerBuf, offset, checksumSupport);
+        ByteBuff curBlock = onDiskBlock.duplicate().limit(onDiskSizeWithHeader);
+        // Verify checksum of the data before using it for building HFileBlock.
+        if (verifyChecksum && !validateChecksum(offset, curBlock, hdrSize)) {
+          return null;
+        }
+        long duration = System.currentTimeMillis() - startTime;
+        if (updateMetrics) {
+          HFile.updateReadLatency(duration, pread);
+        }
+        // The onDiskBlock will become the headerAndDataBuffer for this block.
+        // If nextBlockOnDiskSizeWithHeader is not zero, the onDiskBlock already
+        // contains the header of next block, so no need to set next block's header in it.
+        HFileBlock hFileBlock = new HFileBlock(curBlock, checksumSupport, MemoryType.EXCLUSIVE,
+            offset, nextBlockOnDiskSize, fileContext);
+        // Run check on uncompressed sizings.
+        if (!fileContext.isCompressedOrEncrypted()) {
+          hFileBlock.sanityCheckUncompressed();
+        }
+        LOG.trace("Read {} in {} ns", hFileBlock, duration);
+        // Cache next block header if we read it for the next time through here.
+        if (nextBlockOnDiskSize != -1) {
+          cacheNextBlockHeader(offset + hFileBlock.getOnDiskSizeWithHeader(), onDiskBlock,
+            onDiskSizeWithHeader, hdrSize);
+        }
+        initHFileBlockSuccess = true;
+        return hFileBlock;
+      } finally {
+        if (!initHFileBlockSuccess) {
+          onDiskBlock.release();
+        }
       }
-      return hFileBlock;
     }
 
     @Override
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockPositionalRead.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestBlockIOUtils.java
similarity index 54%
rename from hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockPositionalRead.java
rename to hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestBlockIOUtils.java
index a13c868..60180e6 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockPositionalRead.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestBlockIOUtils.java
@@ -17,33 +17,115 @@
  */
 package org.apache.hadoop.hbase.io.hfile;
 
-import static org.junit.Assert.*;
-import static org.mockito.Mockito.*;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+import static org.mockito.Mockito.when;
 
 import java.io.IOException;
+import java.nio.ByteBuffer;
+
 import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.nio.ByteBuff;
+import org.apache.hadoop.hbase.nio.MultiByteBuff;
+import org.apache.hadoop.hbase.nio.SingleByteBuff;
 import org.apache.hadoop.hbase.testclassification.IOTests;
 import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.hbase.util.Bytes;
 import org.junit.ClassRule;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.rules.ExpectedException;
 
-/**
- * Unit test suite covering HFileBlock positional read logic.
- */
-@Category({IOTests.class, SmallTests.class})
-public class TestHFileBlockPositionalRead {
+@Category({ IOTests.class, SmallTests.class })
+public class TestBlockIOUtils {
 
   @ClassRule
   public static final HBaseClassTestRule CLASS_RULE =
-      HBaseClassTestRule.forClass(TestHFileBlockPositionalRead.class);
+      HBaseClassTestRule.forClass(TestBlockIOUtils.class);
 
   @Rule
   public ExpectedException exception = ExpectedException.none();
 
+  private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+
+  @Test
+  public void testIsByteBufferReadable() throws IOException {
+    FileSystem fs = TEST_UTIL.getTestFileSystem();
+    Path p = new Path(TEST_UTIL.getDataTestDirOnTestFS(), "testIsByteBufferReadable");
+    try (FSDataOutputStream out = fs.create(p)) {
+      out.writeInt(23);
+    }
+    try (FSDataInputStream is = fs.open(p)) {
+      assertFalse(BlockIOUtils.isByteBufferReadable(is));
+    }
+  }
+
+  @Test
+  public void testReadFully() throws IOException {
+    FileSystem fs = TEST_UTIL.getTestFileSystem();
+    Path p = new Path(TEST_UTIL.getDataTestDirOnTestFS(), "testReadFully");
+    String s = "hello world";
+    try (FSDataOutputStream out = fs.create(p)) {
+      out.writeBytes(s);
+    }
+    ByteBuff buf = new SingleByteBuff(ByteBuffer.allocate(11));
+    try (FSDataInputStream in = fs.open(p)) {
+      BlockIOUtils.readFully(buf, in, 11);
+    }
+    buf.rewind();
+    byte[] heapBuf = new byte[s.length()];
+    buf.get(heapBuf, 0, heapBuf.length);
+    assertArrayEquals(Bytes.toBytes(s), heapBuf);
+  }
+
+  @Test
+  public void testReadWithExtra() throws IOException {
+    FileSystem fs = TEST_UTIL.getTestFileSystem();
+    Path p = new Path(TEST_UTIL.getDataTestDirOnTestFS(), "testReadWithExtra");
+    String s = "hello world";
+    try (FSDataOutputStream out = fs.create(p)) {
+      out.writeBytes(s);
+    }
+    ByteBuff buf = new SingleByteBuff(ByteBuffer.allocate(8));
+    try (FSDataInputStream in = fs.open(p)) {
+      assertTrue(BlockIOUtils.readWithExtra(buf, in, 6, 2));
+    }
+    buf.rewind();
+    byte[] heapBuf = new byte[buf.capacity()];
+    buf.get(heapBuf, 0, heapBuf.length);
+    assertArrayEquals(Bytes.toBytes("hello wo"), heapBuf);
+
+    buf = new MultiByteBuff(ByteBuffer.allocate(4), ByteBuffer.allocate(4), ByteBuffer.allocate(4));
+    try (FSDataInputStream in = fs.open(p)) {
+      assertTrue(BlockIOUtils.readWithExtra(buf, in, 8, 3));
+    }
+    buf.rewind();
+    heapBuf = new byte[11];
+    buf.get(heapBuf, 0, heapBuf.length);
+    assertArrayEquals(Bytes.toBytes("hello world"), heapBuf);
+
+    buf.position(0).limit(12);
+    try (FSDataInputStream in = fs.open(p)) {
+      try {
+        BlockIOUtils.readWithExtra(buf, in, 12, 0);
+        fail("Should only read 11 bytes");
+      } catch (IOException e) {
+
+      }
+    }
+  }
+
   @Test
   public void testPositionalReadNoExtra() throws IOException {
     long position = 0;
@@ -52,10 +134,10 @@ public class TestHFileBlockPositionalRead {
     int extraLen = 0;
     int totalLen = necessaryLen + extraLen;
     byte[] buf = new byte[totalLen];
+    ByteBuff bb = new SingleByteBuff(ByteBuffer.wrap(buf, 0, totalLen));
     FSDataInputStream in = mock(FSDataInputStream.class);
     when(in.read(position, buf, bufOffset, totalLen)).thenReturn(totalLen);
-    boolean ret = HFileBlock.positionalReadWithExtra(in, position, buf,
-        bufOffset, necessaryLen, extraLen);
+    boolean ret = BlockIOUtils.preadWithExtra(bb, in, position, necessaryLen, extraLen);
     assertFalse("Expect false return when no extra bytes requested", ret);
     verify(in).read(position, buf, bufOffset, totalLen);
     verifyNoMoreInteractions(in);
@@ -69,11 +151,11 @@ public class TestHFileBlockPositionalRead {
     int extraLen = 0;
     int totalLen = necessaryLen + extraLen;
     byte[] buf = new byte[totalLen];
+    ByteBuff bb = new SingleByteBuff(ByteBuffer.wrap(buf, 0, totalLen));
     FSDataInputStream in = mock(FSDataInputStream.class);
     when(in.read(position, buf, bufOffset, totalLen)).thenReturn(5);
     when(in.read(5, buf, 5, 5)).thenReturn(5);
-    boolean ret = HFileBlock.positionalReadWithExtra(in, position, buf,
-        bufOffset, necessaryLen, extraLen);
+    boolean ret = BlockIOUtils.preadWithExtra(bb, in, position, necessaryLen, extraLen);
     assertFalse("Expect false return when no extra bytes requested", ret);
     verify(in).read(position, buf, bufOffset, totalLen);
     verify(in).read(5, buf, 5, 5);
@@ -88,10 +170,10 @@ public class TestHFileBlockPositionalRead {
     int extraLen = 5;
     int totalLen = necessaryLen + extraLen;
     byte[] buf = new byte[totalLen];
+    ByteBuff bb = new SingleByteBuff(ByteBuffer.wrap(buf, 0, totalLen));
     FSDataInputStream in = mock(FSDataInputStream.class);
     when(in.read(position, buf, bufOffset, totalLen)).thenReturn(totalLen);
-    boolean ret = HFileBlock.positionalReadWithExtra(in, position, buf,
-        bufOffset, necessaryLen, extraLen);
+    boolean ret = BlockIOUtils.preadWithExtra(bb, in, position, necessaryLen, extraLen);
     assertTrue("Expect true return when reading extra bytes succeeds", ret);
     verify(in).read(position, buf, bufOffset, totalLen);
     verifyNoMoreInteractions(in);
@@ -105,10 +187,10 @@ public class TestHFileBlockPositionalRead {
     int extraLen = 5;
     int totalLen = necessaryLen + extraLen;
     byte[] buf = new byte[totalLen];
+    ByteBuff bb = new SingleByteBuff(ByteBuffer.wrap(buf, 0, totalLen));
     FSDataInputStream in = mock(FSDataInputStream.class);
     when(in.read(position, buf, bufOffset, totalLen)).thenReturn(necessaryLen);
-    boolean ret = HFileBlock.positionalReadWithExtra(in, position, buf,
-        bufOffset, necessaryLen, extraLen);
+    boolean ret = BlockIOUtils.preadWithExtra(bb, in, position, necessaryLen, extraLen);
     assertFalse("Expect false return when reading extra bytes fails", ret);
     verify(in).read(position, buf, bufOffset, totalLen);
     verifyNoMoreInteractions(in);
@@ -123,11 +205,11 @@ public class TestHFileBlockPositionalRead {
     int extraLen = 5;
     int totalLen = necessaryLen + extraLen;
     byte[] buf = new byte[totalLen];
+    ByteBuff bb = new SingleByteBuff(ByteBuffer.wrap(buf, 0, totalLen));
     FSDataInputStream in = mock(FSDataInputStream.class);
     when(in.read(position, buf, bufOffset, totalLen)).thenReturn(5);
     when(in.read(5, buf, 5, 10)).thenReturn(10);
-    boolean ret = HFileBlock.positionalReadWithExtra(in, position, buf,
-        bufOffset, necessaryLen, extraLen);
+    boolean ret = BlockIOUtils.preadWithExtra(bb, in, position, necessaryLen, extraLen);
     assertTrue("Expect true return when reading extra bytes succeeds", ret);
     verify(in).read(position, buf, bufOffset, totalLen);
     verify(in).read(5, buf, 5, 10);
@@ -142,12 +224,12 @@ public class TestHFileBlockPositionalRead {
     int extraLen = 0;
     int totalLen = necessaryLen + extraLen;
     byte[] buf = new byte[totalLen];
+    ByteBuff bb = new SingleByteBuff(ByteBuffer.wrap(buf, 0, totalLen));
     FSDataInputStream in = mock(FSDataInputStream.class);
     when(in.read(position, buf, bufOffset, totalLen)).thenReturn(9);
     when(in.read(position, buf, bufOffset, totalLen)).thenReturn(-1);
     exception.expect(IOException.class);
     exception.expectMessage("EOF");
-    HFileBlock.positionalReadWithExtra(in, position, buf, bufOffset,
-        necessaryLen, extraLen);
+    BlockIOUtils.preadWithExtra(bb, in, position, necessaryLen, extraLen);
   }
 }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
index e93b61e..a4135d7 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
@@ -398,23 +398,25 @@ public class TestChecksum {
       return b;
     }
 
+
     @Override
-    protected int readAtOffset(FSDataInputStream istream, byte [] dest, int destOffset, int size,
+    protected boolean readAtOffset(FSDataInputStream istream, ByteBuff dest, int size,
         boolean peekIntoNextBlock, long fileOffset, boolean pread) throws IOException {
-      int returnValue = super.readAtOffset(istream, dest, destOffset, size, peekIntoNextBlock,
-          fileOffset, pread);
+      int destOffset = dest.position();
+      boolean returnValue =
+          super.readAtOffset(istream, dest, size, peekIntoNextBlock, fileOffset, pread);
       if (!corruptDataStream) {
         return returnValue;
       }
       // Corrupt 3rd character of block magic of next block's header.
       if (peekIntoNextBlock) {
-        dest[destOffset + size + 3] = 0b00000000;
+        dest.put(destOffset + size + 3, (byte) 0b00000000);
       }
       // We might be reading this block's header too, corrupt it.
-      dest[destOffset + 1] = 0b00000000;
+      dest.put(destOffset + 1, (byte) 0b00000000);
       // Corrupt non header data
       if (size > hdrSize) {
-        dest[destOffset + hdrSize + 1] = 0b00000000;
+        dest.put(destOffset + hdrSize + 1, (byte) 0b00000000);
       }
       return returnValue;
     }

[hbase] 17/22: HBASE-22463 Some paths in HFileScannerImpl did not consider block#release which will exhaust the ByteBuffAllocator (#257)

Posted by op...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

openinx pushed a commit to branch HBASE-21879
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 3b031c6e75e0a7e807f5abfcd688cdf863a5efdb
Author: openinx <op...@gmail.com>
AuthorDate: Thu May 30 12:24:10 2019 +0800

    HBASE-22463 Some paths in HFileScannerImpl did not consider block#release which will exhaust the ByteBuffAllocator (#257)
---
 .../hadoop/hbase/io/hfile/MemcachedBlockCache.java |   4 +-
 .../apache/hadoop/hbase/io/hfile/Cacheable.java    |  14 -
 .../hbase/io/hfile/CacheableDeserializer.java      |   6 +-
 .../apache/hadoop/hbase/io/hfile/HFileBlock.java   |  26 +-
 .../hadoop/hbase/io/hfile/HFileReaderImpl.java     | 154 +++++------
 .../hadoop/hbase/io/hfile/LruBlockCache.java       |   7 -
 .../hadoop/hbase/io/hfile/TinyLfuBlockCache.java   |  15 +-
 .../hadoop/hbase/io/hfile/bucket/BucketEntry.java  |   5 +-
 .../hbase/io/hfile/bucket/ByteBufferIOEngine.java  |   8 +-
 .../hfile/bucket/ExclusiveMemoryMmapIOEngine.java  |   3 +-
 .../hadoop/hbase/io/hfile/bucket/FileIOEngine.java |   3 +-
 .../io/hfile/bucket/SharedMemoryMmapIOEngine.java  |   8 +-
 .../hadoop/hbase/io/hfile/CacheTestUtils.java      |   8 +-
 .../hadoop/hbase/io/hfile/TestCacheConfig.java     |   8 +-
 .../hbase/io/hfile/TestCachedBlockQueue.java       |   5 -
 .../hadoop/hbase/io/hfile/TestHFileBlock.java      |   3 +-
 .../hfile/TestHFileScannerImplReferenceCount.java  | 301 +++++++++++++++++++++
 .../hadoop/hbase/io/hfile/TestLruBlockCache.java   |   6 -
 .../hbase/io/hfile/TestTinyLfuBlockCache.java      |   5 -
 .../io/hfile/bucket/TestBucketCacheRefCnt.java     |   3 -
 .../io/hfile/bucket/TestByteBufferIOEngine.java    |   3 +-
 21 files changed, 403 insertions(+), 192 deletions(-)

diff --git a/hbase-external-blockcache/src/main/java/org/apache/hadoop/hbase/io/hfile/MemcachedBlockCache.java b/hbase-external-blockcache/src/main/java/org/apache/hadoop/hbase/io/hfile/MemcachedBlockCache.java
index 22abd2c..6ccd138 100644
--- a/hbase-external-blockcache/src/main/java/org/apache/hadoop/hbase/io/hfile/MemcachedBlockCache.java
+++ b/hbase-external-blockcache/src/main/java/org/apache/hadoop/hbase/io/hfile/MemcachedBlockCache.java
@@ -37,7 +37,6 @@ import net.spy.memcached.transcoders.Transcoder;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.io.ByteBuffAllocator;
-import org.apache.hadoop.hbase.io.hfile.Cacheable.MemoryType;
 import org.apache.hadoop.hbase.nio.ByteBuff;
 import org.apache.hadoop.hbase.nio.SingleByteBuff;
 import org.apache.hadoop.hbase.trace.TraceUtil;
@@ -272,8 +271,7 @@ public class MemcachedBlockCache implements BlockCache {
     public HFileBlock decode(CachedData d) {
       try {
         ByteBuff buf = new SingleByteBuff(ByteBuffer.wrap(d.getData()));
-        return (HFileBlock) HFileBlock.BLOCK_DESERIALIZER.deserialize(buf, ByteBuffAllocator.HEAP,
-          MemoryType.EXCLUSIVE);
+        return (HFileBlock) HFileBlock.BLOCK_DESERIALIZER.deserialize(buf, ByteBuffAllocator.HEAP);
       } catch (IOException e) {
         LOG.warn("Failed to deserialize data from memcached", e);
       }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/Cacheable.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/Cacheable.java
index 9b4b38f..96c8e82 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/Cacheable.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/Cacheable.java
@@ -63,20 +63,6 @@ public interface Cacheable extends HeapSize, HBaseReferenceCounted {
    */
   BlockType getBlockType();
 
-  /**
-   * @return the {@code MemoryType} of this Cacheable
-   */
-  MemoryType getMemoryType();
-
-  /**
-   * SHARED means when this Cacheable is read back from cache it refers to the same memory area as
-   * used by the cache for caching it. EXCLUSIVE means when this Cacheable is read back from cache,
-   * the data was copied to an exclusive memory area of this Cacheable.
-   */
-  enum MemoryType {
-    SHARED, EXCLUSIVE
-  }
-
   /******************************* ReferenceCounted Interfaces ***********************************/
 
   /**
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheableDeserializer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheableDeserializer.java
index 0205097..e12173d 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheableDeserializer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheableDeserializer.java
@@ -20,9 +20,8 @@ package org.apache.hadoop.hbase.io.hfile;
 import java.io.IOException;
 
 import org.apache.hadoop.hbase.io.ByteBuffAllocator;
-import org.apache.yetus.audience.InterfaceAudience;
-import org.apache.hadoop.hbase.io.hfile.Cacheable.MemoryType;
 import org.apache.hadoop.hbase.nio.ByteBuff;
+import org.apache.yetus.audience.InterfaceAudience;
 
 /**
  * Interface for a deserializer. Throws an IOException if the serialized data is incomplete or
@@ -33,11 +32,10 @@ public interface CacheableDeserializer<T extends Cacheable> {
   /**
    * @param b ByteBuff to deserialize the Cacheable.
    * @param allocator to manage NIO ByteBuffers for future allocation or de-allocation.
-   * @param memType the {@link MemoryType} of the buffer
    * @return T the deserialized object.
    * @throws IOException
    */
-  T deserialize(ByteBuff b, ByteBuffAllocator allocator, MemoryType memType) throws IOException;
+  T deserialize(ByteBuff b, ByteBuffAllocator allocator) throws IOException;
 
   /**
    * Get the identifier of this deserializer. Identifier is unique for each deserializer and
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
index 079907e..452b68c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
@@ -201,8 +201,6 @@ public class HFileBlock implements Cacheable {
    */
   private long offset = UNSET;
 
-  private MemoryType memType = MemoryType.EXCLUSIVE;
-
   /**
    * The on-disk size of the next block, including the header and checksums if present.
    * UNSET if unknown.
@@ -274,7 +272,7 @@ public class HFileBlock implements Cacheable {
     }
 
     @Override
-    public HFileBlock deserialize(ByteBuff buf, ByteBuffAllocator alloc, MemoryType memType)
+    public HFileBlock deserialize(ByteBuff buf, ByteBuffAllocator alloc)
         throws IOException {
       // The buf has the file block followed by block metadata.
       // Set limit to just before the BLOCK_METADATA_SPACE then rewind.
@@ -287,8 +285,7 @@ public class HFileBlock implements Cacheable {
       boolean usesChecksum = buf.get() == (byte) 1;
       long offset = buf.getLong();
       int nextBlockOnDiskSize = buf.getInt();
-      return new HFileBlock(newByteBuff, usesChecksum, memType, offset, nextBlockOnDiskSize, null,
-          alloc);
+      return new HFileBlock(newByteBuff, usesChecksum, offset, nextBlockOnDiskSize, null, alloc);
     }
 
     @Override
@@ -366,7 +363,7 @@ public class HFileBlock implements Cacheable {
    * to that point.
    * @param buf Has header, content, and trailing checksums if present.
    */
-  HFileBlock(ByteBuff buf, boolean usesHBaseChecksum, MemoryType memType, final long offset,
+  HFileBlock(ByteBuff buf, boolean usesHBaseChecksum, final long offset,
       final int nextBlockOnDiskSize, HFileContext fileContext, ByteBuffAllocator allocator)
       throws IOException {
     buf.rewind();
@@ -398,7 +395,6 @@ public class HFileBlock implements Cacheable {
     assert usesHBaseChecksum == fileContext.isUseHBaseChecksum();
     init(blockType, onDiskSizeWithoutHeader, uncompressedSizeWithoutHeader, prevBlockOffset, offset,
       onDiskDataSizeWithHeader, nextBlockOnDiskSize, fileContext, allocator);
-    this.memType = memType;
     this.offset = offset;
     this.buf = buf;
     this.buf.rewind();
@@ -1785,8 +1781,8 @@ public class HFileBlock implements Cacheable {
         // The onDiskBlock will become the headerAndDataBuffer for this block.
         // If nextBlockOnDiskSizeWithHeader is not zero, the onDiskBlock already
         // contains the header of next block, so no need to set next block's header in it.
-        HFileBlock hFileBlock = new HFileBlock(curBlock, checksumSupport, MemoryType.EXCLUSIVE,
-            offset, nextBlockOnDiskSize, fileContext, intoHeap ? HEAP: allocator);
+        HFileBlock hFileBlock = new HFileBlock(curBlock, checksumSupport, offset,
+            nextBlockOnDiskSize, fileContext, intoHeap ? HEAP : allocator);
         // Run check on uncompressed sizings.
         if (!fileContext.isCompressedOrEncrypted()) {
           hFileBlock.sanityCheckUncompressed();
@@ -2060,18 +2056,6 @@ public class HFileBlock implements Cacheable {
     return this.fileContext;
   }
 
-  @Override
-  public MemoryType getMemoryType() {
-    return this.memType;
-  }
-
-  /**
-   * @return true if this block is backed by a shared memory area(such as that of a BucketCache).
-   */
-  boolean usesSharedMemory() {
-    return this.memType == MemoryType.SHARED;
-  }
-
   /**
    * Convert the contents of the block header into a human readable string.
    * This is mostly helpful for debugging. This assumes that the block
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
index be8cabb..1157615 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
@@ -24,8 +24,6 @@ import java.security.Key;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Optional;
-import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.hadoop.conf.Configurable;
 import org.apache.hadoop.conf.Configuration;
@@ -489,8 +487,6 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
     private int currValueLen;
     private int currMemstoreTSLen;
     private long currMemstoreTS;
-    // Updated but never read?
-    protected AtomicInteger blockFetches = new AtomicInteger(0);
     protected final HFile.Reader reader;
     private int currTagsLen;
     // buffer backed keyonlyKV
@@ -506,7 +502,11 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
      * If the nextIndexedKey is null, it means the nextIndexedKey has not been loaded yet.
      */
     protected Cell nextIndexedKey;
-    // Current block being used
+    // Current block being used. NOTICE: DON't release curBlock separately except in shipped() or
+    // close() methods. Because the shipped() or close() will do the release finally, even if any
+    // exception occur the curBlock will be released by the close() method (see
+    // RegionScannerImpl#handleException). Call the releaseIfNotCurBlock() to release the
+    // unreferenced block please.
     protected HFileBlock curBlock;
     // Previous blocks that were used in the course of the read
     protected final ArrayList<HFileBlock> prevBlocks = new ArrayList<>();
@@ -520,12 +520,10 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
     }
 
     void updateCurrBlockRef(HFileBlock block) {
-      if (block != null && this.curBlock != null &&
-          block.getOffset() == this.curBlock.getOffset()) {
+      if (block != null && curBlock != null && block.getOffset() == curBlock.getOffset()) {
         return;
       }
-      // We don't have to keep ref to EXCLUSIVE type of block
-      if (this.curBlock != null && this.curBlock.usesSharedMemory()) {
+      if (this.curBlock != null) {
         prevBlocks.add(this.curBlock);
       }
       this.curBlock = block;
@@ -533,7 +531,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
 
     void reset() {
       // We don't have to keep ref to EXCLUSIVE type of block
-      if (this.curBlock != null && this.curBlock.usesSharedMemory()) {
+      if (this.curBlock != null) {
         this.prevBlocks.add(this.curBlock);
       }
       this.curBlock = null;
@@ -821,7 +819,6 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
             return loadBlockAndSeekToKey(this.curBlock, nextIndexedKey, false, key,
                 false);
           }
-
         }
       }
       // Don't rewind on a reseek operation, because reseek implies that we are
@@ -846,20 +843,19 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
     public int seekTo(Cell key, boolean rewind) throws IOException {
       HFileBlockIndex.BlockIndexReader indexReader = reader.getDataBlockIndexReader();
       BlockWithScanInfo blockWithScanInfo = indexReader.loadDataBlockWithScanInfo(key, curBlock,
-          cacheBlocks, pread, isCompaction, getEffectiveDataBlockEncoding());
+        cacheBlocks, pread, isCompaction, getEffectiveDataBlockEncoding());
       if (blockWithScanInfo == null || blockWithScanInfo.getHFileBlock() == null) {
-        // This happens if the key e.g. falls before the beginning of the
-        // file.
+        // This happens if the key e.g. falls before the beginning of the file.
         return -1;
       }
       return loadBlockAndSeekToKey(blockWithScanInfo.getHFileBlock(),
-          blockWithScanInfo.getNextIndexedKey(), rewind, key, false);
+        blockWithScanInfo.getNextIndexedKey(), rewind, key, false);
     }
 
     @Override
     public boolean seekBefore(Cell key) throws IOException {
       HFileBlock seekToBlock = reader.getDataBlockIndexReader().seekToDataBlock(key, curBlock,
-          cacheBlocks, pread, isCompaction, reader.getEffectiveEncodingInCache(isCompaction));
+        cacheBlocks, pread, isCompaction, reader.getEffectiveEncodingInCache(isCompaction));
       if (seekToBlock == null) {
         return false;
       }
@@ -869,22 +865,22 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
         // The key we are interested in
         if (previousBlockOffset == -1) {
           // we have a 'problem', the key we want is the first of the file.
+          releaseIfNotCurBlock(seekToBlock);
           return false;
         }
 
         // The first key in the current block 'seekToBlock' is greater than the given
         // seekBefore key. We will go ahead by reading the next block that satisfies the
         // given key. Return the current block before reading the next one.
-        seekToBlock.release();
+        releaseIfNotCurBlock(seekToBlock);
         // It is important that we compute and pass onDiskSize to the block
         // reader so that it does not have to read the header separately to
-        // figure out the size.  Currently, we do not have a way to do this
+        // figure out the size. Currently, we do not have a way to do this
         // correctly in the general case however.
         // TODO: See https://issues.apache.org/jira/browse/HBASE-14576
         int prevBlockSize = -1;
-        seekToBlock = reader.readBlock(previousBlockOffset,
-            prevBlockSize, cacheBlocks,
-            pread, isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding());
+        seekToBlock = reader.readBlock(previousBlockOffset, prevBlockSize, cacheBlocks, pread,
+          isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding());
         // TODO shortcut: seek forward in this block to the last key of the
         // block.
       }
@@ -893,6 +889,16 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
     }
 
     /**
+     * The curBlock will be released by shipping or close method, so only need to consider releasing
+     * the block, which was read from HFile before and not referenced by curBlock.
+     */
+    protected void releaseIfNotCurBlock(HFileBlock block) {
+      if (curBlock != block) {
+        block.release();
+      }
+    }
+
+    /**
      * Scans blocks in the "scanned" section of the {@link HFile} until the next
      * data block is found.
      *
@@ -903,33 +909,30 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
         justification="Yeah, unnecessary null check; could do w/ clean up")
     protected HFileBlock readNextDataBlock() throws IOException {
       long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset();
-      if (curBlock == null)
+      if (curBlock == null) {
         return null;
-
+      }
       HFileBlock block = this.curBlock;
-
       do {
         if (block.getOffset() >= lastDataBlockOffset) {
+          releaseIfNotCurBlock(block);
           return null;
         }
-
         if (block.getOffset() < 0) {
-          throw new IOException(
-              "Invalid block file offset: " + block + ", path=" + reader.getPath());
+          releaseIfNotCurBlock(block);
+          throw new IOException("Invalid block file offset: " + block + ", path=" + reader.getPath());
         }
-
         // We are reading the next block without block type validation, because
         // it might turn out to be a non-data block.
         block = reader.readBlock(block.getOffset() + block.getOnDiskSizeWithHeader(),
-            block.getNextBlockOnDiskSize(), cacheBlocks, pread,
-            isCompaction, true, null, getEffectiveDataBlockEncoding());
-        if (block != null && !block.getBlockType().isData()) { // Findbugs: NP_NULL_ON_SOME_PATH
+          block.getNextBlockOnDiskSize(), cacheBlocks, pread, isCompaction, true, null,
+          getEffectiveDataBlockEncoding());
+        if (block != null && !block.getBlockType().isData()) {
           // Whatever block we read we will be returning it unless
           // it is a datablock. Just in case the blocks are non data blocks
           block.release();
         }
       } while (!block.getBlockType().isData());
-
       return block;
     }
 
@@ -1109,8 +1112,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
       }
 
       long firstDataBlockOffset = reader.getTrailer().getFirstDataBlockOffset();
-      if (curBlock != null
-          && curBlock.getOffset() == firstDataBlockOffset) {
+      if (curBlock != null && curBlock.getOffset() == firstDataBlockOffset) {
         return processFirstDataBlock();
       }
 
@@ -1128,8 +1130,8 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
       HFileBlock newBlock = reader.readBlock(firstDataBlockOffset, -1, cacheBlocks, pread,
         isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding());
       if (newBlock.getOffset() < 0) {
-        throw new IOException(
-            "Invalid block offset: " + newBlock.getOffset() + ", path=" + reader.getPath());
+        releaseIfNotCurBlock(newBlock);
+        throw new IOException("Invalid block offset: " + newBlock.getOffset() + ", path=" + reader.getPath());
       }
       updateCurrentBlock(newBlock);
     }
@@ -1176,26 +1178,26 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
     }
 
     /**
-     * Updates the current block to be the given {@link HFileBlock}. Seeks to
-     * the the first key/value pair.
-     *
-     * @param newBlock the block to make current
+     * Updates the current block to be the given {@link HFileBlock}. Seeks to the the first
+     * key/value pair.
+     * @param newBlock the block read by {@link HFileReaderImpl#readBlock}, it's a totally new block
+     *          with new allocated {@link ByteBuff}, so if no further reference to this block, we
+     *          should release it carefully.
      */
     protected void updateCurrentBlock(HFileBlock newBlock) throws IOException {
-      // Set the active block on the reader
-      // sanity check
-      if (newBlock.getBlockType() != BlockType.DATA) {
-        throw new IllegalStateException("ScannerV2 works only on data " + "blocks, got "
-            + newBlock.getBlockType() + "; " + "HFileName=" + reader.getPath()
-            + ", " + "dataBlockEncoder=" + reader.getDataBlockEncoding() + ", " + "isCompaction="
-            + isCompaction);
+      try {
+        if (newBlock.getBlockType() != BlockType.DATA) {
+          throw new IllegalStateException(
+              "ScannerV2 works only on data blocks, got " + newBlock.getBlockType() + "; "
+                  + "HFileName=" + reader.getPath() + ", " + "dataBlockEncoder="
+                  + reader.getDataBlockEncoding() + ", " + "isCompaction=" + isCompaction);
+        }
+        updateCurrBlockRef(newBlock);
+        blockBuffer = newBlock.getBufferWithoutHeader();
+        readKeyValueLen();
+      } finally {
+        releaseIfNotCurBlock(newBlock);
       }
-
-      updateCurrBlockRef(newBlock);
-      blockBuffer = newBlock.getBufferWithoutHeader();
-      readKeyValueLen();
-      blockFetches.incrementAndGet();
-
       // Reset the next indexed key
       this.nextIndexedKey = null;
     }
@@ -1643,32 +1645,33 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
     }
 
     /**
-     * Updates the current block to be the given {@link HFileBlock}. Seeks to
-     * the the first key/value pair.
-     *
-     * @param newBlock the block to make current
+     * Updates the current block to be the given {@link HFileBlock}. Seeks to the the first
+     * key/value pair.
+     * @param newBlock the block to make current, and read by {@link HFileReaderImpl#readBlock},
+     *          it's a totally new block with new allocated {@link ByteBuff}, so if no further
+     *          reference to this block, we should release it carefully.
      * @throws CorruptHFileException
      */
     @Override
     protected void updateCurrentBlock(HFileBlock newBlock) throws CorruptHFileException {
-
-      // sanity checks
-      if (newBlock.getBlockType() != BlockType.ENCODED_DATA) {
-        throw new IllegalStateException("EncodedScanner works only on encoded data blocks");
-      }
-      short dataBlockEncoderId = newBlock.getDataBlockEncodingId();
-      if (!DataBlockEncoding.isCorrectEncoder(dataBlockEncoder, dataBlockEncoderId)) {
-        String encoderCls = dataBlockEncoder.getClass().getName();
-        throw new CorruptHFileException("Encoder " + encoderCls
-          + " doesn't support data block encoding "
-          + DataBlockEncoding.getNameFromId(dataBlockEncoderId)
-          + ", path=" + reader.getPath());
+      try {
+        // sanity checks
+        if (newBlock.getBlockType() != BlockType.ENCODED_DATA) {
+          throw new IllegalStateException("EncodedScanner works only on encoded data blocks");
+        }
+        short dataBlockEncoderId = newBlock.getDataBlockEncodingId();
+        if (!DataBlockEncoding.isCorrectEncoder(dataBlockEncoder, dataBlockEncoderId)) {
+          String encoderCls = dataBlockEncoder.getClass().getName();
+          throw new CorruptHFileException(
+              "Encoder " + encoderCls + " doesn't support data block encoding "
+                  + DataBlockEncoding.getNameFromId(dataBlockEncoderId) + ",path=" + reader.getPath());
+        }
+        updateCurrBlockRef(newBlock);
+        ByteBuff encodedBuffer = getEncodedBuffer(newBlock);
+        seeker.setCurrentBuffer(encodedBuffer);
+      } finally {
+        releaseIfNotCurBlock(newBlock);
       }
-      updateCurrBlockRef(newBlock);
-      ByteBuff encodedBuffer = getEncodedBuffer(newBlock);
-      seeker.setCurrentBuffer(encodedBuffer);
-      blockFetches.incrementAndGet();
-
       // Reset the next indexed key
       this.nextIndexedKey = null;
     }
@@ -1748,8 +1751,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
     @Override
     protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey,
         boolean rewind, Cell key, boolean seekBefore) throws IOException {
-      if (this.curBlock == null
-          || this.curBlock.getOffset() != seekToBlock.getOffset()) {
+      if (this.curBlock == null || this.curBlock.getOffset() != seekToBlock.getOffset()) {
         updateCurrentBlock(seekToBlock);
       } else if (rewind) {
         seeker.rewind();
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java
index 70715ae..0ec73a3 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java
@@ -530,16 +530,9 @@ public class LruBlockCache implements FirstLevelBlockCache {
       if (victimHandler != null && !repeat) {
         // The handler will increase result's refCnt for RPC, so need no extra retain.
         Cacheable result = victimHandler.getBlock(cacheKey, caching, repeat, updateCacheMetrics);
-
         // Promote this to L1.
         if (result != null) {
           if (caching) {
-            if (result instanceof HFileBlock && ((HFileBlock) result).usesSharedMemory()) {
-              Cacheable original = result;
-              result = ((HFileBlock) original).deepCloneOnHeap();
-              // deepClone an new one, so need to release the original one to deallocate it.
-              original.release();
-            }
             cacheBlock(cacheKey, result, /* inMemory = */ false);
           }
         }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/TinyLfuBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/TinyLfuBlockCache.java
index fbad3e3..5e69f6c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/TinyLfuBlockCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/TinyLfuBlockCache.java
@@ -171,8 +171,8 @@ public final class TinyLfuBlockCache implements FirstLevelBlockCache {
       if (victimCache != null) {
         value = victimCache.getBlock(cacheKey, caching, repeat, updateCacheMetrics);
         if ((value != null) && caching) {
-          if ((value instanceof HFileBlock) && ((HFileBlock) value).usesSharedMemory()) {
-            value = ((HFileBlock) value).deepClone();
+          if ((value instanceof HFileBlock) && !((HFileBlock) value).isOnHeap()) {
+            value = ((HFileBlock) value).deepCloneOnHeap();
           }
           cacheBlock(cacheKey, value);
         }
@@ -248,17 +248,6 @@ public final class TinyLfuBlockCache implements FirstLevelBlockCache {
         .iterator();
   }
 
-  @Override
-  public void returnBlock(BlockCacheKey cacheKey, Cacheable block) {
-    // There is no SHARED type here in L1. But the block might have been served from the L2 victim
-    // cache (when the Combined mode = false). So just try return this block to the victim cache.
-    // Note : In case of CombinedBlockCache we will have this victim cache configured for L1
-    // cache. But CombinedBlockCache will only call returnBlock on L2 cache.
-    if (victimCache != null) {
-      victimCache.returnBlock(cacheKey, block);
-    }
-  }
-
   private void logStats() {
     LOG.info(
         "totalSize=" + StringUtils.byteDesc(heapSize()) + ", " +
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketEntry.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketEntry.java
index a533793..ca41eca 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketEntry.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketEntry.java
@@ -29,7 +29,6 @@ import java.util.concurrent.locks.ReentrantReadWriteLock;
 import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.io.hfile.BlockPriority;
 import org.apache.hadoop.hbase.io.hfile.Cacheable;
-import org.apache.hadoop.hbase.io.hfile.Cacheable.MemoryType;
 import org.apache.hadoop.hbase.io.hfile.CacheableDeserializer;
 import org.apache.hadoop.hbase.io.hfile.CacheableDeserializerIdManager;
 import org.apache.hadoop.hbase.nio.ByteBuff;
@@ -194,9 +193,9 @@ class BucketEntry implements HBaseReferenceCounted {
     return this.refCnt() > 1 || (evicted && refCnt() == 1);
   }
 
-  Cacheable wrapAsCacheable(ByteBuffer[] buffers, MemoryType memoryType) throws IOException {
+  Cacheable wrapAsCacheable(ByteBuffer[] buffers) throws IOException {
     ByteBuff buf = ByteBuff.wrap(buffers, this.refCnt);
-    return this.deserializerReference().deserialize(buf, allocator, memoryType);
+    return this.deserializerReference().deserialize(buf, allocator);
   }
 
   interface BucketEntryHandler<T> {
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/ByteBufferIOEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/ByteBufferIOEngine.java
index 4e1b913..b0415e3 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/ByteBufferIOEngine.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/ByteBufferIOEngine.java
@@ -23,7 +23,6 @@ import java.nio.ByteBuffer;
 
 import org.apache.yetus.audience.InterfaceAudience;
 import org.apache.hadoop.hbase.io.hfile.Cacheable;
-import org.apache.hadoop.hbase.io.hfile.Cacheable.MemoryType;
 import org.apache.hadoop.hbase.nio.ByteBuff;
 import org.apache.hadoop.hbase.util.ByteBufferAllocator;
 import org.apache.hadoop.hbase.util.ByteBufferArray;
@@ -104,10 +103,9 @@ public class ByteBufferIOEngine implements IOEngine {
     // Here the buffer that is created directly refers to the buffer in the actual buckets.
     // When any cell is referring to the blocks created out of these buckets then it means that
     // those cells are referring to a shared memory area which if evicted by the BucketCache would
-    // lead to corruption of results. Hence we set the type of the buffer as SHARED_MEMORY
-    // so that the readers using this block are aware of this fact and do the necessary action
-    // to prevent eviction till the results are either consumed or copied
-    return be.wrapAsCacheable(buffers, MemoryType.SHARED);
+    // lead to corruption of results. The readers using this block are aware of this fact and do the
+    // necessary action to prevent eviction till the results are either consumed or copied
+    return be.wrapAsCacheable(buffers);
   }
 
   /**
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/ExclusiveMemoryMmapIOEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/ExclusiveMemoryMmapIOEngine.java
index af749d7..3d7f2b1 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/ExclusiveMemoryMmapIOEngine.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/ExclusiveMemoryMmapIOEngine.java
@@ -20,7 +20,6 @@ import java.io.IOException;
 import java.nio.ByteBuffer;
 
 import org.apache.hadoop.hbase.io.hfile.Cacheable;
-import org.apache.hadoop.hbase.io.hfile.Cacheable.MemoryType;
 import org.apache.hadoop.hbase.nio.ByteBuff;
 import org.apache.yetus.audience.InterfaceAudience;
 
@@ -39,6 +38,6 @@ public class ExclusiveMemoryMmapIOEngine extends FileMmapIOEngine {
     ByteBuff dst = ByteBuff.wrap(ByteBuffer.allocate(be.getLength()));
     bufferArray.read(be.offset(), dst);
     dst.position(0).limit(be.getLength());
-    return be.wrapAsCacheable(dst.nioByteBuffers(), MemoryType.EXCLUSIVE);
+    return be.wrapAsCacheable(dst.nioByteBuffers());
   }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FileIOEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FileIOEngine.java
index f5ab309..b3afe48 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FileIOEngine.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FileIOEngine.java
@@ -30,7 +30,6 @@ import java.util.concurrent.locks.ReentrantLock;
 
 import org.apache.hadoop.hbase.exceptions.IllegalArgumentIOException;
 import org.apache.hadoop.hbase.io.hfile.Cacheable;
-import org.apache.hadoop.hbase.io.hfile.Cacheable.MemoryType;
 import org.apache.hadoop.hbase.nio.ByteBuff;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.yetus.audience.InterfaceAudience;
@@ -143,7 +142,7 @@ public class FileIOEngine implements IOEngine {
       }
     }
     dstBuffer.rewind();
-    return be.wrapAsCacheable(new ByteBuffer[] { dstBuffer }, MemoryType.EXCLUSIVE);
+    return be.wrapAsCacheable(new ByteBuffer[] { dstBuffer });
   }
 
   @VisibleForTesting
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/SharedMemoryMmapIOEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/SharedMemoryMmapIOEngine.java
index bd83dd4..5369060 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/SharedMemoryMmapIOEngine.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/SharedMemoryMmapIOEngine.java
@@ -21,7 +21,6 @@ import java.io.IOException;
 import java.nio.ByteBuffer;
 
 import org.apache.hadoop.hbase.io.hfile.Cacheable;
-import org.apache.hadoop.hbase.io.hfile.Cacheable.MemoryType;
 import org.apache.yetus.audience.InterfaceAudience;
 
 /**
@@ -54,9 +53,8 @@ public class SharedMemoryMmapIOEngine extends FileMmapIOEngine {
     // Here the buffer that is created directly refers to the buffer in the actual buckets.
     // When any cell is referring to the blocks created out of these buckets then it means that
     // those cells are referring to a shared memory area which if evicted by the BucketCache would
-    // lead to corruption of results. Hence we set the type of the buffer as SHARED_MEMORY
-    // so that the readers using this block are aware of this fact and do the necessary action
-    // to prevent eviction till the results are either consumed or copied
-    return be.wrapAsCacheable(buffers, MemoryType.SHARED);
+    // lead to corruption of results. The readers using this block are aware of this fact and do
+    // the necessary action to prevent eviction till the results are either consumed or copied
+    return be.wrapAsCacheable(buffers);
   }
 }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java
index 5f1f617..717e9d7 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java
@@ -232,8 +232,7 @@ public class CacheTestUtils {
           }
 
           @Override
-          public Cacheable deserialize(ByteBuff b, ByteBuffAllocator alloc, MemoryType memType)
-              throws IOException {
+          public Cacheable deserialize(ByteBuff b, ByteBuffAllocator alloc) throws IOException {
             int len = b.getInt();
             Thread.yield();
             byte buf[] = new byte[len];
@@ -281,11 +280,6 @@ public class CacheTestUtils {
     public BlockType getBlockType() {
       return BlockType.DATA;
     }
-
-    @Override
-    public MemoryType getMemoryType() {
-      return MemoryType.EXCLUSIVE;
-    }
   }
 
 
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheConfig.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheConfig.java
index eda54f7..5d66e9a 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheConfig.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheConfig.java
@@ -36,7 +36,6 @@ import org.apache.hadoop.hbase.HColumnDescriptor;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.io.hfile.BlockType.BlockCategory;
-import org.apache.hadoop.hbase.io.hfile.Cacheable.MemoryType;
 import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache;
 import org.apache.hadoop.hbase.io.util.MemorySizeUtil;
 import org.apache.hadoop.hbase.nio.ByteBuff;
@@ -82,7 +81,7 @@ public class TestCacheConfig {
     }
 
     @Override
-    public Cacheable deserialize(ByteBuff b, ByteBuffAllocator alloc, MemoryType memType)
+    public Cacheable deserialize(ByteBuff b, ByteBuffAllocator alloc)
         throws IOException {
       LOG.info("Deserialized " + b);
       return cacheable;
@@ -144,11 +143,6 @@ public class TestCacheConfig {
     public BlockType getBlockType() {
       return BlockType.DATA;
     }
-
-    @Override
-    public MemoryType getMemoryType() {
-      return MemoryType.EXCLUSIVE;
-    }
   }
 
   static class MetaCacheEntry extends DataCacheEntry {
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCachedBlockQueue.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCachedBlockQueue.java
index ab039ec..b81972e 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCachedBlockQueue.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCachedBlockQueue.java
@@ -145,11 +145,6 @@ public class TestCachedBlockQueue extends TestCase {
               return BlockType.DATA;
             }
 
-            @Override
-            public MemoryType getMemoryType() {
-              return MemoryType.EXCLUSIVE;
-            }
-
           }, accessTime, false);
     }
   }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java
index 538a5a6..de10ced 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java
@@ -61,7 +61,6 @@ import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.io.compress.Compression;
 import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
-import org.apache.hadoop.hbase.io.hfile.Cacheable.MemoryType;
 import org.apache.hadoop.hbase.nio.ByteBuff;
 import org.apache.hadoop.hbase.nio.MultiByteBuff;
 import org.apache.hadoop.hbase.nio.SingleByteBuff;
@@ -526,7 +525,7 @@ public class TestHFileBlock {
               ByteBuffer serialized = ByteBuffer.allocate(blockFromHFile.getSerializedLength());
               blockFromHFile.serialize(serialized, true);
               HFileBlock deserialized = (HFileBlock) blockFromHFile.getDeserializer()
-                  .deserialize(new SingleByteBuff(serialized), HEAP, MemoryType.EXCLUSIVE);
+                  .deserialize(new SingleByteBuff(serialized), HEAP);
               assertEquals("Serialization did not preserve block state. reuseBuffer=" + reuseBuffer,
                 blockFromHFile, deserialized);
               // intentional reference comparison
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileScannerImplReferenceCount.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileScannerImplReferenceCount.java
new file mode 100644
index 0000000..60ee958
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileScannerImplReferenceCount.java
@@ -0,0 +1,301 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io.hfile;
+
+import static org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.MAX_CHUNK_SIZE_KEY;
+import static org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.MIN_INDEX_NUM_ENTRIES_KEY;
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellComparatorImpl;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
+import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
+import org.apache.hadoop.hbase.io.hfile.HFileReaderImpl.HFileScannerImpl;
+import org.apache.hadoop.hbase.testclassification.IOTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@Category({ IOTests.class, SmallTests.class })
+public class TestHFileScannerImplReferenceCount {
+
+  @ClassRule
+  public static final HBaseClassTestRule CLASS_RULE =
+      HBaseClassTestRule.forClass(TestHFileScannerImplReferenceCount.class);
+
+  private static final Logger LOG =
+      LoggerFactory.getLogger(TestHFileScannerImplReferenceCount.class);
+  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
+  private static final byte[] FAMILY = Bytes.toBytes("f");
+  private static final byte[] QUALIFIER = Bytes.toBytes("q");
+  private static final byte[] SUFFIX = randLongBytes();
+
+  private static byte[] randLongBytes() {
+    Random rand = new Random();
+    byte[] keys = new byte[300];
+    rand.nextBytes(keys);
+    return keys;
+  }
+
+  private Cell firstCell = null;
+  private Cell secondCell = null;
+
+  @BeforeClass
+  public static void setUp() {
+    Configuration conf = UTIL.getConfiguration();
+    // Set the max chunk size and min entries key to be very small for index block, so that we can
+    // create an index block tree with level >= 2.
+    conf.setInt(MAX_CHUNK_SIZE_KEY, 10);
+    conf.setInt(MIN_INDEX_NUM_ENTRIES_KEY, 2);
+  }
+
+  private void writeHFile(Configuration conf, FileSystem fs, Path hfilePath, Algorithm compression,
+      DataBlockEncoding encoding, int cellCount) throws IOException {
+    HFileContext context =
+        new HFileContextBuilder().withBlockSize(1).withDataBlockEncoding(DataBlockEncoding.NONE)
+            .withCompression(compression).withDataBlockEncoding(encoding).build();
+    try (HFile.Writer writer =
+        new HFile.WriterFactory(conf, new CacheConfig(conf)).withPath(fs, hfilePath)
+            .withFileContext(context).withComparator(CellComparatorImpl.COMPARATOR).create()) {
+      Random rand = new Random(9713312); // Just a fixed seed.
+      for (int i = 0; i < cellCount; ++i) {
+        byte[] keyBytes = Bytes.add(Bytes.toBytes(i), SUFFIX);
+
+        // A random-length random value.
+        byte[] valueBytes = RandomKeyValueUtil.randomValue(rand);
+        KeyValue keyValue =
+            new KeyValue(keyBytes, FAMILY, QUALIFIER, HConstants.LATEST_TIMESTAMP, valueBytes);
+        if (firstCell == null) {
+          firstCell = keyValue;
+        } else if (secondCell == null) {
+          secondCell = keyValue;
+        }
+        writer.append(keyValue);
+      }
+    }
+  }
+
+  private void testReleaseBlock(Algorithm compression, DataBlockEncoding encoding)
+      throws Exception {
+    Configuration conf = new Configuration(UTIL.getConfiguration());
+    Path dir = UTIL.getDataTestDir("testReleasingBlock");
+    FileSystem fs = dir.getFileSystem(conf);
+    try {
+      String hfileName = "testReleaseBlock_hfile_0_" + System.currentTimeMillis();
+      Path hfilePath = new Path(dir, hfileName);
+      int cellCount = 1000;
+      LOG.info("Start to write {} cells into hfile: {}", cellCount, hfilePath);
+      writeHFile(conf, fs, hfilePath, compression, encoding, cellCount);
+
+      BlockCache defaultBC = BlockCacheFactory.createBlockCache(conf);
+      Assert.assertNotNull(defaultBC);
+      HFile.Reader reader =
+          HFile.createReader(fs, hfilePath, new CacheConfig(conf, defaultBC), true, conf);
+      Assert.assertTrue(reader instanceof HFileReaderImpl);
+      // We've build a HFile tree with index = 16.
+      Assert.assertEquals(16, reader.getTrailer().getNumDataIndexLevels());
+
+      HFileScanner scanner = reader.getScanner(true, true, false);
+      BlockWithScanInfo scanInfo = reader.getDataBlockIndexReader()
+          .loadDataBlockWithScanInfo(firstCell, null, true, true, false, DataBlockEncoding.NONE);
+      BlockWithScanInfo scanInfo2 = reader.getDataBlockIndexReader()
+          .loadDataBlockWithScanInfo(secondCell, null, true, true, false, DataBlockEncoding.NONE);
+      HFileBlock block = scanInfo.getHFileBlock();
+      HFileBlock block2 = scanInfo2.getHFileBlock();
+      // One refCnt for blockCache and the other refCnt for RPC path.
+      Assert.assertEquals(block.refCnt(), 2);
+      Assert.assertEquals(block2.refCnt(), 2);
+      Assert.assertFalse(block == block2);
+
+      scanner.seekTo(firstCell);
+      Assert.assertEquals(block.refCnt(), 3);
+
+      // Seek to the block again, the curBlock won't change and won't read from BlockCache. so
+      // refCnt should be unchanged.
+      scanner.seekTo(firstCell);
+      Assert.assertEquals(block.refCnt(), 3);
+
+      scanner.seekTo(secondCell);
+      Assert.assertEquals(block.refCnt(), 3);
+      Assert.assertEquals(block2.refCnt(), 3);
+
+      // After shipped, the block will be release, but block2 is still referenced by the curBlock.
+      scanner.shipped();
+      Assert.assertEquals(block.refCnt(), 2);
+      Assert.assertEquals(block2.refCnt(), 3);
+
+      // Try to ship again, though with nothing to client.
+      scanner.shipped();
+      Assert.assertEquals(block.refCnt(), 2);
+      Assert.assertEquals(block2.refCnt(), 3);
+
+      // The curBlock(block2) will also be released.
+      scanner.close();
+      Assert.assertEquals(block2.refCnt(), 2);
+
+      // Finish the block & block2 RPC path
+      block.release();
+      block2.release();
+      Assert.assertEquals(block.refCnt(), 1);
+      Assert.assertEquals(block2.refCnt(), 1);
+
+      // Evict the LRUBlockCache
+      Assert.assertTrue(defaultBC.evictBlocksByHfileName(hfileName) >= 2);
+      Assert.assertEquals(block.refCnt(), 0);
+      Assert.assertEquals(block2.refCnt(), 0);
+
+      int count = 0;
+      Assert.assertTrue(scanner.seekTo());
+      ++count;
+      while (scanner.next()) {
+        count++;
+      }
+      assertEquals(cellCount, count);
+    } finally {
+      fs.delete(dir, true);
+    }
+  }
+
+  /**
+   * See HBASE-22480
+   */
+  @Test
+  public void testSeekBefore() throws IOException {
+    Configuration conf = new Configuration(UTIL.getConfiguration());
+    Path dir = UTIL.getDataTestDir("testSeekBefore");
+    FileSystem fs = dir.getFileSystem(conf);
+    try {
+      String hfileName = "testSeekBefore_hfile_0_" + System.currentTimeMillis();
+      Path hfilePath = new Path(dir, hfileName);
+      int cellCount = 1000;
+      LOG.info("Start to write {} cells into hfile: {}", cellCount, hfilePath);
+      writeHFile(conf, fs, hfilePath, Algorithm.NONE, DataBlockEncoding.NONE, cellCount);
+
+      BlockCache defaultBC = BlockCacheFactory.createBlockCache(conf);
+      Assert.assertNotNull(defaultBC);
+      HFile.Reader reader =
+          HFile.createReader(fs, hfilePath, new CacheConfig(conf, defaultBC), true, conf);
+      Assert.assertTrue(reader instanceof HFileReaderImpl);
+      // We've build a HFile tree with index = 16.
+      Assert.assertEquals(16, reader.getTrailer().getNumDataIndexLevels());
+
+      HFileScanner scanner = reader.getScanner(true, true, false);
+      HFileBlock block1 = reader.getDataBlockIndexReader()
+          .loadDataBlockWithScanInfo(firstCell, null, true, true, false, DataBlockEncoding.NONE)
+          .getHFileBlock();
+      HFileBlock block2 = reader.getDataBlockIndexReader()
+          .loadDataBlockWithScanInfo(secondCell, null, true, true, false, DataBlockEncoding.NONE)
+          .getHFileBlock();
+      Assert.assertEquals(block1.refCnt(), 2);
+      Assert.assertEquals(block2.refCnt(), 2);
+
+      // Let the curBlock refer to block2.
+      scanner.seekTo(secondCell);
+      Assert.assertTrue(((HFileScannerImpl) scanner).curBlock == block2);
+      Assert.assertEquals(3, block2.refCnt());
+
+      // Release the block1, only one reference: blockCache.
+      Assert.assertFalse(block1.release());
+      Assert.assertEquals(1, block1.refCnt());
+      // Release the block2, so the remain references are: 1. scanner; 2. blockCache.
+      Assert.assertFalse(block2.release());
+      Assert.assertEquals(2, block2.refCnt());
+
+      // Do the seekBefore: the newBlock will be the previous block of curBlock.
+      Assert.assertTrue(scanner.seekBefore(secondCell));
+      Assert.assertTrue(((HFileScannerImpl) scanner).curBlock == block1);
+      // Two reference for block1: 1. scanner; 2. blockCache.
+      Assert.assertEquals(2, block1.refCnt());
+      // Reference count of block2 must be unchanged because we haven't shipped.
+      Assert.assertEquals(2, block2.refCnt());
+
+      // Do the shipped
+      scanner.shipped();
+      Assert.assertEquals(2, block1.refCnt());
+      Assert.assertEquals(1, block2.refCnt());
+
+      // Do the close
+      scanner.close();
+      Assert.assertEquals(1, block1.refCnt());
+      Assert.assertEquals(1, block2.refCnt());
+
+      Assert.assertTrue(defaultBC.evictBlocksByHfileName(hfileName) >= 2);
+      Assert.assertEquals(0, block1.refCnt());
+      Assert.assertEquals(0, block2.refCnt());
+
+      // Reload the block1 again.
+      block1 = reader.getDataBlockIndexReader()
+          .loadDataBlockWithScanInfo(firstCell, null, true, true, false, DataBlockEncoding.NONE)
+          .getHFileBlock();
+      Assert.assertFalse(block1.release());
+      Assert.assertEquals(1, block1.refCnt());
+      // Re-seek to the begin.
+      Assert.assertTrue(scanner.seekTo());
+      Assert.assertTrue(((HFileScannerImpl) scanner).curBlock == block1);
+      Assert.assertEquals(2, block1.refCnt());
+      // Return false because firstCell <= c[0]
+      Assert.assertFalse(scanner.seekBefore(firstCell));
+      // The block1 shouldn't be released because we still don't do the shipped or close.
+      Assert.assertEquals(2, block1.refCnt());
+
+      scanner.close();
+      Assert.assertEquals(1, block1.refCnt());
+      Assert.assertTrue(defaultBC.evictBlocksByHfileName(hfileName) >= 1);
+      Assert.assertEquals(0, block1.refCnt());
+    } finally {
+      fs.delete(dir, true);
+    }
+  }
+
+  @Test
+  public void testDefault() throws Exception {
+    testReleaseBlock(Algorithm.NONE, DataBlockEncoding.NONE);
+  }
+
+  @Test
+  public void testCompression() throws Exception {
+    testReleaseBlock(Algorithm.GZ, DataBlockEncoding.NONE);
+  }
+
+  @Test
+  public void testDataBlockEncoding() throws Exception {
+    testReleaseBlock(Algorithm.NONE, DataBlockEncoding.ROW_INDEX_V1);
+  }
+
+  @Test
+  public void testDataBlockEncodingAndCompression() throws Exception {
+    testReleaseBlock(Algorithm.GZ, DataBlockEncoding.ROW_INDEX_V1);
+  }
+}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestLruBlockCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestLruBlockCache.java
index a355ab0..9b4d768 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestLruBlockCache.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestLruBlockCache.java
@@ -955,12 +955,6 @@ public class TestLruBlockCache {
     public BlockType getBlockType() {
       return BlockType.DATA;
     }
-
-    @Override
-    public MemoryType getMemoryType() {
-      return MemoryType.EXCLUSIVE;
-    }
-
   }
 
   static void testMultiThreadGetAndEvictBlockInternal(BlockCache cache) throws Exception {
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestTinyLfuBlockCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestTinyLfuBlockCache.java
index 9a333f8..bbe3182 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestTinyLfuBlockCache.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestTinyLfuBlockCache.java
@@ -296,11 +296,6 @@ public class TestTinyLfuBlockCache {
     }
 
     @Override
-    public MemoryType getMemoryType() {
-      return MemoryType.EXCLUSIVE;
-    }
-
-    @Override
     public void serialize(ByteBuffer destination, boolean includeNextBlockMetadata) {
     }
   }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCacheRefCnt.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCacheRefCnt.java
index 6015706..cf356f3 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCacheRefCnt.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCacheRefCnt.java
@@ -32,7 +32,6 @@ import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.io.hfile.BlockCacheKey;
 import org.apache.hadoop.hbase.io.hfile.BlockType;
 import org.apache.hadoop.hbase.io.hfile.Cacheable;
-import org.apache.hadoop.hbase.io.hfile.Cacheable.MemoryType;
 import org.apache.hadoop.hbase.io.hfile.HFileBlock;
 import org.apache.hadoop.hbase.io.hfile.HFileContext;
 import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
@@ -148,7 +147,6 @@ public class TestBucketCacheRefCnt {
       assertEquals(1, blk.refCnt());
 
       Cacheable block = cache.getBlock(key, false, false, false);
-      assertTrue(block.getMemoryType() == MemoryType.SHARED);
       assertTrue(block instanceof HFileBlock);
       assertTrue(((HFileBlock) block).getByteBuffAllocator() == alloc);
       assertEquals(2, block.refCnt());
@@ -157,7 +155,6 @@ public class TestBucketCacheRefCnt {
       assertEquals(3, block.refCnt());
 
       Cacheable newBlock = cache.getBlock(key, false, false, false);
-      assertTrue(newBlock.getMemoryType() == MemoryType.SHARED);
       assertTrue(newBlock instanceof HFileBlock);
       assertTrue(((HFileBlock) newBlock).getByteBuffAllocator() == alloc);
       assertEquals(4, newBlock.refCnt());
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestByteBufferIOEngine.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestByteBufferIOEngine.java
index 1a8964f..2184fa5 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestByteBufferIOEngine.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestByteBufferIOEngine.java
@@ -22,7 +22,6 @@ import java.nio.ByteBuffer;
 import org.apache.hadoop.hbase.HBaseClassTestRule;
 import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.io.hfile.Cacheable;
-import org.apache.hadoop.hbase.io.hfile.Cacheable.MemoryType;
 import org.apache.hadoop.hbase.io.hfile.CacheableDeserializer;
 import org.apache.hadoop.hbase.io.hfile.CacheableDeserializerIdManager;
 import org.apache.hadoop.hbase.nio.ByteBuff;
@@ -127,7 +126,7 @@ public class TestByteBufferIOEngine {
     private int identifier;
 
     @Override
-    public Cacheable deserialize(final ByteBuff b, ByteBuffAllocator alloc, MemoryType memType)
+    public Cacheable deserialize(final ByteBuff b, ByteBuffAllocator alloc)
         throws IOException {
       this.buf = b;
       return null;

[hbase] 05/22: HBASE-22005 Use ByteBuff's refcnt to track the life cycle of data block

Posted by op...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

openinx pushed a commit to branch HBASE-21879
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 57df2d79c0fa9b69d73a96d5ec5811aee5c83709
Author: huzheng <op...@gmail.com>
AuthorDate: Sat Feb 16 21:37:18 2019 +0800

    HBASE-22005 Use ByteBuff's refcnt to track the life cycle of data block
---
 .../apache/hadoop/hbase/io/ByteBuffAllocator.java  |  13 ++-
 .../hadoop/hbase/io/TestByteBuffAllocator.java     |   2 +-
 .../apache/hadoop/hbase/io/hfile/BlockCache.java   |  25 +++--
 .../hadoop/hbase/io/hfile/BlockCacheUtil.java      |   5 +-
 .../apache/hadoop/hbase/io/hfile/CacheConfig.java  |  17 ++-
 .../apache/hadoop/hbase/io/hfile/Cacheable.java    |  45 +++++++-
 .../apache/hadoop/hbase/io/hfile/HFileBlock.java   | 115 ++++++++++++++++-----
 .../hadoop/hbase/io/hfile/HFileReaderImpl.java     |  80 ++++++++------
 .../hadoop/hbase/io/hfile/LruBlockCache.java       |  42 ++++----
 .../hadoop/hbase/io/hfile/bucket/BucketCache.java  |  60 ++++++++++-
 .../org/apache/hadoop/hbase/ipc/RpcServer.java     |   5 +
 .../hadoop/hbase/ipc/RpcServerInterface.java       |   7 ++
 .../hadoop/hbase/regionserver/HMobStore.java       |   9 +-
 .../apache/hadoop/hbase/regionserver/HStore.java   |   3 +-
 .../regionserver/RegionServicesForStores.java      |  18 ++++
 .../io/encoding/TestLoadAndSwitchEncodeOnDisk.java |   7 +-
 .../hadoop/hbase/io/hfile/CacheTestUtils.java      |  13 ++-
 .../hadoop/hbase/io/hfile/TestCacheConfig.java     |   3 +-
 .../hadoop/hbase/io/hfile/TestCacheOnWrite.java    |  34 +++---
 .../apache/hadoop/hbase/io/hfile/TestChecksum.java |  16 +--
 .../apache/hadoop/hbase/io/hfile/TestHFile.java    |  48 +++++++--
 .../hadoop/hbase/io/hfile/TestHFileBlock.java      |  23 +++--
 .../hadoop/hbase/io/hfile/TestHFileBlockIndex.java |   5 +-
 .../hbase/io/hfile/TestHFileDataBlockEncoder.java  |  10 +-
 .../hadoop/hbase/io/hfile/TestHFileEncryption.java |   4 +-
 .../hadoop/hbase/io/hfile/TestHFileWriterV3.java   |   3 +-
 .../hadoop/hbase/io/hfile/TestLruBlockCache.java   |   6 +-
 .../apache/hadoop/hbase/io/hfile/TestPrefetch.java |  14 +--
 .../hbase/io/hfile/bucket/TestBucketCache.java     |  75 ++++++++++++--
 .../regionserver/TestSecureBulkLoadManager.java    |   3 +-
 30 files changed, 524 insertions(+), 186 deletions(-)

diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java
index 1833462..0020e23 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java
@@ -62,6 +62,11 @@ public class ByteBuffAllocator {
 
   private static final Logger LOG = LoggerFactory.getLogger(ByteBuffAllocator.class);
 
+  // The on-heap allocator is mostly used for testing, but also some non-test usage, such as
+  // scanning snapshot, we won't have an RpcServer to initialize the allocator, so just use the
+  // default heap allocator, it will just allocate ByteBuffers from heap but wrapped by an ByteBuff.
+  public static final ByteBuffAllocator HEAP = ByteBuffAllocator.createOnHeap();
+
   public static final String MAX_BUFFER_COUNT_KEY = "hbase.ipc.server.allocator.max.buffer.count";
 
   public static final String BUFFER_SIZE_KEY = "hbase.ipc.server.allocator.buffer.size";
@@ -131,7 +136,7 @@ public class ByteBuffAllocator {
    * designed for testing purpose or disabled reservoir case.
    * @return allocator to allocate on-heap ByteBuffer.
    */
-  public static ByteBuffAllocator createOnHeap() {
+  private static ByteBuffAllocator createOnHeap() {
     return new ByteBuffAllocator(false, 0, DEFAULT_BUFFER_SIZE, Integer.MAX_VALUE);
   }
 
@@ -167,7 +172,11 @@ public class ByteBuffAllocator {
       }
     }
     // Allocated from heap, let the JVM free its memory.
-    return new SingleByteBuff(NONE, ByteBuffer.allocate(this.bufSize));
+    return allocateOnHeap(this.bufSize);
+  }
+
+  private SingleByteBuff allocateOnHeap(int size) {
+    return new SingleByteBuff(NONE, ByteBuffer.allocate(size));
   }
 
   /**
diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/io/TestByteBuffAllocator.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/io/TestByteBuffAllocator.java
index 0976c11..4375032 100644
--- a/hbase-common/src/test/java/org/apache/hadoop/hbase/io/TestByteBuffAllocator.java
+++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/io/TestByteBuffAllocator.java
@@ -128,7 +128,7 @@ public class TestByteBuffAllocator {
   @Test
   public void testAllocateOneBuffer() {
     // Allocate from on-heap
-    ByteBuffAllocator allocator = ByteBuffAllocator.createOnHeap();
+    ByteBuffAllocator allocator = ByteBuffAllocator.HEAP;
     ByteBuff buf = allocator.allocateOneBuffer();
     assertTrue(buf.hasArray());
     assertEquals(ByteBuffAllocator.DEFAULT_BUFFER_SIZE, buf.remaining());
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java
index 9756aa3..570519c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java
@@ -135,14 +135,25 @@ public interface BlockCache extends Iterable<CachedBlock> {
   BlockCache [] getBlockCaches();
 
   /**
-   * Called when the scanner using the block decides to return the block once its usage
-   * is over.
-   * This API should be called after the block is used, failing to do so may have adverse effects
-   * by preventing the blocks from being evicted because of which it will prevent new hot blocks
-   * from getting added to the block cache.  The implementation of the BlockCache will decide
-   * on what to be done with the block based on the memory type of the block's {@link MemoryType}.
+   * Called when the scanner using the block decides to decrease refCnt of block and return the
+   * block once its usage is over. This API should be called after the block is used, failing to do
+   * so may have adverse effects by preventing the blocks from being evicted because of which it
+   * will prevent new hot blocks from getting added to the block cache. The implementation of the
+   * BlockCache will decide on what to be done with the block based on the memory type of the
+   * block's {@link MemoryType}. <br>
+   * <br>
+   * Note that if two handlers read from backingMap in off-heap BucketCache at the same time, BC
+   * will return two ByteBuff, which reference to the same memory area in buckets, but wrapped by
+   * two different ByteBuff, and each of them has its own independent refCnt(=1). so here, if
+   * returnBlock with different blocks in two handlers, it has no problem. but if both the two
+   * handlers returnBlock with the same block, then the refCnt exception will happen here. <br>
+   * TODO let's unify the ByteBuff's refCnt and BucketEntry's refCnt in HBASE-21957, after that
+   * we'll just call the Cacheable#release instead of calling release in some path and calling
+   * returnBlock in other paths in current version.
    * @param cacheKey the cache key of the block
    * @param block the hfileblock to be returned
    */
-  default void returnBlock(BlockCacheKey cacheKey, Cacheable block){}
+  default void returnBlock(BlockCacheKey cacheKey, Cacheable block) {
+    block.release();
+  }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheUtil.java
index 0cb2bd1..02c7b17 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheUtil.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheUtil.java
@@ -229,10 +229,7 @@ public class BlockCacheUtil {
   public static boolean shouldReplaceExistingCacheBlock(BlockCache blockCache,
       BlockCacheKey cacheKey, Cacheable newBlock) {
     Cacheable existingBlock = blockCache.getBlock(cacheKey, false, false, false);
-    if (null == existingBlock) {
-      // Not exist now.
-      return true;
-    }
+    existingBlock.retain();
     try {
       int comparison = BlockCacheUtil.validateBlockAddition(existingBlock, newBlock, cacheKey);
       if (comparison < 0) {
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java
index cd9303d..53c216f 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java
@@ -21,6 +21,7 @@ import java.util.Optional;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.io.hfile.BlockType.BlockCategory;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
@@ -128,6 +129,8 @@ public class CacheConfig {
   // Local reference to the block cache
   private final BlockCache blockCache;
 
+  private final ByteBuffAllocator byteBuffAllocator;
+
   /**
    * Create a cache configuration using the specified configuration object and
    * defaults for family level settings. Only use if no column family context.
@@ -138,7 +141,7 @@ public class CacheConfig {
   }
 
   public CacheConfig(Configuration conf, BlockCache blockCache) {
-    this(conf, null, blockCache);
+    this(conf, null, blockCache, ByteBuffAllocator.HEAP);
   }
 
   /**
@@ -147,7 +150,8 @@ public class CacheConfig {
    * @param conf hbase configuration
    * @param family column family configuration
    */
-  public CacheConfig(Configuration conf, ColumnFamilyDescriptor family, BlockCache blockCache) {
+  public CacheConfig(Configuration conf, ColumnFamilyDescriptor family, BlockCache blockCache,
+      ByteBuffAllocator byteBuffAllocator) {
     this.cacheDataOnRead = conf.getBoolean(CACHE_DATA_ON_READ_KEY, DEFAULT_CACHE_DATA_ON_READ) &&
         (family == null ? true : family.isBlockCacheEnabled());
     this.inMemory = family == null ? DEFAULT_IN_MEMORY : family.isInMemory();
@@ -171,6 +175,7 @@ public class CacheConfig {
     this.prefetchOnOpen = conf.getBoolean(PREFETCH_BLOCKS_ON_OPEN_KEY, DEFAULT_PREFETCH_ON_OPEN) ||
         (family == null ? false : family.isPrefetchBlocksOnOpen());
     this.blockCache = blockCache;
+    this.byteBuffAllocator = byteBuffAllocator;
     LOG.info("Created cacheConfig: " + this + (family == null ? "" : " for family " + family) +
         " with blockCache=" + blockCache);
   }
@@ -190,6 +195,7 @@ public class CacheConfig {
     this.prefetchOnOpen = cacheConf.prefetchOnOpen;
     this.dropBehindCompaction = cacheConf.dropBehindCompaction;
     this.blockCache = cacheConf.blockCache;
+    this.byteBuffAllocator = cacheConf.byteBuffAllocator;
   }
 
   private CacheConfig() {
@@ -203,6 +209,7 @@ public class CacheConfig {
     this.prefetchOnOpen = false;
     this.dropBehindCompaction = false;
     this.blockCache = null;
+    this.byteBuffAllocator = ByteBuffAllocator.HEAP;
   }
 
   /**
@@ -360,6 +367,10 @@ public class CacheConfig {
     return Optional.ofNullable(this.blockCache);
   }
 
+  public ByteBuffAllocator getByteBuffAllocator() {
+    return this.byteBuffAllocator;
+  }
+
   @Override
   public String toString() {
     return "cacheDataOnRead=" + shouldCacheDataOnRead() + ", cacheDataOnWrite="
@@ -368,4 +379,4 @@ public class CacheConfig {
         + shouldEvictOnClose() + ", cacheDataCompressed=" + shouldCacheDataCompressed()
         + ", prefetchOnOpen=" + shouldPrefetchOnOpen();
   }
-}
\ No newline at end of file
+}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/Cacheable.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/Cacheable.java
index a842967..93b520e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/Cacheable.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/Cacheable.java
@@ -24,6 +24,8 @@ import java.nio.ByteBuffer;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.apache.hadoop.hbase.io.HeapSize;
 
+import org.apache.hbase.thirdparty.io.netty.util.ReferenceCounted;
+
 /**
  * Cacheable is an interface that allows for an object to be cached. If using an
  * on heap cache, just use heapsize. If using an off heap cache, Cacheable
@@ -34,7 +36,7 @@ import org.apache.hadoop.hbase.io.HeapSize;
  *
  */
 @InterfaceAudience.Private
-public interface Cacheable extends HeapSize {
+public interface Cacheable extends HeapSize, ReferenceCounted {
   /**
    * Returns the length of the ByteBuffer required to serialized the object. If the
    * object cannot be serialized, it should return 0.
@@ -75,4 +77,45 @@ public interface Cacheable extends HeapSize {
   enum MemoryType {
     SHARED, EXCLUSIVE
   }
+
+  /******************************* ReferenceCounted Interfaces ***********************************/
+
+  /**
+   * Increase its reference count, and only when no reference we can free the object's memory.
+   */
+  default Cacheable retain() {
+    return this;
+  }
+
+  default Cacheable retain(int increment) {
+    throw new UnsupportedOperationException();
+  }
+
+  /**
+   * Reference count of this Cacheable.
+   */
+  default int refCnt() {
+    return 0;
+  }
+
+  /**
+   * Decrease its reference count, and if no reference then free the memory of this object, its
+   * backend is usually a {@link org.apache.hadoop.hbase.nio.ByteBuff}, and we will put its NIO
+   * ByteBuffers back to {@link org.apache.hadoop.hbase.io.ByteBuffAllocator}
+   */
+  default boolean release() {
+    return false;
+  }
+
+  default boolean release(int increment) {
+    throw new UnsupportedOperationException();
+  }
+
+  default ReferenceCounted touch() {
+    throw new UnsupportedOperationException();
+  }
+
+  default ReferenceCounted touch(Object hint) {
+    throw new UnsupportedOperationException();
+  }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
index 4773678..22a8295 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
@@ -22,6 +22,8 @@ import java.io.DataOutput;
 import java.io.DataOutputStream;
 import java.io.IOException;
 import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
 import java.util.concurrent.atomic.AtomicReference;
 import java.util.concurrent.locks.Lock;
 import java.util.concurrent.locks.ReentrantLock;
@@ -31,6 +33,7 @@ import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -201,6 +204,8 @@ public class HFileBlock implements Cacheable {
    */
   private int nextBlockOnDiskSize = UNSET;
 
+  private ByteBuffAllocator allocator;
+
   /**
    * On a checksum failure, do these many succeeding read requests using hdfs checksums before
    * auto-reenabling hbase checksum verification.
@@ -278,7 +283,10 @@ public class HFileBlock implements Cacheable {
       boolean usesChecksum = buf.get() == (byte) 1;
       long offset = buf.getLong();
       int nextBlockOnDiskSize = buf.getInt();
-      return new HFileBlock(newByteBuff, usesChecksum, memType, offset, nextBlockOnDiskSize, null);
+      // TODO make the newly created HFileBlock use the off-heap allocator, Need change the
+      // deserializer or change the deserialize interface.
+      return new HFileBlock(newByteBuff, usesChecksum, memType, offset, nextBlockOnDiskSize, null,
+          ByteBuffAllocator.HEAP);
     }
 
     @Override
@@ -313,7 +321,7 @@ public class HFileBlock implements Cacheable {
   private HFileBlock(HFileBlock that, boolean bufCopy) {
     init(that.blockType, that.onDiskSizeWithoutHeader, that.uncompressedSizeWithoutHeader,
       that.prevBlockOffset, that.offset, that.onDiskDataSizeWithHeader, that.nextBlockOnDiskSize,
-      that.fileContext);
+      that.fileContext, that.allocator);
     if (bufCopy) {
       this.buf = new SingleByteBuff(ByteBuffer.wrap(that.buf.toBytes(0, that.buf.limit())));
     } else {
@@ -345,9 +353,9 @@ public class HFileBlock implements Cacheable {
   public HFileBlock(BlockType blockType, int onDiskSizeWithoutHeader,
       int uncompressedSizeWithoutHeader, long prevBlockOffset, ByteBuffer b, boolean fillHeader,
       long offset, final int nextBlockOnDiskSize, int onDiskDataSizeWithHeader,
-      HFileContext fileContext) {
+      HFileContext fileContext, ByteBuffAllocator allocator) {
     init(blockType, onDiskSizeWithoutHeader, uncompressedSizeWithoutHeader, prevBlockOffset, offset,
-      onDiskDataSizeWithHeader, nextBlockOnDiskSize, fileContext);
+      onDiskDataSizeWithHeader, nextBlockOnDiskSize, fileContext, allocator);
     this.buf = new SingleByteBuff(b);
     if (fillHeader) {
       overwriteHeader();
@@ -363,7 +371,7 @@ public class HFileBlock implements Cacheable {
    * @param buf Has header, content, and trailing checksums if present.
    */
   HFileBlock(ByteBuff buf, boolean usesHBaseChecksum, MemoryType memType, final long offset,
-      final int nextBlockOnDiskSize, HFileContext fileContext)
+      final int nextBlockOnDiskSize, HFileContext fileContext, ByteBuffAllocator allocator)
       throws IOException {
     buf.rewind();
     final BlockType blockType = BlockType.read(buf);
@@ -393,7 +401,7 @@ public class HFileBlock implements Cacheable {
     fileContext = fileContextBuilder.build();
     assert usesHBaseChecksum == fileContext.isUseHBaseChecksum();
     init(blockType, onDiskSizeWithoutHeader, uncompressedSizeWithoutHeader, prevBlockOffset, offset,
-      onDiskDataSizeWithHeader, nextBlockOnDiskSize, fileContext);
+      onDiskDataSizeWithHeader, nextBlockOnDiskSize, fileContext, allocator);
     this.memType = memType;
     this.offset = offset;
     this.buf = buf;
@@ -405,7 +413,8 @@ public class HFileBlock implements Cacheable {
    */
   private void init(BlockType blockType, int onDiskSizeWithoutHeader,
       int uncompressedSizeWithoutHeader, long prevBlockOffset, long offset,
-      int onDiskDataSizeWithHeader, final int nextBlockOnDiskSize, HFileContext fileContext) {
+      int onDiskDataSizeWithHeader, final int nextBlockOnDiskSize, HFileContext fileContext,
+      ByteBuffAllocator allocator) {
     this.blockType = blockType;
     this.onDiskSizeWithoutHeader = onDiskSizeWithoutHeader;
     this.uncompressedSizeWithoutHeader = uncompressedSizeWithoutHeader;
@@ -414,6 +423,7 @@ public class HFileBlock implements Cacheable {
     this.onDiskDataSizeWithHeader = onDiskDataSizeWithHeader;
     this.nextBlockOnDiskSize = nextBlockOnDiskSize;
     this.fileContext = fileContext;
+    this.allocator = allocator;
   }
 
   /**
@@ -441,6 +451,26 @@ public class HFileBlock implements Cacheable {
     return blockType;
   }
 
+  @Override
+  public int refCnt() {
+    return buf.refCnt();
+  }
+
+  @Override
+  public HFileBlock retain() {
+    buf.retain();
+    return this;
+  }
+
+  /**
+   * Call {@link ByteBuff#release()} to decrease the reference count, if no other reference, it will
+   * return back the {@link ByteBuffer} to {@link org.apache.hadoop.hbase.io.ByteBuffAllocator}
+   */
+  @Override
+  public boolean release() {
+    return buf.release();
+  }
+
   /** @return get data block encoding id that was used to encode this block */
   short getDataBlockEncodingId() {
     if (blockType != BlockType.ENCODED_DATA) {
@@ -664,7 +694,7 @@ public class HFileBlock implements Cacheable {
     int headerSize = headerSize();
     int capacityNeeded = headerSize + uncompressedSizeWithoutHeader + cksumBytes;
 
-    ByteBuff newBuf = new SingleByteBuff(ByteBuffer.allocate(capacityNeeded));
+    ByteBuff newBuf = allocator.allocate(capacityNeeded);
 
     // Copy header bytes into newBuf.
     // newBuf is HBB so no issue in calling array()
@@ -684,7 +714,7 @@ public class HFileBlock implements Cacheable {
     final int cksumBytes = totalChecksumBytes();
     final int headerSize = headerSize();
     final int expectedCapacity = headerSize + uncompressedSizeWithoutHeader + cksumBytes;
-    final int bufCapacity = buf.capacity();
+    final int bufCapacity = buf.remaining();
     return bufCapacity == expectedCapacity || bufCapacity == expectedCapacity + headerSize;
   }
 
@@ -1221,7 +1251,8 @@ public class HFileBlock implements Cacheable {
           cacheConf.shouldCacheCompressed(blockType.getCategory()) ? cloneOnDiskBufferWithHeader()
               : cloneUncompressedBufferWithHeader(),
           FILL_HEADER, startOffset, UNSET,
-          onDiskBlockBytesWithHeader.size() + onDiskChecksum.length, newContext);
+          onDiskBlockBytesWithHeader.size() + onDiskChecksum.length, newContext,
+          cacheConf.getByteBuffAllocator());
     }
   }
 
@@ -1239,7 +1270,10 @@ public class HFileBlock implements Cacheable {
     void writeToBlock(DataOutput out) throws IOException;
   }
 
-  /** Iterator for {@link HFileBlock}s. */
+  /**
+   * Iterator for reading {@link HFileBlock}s in load-on-open-section, such as root data index
+   * block, meta index block, file info block etc.
+   */
   interface BlockIterator {
     /**
      * Get the next block, or null if there are no more blocks to iterate.
@@ -1247,10 +1281,20 @@ public class HFileBlock implements Cacheable {
     HFileBlock nextBlock() throws IOException;
 
     /**
-     * Similar to {@link #nextBlock()} but checks block type, throws an
-     * exception if incorrect, and returns the HFile block
+     * Similar to {@link #nextBlock()} but checks block type, throws an exception if incorrect, and
+     * returns the HFile block
      */
     HFileBlock nextBlockWithBlockType(BlockType blockType) throws IOException;
+
+    /**
+     * Now we use the {@link ByteBuffAllocator} to manage the nio ByteBuffers for HFileBlocks, so we
+     * must deallocate all of the ByteBuffers in the end life. the BlockIterator's life cycle is
+     * starting from opening an HFileReader and stopped when the HFileReader#close, so we will keep
+     * track all the read blocks until we call {@link BlockIterator#freeBlocks()} when closing the
+     * HFileReader. Sum bytes of those blocks in load-on-open section should be quite small, so
+     * tracking them should be OK.
+     */
+    void freeBlocks();
   }
 
   /** An HFile block reader with iteration ability. */
@@ -1353,10 +1397,12 @@ public class HFileBlock implements Cacheable {
     // Cache the fileName
     private String pathName;
 
+    private final ByteBuffAllocator allocator;
+
     private final Lock streamLock = new ReentrantLock();
 
     FSReaderImpl(FSDataInputStreamWrapper stream, long fileSize, HFileSystem hfs, Path path,
-        HFileContext fileContext) throws IOException {
+        HFileContext fileContext, ByteBuffAllocator allocator) throws IOException {
       this.fileSize = fileSize;
       this.hfs = hfs;
       if (path != null) {
@@ -1364,6 +1410,7 @@ public class HFileBlock implements Cacheable {
       }
       this.fileContext = fileContext;
       this.hdrSize = headerSize(fileContext.isUseHBaseChecksum());
+      this.allocator = allocator;
 
       this.streamWrapper = stream;
       // Older versions of HBase didn't support checksum.
@@ -1376,15 +1423,18 @@ public class HFileBlock implements Cacheable {
      * A constructor that reads files with the latest minor version. This is used by unit tests
      * only.
      */
-    FSReaderImpl(FSDataInputStream istream, long fileSize, HFileContext fileContext)
-        throws IOException {
-      this(new FSDataInputStreamWrapper(istream), fileSize, null, null, fileContext);
+    FSReaderImpl(FSDataInputStream istream, long fileSize, HFileContext fileContext,
+        ByteBuffAllocator allocator) throws IOException {
+      this(new FSDataInputStreamWrapper(istream), fileSize, null, null, fileContext, allocator);
     }
 
     @Override
     public BlockIterator blockRange(final long startOffset, final long endOffset) {
       final FSReader owner = this; // handle for inner class
       return new BlockIterator() {
+        private volatile boolean freed = false;
+        // Tracking all read blocks until we call freeBlocks.
+        private List<HFileBlock> blockTracker = new ArrayList<>();
         private long offset = startOffset;
         // Cache length of next block. Current block has the length of next block in it.
         private long length = -1;
@@ -1397,19 +1447,33 @@ public class HFileBlock implements Cacheable {
           HFileBlock b = readBlockData(offset, length, false, false);
           offset += b.getOnDiskSizeWithHeader();
           length = b.getNextBlockOnDiskSize();
-          return b.unpack(fileContext, owner);
+          HFileBlock uncompressed = b.unpack(fileContext, owner);
+          if (uncompressed != b) {
+            b.release(); // Need to release the compressed Block now.
+          }
+          blockTracker.add(uncompressed);
+          return uncompressed;
         }
 
         @Override
-        public HFileBlock nextBlockWithBlockType(BlockType blockType)
-            throws IOException {
+        public HFileBlock nextBlockWithBlockType(BlockType blockType) throws IOException {
           HFileBlock blk = nextBlock();
           if (blk.getBlockType() != blockType) {
-            throw new IOException("Expected block of type " + blockType
-                + " but found " + blk.getBlockType());
+            throw new IOException(
+                "Expected block of type " + blockType + " but found " + blk.getBlockType());
           }
           return blk;
         }
+
+        @Override
+        public void freeBlocks() {
+          if (freed) {
+            return;
+          }
+          blockTracker.forEach(HFileBlock::release);
+          blockTracker = null;
+          freed = true;
+        }
       };
     }
 
@@ -1664,8 +1728,7 @@ public class HFileBlock implements Cacheable {
       // says where to start reading. If we have the header cached, then we don't need to read
       // it again and we can likely read from last place we left off w/o need to backup and reread
       // the header we read last time through here.
-      ByteBuff onDiskBlock =
-          new SingleByteBuff(ByteBuffer.allocate(onDiskSizeWithHeader + hdrSize));
+      ByteBuff onDiskBlock = allocator.allocate(onDiskSizeWithHeader + hdrSize);
       boolean initHFileBlockSuccess = false;
       try {
         if (headerBuf != null) {
@@ -1682,7 +1745,7 @@ public class HFileBlock implements Cacheable {
         // Do a few checks before we go instantiate HFileBlock.
         assert onDiskSizeWithHeader > this.hdrSize;
         verifyOnDiskSizeMatchesHeader(onDiskSizeWithHeader, headerBuf, offset, checksumSupport);
-        ByteBuff curBlock = onDiskBlock.duplicate().limit(onDiskSizeWithHeader);
+        ByteBuff curBlock = onDiskBlock.duplicate().position(0).limit(onDiskSizeWithHeader);
         // Verify checksum of the data before using it for building HFileBlock.
         if (verifyChecksum && !validateChecksum(offset, curBlock, hdrSize)) {
           return null;
@@ -1695,7 +1758,7 @@ public class HFileBlock implements Cacheable {
         // If nextBlockOnDiskSizeWithHeader is not zero, the onDiskBlock already
         // contains the header of next block, so no need to set next block's header in it.
         HFileBlock hFileBlock = new HFileBlock(curBlock, checksumSupport, MemoryType.EXCLUSIVE,
-            offset, nextBlockOnDiskSize, fileContext);
+            offset, nextBlockOnDiskSize, fileContext, allocator);
         // Run check on uncompressed sizings.
         if (!fileContext.isCompressedOrEncrypted()) {
           hFileBlock.sanityCheckUncompressed();
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
index 69f45be..5fdb66f 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
@@ -24,6 +24,7 @@ import java.security.Key;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Optional;
+import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.hadoop.conf.Configurable;
@@ -138,6 +139,13 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
   private IdLock offsetLock = new IdLock();
 
   /**
+   * The iterator will track all blocks in load-on-open section, since we use the
+   * {@link org.apache.hadoop.hbase.io.ByteBuffAllocator} to manage the ByteBuffers in block now, so
+   * we must ensure that deallocate all ByteBuffers in the end.
+   */
+  private final HFileBlock.BlockIterator blockIter;
+
+  /**
    * Blocks read from the load-on-open section, excluding data root index, meta
    * index, and file info.
    */
@@ -199,7 +207,8 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
     this.primaryReplicaReader = primaryReplicaReader;
     checkFileVersion();
     this.hfileContext = createHFileContext(fsdis, fileSize, hfs, path, trailer);
-    this.fsBlockReader = new HFileBlock.FSReaderImpl(fsdis, fileSize, hfs, path, hfileContext);
+    this.fsBlockReader = new HFileBlock.FSReaderImpl(fsdis, fileSize, hfs, path, hfileContext,
+        cacheConf.getByteBuffAllocator());
 
     // Comparator class name is stored in the trailer in version 2.
     comparator = trailer.createComparator();
@@ -207,11 +216,9 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
         trailer.getNumDataIndexLevels(), this);
     metaBlockIndexReader = new HFileBlockIndex.ByteArrayKeyBlockIndexReader(1);
 
-    // Parse load-on-open data.
-
-    HFileBlock.BlockIterator blockIter = fsBlockReader.blockRange(
-        trailer.getLoadOnOpenDataOffset(),
-        fileSize - trailer.getTrailerSize());
+    // Initialize an block iterator, and parse load-on-open blocks in the following.
+    blockIter = fsBlockReader.blockRange(trailer.getLoadOnOpenDataOffset(),
+      fileSize - trailer.getTrailerSize());
 
     // Data index. We also read statistics about the block index written after
     // the root level.
@@ -372,12 +379,14 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
   @Override
   public void returnBlock(HFileBlock block) {
     if (block != null) {
-      this.cacheConf.getBlockCache().ifPresent(blockCache -> {
-        BlockCacheKey cacheKey =
-            new BlockCacheKey(this.getFileContext().getHFileName(), block.getOffset(),
-                this.isPrimaryReplicaReader(), block.getBlockType());
-        blockCache.returnBlock(cacheKey, block);
-      });
+      if (this.cacheConf.getBlockCache().isPresent()) {
+        BlockCacheKey cacheKey = new BlockCacheKey(this.getFileContext().getHFileName(),
+            block.getOffset(), this.isPrimaryReplicaReader(), block.getBlockType());
+        cacheConf.getBlockCache().get().returnBlock(cacheKey, block);
+      } else {
+        // Release the block here, it means the RPC path didn't ref to this block any more.
+        block.release();
+      }
     }
   }
 
@@ -543,7 +552,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
       this.curBlock = null;
     }
 
-    private void returnBlockToCache(HFileBlock block) {
+    private void returnBlock(HFileBlock block) {
       if (LOG.isTraceEnabled()) {
         LOG.trace("Returning the block : " + block);
       }
@@ -552,11 +561,11 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
 
     private void returnBlocks(boolean returnAll) {
       for (int i = 0; i < this.prevBlocks.size(); i++) {
-        returnBlockToCache(this.prevBlocks.get(i));
+        returnBlock(this.prevBlocks.get(i));
       }
       this.prevBlocks.clear();
       if (returnAll && this.curBlock != null) {
-        returnBlockToCache(this.curBlock);
+        returnBlock(this.curBlock);
         this.curBlock = null;
       }
     }
@@ -1136,10 +1145,9 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
       return true;
     }
 
-    protected void readAndUpdateNewBlock(long firstDataBlockOffset) throws IOException,
-        CorruptHFileException {
+    protected void readAndUpdateNewBlock(long firstDataBlockOffset) throws IOException {
       HFileBlock newBlock = reader.readBlock(firstDataBlockOffset, -1, cacheBlocks, pread,
-          isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding());
+        isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding());
       if (newBlock.getOffset() < 0) {
         throw new IOException(
             "Invalid block offset: " + newBlock.getOffset() + ", path=" + reader.getPath());
@@ -1393,12 +1401,11 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
     // Per meta key from any given file, synchronize reads for said block. This
     // is OK to do for meta blocks because the meta block index is always
     // single-level.
-    synchronized (metaBlockIndexReader
-        .getRootBlockKey(block)) {
+    synchronized (metaBlockIndexReader.getRootBlockKey(block)) {
       // Check cache for block. If found return.
       long metaBlockOffset = metaBlockIndexReader.getRootBlockOffset(block);
-      BlockCacheKey cacheKey = new BlockCacheKey(name, metaBlockOffset,
-        this.isPrimaryReplicaReader(), BlockType.META);
+      BlockCacheKey cacheKey =
+          new BlockCacheKey(name, metaBlockOffset, this.isPrimaryReplicaReader(), BlockType.META);
 
       cacheBlock &= cacheConf.shouldCacheBlockOnRead(BlockType.META.getCategory());
       HFileBlock cachedBlock =
@@ -1411,15 +1418,19 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
       }
       // Cache Miss, please load.
 
-      HFileBlock metaBlock = fsBlockReader.readBlockData(metaBlockOffset, blockSize, true, false).
-          unpack(hfileContext, fsBlockReader);
+      HFileBlock compressedBlock =
+          fsBlockReader.readBlockData(metaBlockOffset, blockSize, true, false);
+      HFileBlock uncompressedBlock = compressedBlock.unpack(hfileContext, fsBlockReader);
+      if (compressedBlock != uncompressedBlock) {
+        compressedBlock.release();
+      }
 
       // Cache the block
       if (cacheBlock) {
-        cacheConf.getBlockCache()
-            .ifPresent(cache -> cache.cacheBlock(cacheKey, metaBlock, cacheConf.isInMemory()));
+        cacheConf.getBlockCache().ifPresent(
+          cache -> cache.cacheBlock(cacheKey, uncompressedBlock, cacheConf.isInMemory()));
       }
-      return metaBlock;
+      return uncompressedBlock;
     }
   }
 
@@ -1501,14 +1512,18 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
         BlockType.BlockCategory category = hfileBlock.getBlockType().getCategory();
 
         // Cache the block if necessary
+        AtomicBoolean cachedRaw = new AtomicBoolean(false);
         cacheConf.getBlockCache().ifPresent(cache -> {
           if (cacheBlock && cacheConf.shouldCacheBlockOnRead(category)) {
-            cache.cacheBlock(cacheKey,
-                cacheConf.shouldCacheCompressed(category) ? hfileBlock : unpacked,
-                cacheConf.isInMemory());
+            cachedRaw.set(cacheConf.shouldCacheCompressed(category));
+            cache.cacheBlock(cacheKey, cachedRaw.get() ? hfileBlock : unpacked,
+              cacheConf.isInMemory());
           }
         });
-
+        if (unpacked != hfileBlock && !cachedRaw.get()) {
+          // End of life here if hfileBlock is an independent block.
+          hfileBlock.release();
+        }
         if (updateCacheMetrics && hfileBlock.getBlockType().isData()) {
           HFile.DATABLOCK_READ_COUNT.increment();
         }
@@ -1581,6 +1596,9 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
   @Override
   public void close(boolean evictOnClose) throws IOException {
     PrefetchExecutor.cancel(path);
+    // Deallocate blocks in load-on-open section
+    blockIter.freeBlocks();
+    // Deallocate data blocks
     cacheConf.getBlockCache().ifPresent(cache -> {
       if (evictOnClose) {
         int numEvicted = cache.evictBlocksByHfileName(name);
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java
index ecbf37c..c2f07cd 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java
@@ -402,6 +402,8 @@ public class LruBlockCache implements FirstLevelBlockCache {
       }
       return;
     }
+    // The block will be referenced by the LRUBlockCache, so should increase the refCnt here.
+    buf.retain();
     cb = new LruCachedBlock(cacheKey, buf, count.incrementAndGet(), inMemory);
     long newSize = updateSizeMetrics(cb, false);
     map.put(cacheKey, cb);
@@ -440,9 +442,12 @@ public class LruBlockCache implements FirstLevelBlockCache {
   /**
    * Cache the block with the specified name and buffer.
    * <p>
-   *
+   * TODO after HBASE-22005, we may cache an block which allocated from off-heap, but our LRU cache
+   * sizing is based on heap size, so we should handle this in HBASE-22127. It will introduce an
+   * switch whether make the LRU on-heap or not, if so we may need copy the memory to on-heap,
+   * otherwise the caching size is based on off-heap.
    * @param cacheKey block's cache key
-   * @param buf      block buffer
+   * @param buf block buffer
    */
   @Override
   public void cacheBlock(BlockCacheKey cacheKey, Cacheable buf) {
@@ -490,14 +495,20 @@ public class LruBlockCache implements FirstLevelBlockCache {
       // However if this is a retry ( second time in double checked locking )
       // And it's already a miss then the l2 will also be a miss.
       if (victimHandler != null && !repeat) {
+        // The handler will increase result's refCnt for RPC, so need no extra retain.
         Cacheable result = victimHandler.getBlock(cacheKey, caching, repeat, updateCacheMetrics);
 
         // Promote this to L1.
-        if (result != null && caching) {
-          if (result instanceof HFileBlock && ((HFileBlock) result).usesSharedMemory()) {
-            result = ((HFileBlock) result).deepClone();
+        if (result != null) {
+          if (caching) {
+            if (result instanceof HFileBlock && ((HFileBlock) result).usesSharedMemory()) {
+              Cacheable original = result;
+              result = ((HFileBlock) original).deepClone();
+              // deepClone an new one, so need to put the original one back to free it.
+              victimHandler.returnBlock(cacheKey, original);
+            }
+            cacheBlock(cacheKey, result, /* inMemory = */ false);
           }
-          cacheBlock(cacheKey, result, /* inMemory = */ false);
         }
         return result;
       }
@@ -505,6 +516,8 @@ public class LruBlockCache implements FirstLevelBlockCache {
     }
     if (updateCacheMetrics) stats.hit(caching, cacheKey.isPrimary(), cacheKey.getBlockType());
     cb.access(count.incrementAndGet());
+    // It will be referenced by RPC path, so increase here.
+    cb.getBuffer().retain();
     return cb.getBuffer();
   }
 
@@ -558,10 +571,12 @@ public class LruBlockCache implements FirstLevelBlockCache {
    * @return the heap size of evicted block
    */
   protected long evictBlock(LruCachedBlock block, boolean evictedByEvictionProcess) {
-    boolean found = map.remove(block.getCacheKey()) != null;
-    if (!found) {
+    LruCachedBlock previous = map.remove(block.getCacheKey());
+    if (previous == null) {
       return 0;
     }
+    // Decrease the block's reference count, and if refCount is 0, then it'll auto-deallocate.
+    previous.getBuffer().release();
     updateSizeMetrics(block, true);
     long val = elements.decrementAndGet();
     if (LOG.isTraceEnabled()) {
@@ -1143,17 +1158,6 @@ public class LruBlockCache implements FirstLevelBlockCache {
   }
 
   @VisibleForTesting
-  Map<BlockType, Integer> getBlockTypeCountsForTest() {
-    Map<BlockType, Integer> counts = new EnumMap<>(BlockType.class);
-    for (LruCachedBlock cb : map.values()) {
-      BlockType blockType = cb.getBuffer().getBlockType();
-      Integer count = counts.get(blockType);
-      counts.put(blockType, (count == null ? 0 : count) + 1);
-    }
-    return counts;
-  }
-
-  @VisibleForTesting
   public Map<DataBlockEncoding, Integer> getEncodingCountsForTest() {
     Map<DataBlockEncoding, Integer> counts = new EnumMap<>(DataBlockEncoding.class);
     for (LruCachedBlock block : map.values()) {
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
index 009b294..0f3446e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
@@ -135,7 +135,7 @@ public class BucketCache implements BlockCache, HeapSize {
 
   // Store the block in this map before writing it to cache
   @VisibleForTesting
-  transient final ConcurrentMap<BlockCacheKey, RAMQueueEntry> ramCache;
+  transient final RAMCache ramCache;
   // In this map, store the block's meta data like offset, length
   @VisibleForTesting
   transient ConcurrentMap<BlockCacheKey, BucketEntry> backingMap;
@@ -289,7 +289,7 @@ public class BucketCache implements BlockCache, HeapSize {
     }
 
     assert writerQueues.size() == writerThreads.length;
-    this.ramCache = new ConcurrentHashMap<>();
+    this.ramCache = new RAMCache();
 
     this.backingMap = new ConcurrentHashMap<>((int) blockNumCapacity);
 
@@ -959,9 +959,8 @@ public class BucketCache implements BlockCache, HeapSize {
             index++;
             continue;
           }
-          BucketEntry bucketEntry =
-            re.writeToCache(ioEngine, bucketAllocator, realCacheSize);
-          // Successfully added.  Up index and add bucketEntry. Clear io exceptions.
+          BucketEntry bucketEntry = re.writeToCache(ioEngine, bucketAllocator, realCacheSize);
+          // Successfully added. Up index and add bucketEntry. Clear io exceptions.
           bucketEntries[index] = bucketEntry;
           if (ioErrorStartTime > 0) {
             ioErrorStartTime = -1;
@@ -1539,6 +1538,7 @@ public class BucketCache implements BlockCache, HeapSize {
           ioEngine.write(sliceBuf, offset);
           ioEngine.write(metadata, offset + len - metadata.limit());
         } else {
+          // Only used for testing.
           ByteBuffer bb = ByteBuffer.allocate(len);
           data.serialize(bb, true);
           ioEngine.write(bb, offset);
@@ -1664,6 +1664,7 @@ public class BucketCache implements BlockCache, HeapSize {
 
   @Override
   public void returnBlock(BlockCacheKey cacheKey, Cacheable block) {
+    block.release();
     if (block.getMemoryType() == MemoryType.SHARED) {
       BucketEntry bucketEntry = backingMap.get(cacheKey);
       if (bucketEntry != null) {
@@ -1707,4 +1708,53 @@ public class BucketCache implements BlockCache, HeapSize {
   float getMemoryFactor() {
     return memoryFactor;
   }
+
+  /**
+   * Wrapped the delegate ConcurrentMap with maintaining its block's reference count.
+   */
+  static class RAMCache {
+    final ConcurrentMap<BlockCacheKey, RAMQueueEntry> delegate = new ConcurrentHashMap<>();
+
+    public boolean containsKey(BlockCacheKey key) {
+      return delegate.containsKey(key);
+    }
+
+    public RAMQueueEntry get(BlockCacheKey key) {
+      RAMQueueEntry re = delegate.get(key);
+      if (re != null) {
+        // It'll be referenced by RPC, so retain here.
+        re.getData().retain();
+      }
+      return re;
+    }
+
+    public RAMQueueEntry putIfAbsent(BlockCacheKey key, RAMQueueEntry entry) {
+      RAMQueueEntry previous = delegate.putIfAbsent(key, entry);
+      if (previous == null) {
+        // The RAMCache reference to this entry, so reference count should be increment.
+        entry.getData().retain();
+      }
+      return previous;
+    }
+
+    public RAMQueueEntry remove(BlockCacheKey key) {
+      RAMQueueEntry previous = delegate.remove(key);
+      if (previous != null) {
+        previous.getData().release();
+      }
+      return previous;
+    }
+
+    public boolean isEmpty() {
+      return delegate.isEmpty();
+    }
+
+    public void clear() {
+      Iterator<Map.Entry<BlockCacheKey, RAMQueueEntry>> it = delegate.entrySet().iterator();
+      while (it.hasNext()) {
+        it.next().getValue().getData().release();
+        it.remove();
+      }
+    }
+  }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServer.java
index ac8c26c..8fdc8d3 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServer.java
@@ -728,6 +728,11 @@ public abstract class RpcServer implements RpcServerInterface,
   }
 
   @Override
+  public ByteBuffAllocator getByteBuffAllocator() {
+    return this.bbAllocator;
+  }
+
+  @Override
   public void setRsRpcServices(RSRpcServices rsRpcServices) {
     this.rsRpcServices = rsRpcServices;
   }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServerInterface.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServerInterface.java
index cf67e98..0f875d8 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServerInterface.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServerInterface.java
@@ -22,6 +22,7 @@ package org.apache.hadoop.hbase.ipc;
 import java.io.IOException;
 import java.net.InetSocketAddress;
 
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.apache.hadoop.hbase.CellScanner;
 import org.apache.hadoop.hbase.monitoring.MonitoredRPCHandler;
@@ -88,5 +89,11 @@ public interface RpcServerInterface {
 
   RpcScheduler getScheduler();
 
+  /**
+   * Allocator to allocate/free the ByteBuffers, those ByteBuffers can be on-heap or off-heap.
+   * @return byte buffer allocator
+   */
+  ByteBuffAllocator getByteBuffAllocator();
+
   void setRsRpcServices(RSRpcServices rsRpcServices);
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HMobStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HMobStore.java
index 632642f..596aa3d 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HMobStore.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HMobStore.java
@@ -38,6 +38,7 @@ import org.apache.hadoop.hbase.CellComparator;
 import org.apache.hadoop.hbase.DoNotRetryIOException;
 import org.apache.hadoop.hbase.ExtendedCellBuilderFactory;
 import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.KeyValueUtil;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.Tag;
 import org.apache.hadoop.hbase.TagType;
@@ -391,8 +392,12 @@ public class HMobStore extends HStore {
       Path path = new Path(location, fileName);
       try {
         file = mobFileCache.openFile(fs, path, cacheConf);
-        return readPt != -1 ? file.readCell(search, cacheMobBlocks, readPt) : file.readCell(search,
-          cacheMobBlocks);
+        Cell cell = readPt != -1 ? file.readCell(search, cacheMobBlocks, readPt)
+            : file.readCell(search, cacheMobBlocks);
+        // Now we will return blocks to allocator for mob cells before shipping to rpc client.
+        // it will be memory leak. so just copy cell as an on-heap KV here. will remove this in
+        // HBASE-22122 (TODO)
+        return KeyValueUtil.copyToNewKeyValue(cell);
       } catch (IOException e) {
         mobFileCache.evictFile(fileName);
         throwable = e;
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
index 11509dd..7e796d8 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java
@@ -377,7 +377,8 @@ public class HStore implements Store, HeapSize, StoreConfigInformation, Propagat
    * @param family The current column family.
    */
   protected void createCacheConf(final ColumnFamilyDescriptor family) {
-    this.cacheConf = new CacheConfig(conf, family, region.getBlockCache());
+    this.cacheConf = new CacheConfig(conf, family, region.getBlockCache(),
+        region.getRegionServicesForStores().getByteBuffAllocator());
   }
 
   /**
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServicesForStores.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServicesForStores.java
index 595ae7a..36392d7 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServicesForStores.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionServicesForStores.java
@@ -23,6 +23,7 @@ import java.util.concurrent.ThreadPoolExecutor;
 import java.util.concurrent.TimeUnit;
 import org.apache.hadoop.hbase.client.RegionInfo;
 import org.apache.hadoop.hbase.executor.ExecutorType;
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.wal.WAL;
 import org.apache.yetus.audience.InterfaceAudience;
 
@@ -66,6 +67,23 @@ public class RegionServicesForStores {
     return region.getWAL();
   }
 
+  private static ByteBuffAllocator ALLOCATOR_FOR_TEST;
+
+  private static synchronized ByteBuffAllocator getAllocatorForTest() {
+    if (ALLOCATOR_FOR_TEST == null) {
+      ALLOCATOR_FOR_TEST = ByteBuffAllocator.HEAP;
+    }
+    return ALLOCATOR_FOR_TEST;
+  }
+
+  public ByteBuffAllocator getByteBuffAllocator() {
+    if (rsServices != null && rsServices.getRpcServer() != null) {
+      return rsServices.getRpcServer().getByteBuffAllocator();
+    } else {
+      return getAllocatorForTest();
+    }
+  }
+
   private static ThreadPoolExecutor INMEMORY_COMPACTION_POOL_FOR_TEST;
 
   private static synchronized ThreadPoolExecutor getInMemoryCompactionPoolForTest() {
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestLoadAndSwitchEncodeOnDisk.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestLoadAndSwitchEncodeOnDisk.java
index 0fd6e7b..fb9e44f 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestLoadAndSwitchEncodeOnDisk.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestLoadAndSwitchEncodeOnDisk.java
@@ -38,6 +38,7 @@ import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.util.TestMiniClusterLoadSequential;
 import org.apache.hadoop.hbase.util.Threads;
 import org.junit.ClassRule;
+import org.junit.Ignore;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.runners.Parameterized.Parameters;
@@ -45,9 +46,8 @@ import org.junit.runners.Parameterized.Parameters;
 /**
  * Uses the load tester
  */
-@Category({IOTests.class, MediumTests.class})
-public class TestLoadAndSwitchEncodeOnDisk extends
-    TestMiniClusterLoadSequential {
+@Category({ IOTests.class, MediumTests.class })
+public class TestLoadAndSwitchEncodeOnDisk extends TestMiniClusterLoadSequential {
 
   @ClassRule
   public static final HBaseClassTestRule CLASS_RULE =
@@ -74,6 +74,7 @@ public class TestLoadAndSwitchEncodeOnDisk extends
 
   @Override
   @Test
+  @Ignore("TODO Ignore this UT temporarily, will fix this in the critical HBASE-21937.")
   public void loadTest() throws Exception {
     Admin admin = TEST_UTIL.getAdmin();
 
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java
index 3c4ae78..6d6f2a7 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java
@@ -36,6 +36,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.MultithreadedTestUtil;
 import org.apache.hadoop.hbase.MultithreadedTestUtil.TestThread;
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.io.HeapSize;
 import org.apache.hadoop.hbase.io.compress.Compression;
 import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache;
@@ -366,11 +367,10 @@ public class CacheTestUtils {
                           .withBytesPerCheckSum(0)
                           .withChecksumType(ChecksumType.NULL)
                           .build();
-      HFileBlock generated = new HFileBlock(BlockType.DATA,
-          onDiskSizeWithoutHeader, uncompressedSizeWithoutHeader,
-          prevBlockOffset, cachedBuffer, HFileBlock.DONT_FILL_HEADER,
-          blockSize,
-          onDiskSizeWithoutHeader + HConstants.HFILEBLOCK_HEADER_SIZE, -1, meta);
+      HFileBlock generated = new HFileBlock(BlockType.DATA, onDiskSizeWithoutHeader,
+          uncompressedSizeWithoutHeader, prevBlockOffset, cachedBuffer, HFileBlock.DONT_FILL_HEADER,
+          blockSize, onDiskSizeWithoutHeader + HConstants.HFILEBLOCK_HEADER_SIZE, -1, meta,
+          ByteBuffAllocator.HEAP);
 
       String strKey;
       /* No conflicting keys */
@@ -401,8 +401,7 @@ public class CacheTestUtils {
   }
 
   public static void getBlockAndAssertEquals(BlockCache cache, BlockCacheKey key,
-                                             Cacheable blockToCache, ByteBuffer destBuffer,
-                                             ByteBuffer expectedBuffer) {
+      Cacheable blockToCache, ByteBuffer destBuffer, ByteBuffer expectedBuffer) {
     destBuffer.clear();
     cache.cacheBlock(key, blockToCache);
     Cacheable actualBlock = cache.getBlock(key, false, false, false);
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheConfig.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheConfig.java
index 0b9cc19..3dae278 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheConfig.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheConfig.java
@@ -34,6 +34,7 @@ import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.HColumnDescriptor;
 import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.io.hfile.BlockType.BlockCategory;
 import org.apache.hadoop.hbase.io.hfile.Cacheable.MemoryType;
 import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache;
@@ -250,7 +251,7 @@ public class TestCacheConfig {
     HColumnDescriptor family = new HColumnDescriptor("testDisableCacheDataBlock");
     family.setBlockCacheEnabled(false);
 
-    cacheConfig = new CacheConfig(conf, family, null);
+    cacheConfig = new CacheConfig(conf, family, null, ByteBuffAllocator.HEAP);
     assertFalse(cacheConfig.shouldCacheBlockOnRead(BlockCategory.DATA));
     assertFalse(cacheConfig.shouldCacheCompressed(BlockCategory.DATA));
     assertFalse(cacheConfig.shouldCacheDataCompressed());
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java
index 115e765..60a4445 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java
@@ -60,6 +60,7 @@ import org.apache.hadoop.hbase.util.BloomFilterFactory;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.ChecksumType;
 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+import org.apache.hadoop.hbase.util.Pair;
 import org.junit.After;
 import org.junit.AfterClass;
 import org.junit.Before;
@@ -112,7 +113,7 @@ public class TestCacheOnWrite {
   private static final int NUM_VALID_KEY_TYPES =
       KeyValue.Type.values().length - 2;
 
-  private static enum CacheOnWriteType {
+  private enum CacheOnWriteType {
     DATA_BLOCKS(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY,
         BlockType.DATA, BlockType.ENCODED_DATA),
     BLOOM_BLOCKS(CacheConfig.CACHE_BLOOM_BLOCKS_ON_WRITE_KEY,
@@ -124,12 +125,11 @@ public class TestCacheOnWrite {
     private final BlockType blockType1;
     private final BlockType blockType2;
 
-    private CacheOnWriteType(String confKey, BlockType blockType) {
+    CacheOnWriteType(String confKey, BlockType blockType) {
       this(confKey, blockType, blockType);
     }
 
-    private CacheOnWriteType(String confKey, BlockType blockType1,
-        BlockType blockType2) {
+    CacheOnWriteType(String confKey, BlockType blockType1, BlockType blockType2) {
       this.blockType1 = blockType1;
       this.blockType2 = blockType2;
       this.confKey = confKey;
@@ -269,18 +269,17 @@ public class TestCacheOnWrite {
 
     DataBlockEncoding encodingInCache = NoOpDataBlockEncoder.INSTANCE.getDataBlockEncoding();
     List<Long> cachedBlocksOffset = new ArrayList<>();
-    Map<Long, HFileBlock> cachedBlocks = new HashMap<>();
+    Map<Long, Pair<HFileBlock, HFileBlock>> cachedBlocks = new HashMap<>();
     while (offset < reader.getTrailer().getLoadOnOpenDataOffset()) {
       // Flags: don't cache the block, use pread, this is not a compaction.
       // Also, pass null for expected block type to avoid checking it.
       HFileBlock block = reader.readBlock(offset, -1, false, true, false, true, null,
           encodingInCache);
-      BlockCacheKey blockCacheKey = new BlockCacheKey(reader.getName(),
-          offset);
+      BlockCacheKey blockCacheKey = new BlockCacheKey(reader.getName(), offset);
       HFileBlock fromCache = (HFileBlock) blockCache.getBlock(blockCacheKey, true, false, true);
       boolean isCached = fromCache != null;
       cachedBlocksOffset.add(offset);
-      cachedBlocks.put(offset, fromCache);
+      cachedBlocks.put(offset, fromCache == null ? null : Pair.newPair(block, fromCache));
       boolean shouldBeCached = cowType.shouldBeCached(block.getBlockType());
       assertTrue("shouldBeCached: " + shouldBeCached+ "\n" +
           "isCached: " + isCached + "\n" +
@@ -332,19 +331,20 @@ public class TestCacheOnWrite {
       Long entry = iterator.next();
       BlockCacheKey blockCacheKey = new BlockCacheKey(reader.getName(),
           entry);
-      HFileBlock hFileBlock = cachedBlocks.get(entry);
-      if (hFileBlock != null) {
-        // call return twice because for the isCache cased the counter would have got incremented
-        // twice
-        blockCache.returnBlock(blockCacheKey, hFileBlock);
-        if(cacheCompressedData) {
+      Pair<HFileBlock, HFileBlock> blockPair = cachedBlocks.get(entry);
+      if (blockPair != null) {
+        // Call return twice because for the isCache cased the counter would have got incremented
+        // twice. Notice that here we need to returnBlock with different blocks. see comments in
+        // BucketCache#returnBlock.
+        blockCache.returnBlock(blockCacheKey, blockPair.getSecond());
+        if (cacheCompressedData) {
           if (this.compress == Compression.Algorithm.NONE
               || cowType == CacheOnWriteType.INDEX_BLOCKS
               || cowType == CacheOnWriteType.BLOOM_BLOCKS) {
-            blockCache.returnBlock(blockCacheKey, hFileBlock);
+            blockCache.returnBlock(blockCacheKey, blockPair.getFirst());
           }
         } else {
-          blockCache.returnBlock(blockCacheKey, hFileBlock);
+          blockCache.returnBlock(blockCacheKey, blockPair.getFirst());
         }
       }
     }
@@ -457,7 +457,7 @@ public class TestCacheOnWrite {
       assertNotEquals(BlockType.ENCODED_DATA, block.getBlockType());
       assertNotEquals(BlockType.DATA, block.getBlockType());
     }
-    ((HRegion)region).close();
+    region.close();
   }
 
   @Test
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
index a4135d7..c432fa9 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
@@ -38,6 +38,7 @@ import org.apache.hadoop.hbase.HBaseClassTestRule;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.fs.HFileSystem;
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
 import org.apache.hadoop.hbase.io.compress.Compression;
 import org.apache.hadoop.hbase.nio.ByteBuff;
@@ -97,8 +98,8 @@ public class TestChecksum {
 
     FSDataInputStreamWrapper is = new FSDataInputStreamWrapper(fs, path);
     meta = new HFileContextBuilder().withHBaseCheckSum(true).build();
-    HFileBlock.FSReader hbr = new HFileBlock.FSReaderImpl(
-        is, totalSize, (HFileSystem) fs, path, meta);
+    HFileBlock.FSReader hbr = new HFileBlock.FSReaderImpl(is, totalSize, (HFileSystem) fs, path,
+        meta, ByteBuffAllocator.HEAP);
     HFileBlock b = hbr.readBlockData(0, -1, false, false);
     assertEquals(b.getChecksumType(), ChecksumType.getDefaultChecksumType().getCode());
   }
@@ -143,8 +144,8 @@ public class TestChecksum {
 
       FSDataInputStreamWrapper is = new FSDataInputStreamWrapper(fs, path);
       meta = new HFileContextBuilder().withHBaseCheckSum(true).build();
-      HFileBlock.FSReader hbr = new HFileBlock.FSReaderImpl(
-            is, totalSize, (HFileSystem) fs, path, meta);
+      HFileBlock.FSReader hbr = new HFileBlock.FSReaderImpl(is, totalSize, (HFileSystem) fs, path,
+          meta, ByteBuffAllocator.HEAP);
       HFileBlock b = hbr.readBlockData(0, -1, false, false);
 
       // verify SingleByteBuff checksum.
@@ -339,8 +340,9 @@ public class TestChecksum {
                .withHBaseCheckSum(true)
                .withBytesPerCheckSum(bytesPerChecksum)
                .build();
-        HFileBlock.FSReader hbr = new HFileBlock.FSReaderImpl(new FSDataInputStreamWrapper(
-            is, nochecksum), totalSize, hfs, path, meta);
+        HFileBlock.FSReader hbr =
+            new HFileBlock.FSReaderImpl(new FSDataInputStreamWrapper(is, nochecksum), totalSize,
+                hfs, path, meta, ByteBuffAllocator.HEAP);
         HFileBlock b = hbr.readBlockData(0, -1, pread, false);
         assertTrue(b.getBufferReadOnly() instanceof SingleByteBuff);
         is.close();
@@ -382,7 +384,7 @@ public class TestChecksum {
 
     public CorruptedFSReaderImpl(FSDataInputStreamWrapper istream, long fileSize, FileSystem fs,
         Path path, HFileContext meta) throws IOException {
-      super(istream, fileSize, (HFileSystem) fs, path, meta);
+      super(istream, fileSize, (HFileSystem) fs, path, meta, ByteBuffAllocator.HEAP);
     }
 
     @Override
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java
index bc608be..f58fe3e 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java
@@ -17,6 +17,8 @@
  */
 package org.apache.hadoop.hbase.io.hfile;
 
+import static org.apache.hadoop.hbase.io.ByteBuffAllocator.MAX_BUFFER_COUNT_KEY;
+import static org.apache.hadoop.hbase.io.ByteBuffAllocator.MIN_ALLOCATE_SIZE_KEY;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNull;
@@ -27,7 +29,9 @@ import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
 import java.nio.ByteBuffer;
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.List;
 import java.util.Objects;
 import java.util.Random;
 import org.apache.hadoop.conf.Configuration;
@@ -42,6 +46,7 @@ import org.apache.hadoop.hbase.CellComparatorImpl;
 import org.apache.hadoop.hbase.CellUtil;
 import org.apache.hadoop.hbase.HBaseClassTestRule;
 import org.apache.hadoop.hbase.HBaseCommonTestingUtility;
+import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.KeyValue;
@@ -49,6 +54,7 @@ import org.apache.hadoop.hbase.KeyValue.Type;
 import org.apache.hadoop.hbase.KeyValueUtil;
 import org.apache.hadoop.hbase.PrivateCellUtil;
 import org.apache.hadoop.hbase.Tag;
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.io.compress.Compression;
 import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
 import org.apache.hadoop.hbase.io.hfile.HFile.Writer;
@@ -58,6 +64,7 @@ import org.apache.hadoop.hbase.testclassification.IOTests;
 import org.apache.hadoop.hbase.testclassification.SmallTests;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.io.Writable;
+import org.junit.Assert;
 import org.junit.BeforeClass;
 import org.junit.ClassRule;
 import org.junit.Rule;
@@ -99,24 +106,45 @@ public class TestHFile  {
 
   @Test
   public void testReaderWithoutBlockCache() throws Exception {
-     Path path = writeStoreFile();
-     try{
-       readStoreFile(path);
-     } catch (Exception e) {
-       // fail test
-       assertTrue(false);
-     }
+    int bufCount = 32;
+    Configuration that = HBaseConfiguration.create(conf);
+    that.setInt(MAX_BUFFER_COUNT_KEY, bufCount);
+    // AllByteBuffers will be allocated from the buffers.
+    that.setInt(MIN_ALLOCATE_SIZE_KEY, 0);
+    ByteBuffAllocator alloc = ByteBuffAllocator.create(that, true);
+    List<ByteBuff> buffs = new ArrayList<>();
+    // Fill the allocator with bufCount ByteBuffer
+    for (int i = 0; i < bufCount; i++) {
+      buffs.add(alloc.allocateOneBuffer());
+    }
+    Assert.assertEquals(alloc.getQueueSize(), 0);
+    for (ByteBuff buf : buffs) {
+      buf.release();
+    }
+    Assert.assertEquals(alloc.getQueueSize(), bufCount);
+    // start write to store file.
+    Path path = writeStoreFile();
+    try {
+      readStoreFile(path, that, alloc);
+    } catch (Exception e) {
+      // fail test
+      assertTrue(false);
+    }
+    Assert.assertEquals(bufCount, alloc.getQueueSize());
   }
 
-
-  private void readStoreFile(Path storeFilePath) throws Exception {
+  private void readStoreFile(Path storeFilePath, Configuration conf, ByteBuffAllocator alloc)
+      throws Exception {
     // Open the file reader with block cache disabled.
-    HFile.Reader reader = HFile.createReader(fs, storeFilePath, conf);
+    CacheConfig cache = new CacheConfig(conf, null, null, alloc);
+    HFile.Reader reader = HFile.createReader(fs, storeFilePath, cache, true, conf);
     long offset = 0;
     while (offset < reader.getTrailer().getLoadOnOpenDataOffset()) {
       HFileBlock block = reader.readBlock(offset, -1, false, true, false, true, null, null);
       offset += block.getOnDiskSizeWithHeader();
+      block.release(); // return back the ByteBuffer back to allocator.
     }
+    reader.close();
   }
 
   private Path writeStoreFile() throws IOException {
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java
index 32160a1..efdae16 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java
@@ -53,6 +53,7 @@ import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.Tag;
 import org.apache.hadoop.hbase.fs.HFileSystem;
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.io.compress.Compression;
 import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
@@ -326,7 +327,8 @@ public class TestHFileBlock {
         .withIncludesMvcc(includesMemstoreTS)
         .withIncludesTags(includesTag)
         .withCompression(algo).build();
-        HFileBlock.FSReader hbr = new HFileBlock.FSReaderImpl(is, totalSize, meta);
+        HFileBlock.FSReader hbr =
+            new HFileBlock.FSReaderImpl(is, totalSize, meta, ByteBuffAllocator.HEAP);
         HFileBlock b = hbr.readBlockData(0, -1, pread, false);
         is.close();
         assertEquals(0, HFile.getAndResetChecksumFailuresCount());
@@ -339,7 +341,7 @@ public class TestHFileBlock {
 
         if (algo == GZ) {
           is = fs.open(path);
-          hbr = new HFileBlock.FSReaderImpl(is, totalSize, meta);
+          hbr = new HFileBlock.FSReaderImpl(is, totalSize, meta, ByteBuffAllocator.HEAP);
           b = hbr.readBlockData(0, 2173 + HConstants.HFILEBLOCK_HEADER_SIZE +
                                 b.totalChecksumBytes(), pread, false);
           assertEquals(expected, b);
@@ -425,7 +427,8 @@ public class TestHFileBlock {
                 .withIncludesMvcc(includesMemstoreTS)
                 .withIncludesTags(includesTag)
                 .build();
-          HFileBlock.FSReaderImpl hbr = new HFileBlock.FSReaderImpl(is, totalSize, meta);
+          HFileBlock.FSReaderImpl hbr =
+              new HFileBlock.FSReaderImpl(is, totalSize, meta, ByteBuffAllocator.HEAP);
           hbr.setDataBlockEncoder(dataBlockEncoder);
           hbr.setIncludesMemStoreTS(includesMemstoreTS);
           HFileBlock blockFromHFile, blockUnpacked;
@@ -553,7 +556,8 @@ public class TestHFileBlock {
                               .withIncludesMvcc(includesMemstoreTS)
                               .withIncludesTags(includesTag)
                               .withCompression(algo).build();
-          HFileBlock.FSReader hbr = new HFileBlock.FSReaderImpl(is, totalSize, meta);
+          HFileBlock.FSReader hbr =
+              new HFileBlock.FSReaderImpl(is, totalSize, meta, ByteBuffAllocator.HEAP);
           long curOffset = 0;
           for (int i = 0; i < NUM_TEST_BLOCKS; ++i) {
             if (!pread) {
@@ -737,7 +741,8 @@ public class TestHFileBlock {
                           .withIncludesTags(includesTag)
                           .withCompression(compressAlgo)
                           .build();
-      HFileBlock.FSReader hbr = new HFileBlock.FSReaderImpl(is, fileSize, meta);
+      HFileBlock.FSReader hbr =
+          new HFileBlock.FSReaderImpl(is, fileSize, meta, ByteBuffAllocator.HEAP);
 
       Executor exec = Executors.newFixedThreadPool(NUM_READER_THREADS);
       ExecutorCompletionService<Boolean> ecs = new ExecutorCompletionService<>(exec);
@@ -845,8 +850,8 @@ public class TestHFileBlock {
                           .withCompression(Algorithm.NONE)
                           .withBytesPerCheckSum(HFile.DEFAULT_BYTES_PER_CHECKSUM)
                           .withChecksumType(ChecksumType.NULL).build();
-      HFileBlock block = new HFileBlock(BlockType.DATA, size, size, -1, buf,
-          HFileBlock.FILL_HEADER, -1, 0, -1, meta);
+      HFileBlock block = new HFileBlock(BlockType.DATA, size, size, -1, buf, HFileBlock.FILL_HEADER,
+          -1, 0, -1, meta, ByteBuffAllocator.HEAP);
       long byteBufferExpectedSize = ClassSize.align(ClassSize.estimateBase(
           new MultiByteBuff(buf).getClass(), true)
           + HConstants.HFILEBLOCK_HEADER_SIZE + size);
@@ -869,9 +874,9 @@ public class TestHFileBlock {
     ByteBuffer buf = ByteBuffer.wrap(byteArr, 0, size);
     HFileContext meta = new HFileContextBuilder().build();
     HFileBlock blockWithNextBlockMetadata = new HFileBlock(BlockType.DATA, size, size, -1, buf,
-        HFileBlock.FILL_HEADER, -1, 52, -1, meta);
+        HFileBlock.FILL_HEADER, -1, 52, -1, meta, ByteBuffAllocator.HEAP);
     HFileBlock blockWithoutNextBlockMetadata = new HFileBlock(BlockType.DATA, size, size, -1, buf,
-        HFileBlock.FILL_HEADER, -1, -1, -1, meta);
+        HFileBlock.FILL_HEADER, -1, -1, -1, meta, ByteBuffAllocator.HEAP);
     ByteBuffer buff1 = ByteBuffer.allocate(length);
     ByteBuffer buff2 = ByteBuffer.allocate(length);
     blockWithNextBlockMetadata.serialize(buff1, true);
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java
index 937b641..73f1c24 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java
@@ -46,6 +46,7 @@ import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.KeyValueUtil;
 import org.apache.hadoop.hbase.PrivateCellUtil;
 import org.apache.hadoop.hbase.fs.HFileSystem;
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.io.compress.Compression;
 import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
@@ -211,8 +212,8 @@ public class TestHFileBlockIndex {
                         .withIncludesTags(useTags)
                         .withCompression(compr)
                         .build();
-    HFileBlock.FSReader blockReader = new HFileBlock.FSReaderImpl(istream, fs.getFileStatus(path)
-        .getLen(), meta);
+    HFileBlock.FSReader blockReader = new HFileBlock.FSReaderImpl(istream,
+        fs.getFileStatus(path).getLen(), meta, ByteBuffAllocator.HEAP);
 
     BlockReaderWrapper brw = new BlockReaderWrapper(blockReader);
     HFileBlockIndex.BlockIndexReader indexReader =
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java
index 27f9b7a..5a6042c 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java
@@ -32,6 +32,7 @@ import org.apache.hadoop.hbase.HBaseClassTestRule;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.io.ByteArrayOutputStream;
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.io.HeapSize;
 import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
@@ -131,9 +132,8 @@ public class TestHFileDataBlockEncoder {
                         .withBlockSize(0)
                         .withChecksumType(ChecksumType.NULL)
                         .build();
-    HFileBlock block = new HFileBlock(BlockType.DATA, size, size, -1, buf,
-        HFileBlock.FILL_HEADER, 0,
-        0, -1, hfileContext);
+    HFileBlock block = new HFileBlock(BlockType.DATA, size, size, -1, buf, HFileBlock.FILL_HEADER,
+        0, 0, -1, hfileContext, ByteBuffAllocator.HEAP);
     HFileBlock cacheBlock = createBlockOnDisk(kvs, block, useTags);
     assertEquals(headerSize, cacheBlock.getDummyHeaderForVersion().length);
   }
@@ -200,7 +200,7 @@ public class TestHFileDataBlockEncoder {
                         .build();
     HFileBlock b = new HFileBlock(BlockType.DATA, size, size, -1, buf,
         HFileBlock.FILL_HEADER, 0,
-         0, -1, meta);
+         0, -1, meta, ByteBuffAllocator.HEAP);
     return b;
   }
 
@@ -223,7 +223,7 @@ public class TestHFileDataBlockEncoder {
     size = encodedBytes.length - block.getDummyHeaderForVersion().length;
     return new HFileBlock(context.getBlockType(), size, size, -1, ByteBuffer.wrap(encodedBytes),
         HFileBlock.FILL_HEADER, 0, block.getOnDiskDataSizeWithHeader(), -1,
-        block.getHFileContext());
+        block.getHFileContext(), ByteBuffAllocator.HEAP);
   }
 
   private void writeBlock(List<Cell> kvs, HFileContext fileContext, boolean useTags)
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileEncryption.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileEncryption.java
index d77af6d..1222d07 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileEncryption.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileEncryption.java
@@ -39,6 +39,7 @@ import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.KeyValueUtil;
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.io.compress.Compression;
 import org.apache.hadoop.hbase.io.crypto.Cipher;
 import org.apache.hadoop.hbase.io.crypto.Encryption;
@@ -152,7 +153,8 @@ public class TestHFileEncryption {
       }
       FSDataInputStream is = fs.open(path);
       try {
-        HFileBlock.FSReaderImpl hbr = new HFileBlock.FSReaderImpl(is, totalSize, fileContext);
+        HFileBlock.FSReaderImpl hbr = new HFileBlock.FSReaderImpl(is, totalSize, fileContext,
+            ByteBuffAllocator.HEAP);
         long pos = 0;
         for (int i = 0; i < blocks; i++) {
           pos += readAndVerifyBlock(pos, fileContext, hbr, blockSizes[i]);
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV3.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV3.java
index 0a1af87..b92f7c6 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV3.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV3.java
@@ -42,6 +42,7 @@ import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.Tag;
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.io.compress.Compression;
 import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
 import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
@@ -181,7 +182,7 @@ public class TestHFileWriterV3 {
                         .withIncludesTags(useTags)
                         .withHBaseCheckSum(true).build();
     HFileBlock.FSReader blockReader =
-        new HFileBlock.FSReaderImpl(fsdis, fileSize, meta);
+        new HFileBlock.FSReaderImpl(fsdis, fileSize, meta, ByteBuffAllocator.HEAP);
     // Comparator class name is stored in the trailer in version 3.
     CellComparator comparator = trailer.createComparator();
     HFileBlockIndex.BlockIndexReader dataBlockIndexReader =
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestLruBlockCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestLruBlockCache.java
index df0bed5..3317a4d 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestLruBlockCache.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestLruBlockCache.java
@@ -34,6 +34,7 @@ import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.Waiter;
 import org.apache.hadoop.hbase.Waiter.ExplainingPredicate;
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.io.HeapSize;
 import org.apache.hadoop.hbase.io.hfile.LruBlockCache.EvictionThread;
 import org.apache.hadoop.hbase.testclassification.IOTests;
@@ -813,10 +814,11 @@ public class TestLruBlockCache {
     byte[] byteArr = new byte[length];
     ByteBuffer buf = ByteBuffer.wrap(byteArr, 0, size);
     HFileContext meta = new HFileContextBuilder().build();
+    ByteBuffAllocator alloc = ByteBuffAllocator.HEAP;
     HFileBlock blockWithNextBlockMetadata = new HFileBlock(BlockType.DATA, size, size, -1, buf,
-        HFileBlock.FILL_HEADER, -1, 52, -1, meta);
+        HFileBlock.FILL_HEADER, -1, 52, -1, meta, alloc);
     HFileBlock blockWithoutNextBlockMetadata = new HFileBlock(BlockType.DATA, size, size, -1, buf,
-        HFileBlock.FILL_HEADER, -1, -1, -1, meta);
+        HFileBlock.FILL_HEADER, -1, -1, -1, meta, alloc);
 
     LruBlockCache cache = new LruBlockCache(maxSize, blockSize, false,
         (int)Math.ceil(1.2*maxSize/blockSize),
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestPrefetch.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestPrefetch.java
index 9986bba..1365680 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestPrefetch.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestPrefetch.java
@@ -34,6 +34,7 @@ import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
 import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
 import org.apache.hadoop.hbase.fs.HFileSystem;
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.regionserver.StoreFileWriter;
 import org.apache.hadoop.hbase.testclassification.IOTests;
 import org.apache.hadoop.hbase.testclassification.SmallTests;
@@ -73,12 +74,12 @@ public class TestPrefetch {
 
   @Test
   public void testPrefetchSetInHCDWorks() {
-    ColumnFamilyDescriptor columnFamilyDescriptor =
-        ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes("f")).setPrefetchBlocksOnOpen(true)
-            .build();
+    ColumnFamilyDescriptor columnFamilyDescriptor = ColumnFamilyDescriptorBuilder
+        .newBuilder(Bytes.toBytes("f")).setPrefetchBlocksOnOpen(true).build();
     Configuration c = HBaseConfiguration.create();
     assertFalse(c.getBoolean(CacheConfig.PREFETCH_BLOCKS_ON_OPEN_KEY, false));
-    CacheConfig cc = new CacheConfig(c, columnFamilyDescriptor, blockCache);
+    CacheConfig cc =
+        new CacheConfig(c, columnFamilyDescriptor, blockCache, ByteBuffAllocator.HEAP);
     assertTrue(cc.shouldPrefetchOnOpen());
   }
 
@@ -129,9 +130,8 @@ public class TestPrefetch {
       HFileBlock block = reader.readBlock(offset, -1, false, true, false, true, null, null);
       BlockCacheKey blockCacheKey = new BlockCacheKey(reader.getName(), offset);
       boolean isCached = blockCache.getBlock(blockCacheKey, true, false, true) != null;
-      if (block.getBlockType() == BlockType.DATA ||
-          block.getBlockType() == BlockType.ROOT_INDEX ||
-          block.getBlockType() == BlockType.INTERMEDIATE_INDEX) {
+      if (block.getBlockType() == BlockType.DATA || block.getBlockType() == BlockType.ROOT_INDEX
+          || block.getBlockType() == BlockType.INTERMEDIATE_INDEX) {
         assertTrue(isCached);
       }
       offset += block.getOnDiskSizeWithHeader();
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java
index 19c1d66..1029a77 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java
@@ -20,11 +20,11 @@ package org.apache.hadoop.hbase.io.hfile.bucket;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 
 import java.io.File;
-import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
@@ -40,6 +40,7 @@ import org.apache.hadoop.hbase.HBaseClassTestRule;
 import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.io.hfile.BlockCacheKey;
 import org.apache.hadoop.hbase.io.hfile.BlockType;
 import org.apache.hadoop.hbase.io.hfile.CacheTestUtils;
@@ -50,8 +51,9 @@ import org.apache.hadoop.hbase.io.hfile.HFileContext;
 import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
 import org.apache.hadoop.hbase.io.hfile.bucket.BucketAllocator.BucketSizeInfo;
 import org.apache.hadoop.hbase.io.hfile.bucket.BucketAllocator.IndexStatistics;
-import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache.RAMQueueEntry;
 import org.apache.hadoop.hbase.nio.ByteBuff;
+import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache.RAMCache;
+import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache.RAMQueueEntry;
 import org.apache.hadoop.hbase.testclassification.IOTests;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.junit.After;
@@ -114,8 +116,7 @@ public class TestBucketCache {
   private static class MockedBucketCache extends BucketCache {
 
     public MockedBucketCache(String ioEngineName, long capacity, int blockSize, int[] bucketSizes,
-        int writerThreads, int writerQLen, String persistencePath) throws FileNotFoundException,
-        IOException {
+        int writerThreads, int writerQLen, String persistencePath) throws IOException {
       super(ioEngineName, capacity, blockSize, bucketSizes, writerThreads, writerQLen,
           persistencePath);
       super.wait_when_cache = true;
@@ -133,10 +134,9 @@ public class TestBucketCache {
   }
 
   @Before
-  public void setup() throws FileNotFoundException, IOException {
-    cache =
-        new MockedBucketCache(ioEngineName, capacitySize, constructedBlockSize,
-            constructedBlockSizes, writeThreads, writerQLen, persistencePath);
+  public void setup() throws IOException {
+    cache = new MockedBucketCache(ioEngineName, capacitySize, constructedBlockSize,
+        constructedBlockSizes, writeThreads, writerQLen, persistencePath);
   }
 
   @After
@@ -430,10 +430,11 @@ public class TestBucketCache {
     byte[] byteArr = new byte[length];
     ByteBuffer buf = ByteBuffer.wrap(byteArr, 0, size);
     HFileContext meta = new HFileContextBuilder().build();
+    ByteBuffAllocator allocator = ByteBuffAllocator.HEAP;
     HFileBlock blockWithNextBlockMetadata = new HFileBlock(BlockType.DATA, size, size, -1, buf,
-        HFileBlock.FILL_HEADER, -1, 52, -1, meta);
+        HFileBlock.FILL_HEADER, -1, 52, -1, meta, allocator);
     HFileBlock blockWithoutNextBlockMetadata = new HFileBlock(BlockType.DATA, size, size, -1, buf,
-        HFileBlock.FILL_HEADER, -1, -1, -1, meta);
+        HFileBlock.FILL_HEADER, -1, -1, -1, meta, allocator);
 
     BlockCacheKey key = new BlockCacheKey("key1", 0);
     ByteBuffer actualBuffer = ByteBuffer.allocate(length);
@@ -447,22 +448,74 @@ public class TestBucketCache {
       block1Buffer);
 
     waitUntilFlushedToBucket(cache, key);
+    assertEquals(1, blockWithNextBlockMetadata.getBufferReadOnly().refCnt());
+    assertEquals(1, blockWithoutNextBlockMetadata.getBufferReadOnly().refCnt());
 
     // Add blockWithoutNextBlockMetada, expect blockWithNextBlockMetadata back.
     CacheTestUtils.getBlockAndAssertEquals(cache, key, blockWithoutNextBlockMetadata, actualBuffer,
       block1Buffer);
+    assertEquals(1, blockWithNextBlockMetadata.getBufferReadOnly().refCnt());
+    assertEquals(1, blockWithoutNextBlockMetadata.getBufferReadOnly().refCnt());
 
     // Clear and add blockWithoutNextBlockMetadata
     cache.evictBlock(key);
+    assertEquals(1, blockWithNextBlockMetadata.getBufferReadOnly().refCnt());
+    assertEquals(1, blockWithoutNextBlockMetadata.getBufferReadOnly().refCnt());
+
     assertNull(cache.getBlock(key, false, false, false));
     CacheTestUtils.getBlockAndAssertEquals(cache, key, blockWithoutNextBlockMetadata, actualBuffer,
       block2Buffer);
 
     waitUntilFlushedToBucket(cache, key);
+    assertEquals(1, blockWithNextBlockMetadata.getBufferReadOnly().refCnt());
+    assertEquals(1, blockWithoutNextBlockMetadata.getBufferReadOnly().refCnt());
 
     // Add blockWithNextBlockMetadata, expect blockWithNextBlockMetadata to replace.
     CacheTestUtils.getBlockAndAssertEquals(cache, key, blockWithNextBlockMetadata, actualBuffer,
       block1Buffer);
+
+    waitUntilFlushedToBucket(cache, key);
+    assertEquals(1, blockWithNextBlockMetadata.getBufferReadOnly().refCnt());
+    assertEquals(1, blockWithoutNextBlockMetadata.getBufferReadOnly().refCnt());
+  }
+
+  @Test
+  public void testRAMCache() {
+    int size = 100;
+    int length = HConstants.HFILEBLOCK_HEADER_SIZE + size;
+    byte[] byteArr = new byte[length];
+    ByteBuffer buf = ByteBuffer.wrap(byteArr, 0, size);
+    HFileContext meta = new HFileContextBuilder().build();
+
+    RAMCache cache = new RAMCache();
+    BlockCacheKey key1 = new BlockCacheKey("file-1", 1);
+    BlockCacheKey key2 = new BlockCacheKey("file-2", 2);
+    HFileBlock blk1 = new HFileBlock(BlockType.DATA, size, size, -1, buf, HFileBlock.FILL_HEADER,
+        -1, 52, -1, meta, ByteBuffAllocator.HEAP);
+    HFileBlock blk2 = new HFileBlock(BlockType.DATA, size, size, -1, buf, HFileBlock.FILL_HEADER,
+        -1, -1, -1, meta, ByteBuffAllocator.HEAP);
+    RAMQueueEntry re1 = new RAMQueueEntry(key1, blk1, 1, false);
+    RAMQueueEntry re2 = new RAMQueueEntry(key1, blk2, 1, false);
+
+    assertFalse(cache.containsKey(key1));
+    assertNull(cache.putIfAbsent(key1, re1));
+    assertEquals(2, ((HFileBlock) re1.getData()).getBufferReadOnly().refCnt());
+
+    assertNotNull(cache.putIfAbsent(key1, re2));
+    assertEquals(2, ((HFileBlock) re1.getData()).getBufferReadOnly().refCnt());
+    assertEquals(1, ((HFileBlock) re2.getData()).getBufferReadOnly().refCnt());
+
+    assertNull(cache.putIfAbsent(key2, re2));
+    assertEquals(2, ((HFileBlock) re1.getData()).getBufferReadOnly().refCnt());
+    assertEquals(2, ((HFileBlock) re2.getData()).getBufferReadOnly().refCnt());
+
+    cache.remove(key1);
+    assertEquals(1, ((HFileBlock) re1.getData()).getBufferReadOnly().refCnt());
+    assertEquals(2, ((HFileBlock) re2.getData()).getBufferReadOnly().refCnt());
+
+    cache.clear();
+    assertEquals(1, ((HFileBlock) re1.getData()).getBufferReadOnly().refCnt());
+    assertEquals(1, ((HFileBlock) re2.getData()).getBufferReadOnly().refCnt());
   }
 
   @Test
@@ -473,7 +526,7 @@ public class TestBucketCache {
     ByteBuffer buf = ByteBuffer.allocate(length);
     HFileContext meta = new HFileContextBuilder().build();
     HFileBlock block = new HFileBlock(BlockType.DATA, size, size, -1, buf, HFileBlock.FILL_HEADER,
-        offset, 52, -1, meta);
+        offset, 52, -1, meta, ByteBuffAllocator.HEAP);
 
     // initialize an mocked ioengine.
     IOEngine ioEngine = Mockito.mock(IOEngine.class);
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSecureBulkLoadManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSecureBulkLoadManager.java
index eb25806..34da4d8 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSecureBulkLoadManager.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSecureBulkLoadManager.java
@@ -39,6 +39,7 @@ import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.Table;
 import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.io.compress.Compression;
 import org.apache.hadoop.hbase.io.crypto.Encryption;
 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
@@ -214,7 +215,7 @@ public class TestSecureBulkLoadManager {
     ColumnFamilyDescriptor family = desc.getColumnFamily(FAMILY);
     Compression.Algorithm compression = HFile.DEFAULT_COMPRESSION_ALGORITHM;
 
-    CacheConfig writerCacheConf = new CacheConfig(conf, family, null);
+    CacheConfig writerCacheConf = new CacheConfig(conf, family, null, ByteBuffAllocator.HEAP);
     writerCacheConf.setCacheDataOnWrite(false);
     HFileContext hFileContext = new HFileContextBuilder()
         .withIncludesMvcc(false)

[hbase] 07/22: HBASE-22159 ByteBufferIOEngine should support write off-heap ByteBuff to the bufferArray

Posted by op...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

openinx pushed a commit to branch HBASE-21879
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 0a105dc8d1a6f2cab20077bc92c8bf3c85fe8f9b
Author: huzheng <op...@gmail.com>
AuthorDate: Wed Apr 3 22:29:31 2019 +0800

    HBASE-22159 ByteBufferIOEngine should support write off-heap ByteBuff to the bufferArray
---
 .../apache/hadoop/hbase/io/ByteBuffAllocator.java  |  29 +-
 .../java/org/apache/hadoop/hbase/nio/ByteBuff.java |  33 ++
 .../apache/hadoop/hbase/util/ByteBufferArray.java  | 398 ++++++++++-----------
 .../hadoop/hbase/util/TestByteBufferArray.java     | 183 +++++++---
 .../hbase/io/hfile/bucket/ByteBufferIOEngine.java  | 100 +++---
 .../hfile/bucket/ExclusiveMemoryMmapIOEngine.java  |  15 +-
 .../hadoop/hbase/io/hfile/bucket/FileIOEngine.java |  12 +-
 .../hbase/io/hfile/bucket/FileMmapIOEngine.java    |   9 +-
 .../io/hfile/bucket/TestByteBufferIOEngine.java    |  58 +--
 .../bucket/TestExclusiveMemoryMmapEngine.java      |  31 +-
 .../hbase/io/hfile/bucket/TestFileIOEngine.java    |  24 +-
 11 files changed, 485 insertions(+), 407 deletions(-)

diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java
index 984d46d..51de22a 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java
@@ -29,7 +29,6 @@ import sun.nio.ch.DirectBuffer;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.nio.ByteBuff;
-import org.apache.hadoop.hbase.nio.MultiByteBuff;
 import org.apache.hadoop.hbase.nio.SingleByteBuff;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
@@ -175,7 +174,7 @@ public class ByteBuffAllocator {
     return allocateOnHeap(this.bufSize);
   }
 
-  private SingleByteBuff allocateOnHeap(int size) {
+  private static SingleByteBuff allocateOnHeap(int size) {
     return new SingleByteBuff(NONE, ByteBuffer.allocate(size));
   }
 
@@ -213,7 +212,7 @@ public class ByteBuffAllocator {
       // just allocate the ByteBuffer from on-heap.
       bbs.add(ByteBuffer.allocate(remain));
     }
-    ByteBuff bb = wrap(bbs, () -> {
+    ByteBuff bb = ByteBuff.wrap(bbs, () -> {
       for (int i = 0; i < lenFromReservoir; i++) {
         this.putbackBuffer(bbs.get(i));
       }
@@ -238,30 +237,6 @@ public class ByteBuffAllocator {
     }
   }
 
-  public static ByteBuff wrap(ByteBuffer[] buffers, Recycler recycler) {
-    if (buffers == null || buffers.length == 0) {
-      throw new IllegalArgumentException("buffers shouldn't be null or empty");
-    }
-    return buffers.length == 1 ? new SingleByteBuff(recycler, buffers[0])
-        : new MultiByteBuff(recycler, buffers);
-  }
-
-  public static ByteBuff wrap(ByteBuffer[] buffers) {
-    return wrap(buffers, NONE);
-  }
-
-  public static ByteBuff wrap(List<ByteBuffer> buffers, Recycler recycler) {
-    if (buffers == null || buffers.size() == 0) {
-      throw new IllegalArgumentException("buffers shouldn't be null or empty");
-    }
-    return buffers.size() == 1 ? new SingleByteBuff(recycler, buffers.get(0))
-        : new MultiByteBuff(recycler, buffers.toArray(new ByteBuffer[0]));
-  }
-
-  public static ByteBuff wrap(List<ByteBuffer> buffers) {
-    return wrap(buffers, NONE);
-  }
-
   /**
    * @return One free DirectByteBuffer from the pool. If no free ByteBuffer and we have not reached
    *         the maximum pool size, it will create a new one and return. In case of max pool size
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/ByteBuff.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/ByteBuff.java
index 1ee3607..9339f43 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/ByteBuff.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/ByteBuff.java
@@ -20,7 +20,10 @@ package org.apache.hadoop.hbase.nio;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.channels.ReadableByteChannel;
+import java.util.List;
 
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
+import org.apache.hadoop.hbase.io.ByteBuffAllocator.Recycler;
 import org.apache.hadoop.hbase.util.ByteBufferUtils;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.ObjectIntPair;
@@ -557,4 +560,34 @@ public abstract class ByteBuff implements ReferenceCounted {
     return this.getClass().getSimpleName() + "[pos=" + position() + ", lim=" + limit() +
         ", cap= " + capacity() + "]";
   }
+
+  /********************************* ByteBuff wrapper methods ***********************************/
+
+  public static ByteBuff wrap(ByteBuffer[] buffers, Recycler recycler) {
+    if (buffers == null || buffers.length == 0) {
+      throw new IllegalArgumentException("buffers shouldn't be null or empty");
+    }
+    return buffers.length == 1 ? new SingleByteBuff(recycler, buffers[0])
+        : new MultiByteBuff(recycler, buffers);
+  }
+
+  public static ByteBuff wrap(ByteBuffer[] buffers) {
+    return wrap(buffers, ByteBuffAllocator.NONE);
+  }
+
+  public static ByteBuff wrap(List<ByteBuffer> buffers, Recycler recycler) {
+    if (buffers == null || buffers.size() == 0) {
+      throw new IllegalArgumentException("buffers shouldn't be null or empty");
+    }
+    return buffers.size() == 1 ? new SingleByteBuff(recycler, buffers.get(0))
+        : new MultiByteBuff(recycler, buffers.toArray(new ByteBuffer[0]));
+  }
+
+  public static ByteBuff wrap(List<ByteBuffer> buffers) {
+    return wrap(buffers, ByteBuffAllocator.NONE);
+  }
+
+  public static ByteBuff wrap(ByteBuffer buffer) {
+    return new SingleByteBuff(ByteBuffAllocator.NONE, buffer);
+  }
 }
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferArray.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferArray.java
index d023339..e5a0b13 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferArray.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferArray.java
@@ -20,15 +20,14 @@ package org.apache.hadoop.hbase.util;
 
 import java.io.IOException;
 import java.nio.ByteBuffer;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutionException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
 import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
-import java.util.concurrent.LinkedBlockingQueue;
-import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
+import java.util.function.BiConsumer;
 
-import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.nio.ByteBuff;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.yetus.audience.InterfaceAudience;
@@ -38,279 +37,248 @@ import org.slf4j.LoggerFactory;
 import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
 
 /**
- * This class manages an array of ByteBuffers with a default size 4MB. These
- * buffers are sequential and could be considered as a large buffer.It supports
- * reading/writing data from this large buffer with a position and offset
+ * This class manages an array of ByteBuffers with a default size 4MB. These buffers are sequential
+ * and could be considered as a large buffer.It supports reading/writing data from this large buffer
+ * with a position and offset
  */
 @InterfaceAudience.Private
 public class ByteBufferArray {
   private static final Logger LOG = LoggerFactory.getLogger(ByteBufferArray.class);
 
   public static final int DEFAULT_BUFFER_SIZE = 4 * 1024 * 1024;
-  @VisibleForTesting
-  ByteBuffer buffers[];
-  private int bufferSize;
-  @VisibleForTesting
-  int bufferCount;
+  private final int bufferSize;
+  private final int bufferCount;
+  final ByteBuffer[] buffers;
 
   /**
-   * We allocate a number of byte buffers as the capacity. In order not to out
-   * of the array bounds for the last byte(see {@link ByteBufferArray#multiple}),
-   * we will allocate one additional buffer with capacity 0;
+   * We allocate a number of byte buffers as the capacity.
    * @param capacity total size of the byte buffer array
    * @param allocator the ByteBufferAllocator that will create the buffers
    * @throws IOException throws IOException if there is an exception thrown by the allocator
    */
-  public ByteBufferArray(long capacity, ByteBufferAllocator allocator)
-      throws IOException {
-    this.bufferSize = DEFAULT_BUFFER_SIZE;
-    if (this.bufferSize > (capacity / 16))
-      this.bufferSize = (int) roundUp(capacity / 16, 32768);
-    this.bufferCount = (int) (roundUp(capacity, bufferSize) / bufferSize);
-    LOG.info("Allocating buffers total=" + StringUtils.byteDesc(capacity)
-        + ", sizePerBuffer=" + StringUtils.byteDesc(bufferSize) + ", count="
-        + bufferCount);
-    buffers = new ByteBuffer[bufferCount + 1];
-    createBuffers(allocator);
+  public ByteBufferArray(long capacity, ByteBufferAllocator allocator) throws IOException {
+    this(getBufferSize(capacity), getBufferCount(capacity),
+        Runtime.getRuntime().availableProcessors(), capacity, allocator);
   }
 
   @VisibleForTesting
-  void createBuffers(ByteBufferAllocator allocator)
-      throws IOException {
-    int threadCount = getThreadCount();
-    ExecutorService service = new ThreadPoolExecutor(threadCount, threadCount, 0L,
-        TimeUnit.MILLISECONDS, new LinkedBlockingQueue<Runnable>());
-    int perThreadCount = (int)Math.floor((double) (bufferCount) / threadCount);
-    int lastThreadCount = bufferCount - (perThreadCount * (threadCount - 1));
-    Future<ByteBuffer[]>[] futures = new Future[threadCount];
+  ByteBufferArray(int bufferSize, int bufferCount, int threadCount, long capacity,
+      ByteBufferAllocator alloc) throws IOException {
+    this.bufferSize = bufferSize;
+    this.bufferCount = bufferCount;
+    LOG.info("Allocating buffers total={}, sizePerBuffer={}, count={}",
+      StringUtils.byteDesc(capacity), StringUtils.byteDesc(bufferSize), bufferCount);
+    this.buffers = new ByteBuffer[bufferCount];
+    createBuffers(threadCount, alloc);
+  }
+
+  private void createBuffers(int threadCount, ByteBufferAllocator alloc) throws IOException {
+    ExecutorService pool = Executors.newFixedThreadPool(threadCount);
+    int perThreadCount = bufferCount / threadCount;
+    int reminder = bufferCount % threadCount;
     try {
+      List<Future<ByteBuffer[]>> futures = new ArrayList<>(threadCount);
+      // Dispatch the creation task to each thread.
       for (int i = 0; i < threadCount; i++) {
-        // Last thread will have to deal with a different number of buffers
-        int buffersToCreate = (i == threadCount - 1) ? lastThreadCount : perThreadCount;
-        futures[i] = service.submit(
-          new BufferCreatorCallable(bufferSize, buffersToCreate, allocator));
+        final int chunkSize = perThreadCount + ((i == threadCount - 1) ? reminder : 0);
+        futures.add(pool.submit(() -> {
+          ByteBuffer[] chunk = new ByteBuffer[chunkSize];
+          for (int k = 0; k < chunkSize; k++) {
+            chunk[k] = alloc.allocate(bufferSize);
+          }
+          return chunk;
+        }));
       }
+      // Append the buffers created by each thread.
       int bufferIndex = 0;
-      for (Future<ByteBuffer[]> future : futures) {
-        try {
-          ByteBuffer[] buffers = future.get();
-          for (ByteBuffer buffer : buffers) {
-            this.buffers[bufferIndex++] = buffer;
+      try {
+        for (Future<ByteBuffer[]> f : futures) {
+          for (ByteBuffer b : f.get()) {
+            this.buffers[bufferIndex++] = b;
           }
-        } catch (InterruptedException | ExecutionException e) {
-          LOG.error("Buffer creation interrupted", e);
-          throw new IOException(e);
         }
+        assert bufferIndex == bufferCount;
+      } catch (Exception e) {
+        LOG.error("Buffer creation interrupted", e);
+        throw new IOException(e);
       }
     } finally {
-      service.shutdownNow();
+      pool.shutdownNow();
     }
-    // always create on heap empty dummy buffer at last
-    this.buffers[bufferCount] = ByteBuffer.allocate(0);
   }
 
   @VisibleForTesting
-  int getThreadCount() {
-    return Runtime.getRuntime().availableProcessors();
-  }
-
-  /**
-   * A callable that creates buffers of the specified length either onheap/offheap using the
-   * {@link ByteBufferAllocator}
-   */
-  private static class BufferCreatorCallable implements Callable<ByteBuffer[]> {
-    private final int bufferCapacity;
-    private final int bufferCount;
-    private final ByteBufferAllocator allocator;
-
-    BufferCreatorCallable(int bufferCapacity, int bufferCount, ByteBufferAllocator allocator) {
-      this.bufferCapacity = bufferCapacity;
-      this.bufferCount = bufferCount;
-      this.allocator = allocator;
+  static int getBufferSize(long capacity) {
+    int bufferSize = DEFAULT_BUFFER_SIZE;
+    if (bufferSize > (capacity / 16)) {
+      bufferSize = (int) roundUp(capacity / 16, 32768);
     }
+    return bufferSize;
+  }
 
-    @Override
-    public ByteBuffer[] call() throws Exception {
-      ByteBuffer[] buffers = new ByteBuffer[this.bufferCount];
-      for (int i = 0; i < this.bufferCount; i++) {
-        buffers[i] = allocator.allocate(this.bufferCapacity);
-      }
-      return buffers;
-    }
+  private static int getBufferCount(long capacity) {
+    int bufferSize = getBufferSize(capacity);
+    return (int) (roundUp(capacity, bufferSize) / bufferSize);
   }
 
-  private long roundUp(long n, long to) {
+  private static long roundUp(long n, long to) {
     return ((n + to - 1) / to) * to;
   }
 
   /**
-   * Transfers bytes from this buffer array into the given destination array
-   * @param start start position in the ByteBufferArray
-   * @param len The maximum number of bytes to be written to the given array
-   * @param dstArray The array into which bytes are to be written
+   * Transfers bytes from this buffers array into the given destination {@link ByteBuff}
+   * @param offset start position in this big logical array.
+   * @param dst the destination ByteBuff. Notice that its position will be advanced.
    * @return number of bytes read
    */
-  public int getMultiple(long start, int len, byte[] dstArray) {
-    return getMultiple(start, len, dstArray, 0);
+  public int read(long offset, ByteBuff dst) {
+    return internalTransfer(offset, dst, READER);
   }
 
   /**
-   * Transfers bytes from this buffer array into the given destination array
-   * @param start start offset of this buffer array
-   * @param len The maximum number of bytes to be written to the given array
-   * @param dstArray The array into which bytes are to be written
-   * @param dstOffset The offset within the given array of the first byte to be
-   *          written
-   * @return number of bytes read
+   * Transfers bytes from the given source {@link ByteBuff} into this buffer array
+   * @param offset start offset of this big logical array.
+   * @param src the source ByteBuff. Notice that its position will be advanced.
+   * @return number of bytes write
    */
-  public int getMultiple(long start, int len, byte[] dstArray, int dstOffset) {
-    multiple(start, len, dstArray, dstOffset, GET_MULTIPLE_VISTOR);
-    return len;
+  public int write(long offset, ByteBuff src) {
+    return internalTransfer(offset, src, WRITER);
   }
 
-  private final static Visitor GET_MULTIPLE_VISTOR = new Visitor() {
-    @Override
-    public void visit(ByteBuffer bb, int pos, byte[] array, int arrayIdx, int len) {
-      ByteBufferUtils.copyFromBufferToArray(array, bb, pos, arrayIdx, len);
-    }
+  /**
+   * Transfer bytes from source {@link ByteBuff} to destination {@link ByteBuffer}. Position of both
+   * source and destination will be advanced.
+   */
+  private static final BiConsumer<ByteBuffer, ByteBuff> WRITER = (dst, src) -> {
+    int off = src.position(), len = dst.remaining();
+    src.get(dst, off, len);
+    src.position(off + len);
   };
 
   /**
-   * Transfers bytes from the given source array into this buffer array
-   * @param start start offset of this buffer array
-   * @param len The maximum number of bytes to be read from the given array
-   * @param srcArray The array from which bytes are to be read
+   * Transfer bytes from source {@link ByteBuffer} to destination {@link ByteBuff}, Position of both
+   * source and destination will be advanced.
    */
-  public void putMultiple(long start, int len, byte[] srcArray) {
-    putMultiple(start, len, srcArray, 0);
-  }
+  private static final BiConsumer<ByteBuffer, ByteBuff> READER = (src, dst) -> {
+    int off = dst.position(), len = src.remaining(), srcOff = src.position();
+    dst.put(off, ByteBuff.wrap(src), srcOff, len);
+    src.position(srcOff + len);
+    dst.position(off + len);
+  };
 
   /**
-   * Transfers bytes from the given source array into this buffer array
-   * @param start start offset of this buffer array
-   * @param len The maximum number of bytes to be read from the given array
-   * @param srcArray The array from which bytes are to be read
-   * @param srcOffset The offset within the given array of the first byte to be
-   *          read
+   * Transferring all remaining bytes from b to the buffers array starting at offset, or
+   * transferring bytes from the buffers array at offset to b until b is filled. Notice that
+   * position of ByteBuff b will be advanced.
+   * @param offset where we start in the big logical array.
+   * @param b the ByteBuff to transfer from or to
+   * @param transfer the transfer interface.
+   * @return the length of bytes we transferred.
    */
-  public void putMultiple(long start, int len, byte[] srcArray, int srcOffset) {
-    multiple(start, len, srcArray, srcOffset, PUT_MULTIPLE_VISITOR);
-  }
-
-  private final static Visitor PUT_MULTIPLE_VISITOR = new Visitor() {
-    @Override
-    public void visit(ByteBuffer bb, int pos, byte[] array, int arrayIdx, int len) {
-      ByteBufferUtils.copyFromArrayToBuffer(bb, pos, array, arrayIdx, len);
+  private int internalTransfer(long offset, ByteBuff b, BiConsumer<ByteBuffer, ByteBuff> transfer) {
+    int expectedTransferLen = b.remaining();
+    if (expectedTransferLen == 0) {
+      return 0;
     }
-  };
-
-  private interface Visitor {
-    /**
-     * Visit the given byte buffer, if it is a read action, we will transfer the
-     * bytes from the buffer to the destination array, else if it is a write
-     * action, we will transfer the bytes from the source array to the buffer
-     * @param bb byte buffer
-     * @param pos Start position in ByteBuffer
-     * @param array a source or destination byte array
-     * @param arrayOffset offset of the byte array
-     * @param len read/write length
-     */
-    void visit(ByteBuffer bb, int pos, byte[] array, int arrayOffset, int len);
+    BufferIterator it = new BufferIterator(offset, expectedTransferLen);
+    while (it.hasNext()) {
+      ByteBuffer a = it.next();
+      transfer.accept(a, b);
+      assert !a.hasRemaining();
+    }
+    assert expectedTransferLen == it.getSum() : "Expected transfer length (=" + expectedTransferLen
+        + ") don't match the actual transfer length(=" + it.getSum() + ")";
+    return expectedTransferLen;
   }
 
   /**
-   * Access(read or write) this buffer array with a position and length as the
-   * given array. Here we will only lock one buffer even if it may be need visit
-   * several buffers. The consistency is guaranteed by the caller.
-   * @param start start offset of this buffer array
-   * @param len The maximum number of bytes to be accessed
-   * @param array The array from/to which bytes are to be read/written
-   * @param arrayOffset The offset within the given array of the first byte to
-   *          be read or written
-   * @param visitor implement of how to visit the byte buffer
+   * Creates a ByteBuff from a given array of ByteBuffers from the given offset to the length
+   * specified. For eg, if there are 4 buffers forming an array each with length 10 and if we call
+   * asSubBuffer(5, 10) then we will create an MBB consisting of two BBs and the first one be a BB
+   * from 'position' 5 to a 'length' 5 and the 2nd BB will be from 'position' 0 to 'length' 5.
+   * @param offset the position in the whole array which is composited by multiple byte buffers.
+   * @param len the length of bytes
+   * @return a ByteBuff formed from the underlying ByteBuffers
    */
-  void multiple(long start, int len, byte[] array, int arrayOffset, Visitor visitor) {
-    assert len >= 0;
-    long end = start + len;
-    int startBuffer = (int) (start / bufferSize), startOffset = (int) (start % bufferSize);
-    int endBuffer = (int) (end / bufferSize), endOffset = (int) (end % bufferSize);
-    assert array.length >= len + arrayOffset;
-    assert startBuffer >= 0 && startBuffer < bufferCount;
-    assert (endBuffer >= 0 && endBuffer < bufferCount)
-        || (endBuffer == bufferCount && endOffset == 0);
-    if (startBuffer >= buffers.length || startBuffer < 0) {
-      String msg = "Failed multiple, start=" + start + ",startBuffer="
-          + startBuffer + ",bufferSize=" + bufferSize;
-      LOG.error(msg);
-      throw new RuntimeException(msg);
+  public ByteBuff asSubByteBuff(long offset, final int len) {
+    BufferIterator it = new BufferIterator(offset, len);
+    ByteBuffer[] mbb = new ByteBuffer[it.getBufferCount()];
+    for (int i = 0; i < mbb.length; i++) {
+      assert it.hasNext();
+      mbb[i] = it.next();
     }
-    int srcIndex = 0, cnt = -1;
-    for (int i = startBuffer; i <= endBuffer; ++i) {
-      ByteBuffer bb = buffers[i].duplicate();
-      int pos = 0;
-      if (i == startBuffer) {
-        cnt = bufferSize - startOffset;
-        if (cnt > len) cnt = len;
-        pos = startOffset;
-      } else if (i == endBuffer) {
-        cnt = endOffset;
-      } else {
-        cnt = bufferSize;
-      }
-      visitor.visit(bb, pos, array, srcIndex + arrayOffset, cnt);
-      srcIndex += cnt;
-    }
-    assert srcIndex == len;
+    assert it.getSum() == len;
+    return ByteBuff.wrap(mbb);
   }
 
   /**
-   * Creates a ByteBuff from a given array of ByteBuffers from the given offset to the
-   * length specified. For eg, if there are 4 buffers forming an array each with length 10 and
-   * if we call asSubBuffer(5, 10) then we will create an MBB consisting of two BBs
-   * and the first one be a BB from 'position' 5 to a 'length' 5 and the 2nd BB will be from
-   * 'position' 0 to 'length' 5.
-   * @param offset
-   * @param len
-   * @return a ByteBuff formed from the underlying ByteBuffers
+   * Iterator to fetch ByteBuffers from offset with given length in this big logical array.
    */
-  public ByteBuff asSubByteBuff(long offset, int len) {
-    assert len >= 0;
-    long end = offset + len;
-    int startBuffer = (int) (offset / bufferSize), startBufferOffset = (int) (offset % bufferSize);
-    int endBuffer = (int) (end / bufferSize), endBufferOffset = (int) (end % bufferSize);
-    // Last buffer in the array is a dummy one with 0 capacity. Avoid sending back that
-    if (endBuffer == this.bufferCount) {
-      endBuffer--;
-      endBufferOffset = bufferSize;
+  private class BufferIterator implements Iterator<ByteBuffer> {
+    private final int len;
+    private int startBuffer, startOffset, endBuffer, endOffset;
+    private int curIndex, sum = 0;
+
+    private int index(long pos) {
+      return (int) (pos / bufferSize);
+    }
+
+    private int offset(long pos) {
+      return (int) (pos % bufferSize);
+    }
+
+    public BufferIterator(long offset, int len) {
+      assert len >= 0 && offset >= 0;
+      this.len = len;
+
+      this.startBuffer = index(offset);
+      this.startOffset = offset(offset);
+
+      this.endBuffer = index(offset + len);
+      this.endOffset = offset(offset + len);
+      if (startBuffer < endBuffer && endOffset == 0) {
+        endBuffer--;
+        endOffset = bufferSize;
+      }
+      assert startBuffer >= 0 && startBuffer < bufferCount;
+      assert endBuffer >= 0 && endBuffer < bufferCount;
+
+      // initialize the index to the first buffer index.
+      this.curIndex = startBuffer;
     }
-    assert startBuffer >= 0 && startBuffer < bufferCount;
-    assert (endBuffer >= 0 && endBuffer < bufferCount)
-        || (endBuffer == bufferCount && endBufferOffset == 0);
-    if (startBuffer >= buffers.length || startBuffer < 0) {
-      String msg = "Failed subArray, start=" + offset + ",startBuffer=" + startBuffer
-          + ",bufferSize=" + bufferSize;
-      LOG.error(msg);
-      throw new RuntimeException(msg);
+
+    @Override
+    public boolean hasNext() {
+      return this.curIndex <= endBuffer;
     }
-    int srcIndex = 0, cnt = -1;
-    ByteBuffer[] mbb = new ByteBuffer[endBuffer - startBuffer + 1];
-    for (int i = startBuffer, j = 0; i <= endBuffer; ++i, j++) {
-      ByteBuffer bb = buffers[i].duplicate();
-      if (i == startBuffer) {
-        cnt = bufferSize - startBufferOffset;
-        if (cnt > len) cnt = len;
-        bb.limit(startBufferOffset + cnt).position(startBufferOffset);
-      } else if (i == endBuffer) {
-        cnt = endBufferOffset;
-        bb.position(0).limit(cnt);
+
+    /**
+     * The returned ByteBuffer is an sliced one, it won't affect the position or limit of the
+     * original one.
+     */
+    @Override
+    public ByteBuffer next() {
+      ByteBuffer bb = buffers[curIndex].duplicate();
+      if (curIndex == startBuffer) {
+        bb.position(startOffset).limit(Math.min(bufferSize, startOffset + len));
+      } else if (curIndex == endBuffer) {
+        bb.position(0).limit(endOffset);
       } else {
-        cnt = bufferSize;
-        bb.position(0).limit(cnt);
+        bb.position(0).limit(bufferSize);
       }
-      mbb[j] = bb.slice();
-      srcIndex += cnt;
+      curIndex++;
+      sum += bb.remaining();
+      // Make sure that its pos is zero, it's important because MBB will count from zero for all nio
+      // ByteBuffers.
+      return bb.slice();
+    }
+
+    int getSum() {
+      return sum;
+    }
+
+    int getBufferCount() {
+      return this.endBuffer - this.startBuffer + 1;
     }
-    assert srcIndex == len;
-    return ByteBuffAllocator.wrap(mbb);
   }
 }
diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestByteBufferArray.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestByteBufferArray.java
index 3fc1c23..0534924 100644
--- a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestByteBufferArray.java
+++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestByteBufferArray.java
@@ -20,34 +20,37 @@ package org.apache.hadoop.hbase.util;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 import java.io.IOException;
 import java.nio.ByteBuffer;
+import java.util.Random;
+
 import org.apache.hadoop.hbase.HBaseClassTestRule;
 import org.apache.hadoop.hbase.nio.ByteBuff;
+import org.apache.hadoop.hbase.nio.MultiByteBuff;
+import org.apache.hadoop.hbase.nio.SingleByteBuff;
 import org.apache.hadoop.hbase.testclassification.MiscTests;
 import org.apache.hadoop.hbase.testclassification.SmallTests;
 import org.junit.ClassRule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 
-@Category({MiscTests.class, SmallTests.class})
+@Category({ MiscTests.class, SmallTests.class })
 public class TestByteBufferArray {
 
+  private static final Random RANDOM = new Random(System.currentTimeMillis());
+
   @ClassRule
   public static final HBaseClassTestRule CLASS_RULE =
       HBaseClassTestRule.forClass(TestByteBufferArray.class);
 
+  private static final ByteBufferAllocator ALLOC = (size) -> ByteBuffer.allocateDirect((int) size);
+
   @Test
   public void testAsSubBufferWhenEndOffsetLandInLastBuffer() throws Exception {
     int capacity = 4 * 1024 * 1024;
-    ByteBufferAllocator allocator = new ByteBufferAllocator() {
-      @Override
-      public ByteBuffer allocate(long size) throws IOException {
-        return ByteBuffer.allocateDirect((int) size);
-      }
-    };
-    ByteBufferArray array = new ByteBufferArray(capacity, allocator);
+    ByteBufferArray array = new ByteBufferArray(capacity, ALLOC);
     ByteBuff subBuf = array.asSubByteBuff(0, capacity);
     subBuf.position(capacity - 1);// Position to the last byte
     assertTrue(subBuf.hasRemaining());
@@ -59,54 +62,148 @@ public class TestByteBufferArray {
   @Test
   public void testByteBufferCreation() throws Exception {
     int capacity = 470 * 1021 * 1023;
-    ByteBufferAllocator allocator = new ByteBufferAllocator() {
-      @Override
-      public ByteBuffer allocate(long size) throws IOException {
-        return ByteBuffer.allocateDirect((int) size);
-      }
-    };
-    ByteBufferArray array = new ByteBufferArray(capacity, allocator);
-    assertEquals(119, array.buffers.length);
+    ByteBufferArray array = new ByteBufferArray(capacity, ALLOC);
+    assertEquals(118, array.buffers.length);
     for (int i = 0; i < array.buffers.length; i++) {
-      if (i == array.buffers.length - 1) {
-        assertEquals(0, array.buffers[i].capacity());
-      } else {
-        assertEquals(ByteBufferArray.DEFAULT_BUFFER_SIZE, array.buffers[i].capacity());
-      }
+      assertEquals(ByteBufferArray.DEFAULT_BUFFER_SIZE, array.buffers[i].capacity());
     }
   }
 
   @Test
   public void testByteBufferCreation1() throws Exception {
-    ByteBufferAllocator allocator = new ByteBufferAllocator() {
-      @Override
-      public ByteBuffer allocate(long size) throws IOException {
-        return ByteBuffer.allocateDirect((int) size);
-      }
-    };
-    ByteBufferArray array = new DummyByteBufferArray(7 * 1024 * 1024, allocator);
-    // overwrite
-    array.bufferCount = 25;
-    array.buffers = new ByteBuffer[array.bufferCount + 1];
-    array.createBuffers(allocator);
+    long cap = 7 * 1024L * 1024L;
+    int bufferSize = ByteBufferArray.getBufferSize(cap), bufferCount = 25;
+    ByteBufferArray array = new ByteBufferArray(bufferSize, bufferCount, 16, cap, ALLOC);
     for (int i = 0; i < array.buffers.length; i++) {
-      if (i == array.buffers.length - 1) {
-        assertEquals(0, array.buffers[i].capacity());
-      } else {
-        assertEquals(458752, array.buffers[i].capacity());
-      }
+      assertEquals(458752, array.buffers[i].capacity());
+    }
+  }
+
+  private static void fill(ByteBuff buf, byte val) {
+    for (int i = buf.position(); i < buf.limit(); i++) {
+      buf.put(i, val);
+    }
+  }
+
+  private ByteBuff createByteBuff(int len) {
+    assert len >= 0;
+    int pos = len == 0 ? 0 : RANDOM.nextInt(len);
+    ByteBuff b = ByteBuff.wrap(ByteBuffer.allocate(2 * len));
+    b.position(pos).limit(pos + len);
+    return b;
+  }
+
+  private interface Call {
+    void run() throws IOException;
+  }
+
+  private void expectedAssert(Call r) throws IOException {
+    try {
+      r.run();
+      fail();
+    } catch (AssertionError e) {
+      // Ignore
+    }
+  }
+
+
+  @Test
+  public void testArrayIO() throws IOException {
+    int cap = 9 * 1024 * 1024, bufferSize = ByteBufferArray.getBufferSize(cap);
+    ByteBufferArray array = new ByteBufferArray(cap, ALLOC);
+    testReadAndWrite(array, 0, 512, (byte) 2);
+    testReadAndWrite(array, cap - 512, 512, (byte) 3);
+    testReadAndWrite(array, 4 * 1024 * 1024, 5 * 1024 * 1024, (byte) 4);
+    testReadAndWrite(array, 256, 256, (byte) 5);
+    testReadAndWrite(array, 257, 513, (byte) 6);
+    testReadAndWrite(array, 0, cap, (byte) 7);
+    testReadAndWrite(array, cap, 0, (byte) 8);
+    testReadAndWrite(array, cap - 1, 1, (byte) 9);
+    testReadAndWrite(array, cap - 2, 2, (byte) 10);
+
+    expectedAssert(() -> testReadAndWrite(array, cap - 2, 3, (byte) 11));
+    expectedAssert(() -> testReadAndWrite(array, cap + 1, 0, (byte) 12));
+    expectedAssert(() -> testReadAndWrite(array, 0, cap + 1, (byte) 12));
+    expectedAssert(() -> testReadAndWrite(array, -1, 0, (byte) 13));
+    expectedAssert(() -> testReadAndWrite(array, 0, -23, (byte) 14));
+    expectedAssert(() -> testReadAndWrite(array, 0, 0, (byte) 15));
+    expectedAssert(() -> testReadAndWrite(array, 4096, cap - 4096 + 1, (byte) 16));
+
+    testAsSubByteBuff(array, 0, cap, true);
+    testAsSubByteBuff(array, 0, 0, false);
+    testAsSubByteBuff(array, 0, 1, false);
+    testAsSubByteBuff(array, 0, bufferSize - 1, false);
+    testAsSubByteBuff(array, 0, bufferSize, false);
+    testAsSubByteBuff(array, 0, bufferSize + 1, true);
+    testAsSubByteBuff(array, 0, 2 * bufferSize, true);
+    testAsSubByteBuff(array, 0, 5 * bufferSize, true);
+    testAsSubByteBuff(array, cap - bufferSize - 1, bufferSize, true);
+    testAsSubByteBuff(array, cap - bufferSize, bufferSize, false);
+    testAsSubByteBuff(array, cap - bufferSize, 0, false);
+    testAsSubByteBuff(array, cap - bufferSize, 1, false);
+    testAsSubByteBuff(array, cap - bufferSize, bufferSize - 1, false);
+    testAsSubByteBuff(array, cap - 2 * bufferSize, 2 * bufferSize, true);
+    testAsSubByteBuff(array, cap - 2 * bufferSize, bufferSize + 1, true);
+    testAsSubByteBuff(array, cap - 2 * bufferSize, bufferSize - 1, false);
+    testAsSubByteBuff(array, cap - 2 * bufferSize, 0, false);
+
+    expectedAssert(() -> testAsSubByteBuff(array, 0, cap + 1, false));
+    expectedAssert(() -> testAsSubByteBuff(array, 0, -1, false));
+    expectedAssert(() -> testAsSubByteBuff(array, -1, -1, false));
+    expectedAssert(() -> testAsSubByteBuff(array, cap - bufferSize, bufferSize + 1, false));
+    expectedAssert(() -> testAsSubByteBuff(array, 2 * bufferSize, cap - 2 * bufferSize + 1, false));
+  }
+
+  private void testReadAndWrite(ByteBufferArray array, int off, int dataSize, byte val) {
+    ByteBuff src = createByteBuff(dataSize);
+    int pos = src.position(), lim = src.limit();
+    fill(src, val);
+    assertEquals(src.remaining(), dataSize);
+    try {
+      assertEquals(dataSize, array.write(off, src));
+      assertEquals(0, src.remaining());
+    } finally {
+      src.position(pos).limit(lim);
+    }
+
+    ByteBuff dst = createByteBuff(dataSize);
+    pos = dst.position();
+    lim = dst.limit();
+    try {
+      assertEquals(dataSize, array.read(off, dst));
+      assertEquals(0, dst.remaining());
+    } finally {
+      dst.position(pos).limit(lim);
     }
+    assertByteBuffEquals(src, dst);
   }
 
-  private static class DummyByteBufferArray extends ByteBufferArray {
+  private void testAsSubByteBuff(ByteBufferArray array, int off, int len, boolean isMulti) {
+    ByteBuff ret = array.asSubByteBuff(off, len);
+    if (isMulti) {
+      assertTrue(ret instanceof MultiByteBuff);
+    } else {
+      assertTrue(ret instanceof SingleByteBuff);
+    }
+    assertTrue(!ret.hasArray());
+    assertEquals(len, ret.remaining());
 
-    public DummyByteBufferArray(long capacity, ByteBufferAllocator allocator) throws IOException {
-      super(capacity, allocator);
+    ByteBuff tmp = createByteBuff(len);
+    int pos = tmp.position(), lim = tmp.limit();
+    try {
+      assertEquals(len, array.read(off, tmp));
+      assertEquals(0, tmp.remaining());
+    } finally {
+      tmp.position(pos).limit(lim);
     }
 
-    @Override
-    int getThreadCount() {
-      return 16;
+    assertByteBuffEquals(ret, tmp);
+  }
+
+  private void assertByteBuffEquals(ByteBuff a, ByteBuff b) {
+    assertEquals(a.remaining(), b.remaining());
+    for (int i = a.position(), j = b.position(); i < a.limit(); i++, j++) {
+      assertEquals(a.get(i), b.get(j));
     }
   }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/ByteBufferIOEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/ByteBufferIOEngine.java
index 3b832fe..fa8b184 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/ByteBufferIOEngine.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/ByteBufferIOEngine.java
@@ -30,39 +30,37 @@ import org.apache.hadoop.hbase.util.ByteBufferAllocator;
 import org.apache.hadoop.hbase.util.ByteBufferArray;
 
 /**
- * IO engine that stores data in memory using an array of ByteBuffers
- * {@link ByteBufferArray}.
- *
- *<h2>How it Works</h2>
- * First, see {@link ByteBufferArray} and how it gives a view across multiple ByteBuffers managed
- * by it internally. This class does the physical BB create and the write and read to the
- * underlying BBs. So we will create N BBs based on the total BC capacity specified on create
- * of the ByteBufferArray. So say we have 10 GB of off heap BucketCache, we will create 2560 such
- * BBs inside our ByteBufferArray.
- * 
- * <p>Now the way BucketCache works is that the entire 10 GB is split into diff sized buckets: by
- * default from 5 KB to 513 KB. Within each bucket of a particular size, there are
- * usually more than one bucket 'block'. The way it is calculate in bucketcache is that the total
- * bucketcache size is divided by 4 (hard-coded currently) * max size option. So using defaults,
- * buckets will be is 4 * 513kb (the biggest default value) = 2052kb. A bucket of 2052kb at offset
- * zero will serve out bucket 'blocks' of 5kb, the next bucket will do the next size up and so on
- * up to the maximum (default) of 513kb).
- * 
- * <p>When we write blocks to the bucketcache, we will see which bucket size group it best fits.
- * So a 4 KB block size goes to the 5 KB size group. Each of the block writes, writes within its
- * appropriate bucket. Though the bucket is '4kb' in size, it will occupy one of the 
- * 5 KB bucket 'blocks' (even if actual size of the bucket is less). Bucket 'blocks' will not span
- * buckets.
- * 
- * <p>But you can see the physical memory under the bucket 'blocks' can be split across the
- * underlying backing BBs from ByteBufferArray. All is split into 4 MB sized BBs.
- * 
- * <p>Each Bucket knows its offset in the entire space of BC and when block is written the offset
+ * IO engine that stores data in memory using an array of ByteBuffers {@link ByteBufferArray}.
+ * <p>
+ * <h2>How it Works</h2> First, see {@link ByteBufferArray} and how it gives a view across multiple
+ * ByteBuffers managed by it internally. This class does the physical BB create and the write and
+ * read to the underlying BBs. So we will create N BBs based on the total BC capacity specified on
+ * create of the ByteBufferArray. So say we have 10 GB of off heap BucketCache, we will create 2560
+ * such BBs inside our ByteBufferArray. <br>
+ * <p>
+ * Now the way BucketCache works is that the entire 10 GB is split into diff sized buckets: by
+ * default from 5 KB to 513 KB. Within each bucket of a particular size, there are usually more than
+ * one bucket 'block'. The way it is calculate in bucketcache is that the total bucketcache size is
+ * divided by 4 (hard-coded currently) * max size option. So using defaults, buckets will be is 4 *
+ * 513kb (the biggest default value) = 2052kb. A bucket of 2052kb at offset zero will serve out
+ * bucket 'blocks' of 5kb, the next bucket will do the next size up and so on up to the maximum
+ * (default) of 513kb). <br>
+ * <p>
+ * When we write blocks to the bucketcache, we will see which bucket size group it best fits. So a 4
+ * KB block size goes to the 5 KB size group. Each of the block writes, writes within its
+ * appropriate bucket. Though the bucket is '4kb' in size, it will occupy one of the 5 KB bucket
+ * 'blocks' (even if actual size of the bucket is less). Bucket 'blocks' will not span buckets. <br>
+ * <p>
+ * But you can see the physical memory under the bucket 'blocks' can be split across the underlying
+ * backing BBs from ByteBufferArray. All is split into 4 MB sized BBs. <br>
+ * <p>
+ * Each Bucket knows its offset in the entire space of BC and when block is written the offset
  * arrives at ByteBufferArray and it figures which BB to write to. It may so happen that the entire
  * block to be written does not fit a particular backing ByteBufferArray so the remainder goes to
- * another BB. See {@link ByteBufferArray#putMultiple(long, int, byte[])}.
-
-So said all these, when we read a block it may be possible that the bytes of that blocks is physically placed in 2 adjucent BBs.  In such case also, we avoid any copy need by having the MBB...
+ * another BB. See {@link ByteBufferArray#write(long, ByteBuff)}. <br>
+ * So said all these, when we read a block it may be possible that the bytes of that blocks is
+ * physically placed in 2 adjucent BBs. In such case also, we avoid any copy need by having the
+ * MBB...
  */
 @InterfaceAudience.Private
 public class ByteBufferIOEngine implements IOEngine {
@@ -74,15 +72,9 @@ public class ByteBufferIOEngine implements IOEngine {
    * @param capacity
    * @throws IOException ideally here no exception to be thrown from the allocator
    */
-  public ByteBufferIOEngine(long capacity)
-      throws IOException {
+  public ByteBufferIOEngine(long capacity) throws IOException {
     this.capacity = capacity;
-    ByteBufferAllocator allocator = new ByteBufferAllocator() {
-      @Override
-      public ByteBuffer allocate(long size) throws IOException {
-        return ByteBuffer.allocateDirect((int) size);
-      }
-    };
+    ByteBufferAllocator allocator = (size) -> ByteBuffer.allocateDirect((int) size);
     bufferArray = new ByteBufferArray(capacity, allocator);
   }
 
@@ -121,27 +113,29 @@ public class ByteBufferIOEngine implements IOEngine {
   }
 
   /**
-   * Transfers data from the given byte buffer to the buffer array
-   * @param srcBuffer the given byte buffer from which bytes are to be read
-   * @param offset The offset in the ByteBufferArray of the first byte to be
-   *          written
+   * Transfers data from the given {@link ByteBuffer} to the buffer array. Position of source will
+   * be advanced by the {@link ByteBuffer#remaining()}.
+   * @param src the given byte buffer from which bytes are to be read.
+   * @param offset The offset in the ByteBufferArray of the first byte to be written
    * @throws IOException throws IOException if writing to the array throws exception
    */
   @Override
-  public void write(ByteBuffer srcBuffer, long offset) throws IOException {
-    assert srcBuffer.hasArray();
-    bufferArray.putMultiple(offset, srcBuffer.remaining(), srcBuffer.array(),
-        srcBuffer.arrayOffset());
+  public void write(ByteBuffer src, long offset) throws IOException {
+    bufferArray.write(offset, ByteBuff.wrap(src));
   }
 
+  /**
+   * Transfers data from the given {@link ByteBuff} to the buffer array. Position of source will be
+   * advanced by the {@link ByteBuffer#remaining()}.
+   * @param src the given byte buffer from which bytes are to be read.
+   * @param offset The offset in the ByteBufferArray of the first byte to be written
+   * @throws IOException throws IOException if writing to the array throws exception
+   */
   @Override
-  public void write(ByteBuff srcBuffer, long offset) throws IOException {
-    // When caching block into BucketCache there will be single buffer backing for this HFileBlock.
-    // This will work for now. But from the DFS itself if we get DBB then this may not hold true.
-    assert srcBuffer.hasArray();
-    bufferArray.putMultiple(offset, srcBuffer.remaining(), srcBuffer.array(),
-        srcBuffer.arrayOffset());
+  public void write(ByteBuff src, long offset) throws IOException {
+    bufferArray.write(offset, src);
   }
+
   /**
    * No operation for the sync in the memory IO engine
    */
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/ExclusiveMemoryMmapIOEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/ExclusiveMemoryMmapIOEngine.java
index 8b024f0..b8e29c6 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/ExclusiveMemoryMmapIOEngine.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/ExclusiveMemoryMmapIOEngine.java
@@ -16,16 +16,15 @@
  */
 package org.apache.hadoop.hbase.io.hfile.bucket;
 
+import static org.apache.hadoop.hbase.io.ByteBuffAllocator.HEAP;
+
 import java.io.IOException;
-import java.nio.ByteBuffer;
 
 import org.apache.hadoop.hbase.io.hfile.Cacheable;
 import org.apache.hadoop.hbase.io.hfile.Cacheable.MemoryType;
 import org.apache.hadoop.hbase.io.hfile.CacheableDeserializer;
-import org.apache.hadoop.hbase.nio.SingleByteBuff;
+import org.apache.hadoop.hbase.nio.ByteBuff;
 import org.apache.yetus.audience.InterfaceAudience;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 /**
  * IO engine that stores data to a file on the local block device using memory mapping
@@ -33,7 +32,6 @@ import org.slf4j.LoggerFactory;
  */
 @InterfaceAudience.Private
 public class ExclusiveMemoryMmapIOEngine extends FileMmapIOEngine {
-  static final Logger LOG = LoggerFactory.getLogger(ExclusiveMemoryMmapIOEngine.class);
 
   public ExclusiveMemoryMmapIOEngine(String filePath, long capacity) throws IOException {
     super(filePath, capacity);
@@ -42,9 +40,8 @@ public class ExclusiveMemoryMmapIOEngine extends FileMmapIOEngine {
   @Override
   public Cacheable read(long offset, int length, CacheableDeserializer<Cacheable> deserializer)
       throws IOException {
-    byte[] dst = new byte[length];
-    bufferArray.getMultiple(offset, length, dst);
-    return deserializer.deserialize(new SingleByteBuff(ByteBuffer.wrap(dst)), true,
-      MemoryType.EXCLUSIVE);
+    ByteBuff dst = HEAP.allocate(length);
+    bufferArray.read(offset, dst);
+    return deserializer.deserialize(dst.position(0).limit(length), true, MemoryType.EXCLUSIVE);
   }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FileIOEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FileIOEngine.java
index 0710d26..f6e49cf 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FileIOEngine.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FileIOEngine.java
@@ -143,6 +143,7 @@ public class FileIOEngine implements IOEngine {
             + " expected");
       }
     }
+    dstBuffer.rewind();
     return deserializer.deserialize(new SingleByteBuff(dstBuffer), true, MemoryType.EXCLUSIVE);
   }
 
@@ -210,10 +211,8 @@ public class FileIOEngine implements IOEngine {
 
   @Override
   public void write(ByteBuff srcBuffer, long offset) throws IOException {
-    // When caching block into BucketCache there will be single buffer backing for this HFileBlock.
-    assert srcBuffer.hasArray();
-    write(ByteBuffer.wrap(srcBuffer.array(), srcBuffer.arrayOffset(),
-            srcBuffer.remaining()), offset);
+    ByteBuffer dup = srcBuffer.asSubByteBuffer(srcBuffer.remaining()).duplicate();
+    write(dup, offset);
   }
 
   private void accessFile(FileAccessor accessor, ByteBuffer buffer,
@@ -229,8 +228,7 @@ public class FileIOEngine implements IOEngine {
       int accessLen = 0;
       if (endFileNum > accessFileNum) {
         // short the limit;
-        buffer.limit((int) (buffer.limit() - remainingAccessDataLen
-            + sizePerFile - accessOffset));
+        buffer.limit((int) (buffer.limit() - remainingAccessDataLen + sizePerFile - accessOffset));
       }
       try {
         accessLen = accessor.access(fileChannel, buffer, accessOffset);
@@ -307,7 +305,7 @@ public class FileIOEngine implements IOEngine {
     }
   }
 
-  private static interface FileAccessor {
+  private interface FileAccessor {
     int access(FileChannel fileChannel, ByteBuffer byteBuffer, long accessOffset)
         throws IOException;
   }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FileMmapIOEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FileMmapIOEngine.java
index 9580efe..bd17fd5 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FileMmapIOEngine.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FileMmapIOEngine.java
@@ -112,17 +112,12 @@ public abstract class FileMmapIOEngine implements IOEngine {
    */
   @Override
   public void write(ByteBuffer srcBuffer, long offset) throws IOException {
-    assert srcBuffer.hasArray();
-    bufferArray.putMultiple(offset, srcBuffer.remaining(), srcBuffer.array(),
-      srcBuffer.arrayOffset());
+    bufferArray.write(offset, ByteBuff.wrap(srcBuffer));
   }
 
   @Override
   public void write(ByteBuff srcBuffer, long offset) throws IOException {
-    // This singleByteBuff can be considered to be array backed
-    assert srcBuffer.hasArray();
-    bufferArray.putMultiple(offset, srcBuffer.remaining(), srcBuffer.array(),
-      srcBuffer.arrayOffset());
+    bufferArray.write(offset, srcBuffer);
   }
 
   /**
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestByteBufferIOEngine.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestByteBufferIOEngine.java
index bb58b4e..a06d86d 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestByteBufferIOEngine.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestByteBufferIOEngine.java
@@ -17,8 +17,6 @@
  */
 package org.apache.hadoop.hbase.io.hfile.bucket;
 
-import static org.junit.Assert.assertTrue;
-
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import org.apache.hadoop.hbase.HBaseClassTestRule;
@@ -28,6 +26,7 @@ import org.apache.hadoop.hbase.io.hfile.CacheableDeserializer;
 import org.apache.hadoop.hbase.nio.ByteBuff;
 import org.apache.hadoop.hbase.testclassification.IOTests;
 import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.junit.Assert;
 import org.junit.ClassRule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
@@ -56,12 +55,10 @@ public class TestByteBufferIOEngine {
       if (blockSize == 0) {
         blockSize = 1;
       }
-      byte[] byteArray = new byte[blockSize];
-      for (int j = 0; j < byteArray.length; ++j) {
-        byteArray[j] = val;
-      }
-      ByteBuffer srcBuffer = ByteBuffer.wrap(byteArray);
-      int offset = 0;
+
+      ByteBuff src = createByteBuffer(blockSize, val, i % 2 == 0);
+      int pos = src.position(), lim = src.limit();
+      int offset;
       if (testOffsetAtStartNum > 0) {
         testOffsetAtStartNum--;
         offset = 0;
@@ -71,13 +68,16 @@ public class TestByteBufferIOEngine {
       } else {
         offset = (int) (Math.random() * (capacity - maxBlockSize));
       }
-      ioEngine.write(srcBuffer, offset);
+      ioEngine.write(src, offset);
+      src.position(pos).limit(lim);
+
       BufferGrabbingDeserializer deserializer = new BufferGrabbingDeserializer();
       ioEngine.read(offset, blockSize, deserializer);
-      ByteBuff dstBuffer = deserializer.buf;
-      for (int j = 0; j < byteArray.length; ++j) {
-        assertTrue(byteArray[j] == dstBuffer.get(j));
-      }
+      ByteBuff dst = deserializer.buf;
+      Assert.assertEquals(src.remaining(), blockSize);
+      Assert.assertEquals(dst.remaining(), blockSize);
+      Assert.assertEquals(0, ByteBuff.compareTo(src, src.position(), src.remaining(), dst,
+        dst.position(), dst.remaining()));
     }
     assert testOffsetAtStartNum == 0;
     assert testOffsetAtEndNum == 0;
@@ -112,6 +112,16 @@ public class TestByteBufferIOEngine {
     }
   }
 
+  static ByteBuff createByteBuffer(int len, int val, boolean useHeap) {
+    ByteBuffer b = useHeap ? ByteBuffer.allocate(2 * len) : ByteBuffer.allocateDirect(2 * len);
+    int pos = (int) (Math.random() * len);
+    b.position(pos).limit(pos + len);
+    for (int i = pos; i < pos + len; i++) {
+      b.put(i, (byte) val);
+    }
+    return ByteBuff.wrap(b);
+  }
+
   @Test
   public void testByteBufferIOEngineWithMBB() throws Exception {
     int capacity = 32 * 1024 * 1024; // 32 MB
@@ -126,12 +136,9 @@ public class TestByteBufferIOEngine {
       if (blockSize == 0) {
         blockSize = 1;
       }
-      byte[] byteArray = new byte[blockSize];
-      for (int j = 0; j < byteArray.length; ++j) {
-        byteArray[j] = val;
-      }
-      ByteBuffer srcBuffer = ByteBuffer.wrap(byteArray);
-      int offset = 0;
+      ByteBuff src = createByteBuffer(blockSize, val, i % 2 == 0);
+      int pos = src.position(), lim = src.limit();
+      int offset;
       if (testOffsetAtStartNum > 0) {
         testOffsetAtStartNum--;
         offset = 0;
@@ -141,13 +148,16 @@ public class TestByteBufferIOEngine {
       } else {
         offset = (int) (Math.random() * (capacity - maxBlockSize));
       }
-      ioEngine.write(srcBuffer, offset);
+      ioEngine.write(src, offset);
+      src.position(pos).limit(lim);
+
       BufferGrabbingDeserializer deserializer = new BufferGrabbingDeserializer();
       ioEngine.read(offset, blockSize, deserializer);
-      ByteBuff dstBuffer = deserializer.buf;
-      for (int j = 0; j < byteArray.length; ++j) {
-        assertTrue(srcBuffer.get(j) == dstBuffer.get(j));
-      }
+      ByteBuff dst = deserializer.buf;
+      Assert.assertEquals(src.remaining(), blockSize);
+      Assert.assertEquals(dst.remaining(), blockSize);
+      Assert.assertEquals(0, ByteBuff.compareTo(src, src.position(), src.remaining(), dst,
+        dst.position(), dst.remaining()));
     }
     assert testOffsetAtStartNum == 0;
     assert testOffsetAtEndNum == 0;
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestExclusiveMemoryMmapEngine.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestExclusiveMemoryMmapEngine.java
index d0d8c8a..79d58f0 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestExclusiveMemoryMmapEngine.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestExclusiveMemoryMmapEngine.java
@@ -17,16 +17,14 @@
  */
 package org.apache.hadoop.hbase.io.hfile.bucket;
 
-import static org.junit.Assert.assertTrue;
-
 import java.io.File;
 import java.io.IOException;
-import java.nio.ByteBuffer;
 import org.apache.hadoop.hbase.HBaseClassTestRule;
 import org.apache.hadoop.hbase.io.hfile.bucket.TestByteBufferIOEngine.BufferGrabbingDeserializer;
 import org.apache.hadoop.hbase.nio.ByteBuff;
 import org.apache.hadoop.hbase.testclassification.IOTests;
 import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.junit.Assert;
 import org.junit.ClassRule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
@@ -34,7 +32,7 @@ import org.junit.experimental.categories.Category;
 /**
  * Basic test for {@link ExclusiveMemoryMmapIOEngine}
  */
-@Category({IOTests.class, SmallTests.class})
+@Category({ IOTests.class, SmallTests.class })
 public class TestExclusiveMemoryMmapEngine {
 
   @ClassRule
@@ -50,17 +48,23 @@ public class TestExclusiveMemoryMmapEngine {
       for (int i = 0; i < 50; i++) {
         int len = (int) Math.floor(Math.random() * 100);
         long offset = (long) Math.floor(Math.random() * size % (size - len));
-        byte[] data1 = new byte[len];
-        for (int j = 0; j < data1.length; ++j) {
-          data1[j] = (byte) (Math.random() * 255);
-        }
-        fileMmapEngine.write(ByteBuffer.wrap(data1), offset);
+        int val = (int) (Math.random() * 255);
+
+        // write
+        ByteBuff src = TestByteBufferIOEngine.createByteBuffer(len, val, i % 2 == 0);
+        int pos = src.position(), lim = src.limit();
+        fileMmapEngine.write(src, offset);
+        src.position(pos).limit(lim);
+
+        // read
         BufferGrabbingDeserializer deserializer = new BufferGrabbingDeserializer();
         fileMmapEngine.read(offset, len, deserializer);
-        ByteBuff data2 = deserializer.getDeserializedByteBuff();
-        for (int j = 0; j < data1.length; ++j) {
-          assertTrue(data1[j] == data2.get(j));
-        }
+        ByteBuff dst = deserializer.getDeserializedByteBuff();
+
+        Assert.assertEquals(src.remaining(), len);
+        Assert.assertEquals(dst.remaining(), len);
+        Assert.assertEquals(0,
+          ByteBuff.compareTo(src, pos, len, dst, dst.position(), dst.remaining()));
       }
     } finally {
       File file = new File(filePath);
@@ -68,6 +72,5 @@ public class TestExclusiveMemoryMmapEngine {
         file.delete();
       }
     }
-
   }
 }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestFileIOEngine.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestFileIOEngine.java
index efb8145..6b0d603 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestFileIOEngine.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestFileIOEngine.java
@@ -34,6 +34,7 @@ import org.apache.hadoop.hbase.nio.ByteBuff;
 import org.apache.hadoop.hbase.testclassification.IOTests;
 import org.apache.hadoop.hbase.testclassification.SmallTests;
 import org.junit.After;
+import org.junit.Assert;
 import org.junit.Before;
 import org.junit.ClassRule;
 import org.junit.Test;
@@ -132,15 +133,22 @@ public class TestFileIOEngine {
     fileIOEngine.closeFileChannels();
     int len = 5;
     long offset = 0L;
-    byte[] data1 = new byte[len];
-    for (int j = 0; j < data1.length; ++j) {
-      data1[j] = (byte) (Math.random() * 255);
+    int val = (int) (Math.random() * 255);
+    for (int i = 0; i < 2; i++) {
+      ByteBuff src = TestByteBufferIOEngine.createByteBuffer(len, val, i % 2 == 0);
+      int pos = src.position(), lim = src.limit();
+      fileIOEngine.write(src, offset);
+      src.position(pos).limit(lim);
+
+      BufferGrabbingDeserializer deserializer = new BufferGrabbingDeserializer();
+      fileIOEngine.read(offset, len, deserializer);
+      ByteBuff dst = deserializer.getDeserializedByteBuff();
+
+      Assert.assertEquals(src.remaining(), len);
+      Assert.assertEquals(dst.remaining(), len);
+      Assert.assertEquals(0,
+        ByteBuff.compareTo(src, pos, len, dst, dst.position(), dst.remaining()));
     }
-    fileIOEngine.write(ByteBuffer.wrap(data1), offset);
-    BufferGrabbingDeserializer deserializer = new BufferGrabbingDeserializer();
-    fileIOEngine.read(offset, len, deserializer);
-    ByteBuff data2 = deserializer.getDeserializedByteBuff();
-    assertArrayEquals(data1, data2.array());
   }
 
   @Test

[hbase] 12/22: HBASE-21921 Notify users if the ByteBufAllocator is always allocating ByteBuffers from heap which means the increacing GC pressure

Posted by op...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

openinx pushed a commit to branch HBASE-21879
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit d3c3a3f184c30384e612ff99b9e7e327db709450
Author: huzheng <op...@gmail.com>
AuthorDate: Mon Apr 29 15:31:19 2019 +0800

    HBASE-21921 Notify users if the ByteBufAllocator is always allocating ByteBuffers from heap which means the increacing GC pressure
---
 .../apache/hadoop/hbase/io/ByteBuffAllocator.java  | 48 ++++++++++++--
 .../hadoop/hbase/io/TestByteBuffAllocator.java     | 75 ++++++++++++++--------
 .../hbase/io/TestByteBufferListOutputStream.java   |  4 +-
 .../regionserver/MetricsRegionServerSource.java    | 15 +++++
 .../regionserver/MetricsRegionServerWrapper.java   | 28 +++++---
 .../MetricsRegionServerSourceImpl.java             | 17 ++++-
 .../hbase/tmpl/regionserver/RSStatusTmpl.jamon     |  3 +-
 .../tmpl/regionserver/ServerMetricsTmpl.jamon      | 28 ++++++++
 .../MetricsRegionServerWrapperImpl.java            | 33 +++++++++-
 .../apache/hadoop/hbase/io/hfile/TestHFile.java    | 12 ++--
 .../hadoop/hbase/io/hfile/TestHFileBlock.java      |  2 +-
 .../MetricsRegionServerWrapperStub.java            | 25 ++++++++
 .../hbase/regionserver/TestRSStatusServlet.java    | 29 +++------
 13 files changed, 244 insertions(+), 75 deletions(-)

diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java
index 51de22a..5939d4a 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java
@@ -24,6 +24,8 @@ import java.util.List;
 import java.util.Queue;
 import java.util.concurrent.ConcurrentLinkedQueue;
 import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.LongAdder;
+
 import sun.nio.ch.DirectBuffer;
 
 import org.apache.hadoop.conf.Configuration;
@@ -95,6 +97,13 @@ public class ByteBuffAllocator {
 
   private final Queue<ByteBuffer> buffers = new ConcurrentLinkedQueue<>();
 
+  // Metrics to track the pool allocation number and heap allocation number. If heap allocation
+  // number is increasing so much, then we may need to increase the max.buffer.count .
+  private final LongAdder poolAllocationNum = new LongAdder();
+  private final LongAdder heapAllocationNum = new LongAdder();
+  private long lastPoolAllocationNum = 0;
+  private long lastHeapAllocationNum = 0;
+
   /**
    * Initialize an {@link ByteBuffAllocator} which will try to allocate ByteBuffers from off-heap if
    * reservoir is enabled and the reservoir has enough buffers, otherwise the allocator will just
@@ -152,11 +161,35 @@ public class ByteBuffAllocator {
     return reservoirEnabled;
   }
 
+  public long getHeapAllocationNum() {
+    return heapAllocationNum.sum();
+  }
+
+  public long getPoolAllocationNum() {
+    return poolAllocationNum.sum();
+  }
+
   @VisibleForTesting
-  public int getQueueSize() {
+  public int getFreeBufferCount() {
     return this.buffers.size();
   }
 
+  public int getTotalBufferCount() {
+    return maxBufCount;
+  }
+
+  public double getHeapAllocationRatio() {
+    long heapAllocNum = heapAllocationNum.sum(), poolAllocNum = poolAllocationNum.sum();
+    double heapDelta = heapAllocNum - lastHeapAllocationNum;
+    double poolDelta = poolAllocNum - lastPoolAllocationNum;
+    lastHeapAllocationNum = heapAllocNum;
+    lastPoolAllocationNum = poolAllocNum;
+    if (Math.abs(heapDelta + poolDelta) < 1e-3) {
+      return 0.0;
+    }
+    return heapDelta / (heapDelta + poolDelta) * 100;
+  }
+
   /**
    * Allocate an buffer with buffer size from ByteBuffAllocator, Note to call the
    * {@link ByteBuff#release()} if no need any more, otherwise the memory leak happen in NIO
@@ -171,11 +204,12 @@ public class ByteBuffAllocator {
       }
     }
     // Allocated from heap, let the JVM free its memory.
-    return allocateOnHeap(this.bufSize);
+    return (SingleByteBuff) ByteBuff.wrap(allocateOnHeap(bufSize));
   }
 
-  private static SingleByteBuff allocateOnHeap(int size) {
-    return new SingleByteBuff(NONE, ByteBuffer.allocate(size));
+  private ByteBuffer allocateOnHeap(int size) {
+    heapAllocationNum.increment();
+    return ByteBuffer.allocate(size);
   }
 
   /**
@@ -190,7 +224,7 @@ public class ByteBuffAllocator {
     }
     // If disabled the reservoir, just allocate it from on-heap.
     if (!isReservoirEnabled() || size == 0) {
-      return allocateOnHeap(size);
+      return ByteBuff.wrap(allocateOnHeap(size));
     }
     int reminder = size % bufSize;
     int len = size / bufSize + (reminder > 0 ? 1 : 0);
@@ -210,7 +244,7 @@ public class ByteBuffAllocator {
     if (remain > 0) {
       // If the last ByteBuffer is too small or the reservoir can not provide more ByteBuffers, we
       // just allocate the ByteBuffer from on-heap.
-      bbs.add(ByteBuffer.allocate(remain));
+      bbs.add(allocateOnHeap(remain));
     }
     ByteBuff bb = ByteBuff.wrap(bbs, () -> {
       for (int i = 0; i < lenFromReservoir; i++) {
@@ -248,6 +282,7 @@ public class ByteBuffAllocator {
     if (bb != null) {
       // To reset the limit to capacity and position to 0, must clear here.
       bb.clear();
+      poolAllocationNum.increment();
       return bb;
     }
     while (true) {
@@ -264,6 +299,7 @@ public class ByteBuffAllocator {
       if (!this.usedBufCount.compareAndSet(c, c + 1)) {
         continue;
       }
+      poolAllocationNum.increment();
       return ByteBuffer.allocateDirect(bufSize);
     }
   }
diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/io/TestByteBuffAllocator.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/io/TestByteBuffAllocator.java
index 4375032..0d0da80 100644
--- a/hbase-common/src/test/java/org/apache/hadoop/hbase/io/TestByteBuffAllocator.java
+++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/io/TestByteBuffAllocator.java
@@ -48,19 +48,26 @@ public class TestByteBuffAllocator {
     int bufSize = 6 * 1024;
     ByteBuffAllocator alloc = new ByteBuffAllocator(true, maxBuffersInPool, bufSize, bufSize / 6);
     ByteBuff buff = alloc.allocate(10 * bufSize);
+    assertEquals(10, alloc.getPoolAllocationNum());
+    assertEquals(0, alloc.getHeapAllocationNum());
     buff.release();
     // When the request size is less than 1/6th of the pool buffer size. We should use on demand
     // created on heap Buffer
     buff = alloc.allocate(200);
     assertTrue(buff.hasArray());
-    assertEquals(maxBuffersInPool, alloc.getQueueSize());
+    assertEquals(maxBuffersInPool, alloc.getFreeBufferCount());
+    assertEquals(maxBuffersInPool, alloc.getTotalBufferCount());
+    assertEquals(10, alloc.getPoolAllocationNum());
+    assertEquals(1, alloc.getHeapAllocationNum());
     buff.release();
     // When the request size is > 1/6th of the pool buffer size.
     buff = alloc.allocate(1024);
     assertFalse(buff.hasArray());
-    assertEquals(maxBuffersInPool - 1, alloc.getQueueSize());
-    buff.release();// ByteBuffDeallocaor#free should put back the BB to pool.
-    assertEquals(maxBuffersInPool, alloc.getQueueSize());
+    assertEquals(maxBuffersInPool - 1, alloc.getFreeBufferCount());
+    assertEquals(11, alloc.getPoolAllocationNum());
+    assertEquals(1, alloc.getHeapAllocationNum());
+    buff.release();// ByteBuff Recycler#free should put back the BB to pool.
+    assertEquals(maxBuffersInPool, alloc.getFreeBufferCount());
     // Request size> pool buffer size
     buff = alloc.allocate(7 * 1024);
     assertFalse(buff.hasArray());
@@ -71,9 +78,11 @@ public class TestByteBuffAllocator {
     assertTrue(bbs[1].isDirect());
     assertEquals(6 * 1024, bbs[0].limit());
     assertEquals(1024, bbs[1].limit());
-    assertEquals(maxBuffersInPool - 2, alloc.getQueueSize());
+    assertEquals(maxBuffersInPool - 2, alloc.getFreeBufferCount());
+    assertEquals(13, alloc.getPoolAllocationNum());
+    assertEquals(1, alloc.getHeapAllocationNum());
     buff.release();
-    assertEquals(maxBuffersInPool, alloc.getQueueSize());
+    assertEquals(maxBuffersInPool, alloc.getFreeBufferCount());
 
     buff = alloc.allocate(6 * 1024 + 200);
     assertFalse(buff.hasArray());
@@ -84,11 +93,16 @@ public class TestByteBuffAllocator {
     assertFalse(bbs[1].isDirect());
     assertEquals(6 * 1024, bbs[0].limit());
     assertEquals(200, bbs[1].limit());
-    assertEquals(maxBuffersInPool - 1, alloc.getQueueSize());
+    assertEquals(maxBuffersInPool - 1, alloc.getFreeBufferCount());
+    assertEquals(14, alloc.getPoolAllocationNum());
+    assertEquals(2, alloc.getHeapAllocationNum());
     buff.release();
-    assertEquals(maxBuffersInPool, alloc.getQueueSize());
+    assertEquals(maxBuffersInPool, alloc.getFreeBufferCount());
 
     alloc.allocate(bufSize * (maxBuffersInPool - 1));
+    assertEquals(23, alloc.getPoolAllocationNum());
+    assertEquals(2, alloc.getHeapAllocationNum());
+
     buff = alloc.allocate(20 * 1024);
     assertFalse(buff.hasArray());
     assertTrue(buff instanceof MultiByteBuff);
@@ -98,23 +112,29 @@ public class TestByteBuffAllocator {
     assertFalse(bbs[1].isDirect());
     assertEquals(6 * 1024, bbs[0].limit());
     assertEquals(14 * 1024, bbs[1].limit());
-    assertEquals(0, alloc.getQueueSize());
+    assertEquals(0, alloc.getFreeBufferCount());
+    assertEquals(24, alloc.getPoolAllocationNum());
+    assertEquals(3, alloc.getHeapAllocationNum());
+
     buff.release();
-    assertEquals(1, alloc.getQueueSize());
+    assertEquals(1, alloc.getFreeBufferCount());
     alloc.allocateOneBuffer();
+    assertEquals(25, alloc.getPoolAllocationNum());
+    assertEquals(3, alloc.getHeapAllocationNum());
 
     buff = alloc.allocate(7 * 1024);
     assertTrue(buff.hasArray());
     assertTrue(buff instanceof SingleByteBuff);
     assertEquals(7 * 1024, buff.nioByteBuffers()[0].limit());
+    assertEquals(25, alloc.getPoolAllocationNum());
+    assertEquals(4, alloc.getHeapAllocationNum());
     buff.release();
   }
 
   @Test
   public void testNegativeAllocatedSize() {
     int maxBuffersInPool = 10;
-    ByteBuffAllocator allocator =
-        new ByteBuffAllocator(true, maxBuffersInPool, 6 * 1024, 1024);
+    ByteBuffAllocator allocator = new ByteBuffAllocator(true, maxBuffersInPool, 6 * 1024, 1024);
     try {
       allocator.allocate(-1);
       fail("Should throw exception when size < 0");
@@ -122,6 +142,7 @@ public class TestByteBuffAllocator {
       // expected exception
     }
     ByteBuff bb = allocator.allocate(0);
+    assertEquals(1, allocator.getHeapAllocationNum());
     bb.release();
   }
 
@@ -169,7 +190,7 @@ public class TestByteBuffAllocator {
     dup2.release();
     assertEquals(0, buf2.refCnt());
     assertEquals(0, dup2.refCnt());
-    assertEquals(0, alloc.getQueueSize());
+    assertEquals(0, alloc.getFreeBufferCount());
     assertException(dup2::position);
     assertException(buf2::position);
 
@@ -178,7 +199,7 @@ public class TestByteBuffAllocator {
     dup1.release();
     assertEquals(0, buf1.refCnt());
     assertEquals(0, dup1.refCnt());
-    assertEquals(2, alloc.getQueueSize());
+    assertEquals(2, alloc.getFreeBufferCount());
     assertException(dup1::position);
     assertException(buf1::position);
 
@@ -189,7 +210,7 @@ public class TestByteBuffAllocator {
     slice3.release();
     assertEquals(0, buf3.refCnt());
     assertEquals(0, slice3.refCnt());
-    assertEquals(2, alloc.getQueueSize());
+    assertEquals(2, alloc.getFreeBufferCount());
 
     // slice the buf4, if the slice4 released, buf4 will also be released (MultipleByteBuffer)
     ByteBuff buf4 = alloc.allocate(bufSize * 2);
@@ -198,7 +219,7 @@ public class TestByteBuffAllocator {
     slice4.release();
     assertEquals(0, buf4.refCnt());
     assertEquals(0, slice4.refCnt());
-    assertEquals(2, alloc.getQueueSize());
+    assertEquals(2, alloc.getFreeBufferCount());
 
     // Test multiple reference for the same ByteBuff (SingleByteBuff)
     ByteBuff buf5 = alloc.allocateOneBuffer();
@@ -206,7 +227,7 @@ public class TestByteBuffAllocator {
     slice5.release();
     assertEquals(0, buf5.refCnt());
     assertEquals(0, slice5.refCnt());
-    assertEquals(2, alloc.getQueueSize());
+    assertEquals(2, alloc.getFreeBufferCount());
     assertException(slice5::position);
     assertException(buf5::position);
 
@@ -216,7 +237,7 @@ public class TestByteBuffAllocator {
     slice6.release();
     assertEquals(0, buf6.refCnt());
     assertEquals(0, slice6.refCnt());
-    assertEquals(2, alloc.getQueueSize());
+    assertEquals(2, alloc.getFreeBufferCount());
 
     // Test retain the parent SingleByteBuff (duplicate)
     ByteBuff parent = alloc.allocateOneBuffer();
@@ -225,11 +246,11 @@ public class TestByteBuffAllocator {
     parent.release();
     assertEquals(1, child.refCnt());
     assertEquals(1, parent.refCnt());
-    assertEquals(1, alloc.getQueueSize());
+    assertEquals(1, alloc.getFreeBufferCount());
     parent.release();
     assertEquals(0, child.refCnt());
     assertEquals(0, parent.refCnt());
-    assertEquals(2, alloc.getQueueSize());
+    assertEquals(2, alloc.getFreeBufferCount());
 
     // Test retain parent MultiByteBuff (duplicate)
     parent = alloc.allocate(bufSize << 1);
@@ -238,11 +259,11 @@ public class TestByteBuffAllocator {
     parent.release();
     assertEquals(1, child.refCnt());
     assertEquals(1, parent.refCnt());
-    assertEquals(0, alloc.getQueueSize());
+    assertEquals(0, alloc.getFreeBufferCount());
     parent.release();
     assertEquals(0, child.refCnt());
     assertEquals(0, parent.refCnt());
-    assertEquals(2, alloc.getQueueSize());
+    assertEquals(2, alloc.getFreeBufferCount());
 
     // Test retain the parent SingleByteBuff (slice)
     parent = alloc.allocateOneBuffer();
@@ -251,11 +272,11 @@ public class TestByteBuffAllocator {
     parent.release();
     assertEquals(1, child.refCnt());
     assertEquals(1, parent.refCnt());
-    assertEquals(1, alloc.getQueueSize());
+    assertEquals(1, alloc.getFreeBufferCount());
     parent.release();
     assertEquals(0, child.refCnt());
     assertEquals(0, parent.refCnt());
-    assertEquals(2, alloc.getQueueSize());
+    assertEquals(2, alloc.getFreeBufferCount());
 
     // Test retain parent MultiByteBuff (slice)
     parent = alloc.allocate(bufSize << 1);
@@ -264,11 +285,11 @@ public class TestByteBuffAllocator {
     parent.release();
     assertEquals(1, child.refCnt());
     assertEquals(1, parent.refCnt());
-    assertEquals(0, alloc.getQueueSize());
+    assertEquals(0, alloc.getFreeBufferCount());
     parent.release();
     assertEquals(0, child.refCnt());
     assertEquals(0, parent.refCnt());
-    assertEquals(2, alloc.getQueueSize());
+    assertEquals(2, alloc.getFreeBufferCount());
   }
 
   @Test
@@ -282,7 +303,7 @@ public class TestByteBuffAllocator {
     buf1.release();
     assertEquals(0, buf1.refCnt());
     assertEquals(0, dup1.refCnt());
-    assertEquals(1, alloc.getQueueSize());
+    assertEquals(1, alloc.getFreeBufferCount());
     assertException(buf1::position);
     assertException(dup1::position);
   }
diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/io/TestByteBufferListOutputStream.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/io/TestByteBufferListOutputStream.java
index 3ac7a75..1943a43 100644
--- a/hbase-common/src/test/java/org/apache/hadoop/hbase/io/TestByteBufferListOutputStream.java
+++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/io/TestByteBufferListOutputStream.java
@@ -55,7 +55,7 @@ public class TestByteBufferListOutputStream {
     bb1.release();
     bbos.writeInt(123);
     bbos.writeInt(124);
-    assertEquals(0, alloc.getQueueSize());
+    assertEquals(0, alloc.getFreeBufferCount());
     List<ByteBuffer> allBufs = bbos.getByteBuffers();
     assertEquals(4, allBufs.size());
     assertEquals(4, bbos.allBufs.size());
@@ -80,6 +80,6 @@ public class TestByteBufferListOutputStream {
     assertEquals(4, b4.remaining());
     assertEquals(124, b4.getInt());
     bbos.releaseResources();
-    assertEquals(3, alloc.getQueueSize());
+    assertEquals(3, alloc.getFreeBufferCount());
   }
 }
diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java
index 8a7e647..68548c8 100644
--- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java
+++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java
@@ -558,4 +558,19 @@ public interface MetricsRegionServerSource extends BaseSource, JvmPauseMonitorSo
   String AVERAGE_REGION_SIZE = "averageRegionSize";
   String AVERAGE_REGION_SIZE_DESC =
       "Average region size over the RegionServer including memstore and storefile sizes.";
+
+  /** Metrics for {@link org.apache.hadoop.hbase.io.ByteBuffAllocator} **/
+  String BYTE_BUFF_ALLOCATOR_HEAP_ALLOCATION_NUM = "ByteBuffAllocatorHeapAllocationNum";
+  String BYTE_BUFF_ALLOCATOR_HEAP_ALLOCATION_NUM_DESC =
+      "Number of heap allocation from ByteBuffAllocator";
+  String BYTE_BUFF_ALLOCATOR_POOL_ALLOCATION_NUM = "ByteBuffAllocatorPoolAllocationNum";
+  String BYTE_BUFF_ALLOCATOR_POOL_ALLOCATION_NUM_DESC =
+      "Number of pool allocation from ByteBuffAllocator";
+  String BYTE_BUFF_ALLOCATOR_HEAP_ALLOACTION_RATIO = "ByteBuffAllocatorHeapAllocationRatio";
+  String BYTE_BUFF_ALLOCATOR_HEAP_ALLOACTION_RATIO_DESC =
+      "Ratio of heap allocation from ByteBuffAllocator, means heapAllocation/totalAllocation";
+  String BYTE_BUFF_ALLOCATOR_TOTAL_BUFFER_COUNT = "ByteBuffAllocatorTotalBufferCount";
+  String BYTE_BUFF_ALLOCATOR_TOTAL_BUFFER_COUNT_DESC = "Total buffer count in ByteBuffAllocator";
+  String BYTE_BUFF_ALLOCATOR_FREE_BUFFER_COUNT = "ByteBuffAllocatorFreeBufferCount";
+  String BYTE_BUFF_ALLOCATOR_FREE_BUFFER_COUNT_DESC = "Free buffer count in ByteBuffAllocator";
 }
diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapper.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapper.java
index 03ebc4c..c196cda 100644
--- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapper.java
+++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapper.java
@@ -230,7 +230,7 @@ public interface MetricsRegionServerWrapper {
    */
   int getFlushQueueSize();
 
-  public long getMemStoreLimit();
+  long getMemStoreLimit();
   /**
    * Get the size (in bytes) of the block cache that is free.
    */
@@ -295,42 +295,42 @@ public interface MetricsRegionServerWrapper {
   /**
    * Hit count of L1 cache.
    */
-  public long getL1CacheHitCount();
+  long getL1CacheHitCount();
 
   /**
    * Miss count of L1 cache.
    */
-  public long getL1CacheMissCount();
+  long getL1CacheMissCount();
 
   /**
    * Hit ratio of L1 cache.
    */
-  public double getL1CacheHitRatio();
+  double getL1CacheHitRatio();
 
   /**
    * Miss ratio of L1 cache.
    */
-  public double getL1CacheMissRatio();
+  double getL1CacheMissRatio();
 
   /**
    * Hit count of L2 cache.
    */
-  public long getL2CacheHitCount();
+  long getL2CacheHitCount();
 
   /**
    * Miss count of L2 cache.
    */
-  public long getL2CacheMissCount();
+  long getL2CacheMissCount();
 
   /**
    * Hit ratio of L2 cache.
    */
-  public double getL2CacheHitRatio();
+  double getL2CacheHitRatio();
 
   /**
    * Miss ratio of L2 cache.
    */
-  public double getL2CacheMissRatio();
+  double getL2CacheMissRatio();
 
   /**
    * Force a re-computation of the metrics.
@@ -523,4 +523,14 @@ public interface MetricsRegionServerWrapper {
   long getTrailerHitCount();
 
   long getTotalRowActionRequestCount();
+
+  long getByteBuffAllocatorHeapAllocationNum();
+
+  long getByteBuffAllocatorPoolAllocationNum();
+
+  double getByteBuffAllocatorHeapAllocRatio();
+
+  long getByteBuffAllocatorTotalBufferCount();
+
+  long getByteBuffAllocatorFreeBufferCount();
 }
diff --git a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java
index 58c42a5..e259022 100644
--- a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java
+++ b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java
@@ -553,7 +553,22 @@ public class MetricsRegionServerSourceImpl
             .addGauge(Interns.info(READ_REQUEST_RATE_PER_SECOND, READ_REQUEST_RATE_DESC),
                     rsWrap.getReadRequestsRatePerSecond())
             .addGauge(Interns.info(WRITE_REQUEST_RATE_PER_SECOND, WRITE_REQUEST_RATE_DESC),
-                    rsWrap.getWriteRequestsRatePerSecond());
+                    rsWrap.getWriteRequestsRatePerSecond())
+            .addGauge(Interns.info(BYTE_BUFF_ALLOCATOR_HEAP_ALLOCATION_NUM,
+                BYTE_BUFF_ALLOCATOR_HEAP_ALLOCATION_NUM_DESC),
+                rsWrap.getByteBuffAllocatorHeapAllocationNum())
+            .addGauge(Interns.info(BYTE_BUFF_ALLOCATOR_POOL_ALLOCATION_NUM,
+                BYTE_BUFF_ALLOCATOR_POOL_ALLOCATION_NUM_DESC),
+                rsWrap.getByteBuffAllocatorPoolAllocationNum())
+            .addGauge(Interns.info(BYTE_BUFF_ALLOCATOR_HEAP_ALLOACTION_RATIO,
+                BYTE_BUFF_ALLOCATOR_HEAP_ALLOACTION_RATIO_DESC),
+                rsWrap.getByteBuffAllocatorHeapAllocRatio())
+            .addGauge(Interns.info(BYTE_BUFF_ALLOCATOR_TOTAL_BUFFER_COUNT,
+                BYTE_BUFF_ALLOCATOR_TOTAL_BUFFER_COUNT_DESC),
+                rsWrap.getByteBuffAllocatorTotalBufferCount())
+            .addGauge(Interns.info(BYTE_BUFF_ALLOCATOR_FREE_BUFFER_COUNT,
+                BYTE_BUFF_ALLOCATOR_FREE_BUFFER_COUNT_DESC),
+                rsWrap.getByteBuffAllocatorFreeBufferCount());
   }
 
   @Override
diff --git a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/regionserver/RSStatusTmpl.jamon b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/regionserver/RSStatusTmpl.jamon
index 89de208..a4ef63a 100644
--- a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/regionserver/RSStatusTmpl.jamon
+++ b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/regionserver/RSStatusTmpl.jamon
@@ -134,7 +134,8 @@ org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
     <section>
     <h2>Server Metrics</h2>
     <& ServerMetricsTmpl; mWrap = regionServer.getRegionServerMetrics().getRegionServerWrapper();
-      mServerWrap = regionServer.getRpcServer().getMetrics().getHBaseServerWrapper(); &>
+      mServerWrap = regionServer.getRpcServer().getMetrics().getHBaseServerWrapper();
+      bbAllocator = regionServer.getRpcServer().getByteBuffAllocator(); &>
     </section>
 
     <section>
diff --git a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/regionserver/ServerMetricsTmpl.jamon b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/regionserver/ServerMetricsTmpl.jamon
index adcfff1..5ace343 100644
--- a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/regionserver/ServerMetricsTmpl.jamon
+++ b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/regionserver/ServerMetricsTmpl.jamon
@@ -19,10 +19,12 @@ limitations under the License.
 <%args>
 MetricsRegionServerWrapper mWrap;
 MetricsHBaseServerWrapper mServerWrap;
+ByteBuffAllocator bbAllocator;
 </%args>
 <%import>
 java.util.*;
 org.apache.hadoop.hbase.regionserver.HRegionServer;
+org.apache.hadoop.hbase.io.ByteBuffAllocator;
 org.apache.hadoop.hbase.ipc.MetricsHBaseServerWrapper;
 org.apache.hadoop.hbase.regionserver.MetricsRegionServerWrapper;
 org.apache.hadoop.hbase.util.Bytes;
@@ -45,6 +47,7 @@ org.apache.hadoop.hbase.io.util.MemorySizeUtil;
         <li class=""><a href="#tab_walStats" data-toggle="tab">WALs</a></li>
         <li class=""><a href="#tab_storeStats" data-toggle="tab">Storefiles</a></li>
         <li class=""><a href="#tab_queueStats" data-toggle="tab">Queues</a></li>
+        <li class=""><a href="#tab_byteBuffAllocatorStats" data-toggle="tab">ByteBuffAllocator Stats</a></li>
     </ul>
     <div class="tab-content" style="padding-bottom: 9px; border-bottom: 1px solid #ddd;">
         <div class="tab-pane active" id="tab_baseStats">
@@ -65,6 +68,9 @@ org.apache.hadoop.hbase.io.util.MemorySizeUtil;
         <div class="tab-pane" id="tab_queueStats">
             <& queueStats; mWrap = mWrap; mServerWrap = mServerWrap; &>
         </div>
+        <div class="tab-pane" id="tab_byteBuffAllocatorStats">
+            <& byteBuffAllocatorStats; bbAllocator = bbAllocator; &>
+        </div>
     </div>
 </div>
 
@@ -225,3 +231,25 @@ MetricsHBaseServerWrapper mServerWrap;
 </tr>
 </table>
 </%def>
+
+<%def byteBuffAllocatorStats>
+<%args>
+ByteBuffAllocator bbAllocator;
+</%args>
+<table class="table table-striped">
+<tr>
+    <th>Number of Heap Allocation</th>
+    <th>Number of Pool Allocation</th>
+    <th>Heap Allocation Ratio</th>
+    <th>Total Buffer Count</th>
+    <th>Free Buffer Count</th>
+</tr>
+<tr>
+    <td><% bbAllocator.getHeapAllocationNum() %></td>
+    <td><% bbAllocator.getPoolAllocationNum() %></td>
+    <td><% bbAllocator.getHeapAllocationRatio() %>%</td>
+    <td><% bbAllocator.getTotalBufferCount() %></td>
+    <td><% bbAllocator.getFreeBufferCount() %></td>
+</tr>
+</table>
+</%def>
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java
index 33a6ee0..db72d11 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java
@@ -32,6 +32,7 @@ import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HDFSBlocksDistribution;
 import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.io.hfile.BlockCache;
 import org.apache.hadoop.hbase.io.hfile.CacheStats;
 import org.apache.hadoop.hbase.io.hfile.CombinedBlockCache;
@@ -58,6 +59,7 @@ class MetricsRegionServerWrapperImpl
 
   private final HRegionServer regionServer;
   private final MetricsWALSource metricsWALSource;
+  private final ByteBuffAllocator allocator;
 
   private Optional<BlockCache> blockCache;
   private Optional<MobFileCache> mobFileCache;
@@ -129,15 +131,15 @@ class MetricsRegionServerWrapperImpl
     initBlockCache();
     initMobFileCache();
 
-    this.period =
-        regionServer.conf.getLong(HConstants.REGIONSERVER_METRICS_PERIOD,
-          HConstants.DEFAULT_REGIONSERVER_METRICS_PERIOD);
+    this.period = regionServer.conf.getLong(HConstants.REGIONSERVER_METRICS_PERIOD,
+      HConstants.DEFAULT_REGIONSERVER_METRICS_PERIOD);
 
     this.executor = CompatibilitySingletonFactory.getInstance(MetricsExecutor.class).getExecutor();
     this.runnable = new RegionServerMetricsWrapperRunnable();
     this.executor.scheduleWithFixedDelay(this.runnable, this.period, this.period,
       TimeUnit.MILLISECONDS);
     this.metricsWALSource = CompatibilitySingletonFactory.getInstance(MetricsWALSource.class);
+    this.allocator = regionServer.getRpcServer().getByteBuffAllocator();
 
     try {
       this.dfsHedgedReadMetrics = FSUtils.getDFSHedgedReadMetrics(regionServer.getConfiguration());
@@ -1006,4 +1008,29 @@ class MetricsRegionServerWrapperImpl
   public long getTrailerHitCount() {
     return this.cacheStats.map(CacheStats::getTrailerHitCount).orElse(0L);
   }
+
+  @Override
+  public long getByteBuffAllocatorHeapAllocationNum() {
+    return this.allocator.getHeapAllocationNum();
+  }
+
+  @Override
+  public long getByteBuffAllocatorPoolAllocationNum() {
+    return this.allocator.getPoolAllocationNum();
+  }
+
+  @Override
+  public double getByteBuffAllocatorHeapAllocRatio() {
+    return this.allocator.getHeapAllocationRatio();
+  }
+
+  @Override
+  public long getByteBuffAllocatorTotalBufferCount() {
+    return this.allocator.getTotalBufferCount();
+  }
+
+  @Override
+  public long getByteBuffAllocatorFreeBufferCount() {
+    return this.allocator.getFreeBufferCount();
+  }
 }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java
index 101fd91..84e24e6 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java
@@ -123,10 +123,10 @@ public class TestHFile  {
     List<ByteBuff> buffs = new ArrayList<>();
     for (int i = 0; i < bufCount; i++) {
       buffs.add(alloc.allocateOneBuffer());
-      Assert.assertEquals(alloc.getQueueSize(), 0);
+      Assert.assertEquals(alloc.getFreeBufferCount(), 0);
     }
     buffs.forEach(ByteBuff::release);
-    Assert.assertEquals(alloc.getQueueSize(), bufCount);
+    Assert.assertEquals(alloc.getFreeBufferCount(), bufCount);
   }
 
   @Test
@@ -143,7 +143,7 @@ public class TestHFile  {
       // fail test
       assertTrue(false);
     }
-    Assert.assertEquals(bufCount, alloc.getQueueSize());
+    Assert.assertEquals(bufCount, alloc.getFreeBufferCount());
     alloc.clean();
   }
 
@@ -171,11 +171,11 @@ public class TestHFile  {
       Assert.assertTrue(cachedBlock instanceof HFileBlock);
       Assert.assertTrue(((HFileBlock) cachedBlock).isOnHeap());
       // Should never allocate off-heap block from allocator because ensure that it's LRU.
-      Assert.assertEquals(bufCount, alloc.getQueueSize());
+      Assert.assertEquals(bufCount, alloc.getFreeBufferCount());
       block.release(); // return back the ByteBuffer back to allocator.
     }
     reader.close();
-    Assert.assertEquals(bufCount, alloc.getQueueSize());
+    Assert.assertEquals(bufCount, alloc.getFreeBufferCount());
     alloc.clean();
     lru.shutdown();
   }
@@ -229,7 +229,7 @@ public class TestHFile  {
     }
     reader.close();
     combined.shutdown();
-    Assert.assertEquals(bufCount, alloc.getQueueSize());
+    Assert.assertEquals(bufCount, alloc.getFreeBufferCount());
     alloc.clean();
   }
 
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java
index af42a24..5fdd7a4 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java
@@ -156,7 +156,7 @@ public class TestHFileBlock {
 
   private void assertAllocator() {
     if (!useHeapAllocator) {
-      assertEquals(MAX_BUFFER_COUNT, alloc.getQueueSize());
+      assertEquals(MAX_BUFFER_COUNT, alloc.getFreeBufferCount());
     }
   }
 
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperStub.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperStub.java
index b003b44..035167a 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperStub.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperStub.java
@@ -116,6 +116,31 @@ public class MetricsRegionServerWrapperStub implements MetricsRegionServerWrappe
   }
 
   @Override
+  public long getByteBuffAllocatorHeapAllocationNum() {
+    return 0;
+  }
+
+  @Override
+  public long getByteBuffAllocatorPoolAllocationNum() {
+    return 0;
+  }
+
+  @Override
+  public double getByteBuffAllocatorHeapAllocRatio() {
+    return 0;
+  }
+
+  @Override
+  public long getByteBuffAllocatorTotalBufferCount() {
+    return 0;
+  }
+
+  @Override
+  public long getByteBuffAllocatorFreeBufferCount() {
+    return 0;
+  }
+
+  @Override
   public long getReadRequestsCount() {
     return 997;
   }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRSStatusServlet.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRSStatusServlet.java
index fd3a56d..c3cf9d3 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRSStatusServlet.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRSStatusServlet.java
@@ -30,6 +30,7 @@ import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.RegionInfo;
 import org.apache.hadoop.hbase.client.RegionInfoBuilder;
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.ipc.MetricsHBaseServer;
 import org.apache.hadoop.hbase.ipc.MetricsHBaseServerWrapperStub;
 import org.apache.hadoop.hbase.ipc.RpcServerInterface;
@@ -50,12 +51,9 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
-import org.apache.hbase.thirdparty.com.google.protobuf.RpcController;
 import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
 
 import org.apache.hadoop.hbase.shaded.protobuf.ResponseConverter;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetOnlineRegionRequest;
-import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetServerInfoRequest;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetServerInfoResponse;
 
 /**
@@ -92,12 +90,10 @@ public class TestRSStatusServlet {
     rs = Mockito.mock(HRegionServer.class);
     rpcServices = Mockito.mock(RSRpcServices.class);
     rpcServer = Mockito.mock(RpcServerInterface.class);
-    Mockito.doReturn(HBaseConfiguration.create())
-      .when(rs).getConfiguration();
+    Mockito.doReturn(HBaseConfiguration.create()).when(rs).getConfiguration();
     Mockito.doReturn(rpcServices).when(rs).getRSRpcServices();
     Mockito.doReturn(rpcServer).when(rs).getRpcServer();
-    Mockito.doReturn(fakeResponse).when(rpcServices).getServerInfo(
-      (RpcController)Mockito.any(), (GetServerInfoRequest)Mockito.any());
+    Mockito.doReturn(fakeResponse).when(rpcServices).getServerInfo(Mockito.any(), Mockito.any());
     // Fake ZKW
     ZKWatcher zkw = Mockito.mock(ZKWatcher.class);
     Mockito.doReturn("fakequorum").when(zkw).getQuorum();
@@ -119,6 +115,7 @@ public class TestRSStatusServlet {
     MetricsHBaseServer ms = Mockito.mock(MetricsHBaseServer.class);
     Mockito.doReturn(new MetricsHBaseServerWrapperStub()).when(ms).getHBaseServerWrapper();
     Mockito.doReturn(ms).when(rpcServer).getMetrics();
+    Mockito.doReturn(ByteBuffAllocator.HEAP).when(rpcServer).getByteBuffAllocator();
   }
 
   @Test
@@ -130,18 +127,12 @@ public class TestRSStatusServlet {
   public void testWithRegions() throws IOException, ServiceException {
     HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(name.getMethodName()));
     List<RegionInfo> regions = Lists.newArrayList(
-        RegionInfoBuilder.newBuilder(htd.getTableName())
-            .setStartKey(Bytes.toBytes("a"))
-            .setEndKey(Bytes.toBytes("d"))
-            .build(),
-        RegionInfoBuilder.newBuilder(htd.getTableName())
-            .setStartKey(Bytes.toBytes("d"))
-            .setEndKey(Bytes.toBytes("z"))
-            .build()
-        );
-    Mockito.doReturn(ResponseConverter.buildGetOnlineRegionResponse(
-      regions)).when(rpcServices).getOnlineRegion((RpcController)Mockito.any(),
-        (GetOnlineRegionRequest)Mockito.any());
+      RegionInfoBuilder.newBuilder(htd.getTableName()).setStartKey(Bytes.toBytes("a"))
+          .setEndKey(Bytes.toBytes("d")).build(),
+      RegionInfoBuilder.newBuilder(htd.getTableName()).setStartKey(Bytes.toBytes("d"))
+          .setEndKey(Bytes.toBytes("z")).build());
+    Mockito.doReturn(ResponseConverter.buildGetOnlineRegionResponse(regions)).when(rpcServices)
+        .getOnlineRegion(Mockito.any(), Mockito.any());
     new RSStatusTmpl().render(new StringWriter(), rs);
   }
 }

[hbase] 22/22: HBASE-22598 Deprecated the hbase.ipc.server.reservoir.initial.buffer.size & hbase.ipc.server.reservoir.initial.max for HBase2.x compatibility (#318)

Posted by op...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

openinx pushed a commit to branch HBASE-21879
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit afaf7a9e351b59241d0d7441a05c947223bbd064
Author: openinx <op...@gmail.com>
AuthorDate: Mon Jun 17 21:36:22 2019 +0800

    HBASE-22598 Deprecated the hbase.ipc.server.reservoir.initial.buffer.size & hbase.ipc.server.reservoir.initial.max for HBase2.x compatibility (#318)
---
 .../apache/hadoop/hbase/io/ByteBuffAllocator.java  | 34 ++++++++++++++++++++--
 .../hadoop/hbase/io/TestByteBuffAllocator.java     | 19 ++++++++++++
 2 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java
index e8e77dc..5c2c8ff 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java
@@ -68,9 +68,32 @@ public class ByteBuffAllocator {
   // default heap allocator, it will just allocate ByteBuffers from heap but wrapped by an ByteBuff.
   public static final ByteBuffAllocator HEAP = ByteBuffAllocator.createOnHeap();
 
-  public static final String MAX_BUFFER_COUNT_KEY = "hbase.ipc.server.allocator.max.buffer.count";
+  public static final String MAX_BUFFER_COUNT_KEY = "hbase.server.allocator.max.buffer.count";
 
-  public static final String BUFFER_SIZE_KEY = "hbase.ipc.server.allocator.buffer.size";
+  public static final String BUFFER_SIZE_KEY = "hbase.server.allocator.buffer.size";
+
+  /**
+   * @deprecated use {@link ByteBuffAllocator#MAX_BUFFER_COUNT_KEY} instead.
+   */
+  @Deprecated
+  static final String DEPRECATED_MAX_BUFFER_COUNT_KEY = "hbase.ipc.server.reservoir.initial.max";
+
+  /**
+   * @deprecated use {@link ByteBuffAllocator#BUFFER_SIZE_KEY} instead.
+   */
+  @Deprecated
+  static final String DEPRECATED_BUFFER_SIZE_KEY = "hbase.ipc.server.reservoir.initial.buffer.size";
+
+  /**
+   * The hbase.ipc.server.reservoir.initial.max and hbase.ipc.server.reservoir.initial.buffer.size
+   * were introduced in HBase2.0.0, while in HBase3.0.0 the two config keys will be replaced by
+   * {@link ByteBuffAllocator#MAX_BUFFER_COUNT_KEY} and {@link ByteBuffAllocator#BUFFER_SIZE_KEY}.
+   * Keep the two old config keys here for HBase2.x compatibility.
+   */
+  static {
+    Configuration.addDeprecation(DEPRECATED_MAX_BUFFER_COUNT_KEY, MAX_BUFFER_COUNT_KEY);
+    Configuration.addDeprecation(DEPRECATED_BUFFER_SIZE_KEY, BUFFER_SIZE_KEY);
+  }
 
   /**
    * There're some reasons why better to choose 65KB(rather than 64KB) as the default buffer size:
@@ -129,6 +152,13 @@ public class ByteBuffAllocator {
    * @return ByteBuffAllocator to manage the byte buffers.
    */
   public static ByteBuffAllocator create(Configuration conf, boolean reservoirEnabled) {
+    if (conf.get(DEPRECATED_BUFFER_SIZE_KEY) != null
+        || conf.get(DEPRECATED_MAX_BUFFER_COUNT_KEY) != null) {
+      LOG.warn("The config keys {} and {} are deprecated now, instead please use {} and {}. In "
+            + "future release we will remove the two deprecated configs.",
+        DEPRECATED_BUFFER_SIZE_KEY, DEPRECATED_MAX_BUFFER_COUNT_KEY, BUFFER_SIZE_KEY,
+        MAX_BUFFER_COUNT_KEY);
+    }
     int poolBufSize = conf.getInt(BUFFER_SIZE_KEY, DEFAULT_BUFFER_SIZE);
     if (reservoirEnabled) {
       // The max number of buffers to be pooled in the ByteBufferPool. The default value been
diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/io/TestByteBuffAllocator.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/io/TestByteBuffAllocator.java
index 9186be4..4c88b8f 100644
--- a/hbase-common/src/test/java/org/apache/hadoop/hbase/io/TestByteBuffAllocator.java
+++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/io/TestByteBuffAllocator.java
@@ -25,12 +25,14 @@ import static org.junit.Assert.fail;
 
 import java.nio.ByteBuffer;
 
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.HBaseClassTestRule;
 import org.apache.hadoop.hbase.nio.ByteBuff;
 import org.apache.hadoop.hbase.nio.MultiByteBuff;
 import org.apache.hadoop.hbase.nio.SingleByteBuff;
 import org.apache.hadoop.hbase.testclassification.RPCTests;
 import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.junit.Assert;
 import org.junit.ClassRule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
@@ -338,4 +340,21 @@ public class TestByteBuffAllocator {
       // expected exception.
     }
   }
+
+  @Test
+  public void testDeprecatedConfigs() {
+    Configuration conf = new Configuration();
+    conf.setInt(ByteBuffAllocator.DEPRECATED_MAX_BUFFER_COUNT_KEY, 10);
+    conf.setInt(ByteBuffAllocator.DEPRECATED_BUFFER_SIZE_KEY, 1024);
+    ByteBuffAllocator allocator = ByteBuffAllocator.create(conf, true);
+    Assert.assertEquals(1024, allocator.getBufferSize());
+    Assert.assertEquals(10, allocator.getTotalBufferCount());
+
+    conf = new Configuration();
+    conf.setInt(ByteBuffAllocator.MAX_BUFFER_COUNT_KEY, 11);
+    conf.setInt(ByteBuffAllocator.BUFFER_SIZE_KEY, 2048);
+    allocator = ByteBuffAllocator.create(conf, true);
+    Assert.assertEquals(2048, allocator.getBufferSize());
+    Assert.assertEquals(11, allocator.getTotalBufferCount());
+  }
 }

[hbase] 16/22: HBASE-22422 Retain an ByteBuff with refCnt=0 when getBlock from LRUCache (#242)

Posted by op...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

openinx pushed a commit to branch HBASE-21879
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 6028c5cba3a3fabb8c3f7ec40539c069eb07c477
Author: openinx <op...@gmail.com>
AuthorDate: Tue May 28 10:10:34 2019 +0800

    HBASE-22422 Retain an ByteBuff with refCnt=0 when getBlock from LRUCache (#242)
---
 .../hadoop/hbase/io/hfile/BlockCacheUtil.java      |   3 +
 .../apache/hadoop/hbase/io/hfile/HFileBlock.java   |  40 ++++---
 .../hadoop/hbase/io/hfile/HFileBlockIndex.java     |  26 ++---
 .../hadoop/hbase/io/hfile/HFileReaderImpl.java     |  22 ++--
 .../hadoop/hbase/io/hfile/LruBlockCache.java       |  23 ++--
 .../hadoop/hbase/io/hfile/bucket/BucketCache.java  |  29 +++--
 .../hbase/io/hfile/TestCombinedBlockCache.java     |  17 +++
 .../hadoop/hbase/io/hfile/TestLruBlockCache.java   |  83 +++++++++++++-
 .../hadoop/hbase/io/hfile/bucket/TestRAMCache.java | 126 +++++++++++++++++++++
 9 files changed, 306 insertions(+), 63 deletions(-)

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheUtil.java
index 46e8e24..2672992 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheUtil.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheUtil.java
@@ -230,6 +230,9 @@ public class BlockCacheUtil {
       BlockCacheKey cacheKey, Cacheable newBlock) {
     // NOTICE: The getBlock has retained the existingBlock inside.
     Cacheable existingBlock = blockCache.getBlock(cacheKey, false, false, false);
+    if (existingBlock == null) {
+      return true;
+    }
     try {
       int comparison = BlockCacheUtil.validateBlockAddition(existingBlock, newBlock, cacheKey);
       if (comparison < 0) {
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
index 846460f..079907e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
@@ -17,6 +17,8 @@
  */
 package org.apache.hadoop.hbase.io.hfile;
 
+import static org.apache.hadoop.hbase.io.ByteBuffAllocator.HEAP;
+
 import java.io.DataInputStream;
 import java.io.DataOutput;
 import java.io.DataOutputStream;
@@ -677,18 +679,24 @@ public class HFileBlock implements Cacheable {
 
     HFileBlock unpacked = new HFileBlock(this);
     unpacked.allocateBuffer(); // allocates space for the decompressed block
-
-    HFileBlockDecodingContext ctx = blockType == BlockType.ENCODED_DATA
-        ? reader.getBlockDecodingContext() : reader.getDefaultBlockDecodingContext();
-
-    ByteBuff dup = this.buf.duplicate();
-    dup.position(this.headerSize());
-    dup = dup.slice();
-
-    ctx.prepareDecoding(unpacked.getOnDiskSizeWithoutHeader(),
-      unpacked.getUncompressedSizeWithoutHeader(), unpacked.getBufferWithoutHeader(true), dup);
-
-    return unpacked;
+    boolean succ = false;
+    try {
+      HFileBlockDecodingContext ctx = blockType == BlockType.ENCODED_DATA
+          ? reader.getBlockDecodingContext() : reader.getDefaultBlockDecodingContext();
+      // Create a duplicated buffer without the header part.
+      ByteBuff dup = this.buf.duplicate();
+      dup.position(this.headerSize());
+      dup = dup.slice();
+      // Decode the dup into unpacked#buf
+      ctx.prepareDecoding(unpacked.getOnDiskSizeWithoutHeader(),
+        unpacked.getUncompressedSizeWithoutHeader(), unpacked.getBufferWithoutHeader(true), dup);
+      succ = true;
+      return unpacked;
+    } finally {
+      if (!succ) {
+        unpacked.release();
+      }
+    }
   }
 
   /**
@@ -709,7 +717,7 @@ public class HFileBlock implements Cacheable {
 
     buf = newBuf;
     // set limit to exclude next block's header
-    buf.limit(headerSize + uncompressedSizeWithoutHeader + cksumBytes);
+    buf.limit(capacityNeeded);
   }
 
   /**
@@ -1685,7 +1693,7 @@ public class HFileBlock implements Cacheable {
     }
 
     private ByteBuff allocate(int size, boolean intoHeap) {
-      return intoHeap ? ByteBuffAllocator.HEAP.allocate(size) : allocator.allocate(size);
+      return intoHeap ? HEAP.allocate(size) : allocator.allocate(size);
     }
 
     /**
@@ -1735,7 +1743,7 @@ public class HFileBlock implements Cacheable {
           if (LOG.isTraceEnabled()) {
             LOG.trace("Extra see to get block size!", new RuntimeException());
           }
-          headerBuf = new SingleByteBuff(ByteBuffer.allocate(hdrSize));
+          headerBuf = HEAP.allocate(hdrSize);
           readAtOffset(is, headerBuf, hdrSize, false, offset, pread);
           headerBuf.rewind();
         }
@@ -1778,7 +1786,7 @@ public class HFileBlock implements Cacheable {
         // If nextBlockOnDiskSizeWithHeader is not zero, the onDiskBlock already
         // contains the header of next block, so no need to set next block's header in it.
         HFileBlock hFileBlock = new HFileBlock(curBlock, checksumSupport, MemoryType.EXCLUSIVE,
-            offset, nextBlockOnDiskSize, fileContext, allocator);
+            offset, nextBlockOnDiskSize, fileContext, intoHeap ? HEAP: allocator);
         // Run check on uncompressed sizings.
         if (!fileContext.isCompressedOrEncrypted()) {
           hFileBlock.sanityCheckUncompressed();
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java
index ad61839..8396192 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java
@@ -313,10 +313,13 @@ public class HFileBlockIndex {
       int index = -1;
 
       HFileBlock block = null;
-      boolean dataBlock = false;
       KeyOnlyKeyValue tmpNextIndexKV = new KeyValue.KeyOnlyKeyValue();
       while (true) {
         try {
+          // Must initialize it with null here, because if don't and once an exception happen in
+          // readBlock, then we'll release the previous assigned block twice in the finally block.
+          // (See HBASE-22422)
+          block = null;
           if (currentBlock != null && currentBlock.getOffset() == currentOffset) {
             // Avoid reading the same block again, even with caching turned off.
             // This is crucial for compaction-type workload which might have
@@ -336,9 +339,8 @@ public class HFileBlockIndex {
               // this also accounts for ENCODED_DATA
               expectedBlockType = BlockType.DATA;
             }
-            block =
-                cachingBlockReader.readBlock(currentOffset, currentOnDiskSize, shouldCache, pread,
-                  isCompaction, true, expectedBlockType, expectedDataBlockEncoding);
+            block = cachingBlockReader.readBlock(currentOffset, currentOnDiskSize, shouldCache,
+              pread, isCompaction, true, expectedBlockType, expectedDataBlockEncoding);
           }
 
           if (block == null) {
@@ -348,7 +350,6 @@ public class HFileBlockIndex {
 
           // Found a data block, break the loop and check our level in the tree.
           if (block.getBlockType().isData()) {
-            dataBlock = true;
             break;
           }
 
@@ -381,7 +382,7 @@ public class HFileBlockIndex {
             nextIndexedKey = tmpNextIndexKV;
           }
         } finally {
-          if (!dataBlock && block != null) {
+          if (block != null && !block.getBlockType().isData()) {
             // Release the block immediately if it is not the data block
             block.release();
           }
@@ -389,7 +390,7 @@ public class HFileBlockIndex {
       }
 
       if (lookupLevel != searchTreeLevel) {
-        assert dataBlock == true;
+        assert block.getBlockType().isData();
         // Though we have retrieved a data block we have found an issue
         // in the retrieved data block. Hence returned the block so that
         // the ref count can be decremented
@@ -401,8 +402,7 @@ public class HFileBlockIndex {
       }
 
       // set the next indexed key for the current block.
-      BlockWithScanInfo blockWithScanInfo = new BlockWithScanInfo(block, nextIndexedKey);
-      return blockWithScanInfo;
+      return new BlockWithScanInfo(block, nextIndexedKey);
     }
 
     @Override
@@ -576,8 +576,7 @@ public class HFileBlockIndex {
         boolean pread, boolean isCompaction, DataBlockEncoding expectedDataBlockEncoding)
         throws IOException {
       BlockWithScanInfo blockWithScanInfo = loadDataBlockWithScanInfo(key, currentBlock,
-          cacheBlocks,
-          pread, isCompaction, expectedDataBlockEncoding);
+        cacheBlocks, pread, isCompaction, expectedDataBlockEncoding);
       if (blockWithScanInfo == null) {
         return null;
       } else {
@@ -600,9 +599,8 @@ public class HFileBlockIndex {
      * @throws IOException
      */
     public abstract BlockWithScanInfo loadDataBlockWithScanInfo(Cell key, HFileBlock currentBlock,
-        boolean cacheBlocks,
-        boolean pread, boolean isCompaction, DataBlockEncoding expectedDataBlockEncoding)
-        throws IOException;
+        boolean cacheBlocks, boolean pread, boolean isCompaction,
+        DataBlockEncoding expectedDataBlockEncoding) throws IOException;
 
     /**
      * An approximation to the {@link HFile}'s mid-key. Operates on block
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
index 02e56e9..be8cabb 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
@@ -1134,15 +1134,13 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
       updateCurrentBlock(newBlock);
     }
 
-    protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey,
-        boolean rewind, Cell key, boolean seekBefore) throws IOException {
-      if (this.curBlock == null
-          || this.curBlock.getOffset() != seekToBlock.getOffset()) {
+    protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey, boolean rewind,
+        Cell key, boolean seekBefore) throws IOException {
+      if (this.curBlock == null || this.curBlock.getOffset() != seekToBlock.getOffset()) {
         updateCurrentBlock(seekToBlock);
       } else if (rewind) {
         blockBuffer.rewind();
       }
-
       // Update the nextIndexedKey
       this.nextIndexedKey = nextIndexedKey;
       return blockSeek(key, seekBefore);
@@ -1480,10 +1478,11 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
               // Validate encoding type for data blocks. We include encoding
               // type in the cache key, and we expect it to match on a cache hit.
               if (cachedBlock.getDataBlockEncoding() != dataBlockEncoder.getDataBlockEncoding()) {
+                // Remember to release the block when in exceptional path.
+                cachedBlock.release();
                 throw new IOException("Cached block under key " + cacheKey + " "
-                  + "has wrong encoding: " + cachedBlock.getDataBlockEncoding() + " (expected: "
-                  + dataBlockEncoder.getDataBlockEncoding() + ")"
-                  + ", path=" + path);
+                    + "has wrong encoding: " + cachedBlock.getDataBlockEncoding() + " (expected: "
+                    + dataBlockEncoder.getDataBlockEncoding() + "), path=" + path);
               }
             }
             // Cache-hit. Return!
@@ -1507,15 +1506,14 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
         BlockType.BlockCategory category = hfileBlock.getBlockType().getCategory();
 
         // Cache the block if necessary
-        AtomicBoolean cachedRaw = new AtomicBoolean(false);
         cacheConf.getBlockCache().ifPresent(cache -> {
           if (cacheBlock && cacheConf.shouldCacheBlockOnRead(category)) {
-            cachedRaw.set(cacheConf.shouldCacheCompressed(category));
-            cache.cacheBlock(cacheKey, cachedRaw.get() ? hfileBlock : unpacked,
+            cache.cacheBlock(cacheKey,
+              cacheConf.shouldCacheCompressed(category) ? hfileBlock : unpacked,
               cacheConf.isInMemory());
           }
         });
-        if (unpacked != hfileBlock && !cachedRaw.get()) {
+        if (unpacked != hfileBlock) {
           // End of life here if hfileBlock is an independent block.
           hfileBlock.release();
         }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java
index 82e64e7..70715ae 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java
@@ -512,7 +512,14 @@ public class LruBlockCache implements FirstLevelBlockCache {
   @Override
   public Cacheable getBlock(BlockCacheKey cacheKey, boolean caching, boolean repeat,
       boolean updateCacheMetrics) {
-    LruCachedBlock cb = map.get(cacheKey);
+    LruCachedBlock cb = map.computeIfPresent(cacheKey, (key, val) -> {
+      // It will be referenced by RPC path, so increase here. NOTICE: Must do the retain inside
+      // this block. because if retain outside the map#computeIfPresent, the evictBlock may remove
+      // the block and release, then we're retaining a block with refCnt=0 which is disallowed.
+      // see HBASE-22422.
+      val.getBuffer().retain();
+      return val;
+    });
     if (cb == null) {
       if (!repeat && updateCacheMetrics) {
         stats.miss(caching, cacheKey.isPrimary(), cacheKey.getBlockType());
@@ -540,10 +547,10 @@ public class LruBlockCache implements FirstLevelBlockCache {
       }
       return null;
     }
-    if (updateCacheMetrics) stats.hit(caching, cacheKey.isPrimary(), cacheKey.getBlockType());
+    if (updateCacheMetrics) {
+      stats.hit(caching, cacheKey.isPrimary(), cacheKey.getBlockType());
+    }
     cb.access(count.incrementAndGet());
-    // It will be referenced by RPC path, so increase here.
-    cb.getBuffer().retain();
     return cb.getBuffer();
   }
 
@@ -601,8 +608,6 @@ public class LruBlockCache implements FirstLevelBlockCache {
     if (previous == null) {
       return 0;
     }
-    // Decrease the block's reference count, and if refCount is 0, then it'll auto-deallocate.
-    previous.getBuffer().release();
     updateSizeMetrics(block, true);
     long val = elements.decrementAndGet();
     if (LOG.isTraceEnabled()) {
@@ -610,7 +615,7 @@ public class LruBlockCache implements FirstLevelBlockCache {
       assertCounterSanity(size, val);
     }
     if (block.getBuffer().getBlockType().isData()) {
-       dataBlockElements.decrement();
+      dataBlockElements.decrement();
     }
     if (evictedByEvictionProcess) {
       // When the eviction of the block happened because of invalidation of HFiles, no need to
@@ -620,6 +625,10 @@ public class LruBlockCache implements FirstLevelBlockCache {
         victimHandler.cacheBlock(block.getCacheKey(), block.getBuffer());
       }
     }
+    // Decrease the block's reference count, and if refCount is 0, then it'll auto-deallocate. DO
+    // NOT move this up because if do that then the victimHandler may access the buffer with
+    // refCnt = 0 which is disallowed.
+    previous.getBuffer().release();
     return block.heapSize();
   }
 
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
index bb0b79c..83cd90b 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
@@ -42,6 +42,7 @@ import java.util.concurrent.ConcurrentSkipListSet;
 import java.util.concurrent.Executors;
 import java.util.concurrent.ScheduledExecutorService;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.concurrent.atomic.LongAdder;
 import java.util.concurrent.locks.Lock;
@@ -1532,21 +1533,28 @@ public class BucketCache implements BlockCache, HeapSize {
     }
 
     public RAMQueueEntry get(BlockCacheKey key) {
-      RAMQueueEntry re = delegate.get(key);
-      if (re != null) {
-        // It'll be referenced by RPC, so retain here.
+      return delegate.computeIfPresent(key, (k, re) -> {
+        // It'll be referenced by RPC, so retain atomically here. if the get and retain is not
+        // atomic, another thread may remove and release the block, when retaining in this thread we
+        // may retain a block with refCnt=0 which is disallowed. (see HBASE-22422)
         re.getData().retain();
-      }
-      return re;
+        return re;
+      });
     }
 
+    /**
+     * Return the previous associated value, or null if absent. It has the same meaning as
+     * {@link ConcurrentMap#putIfAbsent(Object, Object)}
+     */
     public RAMQueueEntry putIfAbsent(BlockCacheKey key, RAMQueueEntry entry) {
-      RAMQueueEntry previous = delegate.putIfAbsent(key, entry);
-      if (previous == null) {
+      AtomicBoolean absent = new AtomicBoolean(false);
+      RAMQueueEntry re = delegate.computeIfAbsent(key, k -> {
         // The RAMCache reference to this entry, so reference count should be increment.
         entry.getData().retain();
-      }
-      return previous;
+        absent.set(true);
+        return entry;
+      });
+      return absent.get() ? null : re;
     }
 
     public boolean remove(BlockCacheKey key) {
@@ -1575,8 +1583,9 @@ public class BucketCache implements BlockCache, HeapSize {
     public void clear() {
       Iterator<Map.Entry<BlockCacheKey, RAMQueueEntry>> it = delegate.entrySet().iterator();
       while (it.hasNext()) {
-        it.next().getValue().getData().release();
+        RAMQueueEntry re = it.next().getValue();
         it.remove();
+        re.getData().release();
       }
     }
   }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCombinedBlockCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCombinedBlockCache.java
index f4dc38a..a086a3b 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCombinedBlockCache.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCombinedBlockCache.java
@@ -17,11 +17,16 @@
  */
 package org.apache.hadoop.hbase.io.hfile;
 
+import static org.apache.hadoop.hbase.HConstants.BUCKET_CACHE_IOENGINE_KEY;
+import static org.apache.hadoop.hbase.HConstants.BUCKET_CACHE_SIZE_KEY;
 import static org.junit.Assert.assertEquals;
 
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.io.hfile.CombinedBlockCache.CombinedCacheStats;
 import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.junit.Assert;
 import org.junit.ClassRule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
@@ -33,6 +38,8 @@ public class TestCombinedBlockCache {
   public static final HBaseClassTestRule CLASS_RULE =
       HBaseClassTestRule.forClass(TestCombinedBlockCache.class);
 
+  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
+
   @Test
   public void testCombinedCacheStats() {
     CacheStats lruCacheStats = new CacheStats("lruCacheStats", 2);
@@ -102,4 +109,14 @@ public class TestCombinedBlockCache {
     assertEquals(0.75, stats.getHitRatioPastNPeriods(), delta);
     assertEquals(0.8, stats.getHitCachingRatioPastNPeriods(), delta);
   }
+
+  @Test
+  public void testMultiThreadGetAndEvictBlock() throws Exception {
+    Configuration conf = UTIL.getConfiguration();
+    conf.set(BUCKET_CACHE_IOENGINE_KEY, "offheap");
+    conf.setInt(BUCKET_CACHE_SIZE_KEY, 32);
+    BlockCache blockCache = BlockCacheFactory.createBlockCache(conf);
+    Assert.assertTrue(blockCache instanceof CombinedBlockCache);
+    TestLruBlockCache.testMultiThreadGetAndEvictBlockInternal(blockCache);
+  }
 }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestLruBlockCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestLruBlockCache.java
index 3317a4d..a355ab0 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestLruBlockCache.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestLruBlockCache.java
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hbase.io.hfile;
 
+import static org.apache.hadoop.hbase.io.ByteBuffAllocator.HEAP;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
@@ -27,6 +28,7 @@ import java.util.Random;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.HBaseClassTestRule;
@@ -34,15 +36,17 @@ import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.Waiter;
 import org.apache.hadoop.hbase.Waiter.ExplainingPredicate;
-import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.io.HeapSize;
 import org.apache.hadoop.hbase.io.hfile.LruBlockCache.EvictionThread;
 import org.apache.hadoop.hbase.testclassification.IOTests;
 import org.apache.hadoop.hbase.testclassification.SmallTests;
 import org.apache.hadoop.hbase.util.ClassSize;
+import org.junit.Assert;
 import org.junit.ClassRule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * Tests the concurrent LruBlockCache.<p>
@@ -58,6 +62,8 @@ public class TestLruBlockCache {
   public static final HBaseClassTestRule CLASS_RULE =
       HBaseClassTestRule.forClass(TestLruBlockCache.class);
 
+  private static final Logger LOG = LoggerFactory.getLogger(TestLruBlockCache.class);
+
   @Test
   public void testCacheEvictionThreadSafe() throws Exception {
     long maxSize = 100000;
@@ -814,11 +820,10 @@ public class TestLruBlockCache {
     byte[] byteArr = new byte[length];
     ByteBuffer buf = ByteBuffer.wrap(byteArr, 0, size);
     HFileContext meta = new HFileContextBuilder().build();
-    ByteBuffAllocator alloc = ByteBuffAllocator.HEAP;
     HFileBlock blockWithNextBlockMetadata = new HFileBlock(BlockType.DATA, size, size, -1, buf,
-        HFileBlock.FILL_HEADER, -1, 52, -1, meta, alloc);
+        HFileBlock.FILL_HEADER, -1, 52, -1, meta, HEAP);
     HFileBlock blockWithoutNextBlockMetadata = new HFileBlock(BlockType.DATA, size, size, -1, buf,
-        HFileBlock.FILL_HEADER, -1, -1, -1, meta, alloc);
+        HFileBlock.FILL_HEADER, -1, -1, -1, meta, HEAP);
 
     LruBlockCache cache = new LruBlockCache(maxSize, blockSize, false,
         (int)Math.ceil(1.2*maxSize/blockSize),
@@ -958,5 +963,75 @@ public class TestLruBlockCache {
 
   }
 
+  static void testMultiThreadGetAndEvictBlockInternal(BlockCache cache) throws Exception {
+    int size = 100;
+    int length = HConstants.HFILEBLOCK_HEADER_SIZE + size;
+    byte[] byteArr = new byte[length];
+    HFileContext meta = new HFileContextBuilder().build();
+    BlockCacheKey key = new BlockCacheKey("key1", 0);
+    HFileBlock blk = new HFileBlock(BlockType.DATA, size, size, -1,
+        ByteBuffer.wrap(byteArr, 0, size), HFileBlock.FILL_HEADER, -1, 52, -1, meta, HEAP);
+    AtomicBoolean err1 = new AtomicBoolean(false);
+    Thread t1 = new Thread(() -> {
+      for (int i = 0; i < 10000 && !err1.get(); i++) {
+        try {
+          cache.getBlock(key, false, false, true);
+        } catch (Exception e) {
+          err1.set(true);
+          LOG.info("Cache block or get block failure: ", e);
+        }
+      }
+    });
+
+    AtomicBoolean err2 = new AtomicBoolean(false);
+    Thread t2 = new Thread(() -> {
+      for (int i = 0; i < 10000 && !err2.get(); i++) {
+        try {
+          cache.evictBlock(key);
+        } catch (Exception e) {
+          err2.set(true);
+          LOG.info("Evict block failure: ", e);
+        }
+      }
+    });
+
+    AtomicBoolean err3 = new AtomicBoolean(false);
+    Thread t3 = new Thread(() -> {
+      for (int i = 0; i < 10000 && !err3.get(); i++) {
+        try {
+          cache.cacheBlock(key, blk);
+        } catch (Exception e) {
+          err3.set(true);
+          LOG.info("Cache block failure: ", e);
+        }
+      }
+    });
+    t1.start();
+    t2.start();
+    t3.start();
+    t1.join();
+    t2.join();
+    t3.join();
+    Assert.assertFalse(err1.get());
+    Assert.assertFalse(err2.get());
+    Assert.assertFalse(err3.get());
+  }
+
+  @Test
+  public void testMultiThreadGetAndEvictBlock() throws Exception {
+    long maxSize = 100000;
+    long blockSize = calculateBlockSize(maxSize, 10);
+    LruBlockCache cache =
+        new LruBlockCache(maxSize, blockSize, false, (int) Math.ceil(1.2 * maxSize / blockSize),
+            LruBlockCache.DEFAULT_LOAD_FACTOR, LruBlockCache.DEFAULT_CONCURRENCY_LEVEL,
+            0.66f, // min
+            0.99f, // acceptable
+            0.33f, // single
+            0.33f, // multi
+            0.34f, // memory
+            1.2f, // limit
+            false, 1024);
+    testMultiThreadGetAndEvictBlockInternal(cache);
+  }
 }
 
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestRAMCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestRAMCache.java
new file mode 100644
index 0000000..5c5dda6
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestRAMCache.java
@@ -0,0 +1,126 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io.hfile.bucket;
+
+import java.nio.ByteBuffer;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
+import org.apache.hadoop.hbase.io.hfile.BlockCacheKey;
+import org.apache.hadoop.hbase.io.hfile.BlockType;
+import org.apache.hadoop.hbase.io.hfile.HFileBlock;
+import org.apache.hadoop.hbase.io.hfile.HFileContext;
+import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
+import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache.RAMCache;
+import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache.RAMQueueEntry;
+import org.apache.hadoop.hbase.testclassification.IOTests;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.junit.Assert;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@Category({ IOTests.class, MediumTests.class })
+public class TestRAMCache {
+  private static final Logger LOG = LoggerFactory.getLogger(TestRAMCache.class);
+
+  @ClassRule
+  public static final HBaseClassTestRule CLASS_RULE =
+      HBaseClassTestRule.forClass(TestRAMCache.class);
+
+  // Define a mock HFileBlock.
+  private static class MockHFileBlock extends HFileBlock {
+
+    private volatile CountDownLatch latch;
+
+    MockHFileBlock(BlockType blockType, int onDiskSizeWithoutHeader,
+        int uncompressedSizeWithoutHeader, long prevBlockOffset, ByteBuffer b, boolean fillHeader,
+        long offset, int nextBlockOnDiskSize, int onDiskDataSizeWithHeader,
+        HFileContext fileContext, ByteBuffAllocator allocator) {
+      super(blockType, onDiskSizeWithoutHeader, uncompressedSizeWithoutHeader, prevBlockOffset, b,
+          fillHeader, offset, nextBlockOnDiskSize, onDiskDataSizeWithHeader, fileContext,
+          allocator);
+    }
+
+    public void setLatch(CountDownLatch latch) {
+      this.latch = latch;
+    }
+
+    public MockHFileBlock retain() {
+      try {
+        if (latch != null) {
+          latch.await();
+        }
+      } catch (InterruptedException e) {
+        LOG.info("Interrupted exception error: ", e);
+      }
+      super.retain();
+      return this;
+    }
+  }
+
+  @Test
+  public void testAtomicRAMCache() throws Exception {
+    int size = 100;
+    int length = HConstants.HFILEBLOCK_HEADER_SIZE + size;
+    byte[] byteArr = new byte[length];
+
+    RAMCache cache = new RAMCache();
+    BlockCacheKey key = new BlockCacheKey("file-1", 1);
+    MockHFileBlock blk = new MockHFileBlock(BlockType.DATA, size, size, -1,
+        ByteBuffer.wrap(byteArr, 0, size), HFileBlock.FILL_HEADER, -1, 52, -1,
+        new HFileContextBuilder().build(), ByteBuffAllocator.HEAP);
+    RAMQueueEntry re = new RAMQueueEntry(key, blk, 1, false, ByteBuffAllocator.NONE);
+
+    Assert.assertNull(cache.putIfAbsent(key, re));
+    Assert.assertEquals(cache.putIfAbsent(key, re), re);
+
+    CountDownLatch latch = new CountDownLatch(1);
+    blk.setLatch(latch);
+
+    AtomicBoolean error = new AtomicBoolean(false);
+    Thread t1 = new Thread(() -> {
+      try {
+        cache.get(key);
+      } catch (Exception e) {
+        error.set(true);
+      }
+    });
+    t1.start();
+    Thread.sleep(200);
+
+    AtomicBoolean removed = new AtomicBoolean(false);
+    Thread t2 = new Thread(() -> {
+      cache.remove(key);
+      removed.set(true);
+    });
+    t2.start();
+    Thread.sleep(200);
+    Assert.assertFalse(removed.get());
+
+    latch.countDown();
+    Thread.sleep(200);
+    Assert.assertTrue(removed.get());
+    Assert.assertFalse(error.get());
+  }
+}

[hbase] 19/22: HBASE-22504 Optimize the MultiByteBuff#get(ByteBuffer, offset, len)

Posted by op...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

openinx pushed a commit to branch HBASE-21879
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 66c0ffdec578cea22c57cb336628496538ecceb6
Author: huzheng <op...@gmail.com>
AuthorDate: Thu May 30 22:17:05 2019 +0800

    HBASE-22504 Optimize the MultiByteBuff#get(ByteBuffer, offset, len)
---
 .../org/apache/hadoop/hbase/nio/MultiByteBuff.java | 31 +++++++++++++---------
 .../apache/hadoop/hbase/util/ByteBufferUtils.java  | 25 ++---------------
 .../apache/hadoop/hbase/nio/TestMultiByteBuff.java | 25 +++++++++++++++++
 3 files changed, 45 insertions(+), 36 deletions(-)

diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/MultiByteBuff.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/MultiByteBuff.java
index 186d9ba..3ce1709 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/MultiByteBuff.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/MultiByteBuff.java
@@ -580,7 +580,7 @@ public class MultiByteBuff extends ByteBuff {
     while (length > 0) {
       int toRead = Math.min(length, this.curItem.remaining());
       ByteBufferUtils.copyFromBufferToArray(dst, this.curItem, this.curItem.position(), offset,
-          toRead);
+        toRead);
       this.curItem.position(this.curItem.position() + toRead);
       length -= toRead;
       if (length == 0) break;
@@ -598,8 +598,7 @@ public class MultiByteBuff extends ByteBuff {
     sourceOffset = sourceOffset - this.itemBeginPos[itemIndex];
     while (length > 0) {
       int toRead = Math.min((item.limit() - sourceOffset), length);
-      ByteBufferUtils.copyFromBufferToArray(dst, item, sourceOffset, offset,
-          toRead);
+      ByteBufferUtils.copyFromBufferToArray(dst, item, sourceOffset, offset, toRead);
       length -= toRead;
       if (length == 0) break;
       itemIndex++;
@@ -1020,24 +1019,30 @@ public class MultiByteBuff extends ByteBuff {
     }
     pair.setFirst(ByteBuffer.wrap(dst));
     pair.setSecond(0);
-    return;
   }
 
   /**
    * Copies the content from an this MBB to a ByteBuffer
-   * @param out the ByteBuffer to which the copy has to happen
-   * @param sourceOffset the offset in the MBB from which the elements has
-   * to be copied
+   * @param out the ByteBuffer to which the copy has to happen, its position will be advanced.
+   * @param sourceOffset the offset in the MBB from which the elements has to be copied
    * @param length the length in the MBB upto which the elements has to be copied
    */
   @Override
-  public void get(ByteBuffer out, int sourceOffset,
-      int length) {
+  public void get(ByteBuffer out, int sourceOffset, int length) {
     checkRefCount();
-      // Not used from real read path actually. So not going with
-      // optimization
-    for (int i = 0; i < length; ++i) {
-      out.put(this.get(sourceOffset + i));
+    int itemIndex = getItemIndex(sourceOffset);
+    ByteBuffer in = this.items[itemIndex];
+    sourceOffset = sourceOffset - this.itemBeginPos[itemIndex];
+    while (length > 0) {
+      int toRead = Math.min(in.limit() - sourceOffset, length);
+      ByteBufferUtils.copyFromBufferToBuffer(in, out, sourceOffset, toRead);
+      length -= toRead;
+      if (length == 0) {
+        break;
+      }
+      itemIndex++;
+      in = this.items[itemIndex];
+      sourceOffset = 0;
     }
   }
 
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java
index 98bc88a..08c35be 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java
@@ -719,8 +719,8 @@ public final class ByteBufferUtils {
    * @param sourceOffset offset in the source buffer
    * @param length how many bytes to copy
    */
-  public static void copyFromBufferToBuffer(ByteBuffer in,
-      ByteBuffer out, int sourceOffset, int length) {
+  public static void copyFromBufferToBuffer(ByteBuffer in, ByteBuffer out, int sourceOffset,
+      int length) {
     if (in.hasArray() && out.hasArray()) {
       System.arraycopy(in.array(), sourceOffset + in.arrayOffset(), out.array(), out.position()
           + out.arrayOffset(), length);
@@ -736,27 +736,6 @@ public final class ByteBufferUtils {
   }
 
   /**
-   * Find length of common prefix of two parts in the buffer
-   * @param buffer Where parts are located.
-   * @param offsetLeft Offset of the first part.
-   * @param offsetRight Offset of the second part.
-   * @param limit Maximal length of common prefix.
-   * @return Length of prefix.
-   */
-  public static int findCommonPrefix(ByteBuffer buffer, int offsetLeft,
-      int offsetRight, int limit) {
-    int prefix = 0;
-
-    for (; prefix < limit; ++prefix) {
-      if (buffer.get(offsetLeft + prefix) != buffer.get(offsetRight + prefix)) {
-        break;
-      }
-    }
-
-    return prefix;
-  }
-
-  /**
    * Find length of common prefix in two arrays.
    * @param left Array to be compared.
    * @param leftOffset Offset in left array.
diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/nio/TestMultiByteBuff.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/nio/TestMultiByteBuff.java
index fcfb77a..74d0940 100644
--- a/hbase-common/src/test/java/org/apache/hadoop/hbase/nio/TestMultiByteBuff.java
+++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/nio/TestMultiByteBuff.java
@@ -457,4 +457,29 @@ public class TestMultiByteBuff {
     assertEquals(i, mbb.getInt());
     assertEquals(l, mbb.getLong());
   }
+
+  @Test
+  public void testGetByteBufferWithOffsetAndPos() {
+    byte[] a = Bytes.toBytes("abcd");
+    byte[] b = Bytes.toBytes("efghijkl");
+    ByteBuffer aa = ByteBuffer.wrap(a);
+    ByteBuffer bb = ByteBuffer.wrap(b);
+    MultiByteBuff mbb = new MultiByteBuff(aa, bb);
+    ByteBuffer out = ByteBuffer.allocate(12);
+    mbb.get(out, 0, 1);
+    assertEquals(out.position(), 1);
+    assertTrue(Bytes.equals(Bytes.toBytes("a"), 0, 1, out.array(), 0, 1));
+
+    mbb.get(out, 1, 4);
+    assertEquals(out.position(), 5);
+    assertTrue(Bytes.equals(Bytes.toBytes("abcde"), 0, 5, out.array(), 0, 5));
+
+    mbb.get(out, 10, 1);
+    assertEquals(out.position(), 6);
+    assertTrue(Bytes.equals(Bytes.toBytes("abcdek"), 0, 6, out.array(), 0, 6));
+
+    mbb.get(out, 0, 6);
+    assertEquals(out.position(), 12);
+    assertTrue(Bytes.equals(Bytes.toBytes("abcdekabcdef"), 0, 12, out.array(), 0, 12));
+  }
 }

[hbase] 10/22: HBASE-21937 Make the Compression#decompress can accept ByteBuff as input

Posted by op...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

openinx pushed a commit to branch HBASE-21879
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 6b584dea512cfaa6d1f56da581fc0823d7e55adb
Author: huzheng <op...@gmail.com>
AuthorDate: Tue Apr 2 20:44:08 2019 +0800

    HBASE-21937 Make the Compression#decompress can accept ByteBuff as input
---
 .../hadoop/hbase/io/compress/Compression.java      | 51 ++++++++--------------
 .../encoding/HFileBlockDefaultDecodingContext.java | 11 +++--
 .../apache/hadoop/hbase/io/util}/BlockIOUtils.java | 45 ++++++++++++++++---
 .../apache/hadoop/hbase/io/hfile/HFileBlock.java   | 29 +++++++-----
 .../io/encoding/TestLoadAndSwitchEncodeOnDisk.java |  2 -
 .../hadoop/hbase/io/hfile/TestBlockIOUtils.java    |  1 +
 .../hadoop/hbase/io/hfile/TestHFileBlock.java      | 18 ++++----
 7 files changed, 90 insertions(+), 67 deletions(-)

diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/compress/Compression.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/compress/Compression.java
index d258ba2..3004973 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/compress/Compression.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/compress/Compression.java
@@ -25,7 +25,8 @@ import java.io.OutputStream;
 
 import org.apache.hadoop.conf.Configurable;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.hbase.io.util.BlockIOUtils;
+import org.apache.hadoop.hbase.nio.ByteBuff;
 import org.apache.hadoop.io.compress.CodecPool;
 import org.apache.hadoop.io.compress.CompressionCodec;
 import org.apache.hadoop.io.compress.CompressionInputStream;
@@ -438,45 +439,29 @@ public final class Compression {
   }
 
   /**
-   * Decompresses data from the given stream using the configured compression
-   * algorithm. It will throw an exception if the dest buffer does not have
-   * enough space to hold the decompressed data.
-   *
-   * @param dest
-   *          the output bytes buffer
-   * @param destOffset
-   *          start writing position of the output buffer
-   * @param bufferedBoundedStream
-   *          a stream to read compressed data from, bounded to the exact amount
+   * Decompresses data from the given stream using the configured compression algorithm. It will
+   * throw an exception if the dest buffer does not have enough space to hold the decompressed data.
+   * @param dest the output buffer
+   * @param bufferedBoundedStream a stream to read compressed data from, bounded to the exact amount
    *          of compressed data
-   * @param compressedSize
-   *          compressed data size, header not included
-   * @param uncompressedSize
-   *          uncompressed data size, header not included
-   * @param compressAlgo
-   *          compression algorithm used
-   * @throws IOException
+   * @param uncompressedSize uncompressed data size, header not included
+   * @param compressAlgo compression algorithm used
+   * @throws IOException if any IO error happen
    */
-  public static void decompress(byte[] dest, int destOffset,
-      InputStream bufferedBoundedStream, int compressedSize,
-      int uncompressedSize, Compression.Algorithm compressAlgo)
-      throws IOException {
-
-    if (dest.length - destOffset < uncompressedSize) {
-      throw new IllegalArgumentException(
-          "Output buffer does not have enough space to hold "
-              + uncompressedSize + " decompressed bytes, available: "
-              + (dest.length - destOffset));
+  public static void decompress(ByteBuff dest, InputStream bufferedBoundedStream,
+      int uncompressedSize, Compression.Algorithm compressAlgo) throws IOException {
+    if (dest.remaining() < uncompressedSize) {
+      throw new IllegalArgumentException("Output buffer does not have enough space to hold "
+          + uncompressedSize + " decompressed bytes, available: " + dest.remaining());
     }
 
     Decompressor decompressor = null;
     try {
       decompressor = compressAlgo.getDecompressor();
-      InputStream is = compressAlgo.createDecompressionStream(
-          bufferedBoundedStream, decompressor, 0);
-
-      IOUtils.readFully(is, dest, destOffset, uncompressedSize);
-      is.close();
+      try (InputStream is =
+          compressAlgo.createDecompressionStream(bufferedBoundedStream, decompressor, 0)) {
+        BlockIOUtils.readFullyWithHeapBuffer(is, dest, uncompressedSize);
+      }
     } finally {
       if (decompressor != null) {
         compressAlgo.returnDecompressor(decompressor);
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultDecodingContext.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultDecodingContext.java
index d5bf58c..97d0e6b 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultDecodingContext.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultDecodingContext.java
@@ -28,6 +28,7 @@ import org.apache.hadoop.hbase.io.crypto.Cipher;
 import org.apache.hadoop.hbase.io.crypto.Decryptor;
 import org.apache.hadoop.hbase.io.crypto.Encryption;
 import org.apache.hadoop.hbase.io.hfile.HFileContext;
+import org.apache.hadoop.hbase.io.util.BlockIOUtils;
 import org.apache.hadoop.hbase.nio.ByteBuff;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.yetus.audience.InterfaceAudience;
@@ -87,14 +88,12 @@ public class HFileBlockDefaultDecodingContext implements
       }
 
       Compression.Algorithm compression = fileContext.getCompression();
-      assert blockBufferWithoutHeader.hasArray();
       if (compression != Compression.Algorithm.NONE) {
-        Compression.decompress(blockBufferWithoutHeader.array(),
-            blockBufferWithoutHeader.arrayOffset(), dataInputStream, onDiskSizeWithoutHeader,
-            uncompressedSizeWithoutHeader, compression);
+        Compression.decompress(blockBufferWithoutHeader, dataInputStream,
+          uncompressedSizeWithoutHeader, compression);
       } else {
-        IOUtils.readFully(dataInputStream, blockBufferWithoutHeader.array(),
-            blockBufferWithoutHeader.arrayOffset(), onDiskSizeWithoutHeader);
+        BlockIOUtils.readFullyWithHeapBuffer(dataInputStream, blockBufferWithoutHeader,
+          onDiskSizeWithoutHeader);
       }
     } finally {
       byteBuffInputStream.close();
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockIOUtils.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/util/BlockIOUtils.java
similarity index 86%
rename from hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockIOUtils.java
rename to hbase-common/src/main/java/org/apache/hadoop/hbase/io/util/BlockIOUtils.java
index dbd5b2e..a98a478 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockIOUtils.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/util/BlockIOUtils.java
@@ -16,7 +16,7 @@
  * limitations under the License.
  */
 
-package org.apache.hadoop.hbase.io.hfile;
+package org.apache.hadoop.hbase.io.util;
 
 import java.io.IOException;
 import java.io.InputStream;
@@ -29,9 +29,14 @@ import org.apache.hadoop.io.IOUtils;
 import org.apache.yetus.audience.InterfaceAudience;
 
 @InterfaceAudience.Private
-class BlockIOUtils {
+public final class BlockIOUtils {
 
-  static boolean isByteBufferReadable(FSDataInputStream is) {
+  // Disallow instantiation
+  private BlockIOUtils() {
+
+  }
+
+  public static boolean isByteBufferReadable(FSDataInputStream is) {
     InputStream cur = is.getWrappedStream();
     for (;;) {
       if ((cur instanceof FSDataInputStream)) {
@@ -50,7 +55,7 @@ class BlockIOUtils {
    * @param length bytes to read.
    * @throws IOException exception to throw if any error happen
    */
-  static void readFully(ByteBuff buf, FSDataInputStream dis, int length) throws IOException {
+  public static void readFully(ByteBuff buf, FSDataInputStream dis, int length) throws IOException {
     if (!isByteBufferReadable(dis)) {
       // If InputStream does not support the ByteBuffer read, just read to heap and copy bytes to
       // the destination ByteBuff.
@@ -82,6 +87,32 @@ class BlockIOUtils {
   }
 
   /**
+   * Copying bytes from InputStream to {@link ByteBuff} by using an temporary heap byte[] (default
+   * size is 1024 now).
+   * @param in the InputStream to read
+   * @param out the destination {@link ByteBuff}
+   * @param length to read
+   * @throws IOException if any io error encountered.
+   */
+  public static void readFullyWithHeapBuffer(InputStream in, ByteBuff out, int length)
+      throws IOException {
+    byte[] buffer = new byte[1024];
+    if (length < 0) {
+      throw new IllegalArgumentException("Length must not be negative: " + length);
+    }
+    int remain = length, count;
+    while (remain > 0) {
+      count = in.read(buffer, 0, Math.min(remain, buffer.length));
+      if (count < 0) {
+        throw new IOException(
+            "Premature EOF from inputStream, but still need " + remain + " bytes");
+      }
+      out.put(buffer, 0, count);
+      remain -= count;
+    }
+  }
+
+  /**
    * Read from an input stream at least <code>necessaryLen</code> and if possible,
    * <code>extraLen</code> also if available. Analogous to
    * {@link IOUtils#readFully(InputStream, byte[], int, int)}, but specifies a number of "extra"
@@ -125,8 +156,8 @@ class BlockIOUtils {
    *         ByteBuffers, otherwise we've not read the extraLen bytes yet.
    * @throws IOException if failed to read the necessary bytes.
    */
-  static boolean readWithExtra(ByteBuff buf, FSDataInputStream dis, int necessaryLen, int extraLen)
-      throws IOException {
+  public static boolean readWithExtra(ByteBuff buf, FSDataInputStream dis, int necessaryLen,
+      int extraLen) throws IOException {
     if (!isByteBufferReadable(dis)) {
       // If InputStream does not support the ByteBuffer read, just read to heap and copy bytes to
       // the destination ByteBuff.
@@ -174,7 +205,7 @@ class BlockIOUtils {
    * @return true if and only if extraLen is > 0 and reading those extra bytes was successful
    * @throws IOException if failed to read the necessary bytes
    */
-  static boolean preadWithExtra(ByteBuff buff, FSDataInputStream dis, long position,
+  public static boolean preadWithExtra(ByteBuff buff, FSDataInputStream dis, long position,
       int necessaryLen, int extraLen) throws IOException {
     int remain = necessaryLen + extraLen;
     byte[] buf = new byte[remain];
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
index 2c8fa4d..a3738d6 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
@@ -34,6 +34,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.io.ByteBuffAllocator;
+import org.apache.hadoop.hbase.io.util.BlockIOUtils;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -527,14 +528,22 @@ public class HFileBlock implements Cacheable {
   }
 
   /**
-   * Returns a buffer that does not include the header or checksum.
-   *
+   * Returns a buffer that does not include the header and checksum.
    * @return the buffer with header skipped and checksum omitted.
    */
   public ByteBuff getBufferWithoutHeader() {
+    return this.getBufferWithoutHeader(false);
+  }
+
+  /**
+   * Returns a buffer that does not include the header or checksum.
+   * @param withChecksum to indicate whether include the checksum or not.
+   * @return the buffer with header skipped and checksum omitted.
+   */
+  public ByteBuff getBufferWithoutHeader(boolean withChecksum) {
     ByteBuff dup = getBufferReadOnly();
-    // Now set it up so Buffer spans content only -- no header or no checksums.
-    return dup.position(headerSize()).limit(buf.limit() - totalChecksumBytes()).slice();
+    int delta = withChecksum ? 0 : totalChecksumBytes();
+    return dup.position(headerSize()).limit(buf.limit() - delta).slice();
   }
 
   /**
@@ -608,8 +617,9 @@ public class HFileBlock implements Cacheable {
     // We might optionally allocate HFILEBLOCK_HEADER_SIZE more bytes to read the next
     // block's header, so there are two sensible values for buffer capacity.
     int hdrSize = headerSize();
-    if (dup.capacity() != expectedBufLimit && dup.capacity() != expectedBufLimit + hdrSize) {
-      throw new AssertionError("Invalid buffer capacity: " + dup.capacity() +
+    dup.rewind();
+    if (dup.remaining() != expectedBufLimit && dup.remaining() != expectedBufLimit + hdrSize) {
+      throw new AssertionError("Invalid buffer capacity: " + dup.remaining() +
           ", expected " + expectedBufLimit + " or " + (expectedBufLimit + hdrSize));
     }
   }
@@ -671,15 +681,15 @@ public class HFileBlock implements Cacheable {
     HFileBlock unpacked = new HFileBlock(this);
     unpacked.allocateBuffer(); // allocates space for the decompressed block
 
-    HFileBlockDecodingContext ctx = blockType == BlockType.ENCODED_DATA ?
-      reader.getBlockDecodingContext() : reader.getDefaultBlockDecodingContext();
+    HFileBlockDecodingContext ctx = blockType == BlockType.ENCODED_DATA
+        ? reader.getBlockDecodingContext() : reader.getDefaultBlockDecodingContext();
 
     ByteBuff dup = this.buf.duplicate();
     dup.position(this.headerSize());
     dup = dup.slice();
 
     ctx.prepareDecoding(unpacked.getOnDiskSizeWithoutHeader(),
-      unpacked.getUncompressedSizeWithoutHeader(), unpacked.getBufferWithoutHeader(), dup);
+      unpacked.getUncompressedSizeWithoutHeader(), unpacked.getBufferWithoutHeader(true), dup);
 
     return unpacked;
   }
@@ -697,7 +707,6 @@ public class HFileBlock implements Cacheable {
     ByteBuff newBuf = allocator.allocate(capacityNeeded);
 
     // Copy header bytes into newBuf.
-    // newBuf is HBB so no issue in calling array()
     buf.position(0);
     newBuf.put(0, buf, 0, headerSize);
 
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestLoadAndSwitchEncodeOnDisk.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestLoadAndSwitchEncodeOnDisk.java
index fb9e44f..d53d24e 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestLoadAndSwitchEncodeOnDisk.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestLoadAndSwitchEncodeOnDisk.java
@@ -38,7 +38,6 @@ import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.util.TestMiniClusterLoadSequential;
 import org.apache.hadoop.hbase.util.Threads;
 import org.junit.ClassRule;
-import org.junit.Ignore;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.runners.Parameterized.Parameters;
@@ -74,7 +73,6 @@ public class TestLoadAndSwitchEncodeOnDisk extends TestMiniClusterLoadSequential
 
   @Override
   @Test
-  @Ignore("TODO Ignore this UT temporarily, will fix this in the critical HBASE-21937.")
   public void loadTest() throws Exception {
     Admin admin = TEST_UTIL.getAdmin();
 
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestBlockIOUtils.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestBlockIOUtils.java
index 60180e6..a386f49 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestBlockIOUtils.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestBlockIOUtils.java
@@ -35,6 +35,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HBaseClassTestRule;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.io.util.BlockIOUtils;
 import org.apache.hadoop.hbase.nio.ByteBuff;
 import org.apache.hadoop.hbase.nio.MultiByteBuff;
 import org.apache.hadoop.hbase.nio.SingleByteBuff;
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java
index 2733ca2..af42a24 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java
@@ -97,8 +97,7 @@ public class TestHFileBlock {
 
   private static final Logger LOG = LoggerFactory.getLogger(TestHFileBlock.class);
 
-  // TODO let uncomment the GZ algorithm in HBASE-21937, because no support BB unpack yet.
-  static final Compression.Algorithm[] COMPRESSION_ALGORITHMS = { NONE, /* GZ */ };
+  static final Compression.Algorithm[] COMPRESSION_ALGORITHMS = { NONE, GZ };
 
   private static final int NUM_TEST_BLOCKS = 1000;
   private static final int NUM_READER_THREADS = 26;
@@ -623,7 +622,7 @@ public class TestHFileBlock {
             if (detailedLogging) {
               LOG.info("Reading block #" + i + " at offset " + curOffset);
             }
-            HFileBlock b = hbr.readBlockData(curOffset, -1, pread, false, true);
+            HFileBlock b = hbr.readBlockData(curOffset, -1, pread, false, false);
             if (detailedLogging) {
               LOG.info("Block #" + i + ": " + b);
             }
@@ -638,7 +637,7 @@ public class TestHFileBlock {
             // Now re-load this block knowing the on-disk size. This tests a
             // different branch in the loader.
             HFileBlock b2 =
-                hbr.readBlockData(curOffset, b.getOnDiskSizeWithHeader(), pread, false, true);
+                hbr.readBlockData(curOffset, b.getOnDiskSizeWithHeader(), pread, false, false);
             b2.sanityCheck();
 
             assertEquals(b.getBlockType(), b2.getBlockType());
@@ -667,11 +666,10 @@ public class TestHFileBlock {
               // expectedContents have header + data only
               ByteBuff bufRead = newBlock.getBufferReadOnly();
               ByteBuffer bufExpected = expectedContents.get(i);
-              boolean bytesAreCorrect = Bytes.compareTo(bufRead.array(),
-                  bufRead.arrayOffset(),
-                  bufRead.limit() - newBlock.totalChecksumBytes(),
-                  bufExpected.array(), bufExpected.arrayOffset(),
-                  bufExpected.limit()) == 0;
+              byte[] tmp = new byte[bufRead.limit() - newBlock.totalChecksumBytes()];
+              bufRead.get(tmp, 0, tmp.length);
+              boolean bytesAreCorrect = Bytes.compareTo(tmp, 0, tmp.length, bufExpected.array(),
+                bufExpected.arrayOffset(), bufExpected.limit()) == 0;
               String wrongBytesMsg = "";
 
               if (!bytesAreCorrect) {
@@ -702,6 +700,8 @@ public class TestHFileBlock {
               if (newBlock != b) {
                 assertTrue(b.release());
               }
+            } else {
+              assertTrue(b.release());
             }
           }
           assertEquals(curOffset, fs.getFileStatus(path).getLen());

[hbase] 11/22: HBASE-22122 Change to release mob hfile's block after rpc server shipped response to client

Posted by op...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

openinx pushed a commit to branch HBASE-21879
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit c5895049c642cbae3544b8f7ab11dc7683e376fa
Author: huzheng <op...@gmail.com>
AuthorDate: Wed Apr 17 11:54:15 2019 +0800

    HBASE-22122 Change to release mob hfile's block after rpc server shipped response to client
---
 .../hadoop/hbase/mob/DefaultMobStoreCompactor.java |  28 ++---
 .../java/org/apache/hadoop/hbase/mob/MobCell.java  |  74 +++++++++++++
 .../java/org/apache/hadoop/hbase/mob/MobFile.java  |  21 ++--
 .../hadoop/hbase/regionserver/HMobStore.java       |  63 ++++++------
 .../hadoop/hbase/regionserver/MobStoreScanner.java |  41 +++++++-
 .../hadoop/hbase/regionserver/RSRpcServices.java   |   2 +-
 .../regionserver/ReversedMobStoreScanner.java      |  47 +++++++--
 .../hbase/regionserver/StoreFileScanner.java       |  12 ---
 .../apache/hadoop/hbase/mob/TestCachedMobFile.java |  19 ++--
 .../org/apache/hadoop/hbase/mob/TestMobFile.java   |  26 ++---
 .../hbase/mob/TestMobWithByteBuffAllocator.java    | 114 +++++++++++++++++++++
 .../hadoop/hbase/regionserver/TestHMobStore.java   |  19 ++--
 12 files changed, 347 insertions(+), 119 deletions(-)

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/DefaultMobStoreCompactor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/DefaultMobStoreCompactor.java
index 062bec6..ee1a53f 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/DefaultMobStoreCompactor.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/DefaultMobStoreCompactor.java
@@ -244,19 +244,21 @@ public class DefaultMobStoreCompactor extends DefaultCompactor {
                 writer.append(c);
               } else {
                 // If the value is not larger than the threshold, it's not regarded a mob. Retrieve
-                // the mob cell from the mob file, and write it back to the store file.
-                Cell mobCell = mobStore.resolve(c, false);
-                if (mobCell.getValueLength() != 0) {
-                  // put the mob data back to the store file
-                  PrivateCellUtil.setSequenceId(mobCell, c.getSequenceId());
-                  writer.append(mobCell);
-                  cellsCountCompactedFromMob++;
-                  cellsSizeCompactedFromMob += mobCell.getValueLength();
-                } else {
-                  // If the value of a file is empty, there might be issues when retrieving,
-                  // directly write the cell to the store file, and leave it to be handled by the
-                  // next compaction.
-                  writer.append(c);
+                // the mob cell from the mob file, and write it back to the store file. Must
+                // close the mob scanner once the life cycle finished.
+                try (MobCell mobCell = mobStore.resolve(c, false)) {
+                  if (mobCell.getCell().getValueLength() != 0) {
+                    // put the mob data back to the store file
+                    PrivateCellUtil.setSequenceId(mobCell.getCell(), c.getSequenceId());
+                    writer.append(mobCell.getCell());
+                    cellsCountCompactedFromMob++;
+                    cellsSizeCompactedFromMob += mobCell.getCell().getValueLength();
+                  } else {
+                    // If the value of a file is empty, there might be issues when retrieving,
+                    // directly write the cell to the store file, and leave it to be handled by the
+                    // next compaction.
+                    writer.append(c);
+                  }
                 }
               }
             } else {
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobCell.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobCell.java
new file mode 100644
index 0000000..ec956a2
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobCell.java
@@ -0,0 +1,74 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mob;
+
+import java.io.Closeable;
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.regionserver.StoreFileScanner;
+import org.apache.yetus.audience.InterfaceAudience;
+
+/**
+ * The MobCell will maintain a {@link Cell} and a {@link StoreFileScanner} inside. Now, the mob cell
+ * is backend by NIO ByteBuffers which are allocated from ByteBuffAllocator, so we cannot just read
+ * the cell and close the MOB file scanner because the MOB file scanner closing will deallocate the
+ * NIO ByteBuffers, which resulting memory leak.
+ * <p>
+ * Actually, the right solution is: <br>
+ * 1. Read the normal cell; <br>
+ * 2. Parse the value of normal cell and get MOB fileName,offset,length; <br>
+ * 3. Open scanner to read the mob value; <br>
+ * 4. Construct the response cell whose key is from the normal cell and value is from the mob cell.
+ * <br>
+ * 5. Ship the response cell to HBase client. <br>
+ * 6. Release both normal cell's block and mob cell's block. <br>
+ * <p>
+ * For mob cell, the block releasing just means closing the the mob scanner, so here we need to keep
+ * the {@link StoreFileScanner} inside and close only when we're ensure that the MobCell has been
+ * shipped to RPC client.
+ */
+@InterfaceAudience.Private
+public class MobCell implements Closeable {
+
+  private final Cell cell;
+  private final StoreFileScanner sfScanner;
+
+  public MobCell(Cell cell) {
+    this.cell = cell;
+    this.sfScanner = null;
+  }
+
+  public MobCell(Cell cell, StoreFileScanner sfScanner) {
+    this.cell = cell;
+    this.sfScanner = sfScanner;
+  }
+
+  public Cell getCell() {
+    return cell;
+  }
+
+  @Override
+  public void close() throws IOException {
+    if (this.sfScanner != null) {
+      this.sfScanner.close();
+    }
+  }
+}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobFile.java
index 1d0d5ff..43abd39 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobFile.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobFile.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.mob;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 
 import org.apache.hadoop.conf.Configuration;
@@ -70,7 +71,7 @@ public class MobFile {
    * @return The cell in the mob file.
    * @throws IOException
    */
-  public Cell readCell(Cell search, boolean cacheMobBlocks) throws IOException {
+  public MobCell readCell(Cell search, boolean cacheMobBlocks) throws IOException {
     return readCell(search, cacheMobBlocks, sf.getMaxMemStoreTS());
   }
 
@@ -82,26 +83,26 @@ public class MobFile {
    * @return The cell in the mob file.
    * @throws IOException
    */
-  public Cell readCell(Cell search, boolean cacheMobBlocks, long readPt) throws IOException {
-    Cell result = null;
+  public MobCell readCell(Cell search, boolean cacheMobBlocks, long readPt) throws IOException {
     StoreFileScanner scanner = null;
-    List<HStoreFile> sfs = new ArrayList<>();
-    sfs.add(sf);
+    boolean succ = false;
     try {
-      List<StoreFileScanner> sfScanners = StoreFileScanner.getScannersForStoreFiles(sfs,
-        cacheMobBlocks, true, false, false, readPt);
+      List<StoreFileScanner> sfScanners = StoreFileScanner.getScannersForStoreFiles(
+        Collections.singletonList(sf), cacheMobBlocks, true, false, false, readPt);
       if (!sfScanners.isEmpty()) {
         scanner = sfScanners.get(0);
         if (scanner.seek(search)) {
-          result = scanner.peek();
+          MobCell mobCell = new MobCell(scanner.peek(), scanner);
+          succ = true;
+          return mobCell;
         }
       }
+      return null;
     } finally {
-      if (scanner != null) {
+      if (scanner != null && !succ) {
         scanner.close();
       }
     }
-    return result;
   }
 
   /**
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HMobStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HMobStore.java
index 596aa3d..b8ea960 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HMobStore.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HMobStore.java
@@ -38,7 +38,6 @@ import org.apache.hadoop.hbase.CellComparator;
 import org.apache.hadoop.hbase.DoNotRetryIOException;
 import org.apache.hadoop.hbase.ExtendedCellBuilderFactory;
 import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.KeyValueUtil;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.Tag;
 import org.apache.hadoop.hbase.TagType;
@@ -49,6 +48,7 @@ import org.apache.hadoop.hbase.filter.Filter;
 import org.apache.hadoop.hbase.filter.FilterList;
 import org.apache.hadoop.hbase.io.compress.Compression;
 import org.apache.hadoop.hbase.io.hfile.CorruptHFileException;
+import org.apache.hadoop.hbase.mob.MobCell;
 import org.apache.hadoop.hbase.mob.MobConstants;
 import org.apache.hadoop.hbase.mob.MobFile;
 import org.apache.hadoop.hbase.mob.MobFileCache;
@@ -298,14 +298,14 @@ public class HMobStore extends HStore {
   }
 
   /**
-   * Reads the cell from the mob file, and the read point does not count.
-   * This is used for DefaultMobStoreCompactor where we can read empty value for the missing cell.
+   * Reads the cell from the mob file, and the read point does not count. This is used for
+   * DefaultMobStoreCompactor where we can read empty value for the missing cell.
    * @param reference The cell found in the HBase, its value is a path to a mob file.
    * @param cacheBlocks Whether the scanner should cache blocks.
    * @return The cell found in the mob file.
    * @throws IOException
    */
-  public Cell resolve(Cell reference, boolean cacheBlocks) throws IOException {
+  public MobCell resolve(Cell reference, boolean cacheBlocks) throws IOException {
     return resolve(reference, cacheBlocks, -1, true);
   }
 
@@ -314,14 +314,14 @@ public class HMobStore extends HStore {
    * @param reference The cell found in the HBase, its value is a path to a mob file.
    * @param cacheBlocks Whether the scanner should cache blocks.
    * @param readPt the read point.
-   * @param readEmptyValueOnMobCellMiss Whether return null value when the mob file is
-   *        missing or corrupt.
+   * @param readEmptyValueOnMobCellMiss Whether return null value when the mob file is missing or
+   *          corrupt.
    * @return The cell found in the mob file.
    * @throws IOException
    */
-  public Cell resolve(Cell reference, boolean cacheBlocks, long readPt,
-    boolean readEmptyValueOnMobCellMiss) throws IOException {
-    Cell result = null;
+  public MobCell resolve(Cell reference, boolean cacheBlocks, long readPt,
+      boolean readEmptyValueOnMobCellMiss) throws IOException {
+    MobCell mobCell = null;
     if (MobUtils.hasValidMobRefCellValue(reference)) {
       String fileName = MobUtils.getMobFileName(reference);
       Tag tableNameTag = MobUtils.getTableNameTag(reference);
@@ -336,35 +336,34 @@ public class HMobStore extends HStore {
               locations = new ArrayList<>(2);
               TableName tn = TableName.valueOf(tableNameString);
               locations.add(MobUtils.getMobFamilyPath(conf, tn, family.getNameAsString()));
-              locations.add(HFileArchiveUtil.getStoreArchivePath(conf, tn, MobUtils
-                  .getMobRegionInfo(tn).getEncodedName(), family.getNameAsString()));
+              locations.add(HFileArchiveUtil.getStoreArchivePath(conf, tn,
+                MobUtils.getMobRegionInfo(tn).getEncodedName(), family.getNameAsString()));
               map.put(tableNameString, locations);
             }
           } finally {
             keyLock.releaseLockEntry(lockEntry);
           }
         }
-        result = readCell(locations, fileName, reference, cacheBlocks, readPt,
+        mobCell = readCell(locations, fileName, reference, cacheBlocks, readPt,
           readEmptyValueOnMobCellMiss);
       }
     }
-    if (result == null) {
+    if (mobCell == null) {
       LOG.warn("The Cell result is null, assemble a new Cell with the same row,family,"
           + "qualifier,timestamp,type and tags but with an empty value to return.");
-      result = ExtendedCellBuilderFactory.create(CellBuilderType.DEEP_COPY)
-              .setRow(reference.getRowArray(), reference.getRowOffset(), reference.getRowLength())
-              .setFamily(reference.getFamilyArray(), reference.getFamilyOffset(),
-                reference.getFamilyLength())
-              .setQualifier(reference.getQualifierArray(),
-                reference.getQualifierOffset(), reference.getQualifierLength())
-              .setTimestamp(reference.getTimestamp())
-              .setType(reference.getTypeByte())
-              .setValue(HConstants.EMPTY_BYTE_ARRAY)
-              .setTags(reference.getTagsArray(), reference.getTagsOffset(),
-                reference.getTagsLength())
-              .build();
+      Cell cell = ExtendedCellBuilderFactory.create(CellBuilderType.DEEP_COPY)
+          .setRow(reference.getRowArray(), reference.getRowOffset(), reference.getRowLength())
+          .setFamily(reference.getFamilyArray(), reference.getFamilyOffset(),
+            reference.getFamilyLength())
+          .setQualifier(reference.getQualifierArray(), reference.getQualifierOffset(),
+            reference.getQualifierLength())
+          .setTimestamp(reference.getTimestamp()).setType(reference.getTypeByte())
+          .setValue(HConstants.EMPTY_BYTE_ARRAY)
+          .setTags(reference.getTagsArray(), reference.getTagsOffset(), reference.getTagsLength())
+          .build();
+      mobCell = new MobCell(cell);
     }
-    return result;
+    return mobCell;
   }
 
   /**
@@ -383,8 +382,8 @@ public class HMobStore extends HStore {
    * @return The found cell. Null if there's no such a cell.
    * @throws IOException
    */
-  private Cell readCell(List<Path> locations, String fileName, Cell search, boolean cacheMobBlocks,
-    long readPt, boolean readEmptyValueOnMobCellMiss) throws IOException {
+  private MobCell readCell(List<Path> locations, String fileName, Cell search,
+      boolean cacheMobBlocks, long readPt, boolean readEmptyValueOnMobCellMiss) throws IOException {
     FileSystem fs = getFileSystem();
     Throwable throwable = null;
     for (Path location : locations) {
@@ -392,12 +391,8 @@ public class HMobStore extends HStore {
       Path path = new Path(location, fileName);
       try {
         file = mobFileCache.openFile(fs, path, cacheConf);
-        Cell cell = readPt != -1 ? file.readCell(search, cacheMobBlocks, readPt)
+        return readPt != -1 ? file.readCell(search, cacheMobBlocks, readPt)
             : file.readCell(search, cacheMobBlocks);
-        // Now we will return blocks to allocator for mob cells before shipping to rpc client.
-        // it will be memory leak. so just copy cell as an on-heap KV here. will remove this in
-        // HBASE-22122 (TODO)
-        return KeyValueUtil.copyToNewKeyValue(cell);
       } catch (IOException e) {
         mobFileCache.evictFile(fileName);
         throwable = e;
@@ -425,7 +420,7 @@ public class HMobStore extends HStore {
       }
     }
     LOG.error("The mob file " + fileName + " could not be found in the locations " + locations
-      + " or it is corrupt");
+        + " or it is corrupt");
     if (readEmptyValueOnMobCellMiss) {
       return null;
     } else if ((throwable instanceof FileNotFoundException)
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MobStoreScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MobStoreScanner.java
index b9f9af8..76144f0 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MobStoreScanner.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MobStoreScanner.java
@@ -19,13 +19,17 @@
 package org.apache.hadoop.hbase.regionserver;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.List;
 import java.util.NavigableSet;
 
 import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.mob.MobCell;
 import org.apache.hadoop.hbase.mob.MobUtils;
 import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * Scanner scans both the memstore and the MOB Store. Coalesce KeyValue stream into
@@ -34,10 +38,13 @@ import org.apache.yetus.audience.InterfaceAudience;
 @InterfaceAudience.Private
 public class MobStoreScanner extends StoreScanner {
 
+  private static final Logger LOG = LoggerFactory.getLogger(MobStoreScanner.class);
+
   private boolean cacheMobBlocks = false;
   private boolean rawMobScan = false;
   private boolean readEmptyValueOnMobCellMiss = false;
   private final HMobStore mobStore;
+  private final List<MobCell> referencedMobCells;
 
   public MobStoreScanner(HStore store, ScanInfo scanInfo, Scan scan,
       final NavigableSet<byte[]> columns, long readPt) throws IOException {
@@ -49,6 +56,7 @@ public class MobStoreScanner extends StoreScanner {
       throw new IllegalArgumentException("The store " + store + " is not a HMobStore");
     }
     mobStore = (HMobStore) store;
+    this.referencedMobCells = new ArrayList<>();
   }
 
   /**
@@ -69,11 +77,13 @@ public class MobStoreScanner extends StoreScanner {
       for (int i = 0; i < outResult.size(); i++) {
         Cell cell = outResult.get(i);
         if (MobUtils.isMobReferenceCell(cell)) {
-          Cell mobCell = mobStore
-            .resolve(cell, cacheMobBlocks, readPt, readEmptyValueOnMobCellMiss);
+          MobCell mobCell =
+              mobStore.resolve(cell, cacheMobBlocks, readPt, readEmptyValueOnMobCellMiss);
           mobKVCount++;
-          mobKVSize += mobCell.getValueLength();
-          outResult.set(i, mobCell);
+          mobKVSize += mobCell.getCell().getValueLength();
+          outResult.set(i, mobCell.getCell());
+          // Keep the MobCell here unless we shipped the RPC or close the scanner.
+          referencedMobCells.add(mobCell);
         }
       }
       mobStore.updateMobScanCellsCount(mobKVCount);
@@ -81,4 +91,27 @@ public class MobStoreScanner extends StoreScanner {
     }
     return result;
   }
+
+  private void freeAllReferencedMobCells() throws IOException {
+    for (MobCell cell : referencedMobCells) {
+      cell.close();
+    }
+    referencedMobCells.clear();
+  }
+
+  @Override
+  public void shipped() throws IOException {
+    super.shipped();
+    this.freeAllReferencedMobCells();
+  }
+
+  @Override
+  public void close() {
+    super.close();
+    try {
+      this.freeAllReferencedMobCells();
+    } catch (IOException e) {
+      LOG.warn("Failed to free referenced mob cells: ", e);
+    }
+  }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
index 1586f1c..2b8bba0 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
@@ -291,7 +291,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
    */
   static final int BATCH_ROWS_THRESHOLD_DEFAULT = 5000;
 
-  protected static final String RESERVOIR_ENABLED_KEY = "hbase.ipc.server.reservoir.enabled";
+  public static final String RESERVOIR_ENABLED_KEY = "hbase.ipc.server.reservoir.enabled";
 
   // Request counter. (Includes requests that are not serviced by regions.)
   // Count only once for requests with multiple actions like multi/caching-scan/replayBatch
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ReversedMobStoreScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ReversedMobStoreScanner.java
index d64c372..a3d779c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ReversedMobStoreScanner.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ReversedMobStoreScanner.java
@@ -19,26 +19,31 @@
 package org.apache.hadoop.hbase.regionserver;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.List;
 import java.util.NavigableSet;
 
 import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.mob.MobCell;
 import org.apache.hadoop.hbase.mob.MobUtils;
 import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
- * ReversedMobStoreScanner extends from ReversedStoreScanner, and is used to support
- * reversed scanning in both the memstore and the MOB store.
- *
+ * ReversedMobStoreScanner extends from ReversedStoreScanner, and is used to support reversed
+ * scanning in both the memstore and the MOB store.
  */
 @InterfaceAudience.Private
 public class ReversedMobStoreScanner extends ReversedStoreScanner {
 
+  private static final Logger LOG = LoggerFactory.getLogger(ReversedMobStoreScanner.class);
   private boolean cacheMobBlocks = false;
   private boolean rawMobScan = false;
   private boolean readEmptyValueOnMobCellMiss = false;
-  protected final HMobStore mobStore;
+  private final HMobStore mobStore;
+  private final List<MobCell> referencedMobCells;
 
   ReversedMobStoreScanner(HStore store, ScanInfo scanInfo, Scan scan, NavigableSet<byte[]> columns,
       long readPt) throws IOException {
@@ -50,6 +55,7 @@ public class ReversedMobStoreScanner extends ReversedStoreScanner {
       throw new IllegalArgumentException("The store " + store + " is not a HMobStore");
     }
     mobStore = (HMobStore) store;
+    this.referencedMobCells = new ArrayList<>();
   }
 
   /**
@@ -70,11 +76,13 @@ public class ReversedMobStoreScanner extends ReversedStoreScanner {
       for (int i = 0; i < outResult.size(); i++) {
         Cell cell = outResult.get(i);
         if (MobUtils.isMobReferenceCell(cell)) {
-          Cell mobCell = mobStore
-            .resolve(cell, cacheMobBlocks, readPt, readEmptyValueOnMobCellMiss);
+          MobCell mobCell =
+              mobStore.resolve(cell, cacheMobBlocks, readPt, readEmptyValueOnMobCellMiss);
           mobKVCount++;
-          mobKVSize += mobCell.getValueLength();
-          outResult.set(i, mobCell);
+          mobKVSize += mobCell.getCell().getValueLength();
+          outResult.set(i, mobCell.getCell());
+          // Keep the MobCell here unless we shipped the RPC or close the scanner.
+          referencedMobCells.add(mobCell);
         }
       }
       mobStore.updateMobScanCellsCount(mobKVCount);
@@ -82,4 +90,27 @@ public class ReversedMobStoreScanner extends ReversedStoreScanner {
     }
     return result;
   }
+
+  private void freeAllReferencedMobCells() throws IOException {
+    for (MobCell mobCell : referencedMobCells) {
+      mobCell.close();
+    }
+    referencedMobCells.clear();
+  }
+
+  @Override
+  public void shipped() throws IOException {
+    super.shipped();
+    this.freeAllReferencedMobCells();
+  }
+
+  @Override
+  public void close() {
+    super.close();
+    try {
+      this.freeAllReferencedMobCells();
+    } catch (IOException e) {
+      LOG.warn("Failed to free referenced mob cells: ", e);
+    }
+  }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java
index b5b853a..6e70c5b 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java
@@ -97,18 +97,6 @@ public class StoreFileScanner implements KeyValueScanner {
     this.reader.incrementRefCount();
   }
 
-  boolean isPrimaryReplica() {
-    return reader.isPrimaryReplicaReader();
-  }
-
-  /**
-   * Return an array of scanners corresponding to the given set of store files.
-   */
-  public static List<StoreFileScanner> getScannersForStoreFiles(Collection<HStoreFile> files,
-      boolean cacheBlocks, boolean usePread, long readPt) throws IOException {
-    return getScannersForStoreFiles(files, cacheBlocks, usePread, false, false, readPt);
-  }
-
   /**
    * Return an array of scanners corresponding to the given set of store files.
    */
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mob/TestCachedMobFile.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mob/TestCachedMobFile.java
index bb194b6..d274db3 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mob/TestCachedMobFile.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/mob/TestCachedMobFile.java
@@ -113,46 +113,45 @@ public class TestCachedMobFile {
     Path testDir = TEST_UTIL.getDataTestDir();
     FileSystem fs = testDir.getFileSystem(conf);
     HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();
-    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, fs)
-        .withOutputDir(testDir).withFileContext(meta).build();
+    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, fs).withOutputDir(testDir)
+        .withFileContext(meta).build();
     String caseName = testName.getMethodName();
     MobTestUtil.writeStoreFile(writer, caseName);
     CachedMobFile cachedMobFile = CachedMobFile.create(fs, writer.getPath(), conf, cacheConf);
     byte[] family = Bytes.toBytes(caseName);
     byte[] qualify = Bytes.toBytes(caseName);
     // Test the start key
-    byte[] startKey = Bytes.toBytes("aa");  // The start key bytes
+    byte[] startKey = Bytes.toBytes("aa"); // The start key bytes
     KeyValue expectedKey =
         new KeyValue(startKey, family, qualify, Long.MAX_VALUE, Type.Put, startKey);
     KeyValue seekKey = expectedKey.createKeyOnly(false);
-    Cell cell = cachedMobFile.readCell(seekKey, false);
+    Cell cell = cachedMobFile.readCell(seekKey, false).getCell();
     MobTestUtil.assertCellEquals(expectedKey, cell);
 
     // Test the end key
-    byte[] endKey = Bytes.toBytes("zz");  // The end key bytes
+    byte[] endKey = Bytes.toBytes("zz"); // The end key bytes
     expectedKey = new KeyValue(endKey, family, qualify, Long.MAX_VALUE, Type.Put, endKey);
     seekKey = expectedKey.createKeyOnly(false);
-    cell = cachedMobFile.readCell(seekKey, false);
+    cell = cachedMobFile.readCell(seekKey, false).getCell();
     MobTestUtil.assertCellEquals(expectedKey, cell);
 
     // Test the random key
     byte[] randomKey = Bytes.toBytes(MobTestUtil.generateRandomString(2));
     expectedKey = new KeyValue(randomKey, family, qualify, Long.MAX_VALUE, Type.Put, randomKey);
     seekKey = expectedKey.createKeyOnly(false);
-    cell = cachedMobFile.readCell(seekKey, false);
+    cell = cachedMobFile.readCell(seekKey, false).getCell();
     MobTestUtil.assertCellEquals(expectedKey, cell);
 
     // Test the key which is less than the start key
     byte[] lowerKey = Bytes.toBytes("a1"); // Smaller than "aa"
     expectedKey = new KeyValue(startKey, family, qualify, Long.MAX_VALUE, Type.Put, startKey);
     seekKey = new KeyValue(lowerKey, family, qualify, Long.MAX_VALUE, Type.Put, lowerKey);
-    cell = cachedMobFile.readCell(seekKey, false);
+    cell = cachedMobFile.readCell(seekKey, false).getCell();
     MobTestUtil.assertCellEquals(expectedKey, cell);
 
     // Test the key which is more than the end key
     byte[] upperKey = Bytes.toBytes("z{"); // Bigger than "zz"
     seekKey = new KeyValue(upperKey, family, qualify, Long.MAX_VALUE, Type.Put, upperKey);
-    cell = cachedMobFile.readCell(seekKey, false);
-    Assert.assertNull(cell);
+    Assert.assertNull(cachedMobFile.readCell(seekKey, false));
   }
 }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mob/TestMobFile.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mob/TestMobFile.java
index c22ca98..297c19f 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mob/TestMobFile.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/mob/TestMobFile.java
@@ -43,8 +43,6 @@ import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import org.junit.rules.TestName;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 @Category(SmallTests.class)
 public class TestMobFile {
@@ -53,7 +51,6 @@ public class TestMobFile {
   public static final HBaseClassTestRule CLASS_RULE =
       HBaseClassTestRule.forClass(TestMobFile.class);
 
-  static final Logger LOG = LoggerFactory.getLogger(TestMobFile.class);
   private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
   private Configuration conf = TEST_UTIL.getConfiguration();
   private CacheConfig cacheConf =  new CacheConfig(conf);
@@ -64,11 +61,9 @@ public class TestMobFile {
   public void testReadKeyValue() throws Exception {
     Path testDir = TEST_UTIL.getDataTestDir();
     FileSystem fs = testDir.getFileSystem(conf);
-    HFileContext meta = new HFileContextBuilder().withBlockSize(8*1024).build();
-    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, fs)
-            .withOutputDir(testDir)
-            .withFileContext(meta)
-            .build();
+    HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();
+    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, fs).withOutputDir(testDir)
+        .withFileContext(meta).build();
     String caseName = testName.getMethodName();
     MobTestUtil.writeStoreFile(writer, caseName);
 
@@ -78,39 +73,38 @@ public class TestMobFile {
     byte[] qualify = Bytes.toBytes(caseName);
 
     // Test the start key
-    byte[] startKey = Bytes.toBytes("aa");  // The start key bytes
+    byte[] startKey = Bytes.toBytes("aa"); // The start key bytes
     KeyValue expectedKey =
         new KeyValue(startKey, family, qualify, Long.MAX_VALUE, Type.Put, startKey);
     KeyValue seekKey = expectedKey.createKeyOnly(false);
-    Cell cell = mobFile.readCell(seekKey, false);
+    Cell cell = mobFile.readCell(seekKey, false).getCell();
     MobTestUtil.assertCellEquals(expectedKey, cell);
 
     // Test the end key
-    byte[] endKey = Bytes.toBytes("zz");  // The end key bytes
+    byte[] endKey = Bytes.toBytes("zz"); // The end key bytes
     expectedKey = new KeyValue(endKey, family, qualify, Long.MAX_VALUE, Type.Put, endKey);
     seekKey = expectedKey.createKeyOnly(false);
-    cell = mobFile.readCell(seekKey, false);
+    cell = mobFile.readCell(seekKey, false).getCell();
     MobTestUtil.assertCellEquals(expectedKey, cell);
 
     // Test the random key
     byte[] randomKey = Bytes.toBytes(MobTestUtil.generateRandomString(2));
     expectedKey = new KeyValue(randomKey, family, qualify, Long.MAX_VALUE, Type.Put, randomKey);
     seekKey = expectedKey.createKeyOnly(false);
-    cell = mobFile.readCell(seekKey, false);
+    cell = mobFile.readCell(seekKey, false).getCell();
     MobTestUtil.assertCellEquals(expectedKey, cell);
 
     // Test the key which is less than the start key
     byte[] lowerKey = Bytes.toBytes("a1"); // Smaller than "aa"
     expectedKey = new KeyValue(startKey, family, qualify, Long.MAX_VALUE, Type.Put, startKey);
     seekKey = new KeyValue(lowerKey, family, qualify, Long.MAX_VALUE, Type.Put, lowerKey);
-    cell = mobFile.readCell(seekKey, false);
+    cell = mobFile.readCell(seekKey, false).getCell();
     MobTestUtil.assertCellEquals(expectedKey, cell);
 
     // Test the key which is more than the end key
     byte[] upperKey = Bytes.toBytes("z{"); // Bigger than "zz"
     seekKey = new KeyValue(upperKey, family, qualify, Long.MAX_VALUE, Type.Put, upperKey);
-    cell = mobFile.readCell(seekKey, false);
-    assertNull(cell);
+    assertNull(mobFile.readCell(seekKey, false));
   }
 
   @Test
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mob/TestMobWithByteBuffAllocator.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mob/TestMobWithByteBuffAllocator.java
new file mode 100644
index 0000000..a527740
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/mob/TestMobWithByteBuffAllocator.java
@@ -0,0 +1,114 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.mob;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.ResultScanner;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.regionserver.RSRpcServices;
+import org.apache.hadoop.hbase.snapshot.MobSnapshotTestingUtils;
+import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Test the MOB feature when enable RPC ByteBuffAllocator (HBASE-22122)
+ */
+@Category({ MediumTests.class })
+public class TestMobWithByteBuffAllocator {
+
+  @ClassRule
+  public static final HBaseClassTestRule CLASS_RULE =
+      HBaseClassTestRule.forClass(TestMobWithByteBuffAllocator.class);
+
+  private static final String TABLE_NAME = "TestMobWithByteBuffAllocator";
+  private static final Logger LOG = LoggerFactory.getLogger(TestMobWithByteBuffAllocator.class);
+
+  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
+  private static final Configuration CONF = UTIL.getConfiguration();
+  private static final byte[] FAMILY = Bytes.toBytes("f");
+
+  @BeforeClass
+  public static void setUp() throws Exception {
+    // Must use the ByteBuffAllocator here
+    CONF.setBoolean(RSRpcServices.RESERVOIR_ENABLED_KEY, true);
+    // Must use OFF-HEAP BucketCache here.
+    CONF.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.1f);
+    CONF.set(HConstants.BUCKET_CACHE_IOENGINE_KEY, "offheap");
+    // 32MB for BucketCache.
+    CONF.setFloat(HConstants.BUCKET_CACHE_SIZE_KEY, 32);
+    CONF.setInt(MobConstants.MOB_FILE_CACHE_SIZE_KEY, 0);
+    UTIL.startMiniCluster();
+  }
+
+  @AfterClass
+  public static void tearDown() throws Exception {
+    UTIL.shutdownMiniCluster();
+  }
+
+  @Test
+  public void testReadingCellsFromHFile() throws Exception {
+    TableName tableName = TableName.valueOf(TABLE_NAME);
+    MobSnapshotTestingUtils.createMobTable(UTIL, tableName, 1, FAMILY);
+    LOG.info("Create an mob table {} successfully.", tableName);
+
+    int expectedRows = 500;
+    SnapshotTestingUtils.loadData(UTIL, tableName, expectedRows, FAMILY);
+    LOG.info("Load 500 rows data into table {} successfully.", tableName);
+
+    // Flush all the data into HFiles.
+    try (Admin admin = UTIL.getConnection().getAdmin()) {
+      admin.flush(tableName);
+    }
+
+    // Scan the rows
+    MobSnapshotTestingUtils.verifyMobRowCount(UTIL, tableName, expectedRows);
+
+    // Reversed scan the rows
+    int rows = 0;
+    try (Table table = UTIL.getConnection().getTable(tableName)) {
+      try (ResultScanner scanner = table.getScanner(new Scan().setReversed(true))) {
+        for (Result res; (res = scanner.next()) != null;) {
+          rows++;
+          for (Cell cell : res.listCells()) {
+            Assert.assertTrue(CellUtil.cloneValue(cell).length > 0);
+          }
+        }
+      }
+    }
+    Assert.assertEquals(expectedRows, rows);
+  }
+}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHMobStore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHMobStore.java
index bf1f18e..152ea87 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHMobStore.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHMobStore.java
@@ -448,17 +448,14 @@ public class TestHMobStore {
     String targetPathName = MobUtils.formatDate(currentDate);
     Path targetPath = new Path(store.getPath(), targetPathName);
     store.commitFile(mobFilePath, targetPath);
-    //resolve
-    Cell resultCell1 = store.resolve(seekKey1, false);
-    Cell resultCell2 = store.resolve(seekKey2, false);
-    Cell resultCell3 = store.resolve(seekKey3, false);
-    //compare
-    Assert.assertEquals(Bytes.toString(value),
-        Bytes.toString(CellUtil.cloneValue(resultCell1)));
-    Assert.assertEquals(Bytes.toString(value),
-        Bytes.toString(CellUtil.cloneValue(resultCell2)));
-    Assert.assertEquals(Bytes.toString(value2),
-        Bytes.toString(CellUtil.cloneValue(resultCell3)));
+    // resolve
+    Cell resultCell1 = store.resolve(seekKey1, false).getCell();
+    Cell resultCell2 = store.resolve(seekKey2, false).getCell();
+    Cell resultCell3 = store.resolve(seekKey3, false).getCell();
+    // compare
+    Assert.assertEquals(Bytes.toString(value), Bytes.toString(CellUtil.cloneValue(resultCell1)));
+    Assert.assertEquals(Bytes.toString(value), Bytes.toString(CellUtil.cloneValue(resultCell2)));
+    Assert.assertEquals(Bytes.toString(value2), Bytes.toString(CellUtil.cloneValue(resultCell3)));
   }
 
   /**

[hbase] 20/22: HBASE-22491 Separate the heap HFileBlock and offheap HFileBlock because the heap block won't need refCnt and save into prevBlocks list before shipping (#268)

Posted by op...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

openinx pushed a commit to branch HBASE-21879
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit c3c78a74dad3ac69fb8f6d9e8970091329e13298
Author: openinx <op...@gmail.com>
AuthorDate: Thu Jun 13 14:34:34 2019 +0800

    HBASE-22491 Separate the heap HFileBlock and offheap HFileBlock because the heap block won't need refCnt and save into prevBlocks list before shipping (#268)
---
 .../apache/hadoop/hbase/io/ByteBuffAllocator.java  |   6 +
 .../hbase/io/hfile/ExclusiveMemHFileBlock.java     |  69 ++++
 .../apache/hadoop/hbase/io/hfile/HFileBlock.java   | 166 ++++----
 .../hadoop/hbase/io/hfile/HFileBlockBuilder.java   | 114 ++++++
 .../hadoop/hbase/io/hfile/HFileReaderImpl.java     |   6 +-
 .../hadoop/hbase/io/hfile/LruBlockCache.java       |   4 +-
 .../hadoop/hbase/io/hfile/SharedMemHFileBlock.java |  46 +++
 .../hadoop/hbase/io/hfile/TinyLfuBlockCache.java   |   4 +-
 .../org/apache/hadoop/hbase/io/TestHeapSize.java   |  10 +
 .../hadoop/hbase/io/hfile/CacheTestUtils.java      |  10 +-
 .../apache/hadoop/hbase/io/hfile/TestChecksum.java |   4 +-
 .../apache/hadoop/hbase/io/hfile/TestHFile.java    |   6 +-
 .../hadoop/hbase/io/hfile/TestHFileBlock.java      |  49 ++-
 .../hbase/io/hfile/TestHFileDataBlockEncoder.java  |  16 +-
 .../hfile/TestHFileScannerImplReferenceCount.java  | 450 +++++++++++++--------
 .../hadoop/hbase/io/hfile/TestLruBlockCache.java   |  12 +-
 .../hbase/io/hfile/bucket/TestBucketCache.java     |  29 +-
 .../io/hfile/bucket/TestBucketCacheRefCnt.java     |   3 +-
 .../hadoop/hbase/io/hfile/bucket/TestRAMCache.java |   7 +-
 19 files changed, 706 insertions(+), 305 deletions(-)

diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java
index c85675b..e8e77dc 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java
@@ -297,6 +297,12 @@ public class ByteBuffAllocator {
         }
       }
     }
+    this.usedBufCount.set(0);
+    this.maxPoolSizeInfoLevelLogged = false;
+    this.poolAllocationBytes.reset();
+    this.heapAllocationBytes.reset();
+    this.lastPoolAllocationBytes = 0;
+    this.lastHeapAllocationBytes = 0;
   }
 
   /**
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ExclusiveMemHFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ExclusiveMemHFileBlock.java
new file mode 100644
index 0000000..73c0db4
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ExclusiveMemHFileBlock.java
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io.hfile;
+
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
+import org.apache.hadoop.hbase.nio.ByteBuff;
+import org.apache.yetus.audience.InterfaceAudience;
+
+/**
+ * The {@link ByteBuffAllocator} won't allocate pooled heap {@link ByteBuff} now; at the same time,
+ * if allocate an off-heap {@link ByteBuff} from allocator, then it must be a pooled one. That's to
+ * say, an exclusive memory HFileBlock would must be an heap block and a shared memory HFileBlock
+ * would must be an off-heap block.
+ * <p>
+ * The exclusive memory HFileBlock will do nothing when calling retain or release methods, because
+ * its memory will be garbage collected by JVM, even if its reference count decrease to zero, we can
+ * do nothing for the de-allocating.
+ * <p>
+ * @see org.apache.hadoop.hbase.io.hfile.SharedMemHFileBlock
+ */
+@InterfaceAudience.Private
+public class ExclusiveMemHFileBlock extends HFileBlock {
+
+  ExclusiveMemHFileBlock(BlockType blockType, int onDiskSizeWithoutHeader,
+      int uncompressedSizeWithoutHeader, long prevBlockOffset, ByteBuff buf, boolean fillHeader,
+      long offset, int nextBlockOnDiskSize, int onDiskDataSizeWithHeader,
+      HFileContext fileContext) {
+    super(blockType, onDiskSizeWithoutHeader, uncompressedSizeWithoutHeader, prevBlockOffset, buf,
+        fillHeader, offset, nextBlockOnDiskSize, onDiskDataSizeWithHeader, fileContext,
+        ByteBuffAllocator.HEAP);
+  }
+
+  @Override
+  public int refCnt() {
+    return 0;
+  }
+
+  @Override
+  public ExclusiveMemHFileBlock retain() {
+    // do nothing
+    return this;
+  }
+
+  @Override
+  public boolean release() {
+    // do nothing
+    return false;
+  }
+
+  @Override
+  public boolean isSharedMem() {
+    return false;
+  }
+}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
index 452b68c..14ed275 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
@@ -285,7 +285,7 @@ public class HFileBlock implements Cacheable {
       boolean usesChecksum = buf.get() == (byte) 1;
       long offset = buf.getLong();
       int nextBlockOnDiskSize = buf.getInt();
-      return new HFileBlock(newByteBuff, usesChecksum, offset, nextBlockOnDiskSize, null, alloc);
+      return createFromBuff(newByteBuff, usesChecksum, offset, nextBlockOnDiskSize, null, alloc);
     }
 
     @Override
@@ -301,28 +301,6 @@ public class HFileBlock implements Cacheable {
   }
 
   /**
-   * Copy constructor. Creates a shallow copy of {@code that}'s buffer.
-   */
-  private HFileBlock(HFileBlock that) {
-    this(that, false);
-  }
-
-  /**
-   * Copy constructor. Creates a shallow/deep copy of {@code that}'s buffer as per the boolean
-   * param.
-   */
-  private HFileBlock(HFileBlock that, boolean bufCopy) {
-    init(that.blockType, that.onDiskSizeWithoutHeader, that.uncompressedSizeWithoutHeader,
-      that.prevBlockOffset, that.offset, that.onDiskDataSizeWithHeader, that.nextBlockOnDiskSize,
-      that.fileContext, that.allocator);
-    if (bufCopy) {
-      this.buf = ByteBuff.wrap(ByteBuffer.wrap(that.buf.toBytes(0, that.buf.limit())));
-    } else {
-      this.buf = that.buf.duplicate();
-    }
-  }
-
-  /**
    * Creates a new {@link HFile} block from the given fields. This constructor
    * is used only while writing blocks and caching,
    * and is sitting in a byte buffer and we want to stuff the block into cache.
@@ -336,7 +314,7 @@ public class HFileBlock implements Cacheable {
    * @param onDiskSizeWithoutHeader see {@link #onDiskSizeWithoutHeader}
    * @param uncompressedSizeWithoutHeader see {@link #uncompressedSizeWithoutHeader}
    * @param prevBlockOffset see {@link #prevBlockOffset}
-   * @param b block header ({@link HConstants#HFILEBLOCK_HEADER_SIZE} bytes)
+   * @param buf block buffer with header ({@link HConstants#HFILEBLOCK_HEADER_SIZE} bytes)
    * @param fillHeader when true, write the first 4 header fields into passed buffer.
    * @param offset the file offset the block was read from
    * @param onDiskDataSizeWithHeader see {@link #onDiskDataSizeWithHeader}
@@ -344,12 +322,19 @@ public class HFileBlock implements Cacheable {
    */
   @VisibleForTesting
   public HFileBlock(BlockType blockType, int onDiskSizeWithoutHeader,
-      int uncompressedSizeWithoutHeader, long prevBlockOffset, ByteBuffer b, boolean fillHeader,
-      long offset, final int nextBlockOnDiskSize, int onDiskDataSizeWithHeader,
-      HFileContext fileContext, ByteBuffAllocator allocator) {
-    init(blockType, onDiskSizeWithoutHeader, uncompressedSizeWithoutHeader, prevBlockOffset, offset,
-      onDiskDataSizeWithHeader, nextBlockOnDiskSize, fileContext, allocator);
-    this.buf = new SingleByteBuff(b);
+      int uncompressedSizeWithoutHeader, long prevBlockOffset, ByteBuff buf, boolean fillHeader,
+      long offset, int nextBlockOnDiskSize, int onDiskDataSizeWithHeader, HFileContext fileContext,
+      ByteBuffAllocator allocator) {
+    this.blockType = blockType;
+    this.onDiskSizeWithoutHeader = onDiskSizeWithoutHeader;
+    this.uncompressedSizeWithoutHeader = uncompressedSizeWithoutHeader;
+    this.prevBlockOffset = prevBlockOffset;
+    this.offset = offset;
+    this.onDiskDataSizeWithHeader = onDiskDataSizeWithHeader;
+    this.nextBlockOnDiskSize = nextBlockOnDiskSize;
+    this.fileContext = fileContext;
+    this.allocator = allocator;
+    this.buf = buf;
     if (fillHeader) {
       overwriteHeader();
     }
@@ -363,7 +348,7 @@ public class HFileBlock implements Cacheable {
    * to that point.
    * @param buf Has header, content, and trailing checksums if present.
    */
-  HFileBlock(ByteBuff buf, boolean usesHBaseChecksum, final long offset,
+  static HFileBlock createFromBuff(ByteBuff buf, boolean usesHBaseChecksum, final long offset,
       final int nextBlockOnDiskSize, HFileContext fileContext, ByteBuffAllocator allocator)
       throws IOException {
     buf.rewind();
@@ -374,15 +359,15 @@ public class HFileBlock implements Cacheable {
     final long prevBlockOffset = buf.getLong(Header.PREV_BLOCK_OFFSET_INDEX);
     // This constructor is called when we deserialize a block from cache and when we read a block in
     // from the fs. fileCache is null when deserialized from cache so need to make up one.
-    HFileContextBuilder fileContextBuilder = fileContext != null?
-        new HFileContextBuilder(fileContext): new HFileContextBuilder();
+    HFileContextBuilder fileContextBuilder =
+        fileContext != null ? new HFileContextBuilder(fileContext) : new HFileContextBuilder();
     fileContextBuilder.withHBaseCheckSum(usesHBaseChecksum);
     int onDiskDataSizeWithHeader;
     if (usesHBaseChecksum) {
       byte checksumType = buf.get(Header.CHECKSUM_TYPE_INDEX);
       int bytesPerChecksum = buf.getInt(Header.BYTES_PER_CHECKSUM_INDEX);
       onDiskDataSizeWithHeader = buf.getInt(Header.ON_DISK_DATA_SIZE_WITH_HEADER_INDEX);
-      // Use the checksum type and bytes per checksum from header, not from filecontext.
+      // Use the checksum type and bytes per checksum from header, not from fileContext.
       fileContextBuilder.withChecksumType(ChecksumType.codeToType(checksumType));
       fileContextBuilder.withBytesPerCheckSum(bytesPerChecksum);
     } else {
@@ -393,29 +378,19 @@ public class HFileBlock implements Cacheable {
     }
     fileContext = fileContextBuilder.build();
     assert usesHBaseChecksum == fileContext.isUseHBaseChecksum();
-    init(blockType, onDiskSizeWithoutHeader, uncompressedSizeWithoutHeader, prevBlockOffset, offset,
-      onDiskDataSizeWithHeader, nextBlockOnDiskSize, fileContext, allocator);
-    this.offset = offset;
-    this.buf = buf;
-    this.buf.rewind();
-  }
-
-  /**
-   * Called from constructors.
-   */
-  private void init(BlockType blockType, int onDiskSizeWithoutHeader,
-      int uncompressedSizeWithoutHeader, long prevBlockOffset, long offset,
-      int onDiskDataSizeWithHeader, final int nextBlockOnDiskSize, HFileContext fileContext,
-      ByteBuffAllocator allocator) {
-    this.blockType = blockType;
-    this.onDiskSizeWithoutHeader = onDiskSizeWithoutHeader;
-    this.uncompressedSizeWithoutHeader = uncompressedSizeWithoutHeader;
-    this.prevBlockOffset = prevBlockOffset;
-    this.offset = offset;
-    this.onDiskDataSizeWithHeader = onDiskDataSizeWithHeader;
-    this.nextBlockOnDiskSize = nextBlockOnDiskSize;
-    this.fileContext = fileContext;
-    this.allocator = allocator;
+    return new HFileBlockBuilder()
+        .withBlockType(blockType)
+        .withOnDiskSizeWithoutHeader(onDiskSizeWithoutHeader)
+        .withUncompressedSizeWithoutHeader(uncompressedSizeWithoutHeader)
+        .withPrevBlockOffset(prevBlockOffset)
+        .withOffset(offset)
+        .withOnDiskDataSizeWithHeader(onDiskDataSizeWithHeader)
+        .withNextBlockOnDiskSize(nextBlockOnDiskSize)
+        .withHFileContext(fileContext)
+        .withByteBuffAllocator(allocator)
+        .withByteBuff(buf.rewind())
+        .withShared(!buf.hasArray())
+        .build();
   }
 
   /**
@@ -639,7 +614,7 @@ public class HFileBlock implements Cacheable {
         .append("(").append(onDiskSizeWithoutHeader)
         .append("+").append(HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM).append(")");
     }
-    String dataBegin = null;
+    String dataBegin;
     if (buf.hasArray()) {
       dataBegin = Bytes.toStringBinary(buf.array(), buf.arrayOffset() + headerSize(),
           Math.min(32, buf.limit() - buf.arrayOffset() - headerSize()));
@@ -673,7 +648,7 @@ public class HFileBlock implements Cacheable {
       return this;
     }
 
-    HFileBlock unpacked = new HFileBlock(this);
+    HFileBlock unpacked = shallowClone(this);
     unpacked.allocateBuffer(); // allocates space for the decompressed block
     boolean succ = false;
     try {
@@ -761,10 +736,16 @@ public class HFileBlock implements Cacheable {
   }
 
   /**
-   * @return true to indicate the block is allocated from JVM heap, otherwise from off-heap.
+   * Will be override by {@link SharedMemHFileBlock} or {@link ExclusiveMemHFileBlock}. Return true
+   * by default.
    */
-  boolean isOnHeap() {
-    return buf.hasArray();
+  public boolean isSharedMem() {
+    if (this instanceof SharedMemHFileBlock) {
+      return true;
+    } else if (this instanceof ExclusiveMemHFileBlock) {
+      return false;
+    }
+    return true;
   }
 
   /**
@@ -1039,8 +1020,7 @@ public class HFileBlock implements Cacheable {
             + offset);
       }
       startOffset = offset;
-
-      finishBlockAndWriteHeaderAndData((DataOutputStream) out);
+      finishBlockAndWriteHeaderAndData(out);
     }
 
     /**
@@ -1251,13 +1231,27 @@ public class HFileBlock implements Cacheable {
                                 .withIncludesMvcc(fileContext.isIncludesMvcc())
                                 .withIncludesTags(fileContext.isIncludesTags())
                                 .build();
-      return new HFileBlock(blockType, getOnDiskSizeWithoutHeader(),
-          getUncompressedSizeWithoutHeader(), prevOffset,
-          cacheConf.shouldCacheCompressed(blockType.getCategory()) ? cloneOnDiskBufferWithHeader()
-              : cloneUncompressedBufferWithHeader(),
-          FILL_HEADER, startOffset, UNSET,
-          onDiskBlockBytesWithHeader.size() + onDiskChecksum.length, newContext,
-          cacheConf.getByteBuffAllocator());
+      // Build the HFileBlock.
+      HFileBlockBuilder builder = new HFileBlockBuilder();
+      ByteBuffer buffer;
+      if (cacheConf.shouldCacheCompressed(blockType.getCategory())) {
+        buffer = cloneOnDiskBufferWithHeader();
+      } else {
+        buffer = cloneUncompressedBufferWithHeader();
+      }
+      return builder.withBlockType(blockType)
+          .withOnDiskSizeWithoutHeader(getOnDiskSizeWithoutHeader())
+          .withUncompressedSizeWithoutHeader(getUncompressedSizeWithoutHeader())
+          .withPrevBlockOffset(prevOffset)
+          .withByteBuff(ByteBuff.wrap(buffer))
+          .withFillHeader(FILL_HEADER)
+          .withOffset(startOffset)
+          .withNextBlockOnDiskSize(UNSET)
+          .withOnDiskDataSizeWithHeader(onDiskBlockBytesWithHeader.size() + onDiskChecksum.length)
+          .withHFileContext(newContext)
+          .withByteBuffAllocator(cacheConf.getByteBuffAllocator())
+          .withShared(!buffer.hasArray())
+          .build();
     }
   }
 
@@ -1781,8 +1775,8 @@ public class HFileBlock implements Cacheable {
         // The onDiskBlock will become the headerAndDataBuffer for this block.
         // If nextBlockOnDiskSizeWithHeader is not zero, the onDiskBlock already
         // contains the header of next block, so no need to set next block's header in it.
-        HFileBlock hFileBlock = new HFileBlock(curBlock, checksumSupport, offset,
-            nextBlockOnDiskSize, fileContext, intoHeap ? HEAP : allocator);
+        HFileBlock hFileBlock = createFromBuff(curBlock, checksumSupport, offset,
+          nextBlockOnDiskSize, fileContext, intoHeap ? HEAP : allocator);
         // Run check on uncompressed sizings.
         if (!fileContext.isCompressedOrEncrypted()) {
           hFileBlock.sanityCheckUncompressed();
@@ -1947,7 +1941,7 @@ public class HFileBlock implements Cacheable {
     if (comparison == null) {
       return false;
     }
-    if (comparison.getClass() != this.getClass()) {
+    if (!(comparison instanceof HFileBlock)) {
       return false;
     }
 
@@ -2084,7 +2078,27 @@ public class HFileBlock implements Cacheable {
                    " onDiskDataSizeWithHeader " + onDiskDataSizeWithHeader;
   }
 
-  public HFileBlock deepCloneOnHeap() {
-    return new HFileBlock(this, true);
+  private static HFileBlockBuilder createBuilder(HFileBlock blk){
+    return new HFileBlockBuilder()
+          .withBlockType(blk.blockType)
+          .withOnDiskSizeWithoutHeader(blk.onDiskSizeWithoutHeader)
+          .withUncompressedSizeWithoutHeader(blk.uncompressedSizeWithoutHeader)
+          .withPrevBlockOffset(blk.prevBlockOffset)
+          .withByteBuff(blk.buf.duplicate()) // Duplicate the buffer.
+          .withOffset(blk.offset)
+          .withOnDiskDataSizeWithHeader(blk.onDiskDataSizeWithHeader)
+          .withNextBlockOnDiskSize(blk.nextBlockOnDiskSize)
+          .withHFileContext(blk.fileContext)
+          .withByteBuffAllocator(blk.allocator)
+          .withShared(blk.isSharedMem());
+  }
+
+  static HFileBlock shallowClone(HFileBlock blk) {
+    return createBuilder(blk).build();
+  }
+
+  static HFileBlock deepCloneOnHeap(HFileBlock blk) {
+    ByteBuff deepCloned = ByteBuff.wrap(ByteBuffer.wrap(blk.buf.toBytes(0, blk.buf.limit())));
+    return createBuilder(blk).withByteBuff(deepCloned).withShared(false).build();
   }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockBuilder.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockBuilder.java
new file mode 100644
index 0000000..4ed50e1
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockBuilder.java
@@ -0,0 +1,114 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io.hfile;
+
+import static javax.swing.Spring.UNSET;
+import static org.apache.hadoop.hbase.io.ByteBuffAllocator.HEAP;
+
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
+import org.apache.hadoop.hbase.nio.ByteBuff;
+import org.apache.yetus.audience.InterfaceAudience;
+
+@InterfaceAudience.Private
+public class HFileBlockBuilder {
+
+  private BlockType blockType;
+  private int onDiskSizeWithoutHeader;
+  private int onDiskDataSizeWithHeader;
+  private int uncompressedSizeWithoutHeader;
+  private long prevBlockOffset;
+  private ByteBuff buf;
+  private boolean fillHeader = false;
+  private long offset = UNSET;
+  private int nextBlockOnDiskSize = UNSET;
+  private HFileContext fileContext;
+  private ByteBuffAllocator allocator = HEAP;
+  private boolean isShared;
+
+  public HFileBlockBuilder withBlockType(BlockType blockType) {
+    this.blockType = blockType;
+    return this;
+  }
+
+  public HFileBlockBuilder withOnDiskSizeWithoutHeader(int onDiskSizeWithoutHeader) {
+    this.onDiskSizeWithoutHeader = onDiskSizeWithoutHeader;
+    return this;
+  }
+
+  public HFileBlockBuilder withOnDiskDataSizeWithHeader(int onDiskDataSizeWithHeader) {
+    this.onDiskDataSizeWithHeader = onDiskDataSizeWithHeader;
+    return this;
+  }
+
+  public HFileBlockBuilder withUncompressedSizeWithoutHeader(int uncompressedSizeWithoutHeader) {
+    this.uncompressedSizeWithoutHeader = uncompressedSizeWithoutHeader;
+    return this;
+  }
+
+  public HFileBlockBuilder withPrevBlockOffset(long prevBlockOffset) {
+    this.prevBlockOffset = prevBlockOffset;
+    return this;
+  }
+
+  public HFileBlockBuilder withByteBuff(ByteBuff buf) {
+    this.buf = buf;
+    return this;
+  }
+
+  public HFileBlockBuilder withFillHeader(boolean fillHeader) {
+    this.fillHeader = fillHeader;
+    return this;
+  }
+
+  public HFileBlockBuilder withOffset(long offset) {
+    this.offset = offset;
+    return this;
+  }
+
+  public HFileBlockBuilder withNextBlockOnDiskSize(int nextBlockOnDiskSize) {
+    this.nextBlockOnDiskSize = nextBlockOnDiskSize;
+    return this;
+  }
+
+  public HFileBlockBuilder withHFileContext(HFileContext fileContext) {
+    this.fileContext = fileContext;
+    return this;
+  }
+
+  public HFileBlockBuilder withByteBuffAllocator(ByteBuffAllocator allocator) {
+    this.allocator = allocator;
+    return this;
+  }
+
+  public HFileBlockBuilder withShared(boolean isShared) {
+    this.isShared = isShared;
+    return this;
+  }
+
+  public HFileBlock build() {
+    if (isShared) {
+      return new SharedMemHFileBlock(blockType, onDiskSizeWithoutHeader,
+          uncompressedSizeWithoutHeader, prevBlockOffset, buf, fillHeader, offset,
+          nextBlockOnDiskSize, onDiskDataSizeWithHeader, fileContext, allocator);
+    } else {
+      return new ExclusiveMemHFileBlock(blockType, onDiskSizeWithoutHeader,
+          uncompressedSizeWithoutHeader, prevBlockOffset, buf, fillHeader, offset,
+          nextBlockOnDiskSize, onDiskDataSizeWithHeader, fileContext);
+    }
+  }
+}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
index 1157615..0dae13c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
@@ -523,15 +523,15 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
       if (block != null && curBlock != null && block.getOffset() == curBlock.getOffset()) {
         return;
       }
-      if (this.curBlock != null) {
+      if (this.curBlock != null && this.curBlock.isSharedMem()) {
         prevBlocks.add(this.curBlock);
       }
       this.curBlock = block;
     }
 
     void reset() {
-      // We don't have to keep ref to EXCLUSIVE type of block
-      if (this.curBlock != null) {
+      // We don't have to keep ref to heap block
+      if (this.curBlock != null && this.curBlock.isSharedMem()) {
         this.prevBlocks.add(this.curBlock);
       }
       this.curBlock = null;
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java
index 0ec73a3..7740460 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java
@@ -372,8 +372,8 @@ public class LruBlockCache implements FirstLevelBlockCache {
   private Cacheable asReferencedHeapBlock(Cacheable buf) {
     if (buf instanceof HFileBlock) {
       HFileBlock blk = ((HFileBlock) buf);
-      if (!blk.isOnHeap()) {
-        return blk.deepCloneOnHeap();
+      if (blk.isSharedMem()) {
+        return HFileBlock.deepCloneOnHeap(blk);
       }
     }
     // The block will be referenced by this LRUBlockCache, so should increase its refCnt here.
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/SharedMemHFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/SharedMemHFileBlock.java
new file mode 100644
index 0000000..0d2217e
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/SharedMemHFileBlock.java
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io.hfile;
+
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
+import org.apache.hadoop.hbase.nio.ByteBuff;
+import org.apache.yetus.audience.InterfaceAudience;
+
+/**
+ * The {@link ByteBuffAllocator} won't allocate pooled heap {@link ByteBuff} now; at the same time,
+ * if allocate an off-heap {@link ByteBuff} from allocator, then it must be a pooled one. That's to
+ * say, an exclusive memory HFileBlock would must be an heap block and a shared memory HFileBlock
+ * would must be an off-heap block.
+ * @see org.apache.hadoop.hbase.io.hfile.ExclusiveMemHFileBlock
+ **/
+@InterfaceAudience.Private
+public class SharedMemHFileBlock extends HFileBlock {
+
+  SharedMemHFileBlock(BlockType blockType, int onDiskSizeWithoutHeader,
+      int uncompressedSizeWithoutHeader, long prevBlockOffset, ByteBuff buf, boolean fillHeader,
+      long offset, int nextBlockOnDiskSize, int onDiskDataSizeWithHeader, HFileContext fileContext,
+      ByteBuffAllocator alloc) {
+    super(blockType, onDiskSizeWithoutHeader, uncompressedSizeWithoutHeader, prevBlockOffset, buf,
+        fillHeader, offset, nextBlockOnDiskSize, onDiskDataSizeWithHeader, fileContext, alloc);
+  }
+
+  @Override
+  public boolean isSharedMem() {
+    return true;
+  }
+}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/TinyLfuBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/TinyLfuBlockCache.java
index 5e69f6c..a90c5a3 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/TinyLfuBlockCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/TinyLfuBlockCache.java
@@ -171,8 +171,8 @@ public final class TinyLfuBlockCache implements FirstLevelBlockCache {
       if (victimCache != null) {
         value = victimCache.getBlock(cacheKey, caching, repeat, updateCacheMetrics);
         if ((value != null) && caching) {
-          if ((value instanceof HFileBlock) && !((HFileBlock) value).isOnHeap()) {
-            value = ((HFileBlock) value).deepCloneOnHeap();
+          if ((value instanceof HFileBlock) && ((HFileBlock) value).isSharedMem()) {
+            value = HFileBlock.deepCloneOnHeap((HFileBlock) value);
           }
           cacheBlock(cacheKey, value);
         }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHeapSize.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHeapSize.java
index 71ffb87..108de70 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHeapSize.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHeapSize.java
@@ -43,10 +43,12 @@ import org.apache.hadoop.hbase.client.Delete;
 import org.apache.hadoop.hbase.client.Mutation;
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.io.hfile.BlockCacheKey;
+import org.apache.hadoop.hbase.io.hfile.ExclusiveMemHFileBlock;
 import org.apache.hadoop.hbase.io.hfile.HFileBlock;
 import org.apache.hadoop.hbase.io.hfile.HFileContext;
 import org.apache.hadoop.hbase.io.hfile.LruBlockCache;
 import org.apache.hadoop.hbase.io.hfile.LruCachedBlock;
+import org.apache.hadoop.hbase.io.hfile.SharedMemHFileBlock;
 import org.apache.hadoop.hbase.regionserver.CSLMImmutableSegment;
 import org.apache.hadoop.hbase.regionserver.CellArrayImmutableSegment;
 import org.apache.hadoop.hbase.regionserver.CellArrayMap;
@@ -529,6 +531,14 @@ public class TestHeapSize  {
     actual = HFileBlock.FIXED_OVERHEAD;
     expected = ClassSize.estimateBase(HFileBlock.class, false);
     assertEquals(expected, actual);
+
+    actual = ExclusiveMemHFileBlock.FIXED_OVERHEAD;
+    expected = ClassSize.estimateBase(ExclusiveMemHFileBlock.class, false);
+    assertEquals(expected, actual);
+
+    actual = SharedMemHFileBlock.FIXED_OVERHEAD;
+    expected = ClassSize.estimateBase(SharedMemHFileBlock.class, false);
+    assertEquals(expected, actual);
   }
 
   @Test
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java
index 717e9d7..a7bb8e6 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java
@@ -148,7 +148,6 @@ public class CacheTestUtils {
       if (buf != null) {
         assertEquals(block.block, buf);
       }
-
     }
 
     // Re-add some duplicate blocks. Hope nothing breaks.
@@ -307,10 +306,11 @@ public class CacheTestUtils {
                           .withBytesPerCheckSum(0)
                           .withChecksumType(ChecksumType.NULL)
                           .build();
-      HFileBlock generated = new HFileBlock(BlockType.DATA, onDiskSizeWithoutHeader,
-          uncompressedSizeWithoutHeader, prevBlockOffset, cachedBuffer, HFileBlock.DONT_FILL_HEADER,
-          blockSize, onDiskSizeWithoutHeader + HConstants.HFILEBLOCK_HEADER_SIZE, -1, meta,
-          ByteBuffAllocator.HEAP);
+      HFileBlock generated =
+          new HFileBlock(BlockType.DATA, onDiskSizeWithoutHeader, uncompressedSizeWithoutHeader,
+              prevBlockOffset, ByteBuff.wrap(cachedBuffer), HFileBlock.DONT_FILL_HEADER, blockSize,
+              onDiskSizeWithoutHeader + HConstants.HFILEBLOCK_HEADER_SIZE, -1, meta,
+              ByteBuffAllocator.HEAP);
 
       String strKey;
       /* No conflicting keys */
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
index 2aebc8c..6d02854 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
@@ -101,7 +101,7 @@ public class TestChecksum {
     HFileBlock.FSReader hbr = new HFileBlock.FSReaderImpl(is, totalSize, (HFileSystem) fs, path,
         meta, ByteBuffAllocator.HEAP);
     HFileBlock b = hbr.readBlockData(0, -1, false, false, true);
-    assertTrue(b.isOnHeap());
+    assertTrue(!b.isSharedMem());
     assertEquals(b.getChecksumType(), ChecksumType.getDefaultChecksumType().getCode());
   }
 
@@ -148,7 +148,7 @@ public class TestChecksum {
       HFileBlock.FSReader hbr = new HFileBlock.FSReaderImpl(is, totalSize, (HFileSystem) fs, path,
           meta, ByteBuffAllocator.HEAP);
       HFileBlock b = hbr.readBlockData(0, -1, false, false, true);
-      assertTrue(b.isOnHeap());
+      assertTrue(!b.isSharedMem());
 
       // verify SingleByteBuff checksum.
       verifySBBCheckSum(b.getBufferReadOnly());
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java
index 84e24e6..a504442 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java
@@ -169,7 +169,7 @@ public class TestHFile  {
       Cacheable cachedBlock = lru.getBlock(key, false, false, true);
       Assert.assertNotNull(cachedBlock);
       Assert.assertTrue(cachedBlock instanceof HFileBlock);
-      Assert.assertTrue(((HFileBlock) cachedBlock).isOnHeap());
+      Assert.assertFalse(((HFileBlock) cachedBlock).isSharedMem());
       // Should never allocate off-heap block from allocator because ensure that it's LRU.
       Assert.assertEquals(bufCount, alloc.getFreeBufferCount());
       block.release(); // return back the ByteBuffer back to allocator.
@@ -217,10 +217,10 @@ public class TestHFile  {
         HFileBlock hfb = (HFileBlock) cachedBlock;
         // Data block will be cached in BucketCache, so it should be an off-heap block.
         if (hfb.getBlockType().isData()) {
-          Assert.assertFalse(hfb.isOnHeap());
+          Assert.assertTrue(hfb.isSharedMem());
         } else {
           // Non-data block will be cached in LRUBlockCache, so it must be an on-heap block.
-          Assert.assertTrue(hfb.isOnHeap());
+          Assert.assertFalse(hfb.isSharedMem());
         }
       } finally {
         cachedBlock.release();
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java
index de10ced..d0e98fd 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java
@@ -340,6 +340,14 @@ public class TestHFileBlock {
     testReaderV2Internals();
   }
 
+  private void assertRelease(HFileBlock blk) {
+    if (blk instanceof ExclusiveMemHFileBlock) {
+      assertFalse(blk.release());
+    } else {
+      assertTrue(blk.release());
+    }
+  }
+
   protected void testReaderV2Internals() throws IOException {
     if(includesTag) {
       TEST_UTIL.getConfiguration().setInt("hfile.format.version", 3);
@@ -403,10 +411,10 @@ public class TestHFileBlock {
                 + "'.\nMessage is expected to start with: '" + expectedPrefix
                 + "'", ex.getMessage().startsWith(expectedPrefix));
           }
-          assertTrue(b.release());
+          assertRelease(b);
           is.close();
         }
-        assertTrue(expected.release());
+        assertRelease(expected);
       }
     }
   }
@@ -534,7 +542,7 @@ public class TestHFileBlock {
                   deserialized.unpack(meta, hbr));
               }
             }
-            assertTrue(blockUnpacked.release());
+            assertRelease(blockUnpacked);
             if (blockFromHFile != blockUnpacked) {
               blockFromHFile.release();
             }
@@ -651,7 +659,7 @@ public class TestHFileBlock {
             assertEquals(b.getOnDiskDataSizeWithHeader(),
                          b2.getOnDiskDataSizeWithHeader());
             assertEquals(0, HFile.getAndResetChecksumFailuresCount());
-            assertTrue(b2.release());
+            assertRelease(b2);
 
             curOffset += b.getOnDiskSizeWithHeader();
 
@@ -694,12 +702,12 @@ public class TestHFileBlock {
                 }
               }
               assertTrue(wrongBytesMsg, bytesAreCorrect);
-              assertTrue(newBlock.release());
+              assertRelease(newBlock);
               if (newBlock != b) {
-                assertTrue(b.release());
+                assertRelease(b);
               }
             } else {
-              assertTrue(b.release());
+              assertRelease(b);
             }
           }
           assertEquals(curOffset, fs.getFileStatus(path).getLen());
@@ -750,9 +758,9 @@ public class TestHFileBlock {
           long onDiskSizeArg = withOnDiskSize ? expectedSize : -1;
           b = hbr.readBlockData(offset, onDiskSizeArg, pread, false, false);
           if (useHeapAllocator) {
-            assertTrue(b.isOnHeap());
+            assertTrue(!b.isSharedMem());
           } else {
-            assertTrue(!b.getBlockType().isData() || !b.isOnHeap());
+            assertTrue(!b.getBlockType().isData() || b.isSharedMem());
           }
           assertEquals(types.get(blockId), b.getBlockType());
           assertEquals(expectedSize, b.getOnDiskSizeWithHeader());
@@ -913,14 +921,13 @@ public class TestHFileBlock {
                           .withCompression(Algorithm.NONE)
                           .withBytesPerCheckSum(HFile.DEFAULT_BYTES_PER_CHECKSUM)
                           .withChecksumType(ChecksumType.NULL).build();
-      HFileBlock block = new HFileBlock(BlockType.DATA, size, size, -1, buf, HFileBlock.FILL_HEADER,
-          -1, 0, -1, meta, HEAP);
-      long byteBufferExpectedSize = ClassSize.align(ClassSize.estimateBase(
-          new MultiByteBuff(buf).getClass(), true)
-          + HConstants.HFILEBLOCK_HEADER_SIZE + size);
-      long hfileMetaSize =  ClassSize.align(ClassSize.estimateBase(HFileContext.class, true));
-      long hfileBlockExpectedSize =
-          ClassSize.align(ClassSize.estimateBase(HFileBlock.class, true));
+      HFileBlock block = new HFileBlock(BlockType.DATA, size, size, -1, ByteBuff.wrap(buf),
+          HFileBlock.FILL_HEADER, -1, 0, -1, meta, HEAP);
+      long byteBufferExpectedSize =
+          ClassSize.align(ClassSize.estimateBase(new MultiByteBuff(buf).getClass(), true)
+              + HConstants.HFILEBLOCK_HEADER_SIZE + size);
+      long hfileMetaSize = ClassSize.align(ClassSize.estimateBase(HFileContext.class, true));
+      long hfileBlockExpectedSize = ClassSize.align(ClassSize.estimateBase(HFileBlock.class, true));
       long expected = hfileBlockExpectedSize + byteBufferExpectedSize + hfileMetaSize;
       assertEquals("Block data size: " + size + ", byte buffer expected " +
           "size: " + byteBufferExpectedSize + ", HFileBlock class expected " +
@@ -936,10 +943,10 @@ public class TestHFileBlock {
     byte[] byteArr = new byte[length];
     ByteBuffer buf = ByteBuffer.wrap(byteArr, 0, size);
     HFileContext meta = new HFileContextBuilder().build();
-    HFileBlock blockWithNextBlockMetadata = new HFileBlock(BlockType.DATA, size, size, -1, buf,
-        HFileBlock.FILL_HEADER, -1, 52, -1, meta, alloc);
-    HFileBlock blockWithoutNextBlockMetadata = new HFileBlock(BlockType.DATA, size, size, -1, buf,
-        HFileBlock.FILL_HEADER, -1, -1, -1, meta, alloc);
+    HFileBlock blockWithNextBlockMetadata = new HFileBlock(BlockType.DATA, size, size, -1,
+        ByteBuff.wrap(buf), HFileBlock.FILL_HEADER, -1, 52, -1, meta, alloc);
+    HFileBlock blockWithoutNextBlockMetadata = new HFileBlock(BlockType.DATA, size, size, -1,
+        ByteBuff.wrap(buf), HFileBlock.FILL_HEADER, -1, -1, -1, meta, alloc);
     ByteBuffer buff1 = ByteBuffer.allocate(length);
     ByteBuffer buff2 = ByteBuffer.allocate(length);
     blockWithNextBlockMetadata.serialize(buff1, true);
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java
index 5a6042c..2f249c8 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java
@@ -38,6 +38,7 @@ import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
 import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext;
 import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext;
+import org.apache.hadoop.hbase.nio.ByteBuff;
 import org.apache.hadoop.hbase.testclassification.IOTests;
 import org.apache.hadoop.hbase.testclassification.SmallTests;
 import org.apache.hadoop.hbase.util.ChecksumType;
@@ -132,8 +133,8 @@ public class TestHFileDataBlockEncoder {
                         .withBlockSize(0)
                         .withChecksumType(ChecksumType.NULL)
                         .build();
-    HFileBlock block = new HFileBlock(BlockType.DATA, size, size, -1, buf, HFileBlock.FILL_HEADER,
-        0, 0, -1, hfileContext, ByteBuffAllocator.HEAP);
+    HFileBlock block = new HFileBlock(BlockType.DATA, size, size, -1, ByteBuff.wrap(buf),
+        HFileBlock.FILL_HEADER, 0, 0, -1, hfileContext, ByteBuffAllocator.HEAP);
     HFileBlock cacheBlock = createBlockOnDisk(kvs, block, useTags);
     assertEquals(headerSize, cacheBlock.getDummyHeaderForVersion().length);
   }
@@ -198,9 +199,8 @@ public class TestHFileDataBlockEncoder {
                         .withBlockSize(0)
                         .withChecksumType(ChecksumType.NULL)
                         .build();
-    HFileBlock b = new HFileBlock(BlockType.DATA, size, size, -1, buf,
-        HFileBlock.FILL_HEADER, 0,
-         0, -1, meta, ByteBuffAllocator.HEAP);
+    HFileBlock b = new HFileBlock(BlockType.DATA, size, size, -1, ByteBuff.wrap(buf),
+        HFileBlock.FILL_HEADER, 0, 0, -1, meta, ByteBuffAllocator.HEAP);
     return b;
   }
 
@@ -221,9 +221,9 @@ public class TestHFileDataBlockEncoder {
     blockEncoder.endBlockEncoding(context, dos, baos.getBuffer(), BlockType.DATA);
     byte[] encodedBytes = baos.toByteArray();
     size = encodedBytes.length - block.getDummyHeaderForVersion().length;
-    return new HFileBlock(context.getBlockType(), size, size, -1, ByteBuffer.wrap(encodedBytes),
-        HFileBlock.FILL_HEADER, 0, block.getOnDiskDataSizeWithHeader(), -1,
-        block.getHFileContext(), ByteBuffAllocator.HEAP);
+    return new HFileBlock(context.getBlockType(), size, size, -1,
+        ByteBuff.wrap(ByteBuffer.wrap(encodedBytes)), HFileBlock.FILL_HEADER, 0,
+        block.getOnDiskDataSizeWithHeader(), -1, block.getHFileContext(), ByteBuffAllocator.HEAP);
   }
 
   private void writeBlock(List<Cell> kvs, HFileContext fileContext, boolean useTags)
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileScannerImplReferenceCount.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileScannerImplReferenceCount.java
index 60ee958..87dd29e 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileScannerImplReferenceCount.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileScannerImplReferenceCount.java
@@ -18,6 +18,11 @@
  */
 package org.apache.hadoop.hbase.io.hfile;
 
+import static org.apache.hadoop.hbase.HConstants.BUCKET_CACHE_IOENGINE_KEY;
+import static org.apache.hadoop.hbase.HConstants.BUCKET_CACHE_SIZE_KEY;
+import static org.apache.hadoop.hbase.io.ByteBuffAllocator.BUFFER_SIZE_KEY;
+import static org.apache.hadoop.hbase.io.ByteBuffAllocator.MAX_BUFFER_COUNT_KEY;
+import static org.apache.hadoop.hbase.io.ByteBuffAllocator.MIN_ALLOCATE_SIZE_KEY;
 import static org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.MAX_CHUNK_SIZE_KEY;
 import static org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.MIN_INDEX_NUM_ENTRIES_KEY;
 import static org.junit.Assert.assertEquals;
@@ -34,17 +39,24 @@ import org.apache.hadoop.hbase.HBaseClassTestRule;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
 import org.apache.hadoop.hbase.io.hfile.HFileReaderImpl.HFileScannerImpl;
+import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache;
+import org.apache.hadoop.hbase.io.hfile.bucket.TestBucketCache;
 import org.apache.hadoop.hbase.testclassification.IOTests;
 import org.apache.hadoop.hbase.testclassification.SmallTests;
 import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.After;
 import org.junit.Assert;
+import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.ClassRule;
+import org.junit.Rule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
+import org.junit.rules.TestName;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -55,30 +67,74 @@ public class TestHFileScannerImplReferenceCount {
   public static final HBaseClassTestRule CLASS_RULE =
       HBaseClassTestRule.forClass(TestHFileScannerImplReferenceCount.class);
 
+  @Rule
+  public TestName CASE = new TestName();
+
   private static final Logger LOG =
       LoggerFactory.getLogger(TestHFileScannerImplReferenceCount.class);
   private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
   private static final byte[] FAMILY = Bytes.toBytes("f");
   private static final byte[] QUALIFIER = Bytes.toBytes("q");
   private static final byte[] SUFFIX = randLongBytes();
+  private static final int CELL_COUNT = 1000;
 
   private static byte[] randLongBytes() {
     Random rand = new Random();
-    byte[] keys = new byte[300];
+    byte[] keys = new byte[30];
     rand.nextBytes(keys);
     return keys;
   }
 
+  // It's a deep copy of configuration of UTIL, DON'T use shallow copy.
+  private Configuration conf;
+  private Path workDir;
+  private FileSystem fs;
+  private Path hfilePath;
   private Cell firstCell = null;
   private Cell secondCell = null;
+  private ByteBuffAllocator allocator;
 
   @BeforeClass
-  public static void setUp() {
+  public static void setUpBeforeClass() {
     Configuration conf = UTIL.getConfiguration();
     // Set the max chunk size and min entries key to be very small for index block, so that we can
     // create an index block tree with level >= 2.
     conf.setInt(MAX_CHUNK_SIZE_KEY, 10);
     conf.setInt(MIN_INDEX_NUM_ENTRIES_KEY, 2);
+    // Create a bucket cache with 32MB.
+    conf.set(BUCKET_CACHE_IOENGINE_KEY, "offheap");
+    conf.setInt(BUCKET_CACHE_SIZE_KEY, 32);
+    conf.setInt(BUFFER_SIZE_KEY, 1024);
+    conf.setInt(MAX_BUFFER_COUNT_KEY, 32 * 1024);
+    // All allocated ByteBuff are pooled ByteBuff.
+    conf.setInt(MIN_ALLOCATE_SIZE_KEY, 0);
+  }
+
+  @Before
+  public void setUp() throws IOException {
+    this.firstCell = null;
+    this.secondCell = null;
+    this.allocator = ByteBuffAllocator.create(UTIL.getConfiguration(), true);
+    this.conf = new Configuration(UTIL.getConfiguration());
+    String caseName = CASE.getMethodName();
+    this.workDir = UTIL.getDataTestDir(caseName);
+    this.fs = this.workDir.getFileSystem(conf);
+    this.hfilePath = new Path(this.workDir, caseName + System.currentTimeMillis());
+    LOG.info("Start to write {} cells into hfile: {}, case:{}", CELL_COUNT, hfilePath, caseName);
+  }
+
+  @After
+  public void tearDown() throws IOException {
+    this.allocator.clean();
+    this.fs.delete(this.workDir, true);
+  }
+
+  private void waitBucketCacheFlushed(BlockCache cache) throws InterruptedException {
+    Assert.assertTrue(cache instanceof CombinedBlockCache);
+    BlockCache[] blockCaches = cache.getBlockCaches();
+    Assert.assertEquals(blockCaches.length, 2);
+    Assert.assertTrue(blockCaches[1] instanceof BucketCache);
+    TestBucketCache.waitUntilAllFlushedToBucket((BucketCache) blockCaches[1]);
   }
 
   private void writeHFile(Configuration conf, FileSystem fs, Path hfilePath, Algorithm compression,
@@ -107,176 +163,192 @@ public class TestHFileScannerImplReferenceCount {
     }
   }
 
+  /**
+   * A careful UT for validating the reference count mechanism, if want to change this UT please
+   * read the design doc in HBASE-21879 firstly and make sure that understand the refCnt design.
+   */
   private void testReleaseBlock(Algorithm compression, DataBlockEncoding encoding)
       throws Exception {
-    Configuration conf = new Configuration(UTIL.getConfiguration());
-    Path dir = UTIL.getDataTestDir("testReleasingBlock");
-    FileSystem fs = dir.getFileSystem(conf);
-    try {
-      String hfileName = "testReleaseBlock_hfile_0_" + System.currentTimeMillis();
-      Path hfilePath = new Path(dir, hfileName);
-      int cellCount = 1000;
-      LOG.info("Start to write {} cells into hfile: {}", cellCount, hfilePath);
-      writeHFile(conf, fs, hfilePath, compression, encoding, cellCount);
-
-      BlockCache defaultBC = BlockCacheFactory.createBlockCache(conf);
-      Assert.assertNotNull(defaultBC);
-      HFile.Reader reader =
-          HFile.createReader(fs, hfilePath, new CacheConfig(conf, defaultBC), true, conf);
-      Assert.assertTrue(reader instanceof HFileReaderImpl);
-      // We've build a HFile tree with index = 16.
-      Assert.assertEquals(16, reader.getTrailer().getNumDataIndexLevels());
-
-      HFileScanner scanner = reader.getScanner(true, true, false);
-      BlockWithScanInfo scanInfo = reader.getDataBlockIndexReader()
-          .loadDataBlockWithScanInfo(firstCell, null, true, true, false, DataBlockEncoding.NONE);
-      BlockWithScanInfo scanInfo2 = reader.getDataBlockIndexReader()
-          .loadDataBlockWithScanInfo(secondCell, null, true, true, false, DataBlockEncoding.NONE);
-      HFileBlock block = scanInfo.getHFileBlock();
-      HFileBlock block2 = scanInfo2.getHFileBlock();
-      // One refCnt for blockCache and the other refCnt for RPC path.
-      Assert.assertEquals(block.refCnt(), 2);
-      Assert.assertEquals(block2.refCnt(), 2);
-      Assert.assertFalse(block == block2);
-
-      scanner.seekTo(firstCell);
-      Assert.assertEquals(block.refCnt(), 3);
-
-      // Seek to the block again, the curBlock won't change and won't read from BlockCache. so
-      // refCnt should be unchanged.
-      scanner.seekTo(firstCell);
-      Assert.assertEquals(block.refCnt(), 3);
-
-      scanner.seekTo(secondCell);
-      Assert.assertEquals(block.refCnt(), 3);
-      Assert.assertEquals(block2.refCnt(), 3);
-
-      // After shipped, the block will be release, but block2 is still referenced by the curBlock.
-      scanner.shipped();
-      Assert.assertEquals(block.refCnt(), 2);
-      Assert.assertEquals(block2.refCnt(), 3);
-
-      // Try to ship again, though with nothing to client.
-      scanner.shipped();
-      Assert.assertEquals(block.refCnt(), 2);
-      Assert.assertEquals(block2.refCnt(), 3);
-
-      // The curBlock(block2) will also be released.
-      scanner.close();
-      Assert.assertEquals(block2.refCnt(), 2);
-
-      // Finish the block & block2 RPC path
-      block.release();
-      block2.release();
-      Assert.assertEquals(block.refCnt(), 1);
-      Assert.assertEquals(block2.refCnt(), 1);
-
-      // Evict the LRUBlockCache
-      Assert.assertTrue(defaultBC.evictBlocksByHfileName(hfileName) >= 2);
-      Assert.assertEquals(block.refCnt(), 0);
-      Assert.assertEquals(block2.refCnt(), 0);
-
-      int count = 0;
-      Assert.assertTrue(scanner.seekTo());
-      ++count;
-      while (scanner.next()) {
-        count++;
-      }
-      assertEquals(cellCount, count);
-    } finally {
-      fs.delete(dir, true);
+    writeHFile(conf, fs, hfilePath, compression, encoding, CELL_COUNT);
+    HFileBlock curBlock, prevBlock;
+    BlockCache defaultBC = BlockCacheFactory.createBlockCache(conf);
+    CacheConfig cacheConfig = new CacheConfig(conf, null, defaultBC, allocator);
+    Assert.assertNotNull(defaultBC);
+    Assert.assertTrue(cacheConfig.isCombinedBlockCache());
+    HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConfig, true, conf);
+    Assert.assertTrue(reader instanceof HFileReaderImpl);
+    // We've build a HFile tree with index = 16.
+    Assert.assertEquals(16, reader.getTrailer().getNumDataIndexLevels());
+
+    HFileScannerImpl scanner = (HFileScannerImpl) reader.getScanner(true, true, false);
+    HFileBlock block1 = reader.getDataBlockIndexReader()
+        .loadDataBlockWithScanInfo(firstCell, null, true, true, false, DataBlockEncoding.NONE)
+        .getHFileBlock();
+    waitBucketCacheFlushed(defaultBC);
+    Assert.assertTrue(block1.getBlockType().isData());
+    Assert.assertFalse(block1 instanceof ExclusiveMemHFileBlock);
+
+    HFileBlock block2 = reader.getDataBlockIndexReader()
+        .loadDataBlockWithScanInfo(secondCell, null, true, true, false, DataBlockEncoding.NONE)
+        .getHFileBlock();
+    waitBucketCacheFlushed(defaultBC);
+    Assert.assertTrue(block2.getBlockType().isData());
+    Assert.assertFalse(block2 instanceof ExclusiveMemHFileBlock);
+    // Only one refCnt for RPC path.
+    Assert.assertEquals(block1.refCnt(), 1);
+    Assert.assertEquals(block2.refCnt(), 1);
+    Assert.assertFalse(block1 == block2);
+
+    scanner.seekTo(firstCell);
+    curBlock = scanner.curBlock;
+    Assert.assertEquals(curBlock.refCnt(), 2);
+
+    // Seek to the block again, the curBlock won't change and won't read from BlockCache. so
+    // refCnt should be unchanged.
+    scanner.seekTo(firstCell);
+    Assert.assertTrue(curBlock == scanner.curBlock);
+    Assert.assertEquals(curBlock.refCnt(), 2);
+    prevBlock = curBlock;
+
+    scanner.seekTo(secondCell);
+    curBlock = scanner.curBlock;
+    Assert.assertEquals(prevBlock.refCnt(), 2);
+    Assert.assertEquals(curBlock.refCnt(), 2);
+
+    // After shipped, the prevBlock will be release, but curBlock is still referenced by the
+    // curBlock.
+    scanner.shipped();
+    Assert.assertEquals(prevBlock.refCnt(), 1);
+    Assert.assertEquals(curBlock.refCnt(), 2);
+
+    // Try to ship again, though with nothing to client.
+    scanner.shipped();
+    Assert.assertEquals(prevBlock.refCnt(), 1);
+    Assert.assertEquals(curBlock.refCnt(), 2);
+
+    // The curBlock will also be released.
+    scanner.close();
+    Assert.assertEquals(curBlock.refCnt(), 1);
+
+    // Finish the block & block2 RPC path
+    Assert.assertTrue(block1.release());
+    Assert.assertTrue(block2.release());
+
+    // Evict the LRUBlockCache
+    Assert.assertTrue(defaultBC.evictBlocksByHfileName(hfilePath.getName()) >= 2);
+    Assert.assertEquals(prevBlock.refCnt(), 0);
+    Assert.assertEquals(curBlock.refCnt(), 0);
+
+    int count = 0;
+    Assert.assertTrue(scanner.seekTo());
+    ++count;
+    while (scanner.next()) {
+      count++;
     }
+    assertEquals(CELL_COUNT, count);
   }
 
   /**
    * See HBASE-22480
    */
   @Test
-  public void testSeekBefore() throws IOException {
-    Configuration conf = new Configuration(UTIL.getConfiguration());
-    Path dir = UTIL.getDataTestDir("testSeekBefore");
-    FileSystem fs = dir.getFileSystem(conf);
-    try {
-      String hfileName = "testSeekBefore_hfile_0_" + System.currentTimeMillis();
-      Path hfilePath = new Path(dir, hfileName);
-      int cellCount = 1000;
-      LOG.info("Start to write {} cells into hfile: {}", cellCount, hfilePath);
-      writeHFile(conf, fs, hfilePath, Algorithm.NONE, DataBlockEncoding.NONE, cellCount);
-
-      BlockCache defaultBC = BlockCacheFactory.createBlockCache(conf);
-      Assert.assertNotNull(defaultBC);
-      HFile.Reader reader =
-          HFile.createReader(fs, hfilePath, new CacheConfig(conf, defaultBC), true, conf);
-      Assert.assertTrue(reader instanceof HFileReaderImpl);
-      // We've build a HFile tree with index = 16.
-      Assert.assertEquals(16, reader.getTrailer().getNumDataIndexLevels());
-
-      HFileScanner scanner = reader.getScanner(true, true, false);
-      HFileBlock block1 = reader.getDataBlockIndexReader()
-          .loadDataBlockWithScanInfo(firstCell, null, true, true, false, DataBlockEncoding.NONE)
-          .getHFileBlock();
-      HFileBlock block2 = reader.getDataBlockIndexReader()
-          .loadDataBlockWithScanInfo(secondCell, null, true, true, false, DataBlockEncoding.NONE)
-          .getHFileBlock();
-      Assert.assertEquals(block1.refCnt(), 2);
-      Assert.assertEquals(block2.refCnt(), 2);
-
-      // Let the curBlock refer to block2.
-      scanner.seekTo(secondCell);
-      Assert.assertTrue(((HFileScannerImpl) scanner).curBlock == block2);
-      Assert.assertEquals(3, block2.refCnt());
-
-      // Release the block1, only one reference: blockCache.
-      Assert.assertFalse(block1.release());
-      Assert.assertEquals(1, block1.refCnt());
-      // Release the block2, so the remain references are: 1. scanner; 2. blockCache.
-      Assert.assertFalse(block2.release());
-      Assert.assertEquals(2, block2.refCnt());
-
-      // Do the seekBefore: the newBlock will be the previous block of curBlock.
-      Assert.assertTrue(scanner.seekBefore(secondCell));
-      Assert.assertTrue(((HFileScannerImpl) scanner).curBlock == block1);
-      // Two reference for block1: 1. scanner; 2. blockCache.
-      Assert.assertEquals(2, block1.refCnt());
-      // Reference count of block2 must be unchanged because we haven't shipped.
-      Assert.assertEquals(2, block2.refCnt());
-
-      // Do the shipped
-      scanner.shipped();
-      Assert.assertEquals(2, block1.refCnt());
-      Assert.assertEquals(1, block2.refCnt());
-
-      // Do the close
-      scanner.close();
-      Assert.assertEquals(1, block1.refCnt());
-      Assert.assertEquals(1, block2.refCnt());
-
-      Assert.assertTrue(defaultBC.evictBlocksByHfileName(hfileName) >= 2);
-      Assert.assertEquals(0, block1.refCnt());
-      Assert.assertEquals(0, block2.refCnt());
-
-      // Reload the block1 again.
-      block1 = reader.getDataBlockIndexReader()
-          .loadDataBlockWithScanInfo(firstCell, null, true, true, false, DataBlockEncoding.NONE)
-          .getHFileBlock();
-      Assert.assertFalse(block1.release());
-      Assert.assertEquals(1, block1.refCnt());
-      // Re-seek to the begin.
-      Assert.assertTrue(scanner.seekTo());
-      Assert.assertTrue(((HFileScannerImpl) scanner).curBlock == block1);
-      Assert.assertEquals(2, block1.refCnt());
-      // Return false because firstCell <= c[0]
-      Assert.assertFalse(scanner.seekBefore(firstCell));
-      // The block1 shouldn't be released because we still don't do the shipped or close.
-      Assert.assertEquals(2, block1.refCnt());
-
-      scanner.close();
-      Assert.assertEquals(1, block1.refCnt());
-      Assert.assertTrue(defaultBC.evictBlocksByHfileName(hfileName) >= 1);
-      Assert.assertEquals(0, block1.refCnt());
-    } finally {
-      fs.delete(dir, true);
-    }
+  public void testSeekBefore() throws Exception {
+    HFileBlock curBlock, prevBlock;
+    writeHFile(conf, fs, hfilePath, Algorithm.NONE, DataBlockEncoding.NONE, CELL_COUNT);
+    BlockCache defaultBC = BlockCacheFactory.createBlockCache(conf);
+    CacheConfig cacheConfig = new CacheConfig(conf, null, defaultBC, allocator);
+    Assert.assertNotNull(defaultBC);
+    Assert.assertTrue(cacheConfig.isCombinedBlockCache());
+    HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConfig, true, conf);
+    Assert.assertTrue(reader instanceof HFileReaderImpl);
+    // We've build a HFile tree with index = 16.
+    Assert.assertEquals(16, reader.getTrailer().getNumDataIndexLevels());
+
+    HFileScannerImpl scanner = (HFileScannerImpl) reader.getScanner(true, true, false);
+    HFileBlock block1 = reader.getDataBlockIndexReader()
+        .loadDataBlockWithScanInfo(firstCell, null, true, true, false, DataBlockEncoding.NONE)
+        .getHFileBlock();
+    Assert.assertTrue(block1.getBlockType().isData());
+    Assert.assertFalse(block1 instanceof ExclusiveMemHFileBlock);
+    HFileBlock block2 = reader.getDataBlockIndexReader()
+        .loadDataBlockWithScanInfo(secondCell, null, true, true, false, DataBlockEncoding.NONE)
+        .getHFileBlock();
+    Assert.assertTrue(block2.getBlockType().isData());
+    Assert.assertFalse(block2 instanceof ExclusiveMemHFileBlock);
+    // Wait until flushed to IOEngine;
+    waitBucketCacheFlushed(defaultBC);
+    // One RPC reference path.
+    Assert.assertEquals(block1.refCnt(), 1);
+    Assert.assertEquals(block2.refCnt(), 1);
+
+    // Let the curBlock refer to block2.
+    scanner.seekTo(secondCell);
+    curBlock = scanner.curBlock;
+    Assert.assertFalse(curBlock == block2);
+    Assert.assertEquals(1, block2.refCnt());
+    Assert.assertEquals(2, curBlock.refCnt());
+    prevBlock = scanner.curBlock;
+
+    // Release the block1, no other reference.
+    Assert.assertTrue(block1.release());
+    Assert.assertEquals(0, block1.refCnt());
+    // Release the block2, no other reference.
+    Assert.assertTrue(block2.release());
+    Assert.assertEquals(0, block2.refCnt());
+
+    // Do the seekBefore: the newBlock will be the previous block of curBlock.
+    Assert.assertTrue(scanner.seekBefore(secondCell));
+    Assert.assertEquals(scanner.prevBlocks.size(), 1);
+    Assert.assertTrue(scanner.prevBlocks.get(0) == prevBlock);
+    curBlock = scanner.curBlock;
+    // the curBlock is read from IOEngine, so a different block.
+    Assert.assertFalse(curBlock == block1);
+    // Two reference for curBlock: 1. scanner; 2. blockCache.
+    Assert.assertEquals(2, curBlock.refCnt());
+    // Reference count of prevBlock must be unchanged because we haven't shipped.
+    Assert.assertEquals(2, prevBlock.refCnt());
+
+    // Do the shipped
+    scanner.shipped();
+    Assert.assertEquals(scanner.prevBlocks.size(), 0);
+    Assert.assertNotNull(scanner.curBlock);
+    Assert.assertEquals(2, curBlock.refCnt());
+    Assert.assertEquals(1, prevBlock.refCnt());
+
+    // Do the close
+    scanner.close();
+    Assert.assertNull(scanner.curBlock);
+    Assert.assertEquals(1, curBlock.refCnt());
+    Assert.assertEquals(1, prevBlock.refCnt());
+
+    Assert.assertTrue(defaultBC.evictBlocksByHfileName(hfilePath.getName()) >= 2);
+    Assert.assertEquals(0, curBlock.refCnt());
+    Assert.assertEquals(0, prevBlock.refCnt());
+
+    // Reload the block1 again.
+    block1 = reader.getDataBlockIndexReader()
+        .loadDataBlockWithScanInfo(firstCell, null, true, true, false, DataBlockEncoding.NONE)
+        .getHFileBlock();
+    // Wait until flushed to IOEngine;
+    waitBucketCacheFlushed(defaultBC);
+    Assert.assertTrue(block1.getBlockType().isData());
+    Assert.assertFalse(block1 instanceof ExclusiveMemHFileBlock);
+    Assert.assertTrue(block1.release());
+    Assert.assertEquals(0, block1.refCnt());
+    // Re-seek to the begin.
+    Assert.assertTrue(scanner.seekTo());
+    curBlock = scanner.curBlock;
+    Assert.assertFalse(curBlock == block1);
+    Assert.assertEquals(2, curBlock.refCnt());
+    // Return false because firstCell <= c[0]
+    Assert.assertFalse(scanner.seekBefore(firstCell));
+    // The block1 shouldn't be released because we still don't do the shipped or close.
+    Assert.assertEquals(2, curBlock.refCnt());
+
+    scanner.close();
+    Assert.assertEquals(1, curBlock.refCnt());
+    Assert.assertTrue(defaultBC.evictBlocksByHfileName(hfilePath.getName()) >= 1);
+    Assert.assertEquals(0, curBlock.refCnt());
   }
 
   @Test
@@ -298,4 +370,56 @@ public class TestHFileScannerImplReferenceCount {
   public void testDataBlockEncodingAndCompression() throws Exception {
     testReleaseBlock(Algorithm.GZ, DataBlockEncoding.ROW_INDEX_V1);
   }
+
+  @Test
+  public void testWithLruBlockCache() throws Exception {
+    HFileBlock curBlock;
+    writeHFile(conf, fs, hfilePath, Algorithm.NONE, DataBlockEncoding.NONE, CELL_COUNT);
+    // Set LruBlockCache
+    conf.set(BUCKET_CACHE_IOENGINE_KEY, "");
+    BlockCache defaultBC = BlockCacheFactory.createBlockCache(conf);
+    CacheConfig cacheConfig = new CacheConfig(conf, null, defaultBC, allocator);
+    Assert.assertNotNull(defaultBC);
+    Assert.assertFalse(cacheConfig.isCombinedBlockCache()); // Must be LruBlockCache.
+    HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConfig, true, conf);
+    Assert.assertTrue(reader instanceof HFileReaderImpl);
+    // We've build a HFile tree with index = 16.
+    Assert.assertEquals(16, reader.getTrailer().getNumDataIndexLevels());
+
+    HFileScannerImpl scanner = (HFileScannerImpl) reader.getScanner(true, true, false);
+    HFileBlock block1 = reader.getDataBlockIndexReader()
+        .loadDataBlockWithScanInfo(firstCell, null, true, true, false, DataBlockEncoding.NONE)
+        .getHFileBlock();
+    Assert.assertTrue(block1.getBlockType().isData());
+    Assert.assertTrue(block1 instanceof ExclusiveMemHFileBlock);
+    HFileBlock block2 = reader.getDataBlockIndexReader()
+        .loadDataBlockWithScanInfo(secondCell, null, true, true, false, DataBlockEncoding.NONE)
+        .getHFileBlock();
+    Assert.assertTrue(block2.getBlockType().isData());
+    Assert.assertTrue(block2 instanceof ExclusiveMemHFileBlock);
+    // One RPC reference path.
+    Assert.assertEquals(block1.refCnt(), 0);
+    Assert.assertEquals(block2.refCnt(), 0);
+
+    scanner.seekTo(firstCell);
+    curBlock = scanner.curBlock;
+    Assert.assertTrue(curBlock == block1);
+    Assert.assertEquals(curBlock.refCnt(), 0);
+    Assert.assertTrue(scanner.prevBlocks.isEmpty());
+
+    // Switch to next block
+    scanner.seekTo(secondCell);
+    curBlock = scanner.curBlock;
+    Assert.assertTrue(curBlock == block2);
+    Assert.assertEquals(curBlock.refCnt(), 0);
+    Assert.assertEquals(curBlock.retain().refCnt(), 0);
+    // Only pooled HFileBlock will be kept in prevBlocks and ExclusiveMemHFileBlock will never keep
+    // in prevBlocks.
+    Assert.assertTrue(scanner.prevBlocks.isEmpty());
+
+    // close the scanner
+    scanner.close();
+    Assert.assertNull(scanner.curBlock);
+    Assert.assertTrue(scanner.prevBlocks.isEmpty());
+  }
 }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestLruBlockCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestLruBlockCache.java
index 9b4d768..af70f3d 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestLruBlockCache.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestLruBlockCache.java
@@ -38,6 +38,7 @@ import org.apache.hadoop.hbase.Waiter;
 import org.apache.hadoop.hbase.Waiter.ExplainingPredicate;
 import org.apache.hadoop.hbase.io.HeapSize;
 import org.apache.hadoop.hbase.io.hfile.LruBlockCache.EvictionThread;
+import org.apache.hadoop.hbase.nio.ByteBuff;
 import org.apache.hadoop.hbase.testclassification.IOTests;
 import org.apache.hadoop.hbase.testclassification.SmallTests;
 import org.apache.hadoop.hbase.util.ClassSize;
@@ -820,10 +821,10 @@ public class TestLruBlockCache {
     byte[] byteArr = new byte[length];
     ByteBuffer buf = ByteBuffer.wrap(byteArr, 0, size);
     HFileContext meta = new HFileContextBuilder().build();
-    HFileBlock blockWithNextBlockMetadata = new HFileBlock(BlockType.DATA, size, size, -1, buf,
-        HFileBlock.FILL_HEADER, -1, 52, -1, meta, HEAP);
-    HFileBlock blockWithoutNextBlockMetadata = new HFileBlock(BlockType.DATA, size, size, -1, buf,
-        HFileBlock.FILL_HEADER, -1, -1, -1, meta, HEAP);
+    HFileBlock blockWithNextBlockMetadata = new HFileBlock(BlockType.DATA, size, size, -1,
+        ByteBuff.wrap(buf), HFileBlock.FILL_HEADER, -1, 52, -1, meta, HEAP);
+    HFileBlock blockWithoutNextBlockMetadata = new HFileBlock(BlockType.DATA, size, size, -1,
+        ByteBuff.wrap(buf), HFileBlock.FILL_HEADER, -1, -1, -1, meta, HEAP);
 
     LruBlockCache cache = new LruBlockCache(maxSize, blockSize, false,
         (int)Math.ceil(1.2*maxSize/blockSize),
@@ -964,7 +965,8 @@ public class TestLruBlockCache {
     HFileContext meta = new HFileContextBuilder().build();
     BlockCacheKey key = new BlockCacheKey("key1", 0);
     HFileBlock blk = new HFileBlock(BlockType.DATA, size, size, -1,
-        ByteBuffer.wrap(byteArr, 0, size), HFileBlock.FILL_HEADER, -1, 52, -1, meta, HEAP);
+        ByteBuff.wrap(ByteBuffer.wrap(byteArr, 0, size)), HFileBlock.FILL_HEADER, -1, 52, -1, meta,
+        HEAP);
     AtomicBoolean err1 = new AtomicBoolean(false);
     Thread t1 = new Thread(() -> {
       for (int i = 0; i < 10000 && !err1.get(); i++) {
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java
index 121e070..4ac7907 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java
@@ -203,7 +203,7 @@ public class TestBucketCache {
     CacheTestUtils.testHeapSizeChanges(cache, BLOCK_SIZE);
   }
 
-  private void waitUntilFlushedToBucket(BucketCache cache, BlockCacheKey cacheKey)
+  public static void waitUntilFlushedToBucket(BucketCache cache, BlockCacheKey cacheKey)
       throws InterruptedException {
     while (!cache.backingMap.containsKey(cacheKey) || cache.ramCache.containsKey(cacheKey)) {
       Thread.sleep(100);
@@ -211,6 +211,13 @@ public class TestBucketCache {
     Thread.sleep(1000);
   }
 
+  public static void waitUntilAllFlushedToBucket(BucketCache cache) throws InterruptedException {
+    while (!cache.ramCache.isEmpty()) {
+      Thread.sleep(100);
+    }
+    Thread.sleep(1000);
+  }
+
   // BucketCache.cacheBlock is async, it first adds block to ramCache and writeQueue, then writer
   // threads will flush it to the bucket and put reference entry in backingMap.
   private void cacheAndWaitUntilFlushedToBucket(BucketCache cache, BlockCacheKey cacheKey,
@@ -430,10 +437,10 @@ public class TestBucketCache {
     ByteBuffer buf1 = ByteBuffer.allocate(size), buf2 = ByteBuffer.allocate(size);
     HFileContext meta = new HFileContextBuilder().build();
     ByteBuffAllocator allocator = ByteBuffAllocator.HEAP;
-    HFileBlock blockWithNextBlockMetadata = new HFileBlock(BlockType.DATA, size, size, -1, buf1,
-        HFileBlock.FILL_HEADER, -1, 52, -1, meta, allocator);
-    HFileBlock blockWithoutNextBlockMetadata = new HFileBlock(BlockType.DATA, size, size, -1, buf2,
-        HFileBlock.FILL_HEADER, -1, -1, -1, meta, allocator);
+    HFileBlock blockWithNextBlockMetadata = new HFileBlock(BlockType.DATA, size, size, -1,
+        ByteBuff.wrap(buf1), HFileBlock.FILL_HEADER, -1, 52, -1, meta, allocator);
+    HFileBlock blockWithoutNextBlockMetadata = new HFileBlock(BlockType.DATA, size, size, -1,
+        ByteBuff.wrap(buf2), HFileBlock.FILL_HEADER, -1, -1, -1, meta, allocator);
 
     BlockCacheKey key = new BlockCacheKey("testCacheBlockNextBlockMetadataMissing", 0);
     ByteBuffer actualBuffer = ByteBuffer.allocate(length);
@@ -492,10 +499,10 @@ public class TestBucketCache {
     RAMCache cache = new RAMCache();
     BlockCacheKey key1 = new BlockCacheKey("file-1", 1);
     BlockCacheKey key2 = new BlockCacheKey("file-2", 2);
-    HFileBlock blk1 = new HFileBlock(BlockType.DATA, size, size, -1, buf, HFileBlock.FILL_HEADER,
-        -1, 52, -1, meta, ByteBuffAllocator.HEAP);
-    HFileBlock blk2 = new HFileBlock(BlockType.DATA, size, size, -1, buf, HFileBlock.FILL_HEADER,
-        -1, -1, -1, meta, ByteBuffAllocator.HEAP);
+    HFileBlock blk1 = new HFileBlock(BlockType.DATA, size, size, -1, ByteBuff.wrap(buf),
+        HFileBlock.FILL_HEADER, -1, 52, -1, meta, ByteBuffAllocator.HEAP);
+    HFileBlock blk2 = new HFileBlock(BlockType.DATA, size, size, -1, ByteBuff.wrap(buf),
+        HFileBlock.FILL_HEADER, -1, -1, -1, meta, ByteBuffAllocator.HEAP);
     RAMQueueEntry re1 = new RAMQueueEntry(key1, blk1, 1, false, ByteBuffAllocator.NONE);
     RAMQueueEntry re2 = new RAMQueueEntry(key1, blk2, 1, false, ByteBuffAllocator.NONE);
 
@@ -527,8 +534,8 @@ public class TestBucketCache {
     int length = HConstants.HFILEBLOCK_HEADER_SIZE + size;
     ByteBuffer buf = ByteBuffer.allocate(length);
     HFileContext meta = new HFileContextBuilder().build();
-    HFileBlock block = new HFileBlock(BlockType.DATA, size, size, -1, buf, HFileBlock.FILL_HEADER,
-        offset, 52, -1, meta, ByteBuffAllocator.HEAP);
+    HFileBlock block = new HFileBlock(BlockType.DATA, size, size, -1, ByteBuff.wrap(buf),
+        HFileBlock.FILL_HEADER, offset, 52, -1, meta, ByteBuffAllocator.HEAP);
 
     // initialize an mocked ioengine.
     IOEngine ioEngine = Mockito.mock(IOEngine.class);
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCacheRefCnt.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCacheRefCnt.java
index cf356f3..a9f50cd 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCacheRefCnt.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCacheRefCnt.java
@@ -36,6 +36,7 @@ import org.apache.hadoop.hbase.io.hfile.HFileBlock;
 import org.apache.hadoop.hbase.io.hfile.HFileContext;
 import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
 import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache.WriterThread;
+import org.apache.hadoop.hbase.nio.ByteBuff;
 import org.apache.hadoop.hbase.testclassification.IOTests;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.junit.ClassRule;
@@ -69,7 +70,7 @@ public class TestBucketCacheRefCnt {
   }
 
   private static HFileBlock createBlock(int offset, int size, ByteBuffAllocator alloc) {
-    return new HFileBlock(BlockType.DATA, size, size, -1, ByteBuffer.allocate(size),
+    return new HFileBlock(BlockType.DATA, size, size, -1, ByteBuff.wrap(ByteBuffer.allocate(size)),
         HFileBlock.FILL_HEADER, offset, 52, size, CONTEXT, alloc);
   }
 
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestRAMCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestRAMCache.java
index 5c5dda6..07b4b3d 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestRAMCache.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestRAMCache.java
@@ -31,6 +31,7 @@ import org.apache.hadoop.hbase.io.hfile.HFileContext;
 import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
 import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache.RAMCache;
 import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache.RAMQueueEntry;
+import org.apache.hadoop.hbase.nio.ByteBuff;
 import org.apache.hadoop.hbase.testclassification.IOTests;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.junit.Assert;
@@ -57,9 +58,9 @@ public class TestRAMCache {
         int uncompressedSizeWithoutHeader, long prevBlockOffset, ByteBuffer b, boolean fillHeader,
         long offset, int nextBlockOnDiskSize, int onDiskDataSizeWithHeader,
         HFileContext fileContext, ByteBuffAllocator allocator) {
-      super(blockType, onDiskSizeWithoutHeader, uncompressedSizeWithoutHeader, prevBlockOffset, b,
-          fillHeader, offset, nextBlockOnDiskSize, onDiskDataSizeWithHeader, fileContext,
-          allocator);
+      super(blockType, onDiskSizeWithoutHeader, uncompressedSizeWithoutHeader, prevBlockOffset,
+          ByteBuff.wrap(b), fillHeader, offset, nextBlockOnDiskSize, onDiskDataSizeWithHeader,
+          fileContext, allocator);
     }
 
     public void setLatch(CountDownLatch latch) {

[hbase] 03/22: HBASE-21917 Make the HFileBlock#validateChecksum can accept ByteBuff as an input. (addendum)

Posted by op...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

openinx pushed a commit to branch HBASE-21879
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 6aec70991ec05c06a5630c8258b48bba24481e47
Author: huzheng <op...@gmail.com>
AuthorDate: Thu Mar 7 10:19:32 2019 +0800

    HBASE-21917 Make the HFileBlock#validateChecksum can accept ByteBuff as an input. (addendum)
---
 .../src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java
index 5317f0e..dc007f7 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java
@@ -91,7 +91,7 @@ public class ChecksumUtil {
    */
   private static boolean verifyChunkedSums(DataChecksum dataChecksum, ByteBuff data,
       ByteBuff checksums, String pathName) {
-    // Almost all of the HFile Block are about 64KB, so it would be a SingleByteBuff, use the
+    // Almost all of the HFile Block are about 64KB, and it would be a SingleByteBuff, use the
     // Hadoop's verify checksum directly, because it'll use the native checksum, which has no extra
     // byte[] allocation or copying. (HBASE-21917)
     if (data instanceof SingleByteBuff && checksums instanceof SingleByteBuff) {
@@ -108,8 +108,7 @@ public class ChecksumUtil {
       }
     }
 
-    // Only when the dataBlock is larger than 4MB (default buffer size in BucketCache), the block
-    // will be an MultiByteBuff. we use a small byte[] to update the checksum many times for
+    // If the block is a MultiByteBuff. we use a small byte[] to update the checksum many times for
     // reducing GC pressure. it's a rare case.
     int checksumTypeSize = dataChecksum.getChecksumType().size;
     if (checksumTypeSize == 0) {

[hbase] 15/22: HBASE-22435 Add a UT to address the HFileBlock#heapSize() in TestHeapSize

Posted by op...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

openinx pushed a commit to branch HBASE-21879
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit b8e1ad5f684a569facd35f2e51e01c208cdf1bee
Author: huzheng <op...@gmail.com>
AuthorDate: Tue May 21 15:47:19 2019 +0800

    HBASE-22435 Add a UT to address the HFileBlock#heapSize() in TestHeapSize
---
 .../apache/hadoop/hbase/io/hfile/HFileContext.java | 17 ++++++++-------
 .../apache/hadoop/hbase/io/hfile/HFileBlock.java   | 24 +++++++++-------------
 .../org/apache/hadoop/hbase/io/TestHeapSize.java   | 16 +++++++++++++++
 3 files changed, 34 insertions(+), 23 deletions(-)

diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContext.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContext.java
index b5ccda2..6074f10 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContext.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContext.java
@@ -34,6 +34,11 @@ import org.apache.yetus.audience.InterfaceAudience;
  */
 @InterfaceAudience.Private
 public class HFileContext implements HeapSize, Cloneable {
+  public static final int FIXED_OVERHEAD = ClassSize.align(ClassSize.OBJECT +
+      // Algorithm, checksumType, encoding, Encryption.Context, hfileName reference
+      5 * ClassSize.REFERENCE + 2 * Bytes.SIZEOF_INT +
+      // usesHBaseChecksum, includesMvcc, includesTags and compressTags
+      4 * Bytes.SIZEOF_BOOLEAN + Bytes.SIZEOF_LONG);
 
   public static final int DEFAULT_BYTES_PER_CHECKSUM = 16 * 1024;
 
@@ -188,19 +193,13 @@ public class HFileContext implements HeapSize, Cloneable {
   }
 
   /**
-   * HeapSize implementation
-   * NOTE : The heapsize should be altered as and when new state variable are added
+   * HeapSize implementation. NOTE : The heapsize should be altered as and when new state variable
+   * are added
    * @return heap size of the HFileContext
    */
   @Override
   public long heapSize() {
-    long size = ClassSize.align(ClassSize.OBJECT +
-        // Algorithm reference, encodingon, checksumtype, Encryption.Context reference
-        5 * ClassSize.REFERENCE +
-        2 * Bytes.SIZEOF_INT +
-        // usesHBaseChecksum, includesMvcc, includesTags and compressTags
-        4 * Bytes.SIZEOF_BOOLEAN +
-        Bytes.SIZEOF_LONG);
+    long size = FIXED_OVERHEAD;
     if (this.hfileName != null) {
       size += ClassSize.STRING + this.hfileName.length();
     }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
index 92dcf44..846460f 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
@@ -113,6 +113,14 @@ import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
 @InterfaceAudience.Private
 public class HFileBlock implements Cacheable {
   private static final Logger LOG = LoggerFactory.getLogger(HFileBlock.class);
+  public static final int FIXED_OVERHEAD = ClassSize.align(ClassSize.OBJECT +
+     // BlockType, ByteBuff, MemoryType, HFileContext, ByteBuffAllocator
+      5 * ClassSize.REFERENCE +
+      // On-disk size, uncompressed size, and next block's on-disk size
+      // bytePerChecksum and onDiskDataSize
+      4 * Bytes.SIZEOF_INT +
+      // This and previous block offset
+      2 * Bytes.SIZEOF_LONG);
 
   // Block Header fields.
 
@@ -739,24 +747,12 @@ public class HFileBlock implements Cacheable {
 
   @Override
   public long heapSize() {
-    long size = ClassSize.align(
-        ClassSize.OBJECT +
-        // Block type, multi byte buffer, MemoryType and meta references
-        4 * ClassSize.REFERENCE +
-        // On-disk size, uncompressed size, and next block's on-disk size
-        // bytePerChecksum and onDiskDataSize
-        4 * Bytes.SIZEOF_INT +
-        // This and previous block offset
-        2 * Bytes.SIZEOF_LONG +
-        // Heap size of the meta object. meta will be always not null.
-        fileContext.heapSize()
-    );
-
+    long size = FIXED_OVERHEAD;
+    size += fileContext.heapSize();
     if (buf != null) {
       // Deep overhead of the byte buffer. Needs to be aligned separately.
       size += ClassSize.align(buf.capacity() + MULTI_BYTE_BUFFER_HEAP_SIZE);
     }
-
     return ClassSize.align(size);
   }
 
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHeapSize.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHeapSize.java
index 993503d..71ffb87 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHeapSize.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHeapSize.java
@@ -43,6 +43,8 @@ import org.apache.hadoop.hbase.client.Delete;
 import org.apache.hadoop.hbase.client.Mutation;
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.io.hfile.BlockCacheKey;
+import org.apache.hadoop.hbase.io.hfile.HFileBlock;
+import org.apache.hadoop.hbase.io.hfile.HFileContext;
 import org.apache.hadoop.hbase.io.hfile.LruBlockCache;
 import org.apache.hadoop.hbase.io.hfile.LruCachedBlock;
 import org.apache.hadoop.hbase.regionserver.CSLMImmutableSegment;
@@ -516,6 +518,20 @@ public class TestHeapSize  {
   }
 
   @Test
+  public void testHFileBlockSize() throws IOException {
+    long expected;
+    long actual;
+
+    actual = HFileContext.FIXED_OVERHEAD;
+    expected = ClassSize.estimateBase(HFileContext.class, false);
+    assertEquals(expected, actual);
+
+    actual = HFileBlock.FIXED_OVERHEAD;
+    expected = ClassSize.estimateBase(HFileBlock.class, false);
+    assertEquals(expected, actual);
+  }
+
+  @Test
   public void testMutations(){
     Class<?> cl;
     long expected;

[hbase] 06/22: HBASE-22127 Ensure that the block cached in the LRUBlockCache offheap is allocated from heap

Posted by op...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

openinx pushed a commit to branch HBASE-21879
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 982cb7f804c47681ecdfdfd741bcb4354d8dfae9
Author: huzheng <op...@gmail.com>
AuthorDate: Mon Apr 1 22:23:24 2019 +0800

    HBASE-22127 Ensure that the block cached in the LRUBlockCache offheap is allocated from heap
---
 .../apache/hadoop/hbase/io/ByteBuffAllocator.java  |  20 ++-
 .../apache/hadoop/hbase/io/hfile/CacheConfig.java  |   4 +
 .../apache/hadoop/hbase/io/hfile/HFileBlock.java   |  86 +++++++-----
 .../hadoop/hbase/io/hfile/HFileReaderImpl.java     |  49 ++++---
 .../hadoop/hbase/io/hfile/LruBlockCache.java       |  32 ++++-
 .../hadoop/hbase/io/hfile/bucket/BucketCache.java  |  76 ++++++++---
 .../apache/hadoop/hbase/io/hfile/TestChecksum.java |  24 ++--
 .../apache/hadoop/hbase/io/hfile/TestHFile.java    | 124 ++++++++++++++++--
 .../hadoop/hbase/io/hfile/TestHFileBlock.java      | 145 +++++++++++++++------
 .../hadoop/hbase/io/hfile/TestHFileBlockIndex.java |   2 +-
 .../hadoop/hbase/io/hfile/TestHFileEncryption.java |   2 +-
 .../hadoop/hbase/io/hfile/TestHFileWriterV3.java   |   8 +-
 .../io/hfile/TestLazyDataBlockDecompression.java   |   2 +-
 .../io/hfile/bucket/TestBucketWriterThread.java    |   6 +-
 .../hadoop/hbase/master/AbstractTestDLS.java       |  29 ++---
 15 files changed, 453 insertions(+), 156 deletions(-)

diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java
index 0020e23..984d46d 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java
@@ -24,6 +24,7 @@ import java.util.List;
 import java.util.Queue;
 import java.util.concurrent.ConcurrentLinkedQueue;
 import java.util.concurrent.atomic.AtomicInteger;
+import sun.nio.ch.DirectBuffer;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.HConstants;
@@ -34,7 +35,6 @@ import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-
 import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
 
 /**
@@ -191,7 +191,7 @@ public class ByteBuffAllocator {
     }
     // If disabled the reservoir, just allocate it from on-heap.
     if (!isReservoirEnabled() || size == 0) {
-      return new SingleByteBuff(NONE, ByteBuffer.allocate(size));
+      return allocateOnHeap(size);
     }
     int reminder = size % bufSize;
     int len = size / bufSize + (reminder > 0 ? 1 : 0);
@@ -222,6 +222,22 @@ public class ByteBuffAllocator {
     return bb;
   }
 
+  /**
+   * Free all direct buffers if allocated, mainly used for testing.
+   */
+  @VisibleForTesting
+  public void clean() {
+    while (!buffers.isEmpty()) {
+      ByteBuffer b = buffers.poll();
+      if (b instanceof DirectBuffer) {
+        DirectBuffer db = (DirectBuffer) b;
+        if (db.cleaner() != null) {
+          db.cleaner().clean();
+        }
+      }
+    }
+  }
+
   public static ByteBuff wrap(ByteBuffer[] buffers, Recycler recycler) {
     if (buffers == null || buffers.length == 0) {
       throw new IllegalArgumentException("buffers shouldn't be null or empty");
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java
index 53c216f..bb57fbe 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java
@@ -367,6 +367,10 @@ public class CacheConfig {
     return Optional.ofNullable(this.blockCache);
   }
 
+  public boolean isCombinedBlockCache() {
+    return blockCache instanceof CombinedBlockCache;
+  }
+
   public ByteBuffAllocator getByteBuffAllocator() {
     return this.byteBuffAllocator;
   }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
index 22a8295..2fe9255 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
@@ -763,6 +763,13 @@ public class HFileBlock implements Cacheable {
   }
 
   /**
+   * @return true to indicate the block is allocated from JVM heap, otherwise from off-heap.
+   */
+  boolean isOnHeap() {
+    return buf.hasArray();
+  }
+
+  /**
    * Unified version 2 {@link HFile} block writer. The intended usage pattern
    * is as follows:
    * <ol>
@@ -1300,16 +1307,29 @@ public class HFileBlock implements Cacheable {
   /** An HFile block reader with iteration ability. */
   interface FSReader {
     /**
-     * Reads the block at the given offset in the file with the given on-disk
-     * size and uncompressed size.
-     *
-     * @param offset
-     * @param onDiskSize the on-disk size of the entire block, including all
-     *          applicable headers, or -1 if unknown
+     * Reads the block at the given offset in the file with the given on-disk size and uncompressed
+     * size.
+     * @param offset of the file to read
+     * @param onDiskSize the on-disk size of the entire block, including all applicable headers, or
+     *          -1 if unknown
+     * @param pread true to use pread, otherwise use the stream read.
+     * @param updateMetrics update the metrics or not.
+     * @param intoHeap allocate the block's ByteBuff by {@link ByteBuffAllocator} or JVM heap. For
+     *          LRUBlockCache, we must ensure that the block to cache is an heap one, because the
+     *          memory occupation is based on heap now, also for {@link CombinedBlockCache}, we use
+     *          the heap LRUBlockCache as L1 cache to cache small blocks such as IndexBlock or
+     *          MetaBlock for faster access. So introduce an flag here to decide whether allocate
+     *          from JVM heap or not so that we can avoid an extra off-heap to heap memory copy when
+     *          using LRUBlockCache. For most cases, we known what's the expected block type we'll
+     *          read, while for some special case (Example: HFileReaderImpl#readNextDataBlock()), we
+     *          cannot pre-decide what's the expected block type, then we can only allocate block's
+     *          ByteBuff from {@link ByteBuffAllocator} firstly, and then when caching it in
+     *          {@link LruBlockCache} we'll check whether the ByteBuff is from heap or not, if not
+     *          then we'll clone it to an heap one and cache it.
      * @return the newly read block
      */
-    HFileBlock readBlockData(long offset, long onDiskSize, boolean pread, boolean updateMetrics)
-        throws IOException;
+    HFileBlock readBlockData(long offset, long onDiskSize, boolean pread, boolean updateMetrics,
+        boolean intoHeap) throws IOException;
 
     /**
      * Creates a block iterator over the given portion of the {@link HFile}.
@@ -1444,7 +1464,7 @@ public class HFileBlock implements Cacheable {
           if (offset >= endOffset) {
             return null;
           }
-          HFileBlock b = readBlockData(offset, length, false, false);
+          HFileBlock b = readBlockData(offset, length, false, false, true);
           offset += b.getOnDiskSizeWithHeader();
           length = b.getNextBlockOnDiskSize();
           HFileBlock uncompressed = b.unpack(fileContext, owner);
@@ -1526,16 +1546,18 @@ public class HFileBlock implements Cacheable {
     /**
      * Reads a version 2 block (version 1 blocks not supported and not expected). Tries to do as
      * little memory allocation as possible, using the provided on-disk size.
-     *
      * @param offset the offset in the stream to read at
-     * @param onDiskSizeWithHeaderL the on-disk size of the block, including
-     *          the header, or -1 if unknown; i.e. when iterating over blocks reading
-     *          in the file metadata info.
+     * @param onDiskSizeWithHeaderL the on-disk size of the block, including the header, or -1 if
+     *          unknown; i.e. when iterating over blocks reading in the file metadata info.
      * @param pread whether to use a positional read
+     * @param updateMetrics whether to update the metrics
+     * @param intoHeap allocate ByteBuff of block from heap or off-heap.
+     * @see FSReader#readBlockData(long, long, boolean, boolean, boolean) for more details about the
+     *      useHeap.
      */
     @Override
     public HFileBlock readBlockData(long offset, long onDiskSizeWithHeaderL, boolean pread,
-                                    boolean updateMetrics) throws IOException {
+        boolean updateMetrics, boolean intoHeap) throws IOException {
       // Get a copy of the current state of whether to validate
       // hbase checksums or not for this read call. This is not
       // thread-safe but the one constaint is that if we decide
@@ -1544,9 +1566,8 @@ public class HFileBlock implements Cacheable {
       boolean doVerificationThruHBaseChecksum = streamWrapper.shouldUseHBaseChecksum();
       FSDataInputStream is = streamWrapper.getStream(doVerificationThruHBaseChecksum);
 
-      HFileBlock blk = readBlockDataInternal(is, offset,
-                         onDiskSizeWithHeaderL, pread,
-                         doVerificationThruHBaseChecksum, updateMetrics);
+      HFileBlock blk = readBlockDataInternal(is, offset, onDiskSizeWithHeaderL, pread,
+        doVerificationThruHBaseChecksum, updateMetrics, intoHeap);
       if (blk == null) {
         HFile.LOG.warn("HBase checksum verification failed for file " +
                        pathName + " at offset " +
@@ -1573,7 +1594,7 @@ public class HFileBlock implements Cacheable {
         is = this.streamWrapper.fallbackToFsChecksum(CHECKSUM_VERIFICATION_NUM_IO_THRESHOLD);
         doVerificationThruHBaseChecksum = false;
         blk = readBlockDataInternal(is, offset, onDiskSizeWithHeaderL, pread,
-                                    doVerificationThruHBaseChecksum, updateMetrics);
+          doVerificationThruHBaseChecksum, updateMetrics, intoHeap);
         if (blk != null) {
           HFile.LOG.warn("HDFS checksum verification succeeded for file " +
                          pathName + " at offset " +
@@ -1669,24 +1690,29 @@ public class HFileBlock implements Cacheable {
       return nextBlockOnDiskSize;
     }
 
+    private ByteBuff allocate(int size, boolean intoHeap) {
+      return intoHeap ? ByteBuffAllocator.HEAP.allocate(size) : allocator.allocate(size);
+    }
+
     /**
      * Reads a version 2 block.
-     *
      * @param offset the offset in the stream to read at.
-     * @param onDiskSizeWithHeaderL the on-disk size of the block, including
-     *          the header and checksums if present or -1 if unknown (as a long). Can be -1
-     *          if we are doing raw iteration of blocks as when loading up file metadata; i.e.
-     *          the first read of a new file. Usually non-null gotten from the file index.
+     * @param onDiskSizeWithHeaderL the on-disk size of the block, including the header and
+     *          checksums if present or -1 if unknown (as a long). Can be -1 if we are doing raw
+     *          iteration of blocks as when loading up file metadata; i.e. the first read of a new
+     *          file. Usually non-null gotten from the file index.
      * @param pread whether to use a positional read
-     * @param verifyChecksum Whether to use HBase checksums.
-     *        If HBase checksum is switched off, then use HDFS checksum. Can also flip on/off
-     *        reading same file if we hit a troublesome patch in an hfile.
+     * @param verifyChecksum Whether to use HBase checksums. If HBase checksum is switched off, then
+     *          use HDFS checksum. Can also flip on/off reading same file if we hit a troublesome
+     *          patch in an hfile.
+     * @param updateMetrics whether need to update the metrics.
+     * @param intoHeap allocate the ByteBuff of block from heap or off-heap.
      * @return the HFileBlock or null if there is a HBase checksum mismatch
      */
     @VisibleForTesting
     protected HFileBlock readBlockDataInternal(FSDataInputStream is, long offset,
-        long onDiskSizeWithHeaderL, boolean pread, boolean verifyChecksum, boolean updateMetrics)
-     throws IOException {
+        long onDiskSizeWithHeaderL, boolean pread, boolean verifyChecksum, boolean updateMetrics,
+        boolean intoHeap) throws IOException {
       if (offset < 0) {
         throw new IOException("Invalid offset=" + offset + " trying to read "
             + "block (onDiskSize=" + onDiskSizeWithHeaderL + ")");
@@ -1728,7 +1754,7 @@ public class HFileBlock implements Cacheable {
       // says where to start reading. If we have the header cached, then we don't need to read
       // it again and we can likely read from last place we left off w/o need to backup and reread
       // the header we read last time through here.
-      ByteBuff onDiskBlock = allocator.allocate(onDiskSizeWithHeader + hdrSize);
+      ByteBuff onDiskBlock = this.allocate(onDiskSizeWithHeader + hdrSize, intoHeap);
       boolean initHFileBlockSuccess = false;
       try {
         if (headerBuf != null) {
@@ -2072,7 +2098,7 @@ public class HFileBlock implements Cacheable {
                    " onDiskDataSizeWithHeader " + onDiskDataSizeWithHeader;
   }
 
-  public HFileBlock deepClone() {
+  public HFileBlock deepCloneOnHeap() {
     return new HFileBlock(this, true);
   }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
index 5fdb66f..1137961 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
@@ -272,8 +272,8 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
             if (LOG.isTraceEnabled()) {
               LOG.trace("Prefetch start " + getPathOffsetEndStr(path, offset, end));
             }
-            // TODO: Could we use block iterator in here? Would that get stuff into the cache?
-            HFileBlock prevBlock = null;
+            // Don't use BlockIterator here, because it's designed to read load-on-open section.
+            long onDiskSizeOfNextBlock = -1;
             while (offset < end) {
               if (Thread.interrupted()) {
                 break;
@@ -282,16 +282,17 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
               // the internal-to-hfileblock thread local which holds the overread that gets the
               // next header, will not have happened...so, pass in the onDiskSize gotten from the
               // cached block. This 'optimization' triggers extremely rarely I'd say.
-              long onDiskSize = prevBlock != null? prevBlock.getNextBlockOnDiskSize(): -1;
-              HFileBlock block = readBlock(offset, onDiskSize, /*cacheBlock=*/true,
-                  /*pread=*/true, false, false, null, null);
-              // Need not update the current block. Ideally here the readBlock won't find the
-              // block in cache. We call this readBlock so that block data is read from FS and
-              // cached in BC. So there is no reference count increment that happens here.
-              // The return will ideally be a noop because the block is not of MemoryType SHARED.
-              returnBlock(block);
-              prevBlock = block;
-              offset += block.getOnDiskSizeWithHeader();
+              HFileBlock block = readBlock(offset, onDiskSizeOfNextBlock, /* cacheBlock= */true,
+                /* pread= */true, false, false, null, null);
+              try {
+                onDiskSizeOfNextBlock = block.getNextBlockOnDiskSize();
+                offset += block.getOnDiskSizeWithHeader();
+              } finally {
+                // Ideally here the readBlock won't find the block in cache. We call this
+                // readBlock so that block data is read from FS and cached in BC. we must call
+                // returnBlock here to decrease the reference count of block.
+                returnBlock(block);
+              }
             }
           } catch (IOException e) {
             // IOExceptions are probably due to region closes (relocation, etc.)
@@ -1419,7 +1420,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
       // Cache Miss, please load.
 
       HFileBlock compressedBlock =
-          fsBlockReader.readBlockData(metaBlockOffset, blockSize, true, false);
+          fsBlockReader.readBlockData(metaBlockOffset, blockSize, true, false, true);
       HFileBlock uncompressedBlock = compressedBlock.unpack(hfileContext, fsBlockReader);
       if (compressedBlock != uncompressedBlock) {
         compressedBlock.release();
@@ -1434,6 +1435,24 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
     }
   }
 
+  /**
+   * If expected block is data block, we'll allocate the ByteBuff of block from
+   * {@link org.apache.hadoop.hbase.io.ByteBuffAllocator} and it's usually an off-heap one,
+   * otherwise it will allocate from heap.
+   * @see org.apache.hadoop.hbase.io.hfile.HFileBlock.FSReader#readBlockData(long, long, boolean,
+   *      boolean, boolean)
+   */
+  private boolean shouldUseHeap(BlockType expectedBlockType) {
+    if (cacheConf.getBlockCache() == null) {
+      return false;
+    } else if (!cacheConf.isCombinedBlockCache()) {
+      // Block to cache in LruBlockCache must be an heap one. So just allocate block memory from
+      // heap for saving an extra off-heap to heap copying.
+      return true;
+    }
+    return expectedBlockType != null && !expectedBlockType.isData();
+  }
+
   @Override
   public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize,
       final boolean cacheBlock, boolean pread, final boolean isCompaction,
@@ -1505,8 +1524,8 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
 
         TraceUtil.addTimelineAnnotation("blockCacheMiss");
         // Load block from filesystem.
-        HFileBlock hfileBlock =
-            fsBlockReader.readBlockData(dataBlockOffset, onDiskBlockSize, pread, !isCompaction);
+        HFileBlock hfileBlock = fsBlockReader.readBlockData(dataBlockOffset, onDiskBlockSize, pread,
+          !isCompaction, shouldUseHeap(expectedBlockType));
         validateBlockType(hfileBlock, expectedBlockType);
         HFileBlock unpacked = hfileBlock.unpack(hfileContext, fsBlockReader);
         BlockType.BlockCategory category = hfileBlock.getBlockType().getCategory();
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java
index c2f07cd..b01d014 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java
@@ -354,6 +354,32 @@ public class LruBlockCache implements FirstLevelBlockCache {
     }
   }
 
+  /**
+   * The block cached in LRUBlockCache will always be an heap block: on the one side, the heap
+   * access will be more faster then off-heap, the small index block or meta block cached in
+   * CombinedBlockCache will benefit a lot. on other side, the LRUBlockCache size is always
+   * calculated based on the total heap size, if caching an off-heap block in LRUBlockCache, the
+   * heap size will be messed up. Here we will clone the block into an heap block if it's an
+   * off-heap block, otherwise just use the original block. The key point is maintain the refCnt of
+   * the block (HBASE-22127): <br>
+   * 1. if cache the cloned heap block, its refCnt is an totally new one, it's easy to handle; <br>
+   * 2. if cache the original heap block, we're sure that it won't be tracked in ByteBuffAllocator's
+   * reservoir, if both RPC and LRUBlockCache release the block, then it can be garbage collected by
+   * JVM, so need a retain here.
+   * @param buf the original block
+   * @return an block with an heap memory backend.
+   */
+  private Cacheable asReferencedHeapBlock(Cacheable buf) {
+    if (buf instanceof HFileBlock) {
+      HFileBlock blk = ((HFileBlock) buf);
+      if (!blk.isOnHeap()) {
+        return blk.deepCloneOnHeap();
+      }
+    }
+    // The block will be referenced by this LRUBlockCache, so should increase its refCnt here.
+    return buf.retain();
+  }
+
   // BlockCache implementation
 
   /**
@@ -402,8 +428,8 @@ public class LruBlockCache implements FirstLevelBlockCache {
       }
       return;
     }
-    // The block will be referenced by the LRUBlockCache, so should increase the refCnt here.
-    buf.retain();
+    // Ensure that the block is an heap one.
+    buf = asReferencedHeapBlock(buf);
     cb = new LruCachedBlock(cacheKey, buf, count.incrementAndGet(), inMemory);
     long newSize = updateSizeMetrics(cb, false);
     map.put(cacheKey, cb);
@@ -503,7 +529,7 @@ public class LruBlockCache implements FirstLevelBlockCache {
           if (caching) {
             if (result instanceof HFileBlock && ((HFileBlock) result).usesSharedMemory()) {
               Cacheable original = result;
-              result = ((HFileBlock) original).deepClone();
+              result = ((HFileBlock) original).deepCloneOnHeap();
               // deepClone an new one, so need to put the original one back to free it.
               victimHandler.returnBlock(cacheKey, original);
             }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
index 0f3446e..91f3986 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
@@ -50,6 +50,8 @@ import java.util.concurrent.atomic.LongAdder;
 import java.util.concurrent.locks.Lock;
 import java.util.concurrent.locks.ReentrantLock;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
+import java.util.function.Consumer;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.io.HeapSize;
@@ -557,23 +559,54 @@ public class BucketCache implements BlockCache, HeapSize {
     return evictBlock(cacheKey, true);
   }
 
-  private RAMQueueEntry checkRamCache(BlockCacheKey cacheKey) {
-    RAMQueueEntry removedBlock = ramCache.remove(cacheKey);
-    if (removedBlock != null) {
-      this.blockNumber.decrement();
-      this.heapSize.add(-1 * removedBlock.getData().heapSize());
+  // does not check for the ref count. Just tries to evict it if found in the
+  // bucket map
+  private boolean forceEvict(BlockCacheKey cacheKey) {
+    if (!cacheEnabled) {
+      return false;
     }
-    return removedBlock;
+    boolean existed = removeFromRamCache(cacheKey);
+    BucketEntry bucketEntry = backingMap.get(cacheKey);
+    if (bucketEntry == null) {
+      if (existed) {
+        cacheStats.evicted(0, cacheKey.isPrimary());
+        return true;
+      } else {
+        return false;
+      }
+    }
+    ReentrantReadWriteLock lock = offsetLock.getLock(bucketEntry.offset());
+    try {
+      lock.writeLock().lock();
+      if (backingMap.remove(cacheKey, bucketEntry)) {
+        blockEvicted(cacheKey, bucketEntry, !existed);
+      } else {
+        return false;
+      }
+    } finally {
+      lock.writeLock().unlock();
+    }
+    cacheStats.evicted(bucketEntry.getCachedTime(), cacheKey.isPrimary());
+    return true;
+  }
+
+  private boolean removeFromRamCache(BlockCacheKey cacheKey) {
+    return ramCache.remove(cacheKey, re -> {
+      if (re != null) {
+        this.blockNumber.decrement();
+        this.heapSize.add(-1 * re.getData().heapSize());
+      }
+    });
   }
 
   public boolean evictBlock(BlockCacheKey cacheKey, boolean deletedBlock) {
     if (!cacheEnabled) {
       return false;
     }
-    RAMQueueEntry removedBlock = checkRamCache(cacheKey);
+    boolean existed = removeFromRamCache(cacheKey);
     BucketEntry bucketEntry = backingMap.get(cacheKey);
     if (bucketEntry == null) {
-      if (removedBlock != null) {
+      if (existed) {
         cacheStats.evicted(0, cacheKey.isPrimary());
         return true;
       } else {
@@ -586,7 +619,7 @@ public class BucketCache implements BlockCache, HeapSize {
       int refCount = bucketEntry.getRefCount();
       if (refCount == 0) {
         if (backingMap.remove(cacheKey, bucketEntry)) {
-          blockEvicted(cacheKey, bucketEntry, removedBlock == null);
+          blockEvicted(cacheKey, bucketEntry, !existed);
         } else {
           return false;
         }
@@ -1009,10 +1042,12 @@ public class BucketCache implements BlockCache, HeapSize {
           putIntoBackingMap(key, bucketEntries[i]);
         }
         // Always remove from ramCache even if we failed adding it to the block cache above.
-        RAMQueueEntry ramCacheEntry = ramCache.remove(key);
-        if (ramCacheEntry != null) {
-          heapSize.add(-1 * entries.get(i).getData().heapSize());
-        } else if (bucketEntries[i] != null){
+        boolean existed = ramCache.remove(key, re -> {
+          if (re != null) {
+            heapSize.add(-1 * re.getData().heapSize());
+          }
+        });
+        if (!existed && bucketEntries[i] != null) {
           // Block should have already been evicted. Remove it and free space.
           ReentrantReadWriteLock lock = offsetLock.getLock(bucketEntries[i].offset());
           try {
@@ -1737,12 +1772,23 @@ public class BucketCache implements BlockCache, HeapSize {
       return previous;
     }
 
-    public RAMQueueEntry remove(BlockCacheKey key) {
+    public boolean remove(BlockCacheKey key) {
+      return remove(key, re->{});
+    }
+
+    /**
+     * Defined an {@link Consumer} here, because once the removed entry release its reference count,
+     * then it's ByteBuffers may be recycled and accessing it outside this method will be thrown an
+     * exception. the consumer will access entry to remove before release its reference count.
+     * Notice, don't change its reference count in the {@link Consumer}
+     */
+    public boolean remove(BlockCacheKey key, Consumer<RAMQueueEntry> action) {
       RAMQueueEntry previous = delegate.remove(key);
+      action.accept(previous);
       if (previous != null) {
         previous.getData().release();
       }
-      return previous;
+      return previous != null;
     }
 
     public boolean isEmpty() {
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
index c432fa9..2aebc8c 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
@@ -100,7 +100,8 @@ public class TestChecksum {
     meta = new HFileContextBuilder().withHBaseCheckSum(true).build();
     HFileBlock.FSReader hbr = new HFileBlock.FSReaderImpl(is, totalSize, (HFileSystem) fs, path,
         meta, ByteBuffAllocator.HEAP);
-    HFileBlock b = hbr.readBlockData(0, -1, false, false);
+    HFileBlock b = hbr.readBlockData(0, -1, false, false, true);
+    assertTrue(b.isOnHeap());
     assertEquals(b.getChecksumType(), ChecksumType.getDefaultChecksumType().getCode());
   }
 
@@ -146,7 +147,8 @@ public class TestChecksum {
       meta = new HFileContextBuilder().withHBaseCheckSum(true).build();
       HFileBlock.FSReader hbr = new HFileBlock.FSReaderImpl(is, totalSize, (HFileSystem) fs, path,
           meta, ByteBuffAllocator.HEAP);
-      HFileBlock b = hbr.readBlockData(0, -1, false, false);
+      HFileBlock b = hbr.readBlockData(0, -1, false, false, true);
+      assertTrue(b.isOnHeap());
 
       // verify SingleByteBuff checksum.
       verifySBBCheckSum(b.getBufferReadOnly());
@@ -215,7 +217,7 @@ public class TestChecksum {
               .withHBaseCheckSum(true)
               .build();
         HFileBlock.FSReader hbr = new CorruptedFSReaderImpl(is, totalSize, fs, path, meta);
-        HFileBlock b = hbr.readBlockData(0, -1, pread, false);
+        HFileBlock b = hbr.readBlockData(0, -1, pread, false, true);
         b.sanityCheck();
         assertEquals(4936, b.getUncompressedSizeWithoutHeader());
         assertEquals(algo == GZ ? 2173 : 4936,
@@ -236,19 +238,19 @@ public class TestChecksum {
         // requests. Verify that this is correct.
         for (int i = 0; i <
              HFileBlock.CHECKSUM_VERIFICATION_NUM_IO_THRESHOLD + 1; i++) {
-          b = hbr.readBlockData(0, -1, pread, false);
+          b = hbr.readBlockData(0, -1, pread, false, true);
           assertTrue(b.getBufferReadOnly() instanceof SingleByteBuff);
           assertEquals(0, HFile.getAndResetChecksumFailuresCount());
         }
         // The next read should have hbase checksum verification reanabled,
         // we verify this by assertng that there was a hbase-checksum failure.
-        b = hbr.readBlockData(0, -1, pread, false);
+        b = hbr.readBlockData(0, -1, pread, false, true);
         assertTrue(b.getBufferReadOnly() instanceof SingleByteBuff);
         assertEquals(1, HFile.getAndResetChecksumFailuresCount());
 
         // Since the above encountered a checksum failure, we switch
         // back to not checking hbase checksums.
-        b = hbr.readBlockData(0, -1, pread, false);
+        b = hbr.readBlockData(0, -1, pread, false, true);
         assertTrue(b.getBufferReadOnly() instanceof SingleByteBuff);
         assertEquals(0, HFile.getAndResetChecksumFailuresCount());
         is.close();
@@ -260,7 +262,7 @@ public class TestChecksum {
         assertEquals(false, newfs.useHBaseChecksum());
         is = new FSDataInputStreamWrapper(newfs, path);
         hbr = new CorruptedFSReaderImpl(is, totalSize, newfs, path, meta);
-        b = hbr.readBlockData(0, -1, pread, false);
+        b = hbr.readBlockData(0, -1, pread, false, true);
         is.close();
         b.sanityCheck();
         b = b.unpack(meta, hbr);
@@ -343,7 +345,7 @@ public class TestChecksum {
         HFileBlock.FSReader hbr =
             new HFileBlock.FSReaderImpl(new FSDataInputStreamWrapper(is, nochecksum), totalSize,
                 hfs, path, meta, ByteBuffAllocator.HEAP);
-        HFileBlock b = hbr.readBlockData(0, -1, pread, false);
+        HFileBlock b = hbr.readBlockData(0, -1, pread, false, true);
         assertTrue(b.getBufferReadOnly() instanceof SingleByteBuff);
         is.close();
         b.sanityCheck();
@@ -389,13 +391,13 @@ public class TestChecksum {
 
     @Override
     protected HFileBlock readBlockDataInternal(FSDataInputStream is, long offset,
-        long onDiskSizeWithHeaderL, boolean pread, boolean verifyChecksum, boolean updateMetrics)
-        throws IOException {
+        long onDiskSizeWithHeaderL, boolean pread, boolean verifyChecksum, boolean updateMetrics,
+        boolean useHeap) throws IOException {
       if (verifyChecksum) {
         corruptDataStream = true;
       }
       HFileBlock b = super.readBlockDataInternal(is, offset, onDiskSizeWithHeaderL, pread,
-          verifyChecksum, updateMetrics);
+        verifyChecksum, updateMetrics, useHeap);
       corruptDataStream = false;
       return b;
     }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java
index f58fe3e..0ed933b 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java
@@ -17,8 +17,12 @@
  */
 package org.apache.hadoop.hbase.io.hfile;
 
+import static org.apache.hadoop.hbase.HConstants.BUCKET_CACHE_IOENGINE_KEY;
+import static org.apache.hadoop.hbase.HConstants.BUCKET_CACHE_SIZE_KEY;
+import static org.apache.hadoop.hbase.io.ByteBuffAllocator.BUFFER_SIZE_KEY;
 import static org.apache.hadoop.hbase.io.ByteBuffAllocator.MAX_BUFFER_COUNT_KEY;
 import static org.apache.hadoop.hbase.io.ByteBuffAllocator.MIN_ALLOCATE_SIZE_KEY;
+import static org.apache.hadoop.hbase.io.hfile.CacheConfig.EVICT_BLOCKS_ON_CLOSE_KEY;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNull;
@@ -104,33 +108,129 @@ public class TestHFile  {
     fs = TEST_UTIL.getTestFileSystem();
   }
 
-  @Test
-  public void testReaderWithoutBlockCache() throws Exception {
-    int bufCount = 32;
+  private ByteBuffAllocator initAllocator(boolean reservoirEnabled, int bufSize, int bufCount,
+      int minAllocSize) {
     Configuration that = HBaseConfiguration.create(conf);
+    that.setInt(BUFFER_SIZE_KEY, bufSize);
     that.setInt(MAX_BUFFER_COUNT_KEY, bufCount);
-    // AllByteBuffers will be allocated from the buffers.
-    that.setInt(MIN_ALLOCATE_SIZE_KEY, 0);
-    ByteBuffAllocator alloc = ByteBuffAllocator.create(that, true);
-    List<ByteBuff> buffs = new ArrayList<>();
+    // All ByteBuffers will be allocated from the buffers.
+    that.setInt(MIN_ALLOCATE_SIZE_KEY, minAllocSize);
+    return ByteBuffAllocator.create(that, reservoirEnabled);
+  }
+
+  private void fillByteBuffAllocator(ByteBuffAllocator alloc, int bufCount) {
     // Fill the allocator with bufCount ByteBuffer
+    List<ByteBuff> buffs = new ArrayList<>();
     for (int i = 0; i < bufCount; i++) {
       buffs.add(alloc.allocateOneBuffer());
+      Assert.assertEquals(alloc.getQueueSize(), 0);
     }
-    Assert.assertEquals(alloc.getQueueSize(), 0);
-    for (ByteBuff buf : buffs) {
-      buf.release();
-    }
+    buffs.forEach(ByteBuff::release);
     Assert.assertEquals(alloc.getQueueSize(), bufCount);
+  }
+
+  @Test
+  public void testReaderWithoutBlockCache() throws Exception {
+    int bufCount = 32;
+    // AllByteBuffers will be allocated from the buffers.
+    ByteBuffAllocator alloc = initAllocator(true, 64 * 1024, bufCount, 0);
+    fillByteBuffAllocator(alloc, bufCount);
     // start write to store file.
     Path path = writeStoreFile();
     try {
-      readStoreFile(path, that, alloc);
+      readStoreFile(path, conf, alloc);
     } catch (Exception e) {
       // fail test
       assertTrue(false);
     }
     Assert.assertEquals(bufCount, alloc.getQueueSize());
+    alloc.clean();
+  }
+
+  /**
+   * Test case for HBASE-22127 in LruBlockCache.
+   */
+  @Test
+  public void testReaderWithLRUBlockCache() throws Exception {
+    int bufCount = 1024, blockSize = 64 * 1024;
+    ByteBuffAllocator alloc = initAllocator(true, bufCount, blockSize, 0);
+    fillByteBuffAllocator(alloc, bufCount);
+    Path storeFilePath = writeStoreFile();
+    // Open the file reader with LRUBlockCache
+    BlockCache lru = new LruBlockCache(1024 * 1024 * 32, blockSize, true, conf);
+    CacheConfig cacheConfig = new CacheConfig(conf, null, lru, alloc);
+    HFile.Reader reader = HFile.createReader(fs, storeFilePath, cacheConfig, true, conf);
+    long offset = 0;
+    while (offset < reader.getTrailer().getLoadOnOpenDataOffset()) {
+      BlockCacheKey key = new BlockCacheKey(storeFilePath.getName(), offset);
+      HFileBlock block = reader.readBlock(offset, -1, true, true, false, true, null, null);
+      offset += block.getOnDiskSizeWithHeader();
+      // Ensure the block is an heap one.
+      Cacheable cachedBlock = lru.getBlock(key, false, false, true);
+      Assert.assertNotNull(cachedBlock);
+      Assert.assertTrue(cachedBlock instanceof HFileBlock);
+      Assert.assertTrue(((HFileBlock) cachedBlock).isOnHeap());
+      // Should never allocate off-heap block from allocator because ensure that it's LRU.
+      Assert.assertEquals(bufCount, alloc.getQueueSize());
+      block.release(); // return back the ByteBuffer back to allocator.
+    }
+    reader.close();
+    Assert.assertEquals(bufCount, alloc.getQueueSize());
+    alloc.clean();
+    lru.shutdown();
+  }
+
+  private BlockCache initCombinedBlockCache() {
+    Configuration that = HBaseConfiguration.create(conf);
+    that.setFloat(BUCKET_CACHE_SIZE_KEY, 32); // 32MB for bucket cache.
+    that.set(BUCKET_CACHE_IOENGINE_KEY, "offheap");
+    BlockCache bc = BlockCacheFactory.createBlockCache(that);
+    Assert.assertNotNull(bc);
+    Assert.assertTrue(bc instanceof CombinedBlockCache);
+    return bc;
+  }
+
+  /**
+   * Test case for HBASE-22127 in CombinedBlockCache
+   */
+  @Test
+  public void testReaderWithCombinedBlockCache() throws Exception {
+    int bufCount = 1024, blockSize = 64 * 1024;
+    ByteBuffAllocator alloc = initAllocator(true, bufCount, blockSize, 0);
+    fillByteBuffAllocator(alloc, bufCount);
+    Path storeFilePath = writeStoreFile();
+    // Open the file reader with CombinedBlockCache
+    BlockCache combined = initCombinedBlockCache();
+    conf.setBoolean(EVICT_BLOCKS_ON_CLOSE_KEY, true);
+    CacheConfig cacheConfig = new CacheConfig(conf, null, combined, alloc);
+    HFile.Reader reader = HFile.createReader(fs, storeFilePath, cacheConfig, true, conf);
+    long offset = 0;
+    while (offset < reader.getTrailer().getLoadOnOpenDataOffset()) {
+      BlockCacheKey key = new BlockCacheKey(storeFilePath.getName(), offset);
+      HFileBlock block = reader.readBlock(offset, -1, true, true, false, true, null, null);
+      offset += block.getOnDiskSizeWithHeader();
+      // Read the cached block.
+      Cacheable cachedBlock = combined.getBlock(key, false, false, true);
+      try {
+        Assert.assertNotNull(cachedBlock);
+        Assert.assertTrue(cachedBlock instanceof HFileBlock);
+        HFileBlock hfb = (HFileBlock) cachedBlock;
+        // Data block will be cached in BucketCache, so it should be an off-heap block.
+        if (hfb.getBlockType().isData()) {
+          Assert.assertFalse(hfb.isOnHeap());
+        } else {
+          // Non-data block will be cached in LRUBlockCache, so it must be an on-heap block.
+          Assert.assertTrue(hfb.isOnHeap());
+        }
+      } finally {
+        combined.returnBlock(key, cachedBlock);
+      }
+      block.release(); // return back the ByteBuffer back to allocator.
+    }
+    reader.close();
+    combined.shutdown();
+    Assert.assertEquals(bufCount, alloc.getQueueSize());
+    alloc.clean();
   }
 
   private void readStoreFile(Path storeFilePath, Configuration conf, ByteBuffAllocator alloc)
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java
index efdae16..2733ca2 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java
@@ -40,6 +40,8 @@ import java.util.concurrent.Executor;
 import java.util.concurrent.ExecutorCompletionService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
+
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
@@ -48,6 +50,7 @@ import org.apache.hadoop.hbase.ArrayBackedTag;
 import org.apache.hadoop.hbase.CellComparatorImpl;
 import org.apache.hadoop.hbase.CellUtil;
 import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.KeyValue;
@@ -68,6 +71,7 @@ import org.apache.hadoop.hbase.util.ChecksumType;
 import org.apache.hadoop.hbase.util.ClassSize;
 import org.apache.hadoop.io.WritableUtils;
 import org.apache.hadoop.io.compress.Compressor;
+import org.junit.After;
 import org.junit.Before;
 import org.junit.ClassRule;
 import org.junit.Test;
@@ -93,10 +97,12 @@ public class TestHFileBlock {
 
   private static final Logger LOG = LoggerFactory.getLogger(TestHFileBlock.class);
 
-  static final Compression.Algorithm[] COMPRESSION_ALGORITHMS = { NONE, GZ };
+  // TODO let uncomment the GZ algorithm in HBASE-21937, because no support BB unpack yet.
+  static final Compression.Algorithm[] COMPRESSION_ALGORITHMS = { NONE, /* GZ */ };
 
   private static final int NUM_TEST_BLOCKS = 1000;
   private static final int NUM_READER_THREADS = 26;
+  private static final int MAX_BUFFER_COUNT = 2048;
 
   // Used to generate KeyValues
   private static int NUM_KEYVALUES = 50;
@@ -108,14 +114,51 @@ public class TestHFileBlock {
 
   private final boolean includesMemstoreTS;
   private final boolean includesTag;
-  public TestHFileBlock(boolean includesMemstoreTS, boolean includesTag) {
+  private final boolean useHeapAllocator;
+  private final ByteBuffAllocator alloc;
+
+  public TestHFileBlock(boolean includesMemstoreTS, boolean includesTag, boolean useHeapAllocator) {
     this.includesMemstoreTS = includesMemstoreTS;
     this.includesTag = includesTag;
+    this.useHeapAllocator = useHeapAllocator;
+    this.alloc = useHeapAllocator ? ByteBuffAllocator.HEAP : createOffHeapAlloc();
+    assertAllocator();
   }
 
   @Parameters
   public static Collection<Object[]> parameters() {
-    return HBaseTestingUtility.MEMSTORETS_TAGS_PARAMETRIZED;
+    List<Object[]> params = new ArrayList<>();
+    // Generate boolean triples from 000 to 111
+    for (int i = 0; i < (1 << 3); i++) {
+      Object[] flags = new Boolean[3];
+      for (int k = 0; k < 3; k++) {
+        flags[k] = (i & (1 << k)) != 0;
+      }
+      params.add(flags);
+    }
+    return params;
+  }
+
+  private ByteBuffAllocator createOffHeapAlloc() {
+    Configuration conf = HBaseConfiguration.create(TEST_UTIL.getConfiguration());
+    conf.setInt(ByteBuffAllocator.MAX_BUFFER_COUNT_KEY, MAX_BUFFER_COUNT);
+    conf.setInt(ByteBuffAllocator.MIN_ALLOCATE_SIZE_KEY, 0);
+    ByteBuffAllocator alloc = ByteBuffAllocator.create(conf, true);
+    // Fill the allocator
+    List<ByteBuff> bufs = new ArrayList<>();
+    for (int i = 0; i < MAX_BUFFER_COUNT; i++) {
+      ByteBuff bb = alloc.allocateOneBuffer();
+      assertTrue(!bb.hasArray());
+      bufs.add(bb);
+    }
+    bufs.forEach(ByteBuff::release);
+    return alloc;
+  }
+
+  private void assertAllocator() {
+    if (!useHeapAllocator) {
+      assertEquals(MAX_BUFFER_COUNT, alloc.getQueueSize());
+    }
   }
 
   @Before
@@ -123,6 +166,12 @@ public class TestHFileBlock {
     fs = HFileSystem.get(TEST_UTIL.getConfiguration());
   }
 
+  @After
+  public void tearDown() throws IOException {
+    assertAllocator();
+    alloc.clean();
+  }
+
   static void writeTestBlockContents(DataOutputStream dos) throws IOException {
     // This compresses really well.
     for (int i = 0; i < 1000; ++i)
@@ -327,9 +376,8 @@ public class TestHFileBlock {
         .withIncludesMvcc(includesMemstoreTS)
         .withIncludesTags(includesTag)
         .withCompression(algo).build();
-        HFileBlock.FSReader hbr =
-            new HFileBlock.FSReaderImpl(is, totalSize, meta, ByteBuffAllocator.HEAP);
-        HFileBlock b = hbr.readBlockData(0, -1, pread, false);
+        HFileBlock.FSReader hbr = new HFileBlock.FSReaderImpl(is, totalSize, meta, alloc);
+        HFileBlock b = hbr.readBlockData(0, -1, pread, false, true);
         is.close();
         assertEquals(0, HFile.getAndResetChecksumFailuresCount());
 
@@ -341,14 +389,14 @@ public class TestHFileBlock {
 
         if (algo == GZ) {
           is = fs.open(path);
-          hbr = new HFileBlock.FSReaderImpl(is, totalSize, meta, ByteBuffAllocator.HEAP);
-          b = hbr.readBlockData(0, 2173 + HConstants.HFILEBLOCK_HEADER_SIZE +
-                                b.totalChecksumBytes(), pread, false);
+          hbr = new HFileBlock.FSReaderImpl(is, totalSize, meta, alloc);
+          b = hbr.readBlockData(0,
+            2173 + HConstants.HFILEBLOCK_HEADER_SIZE + b.totalChecksumBytes(), pread, false, true);
           assertEquals(expected, b);
           int wrongCompressedSize = 2172;
           try {
-            b = hbr.readBlockData(0, wrongCompressedSize
-                + HConstants.HFILEBLOCK_HEADER_SIZE, pread, false);
+            hbr.readBlockData(0, wrongCompressedSize + HConstants.HFILEBLOCK_HEADER_SIZE, pread,
+              false, true);
             fail("Exception expected");
           } catch (IOException ex) {
             String expectedPrefix = "Passed in onDiskSizeWithHeader=";
@@ -356,8 +404,10 @@ public class TestHFileBlock {
                 + "'.\nMessage is expected to start with: '" + expectedPrefix
                 + "'", ex.getMessage().startsWith(expectedPrefix));
           }
+          assertTrue(b.release());
           is.close();
         }
+        assertTrue(expected.release());
       }
     }
   }
@@ -428,13 +478,13 @@ public class TestHFileBlock {
                 .withIncludesTags(includesTag)
                 .build();
           HFileBlock.FSReaderImpl hbr =
-              new HFileBlock.FSReaderImpl(is, totalSize, meta, ByteBuffAllocator.HEAP);
+              new HFileBlock.FSReaderImpl(is, totalSize, meta, alloc);
           hbr.setDataBlockEncoder(dataBlockEncoder);
           hbr.setIncludesMemStoreTS(includesMemstoreTS);
           HFileBlock blockFromHFile, blockUnpacked;
           int pos = 0;
           for (int blockId = 0; blockId < numBlocks; ++blockId) {
-            blockFromHFile = hbr.readBlockData(pos, -1, pread, false);
+            blockFromHFile = hbr.readBlockData(pos, -1, pread, false, true);
             assertEquals(0, HFile.getAndResetChecksumFailuresCount());
             blockFromHFile.sanityCheck();
             pos += blockFromHFile.getOnDiskSizeWithHeader();
@@ -487,6 +537,10 @@ public class TestHFileBlock {
                   blockUnpacked, deserialized.unpack(meta, hbr));
               }
             }
+            assertTrue(blockUnpacked.release());
+            if (blockFromHFile != blockUnpacked) {
+              blockFromHFile.release();
+            }
           }
           is.close();
         }
@@ -557,7 +611,7 @@ public class TestHFileBlock {
                               .withIncludesTags(includesTag)
                               .withCompression(algo).build();
           HFileBlock.FSReader hbr =
-              new HFileBlock.FSReaderImpl(is, totalSize, meta, ByteBuffAllocator.HEAP);
+              new HFileBlock.FSReaderImpl(is, totalSize, meta, alloc);
           long curOffset = 0;
           for (int i = 0; i < NUM_TEST_BLOCKS; ++i) {
             if (!pread) {
@@ -569,7 +623,7 @@ public class TestHFileBlock {
             if (detailedLogging) {
               LOG.info("Reading block #" + i + " at offset " + curOffset);
             }
-            HFileBlock b = hbr.readBlockData(curOffset, -1, pread, false);
+            HFileBlock b = hbr.readBlockData(curOffset, -1, pread, false, true);
             if (detailedLogging) {
               LOG.info("Block #" + i + ": " + b);
             }
@@ -583,7 +637,8 @@ public class TestHFileBlock {
 
             // Now re-load this block knowing the on-disk size. This tests a
             // different branch in the loader.
-            HFileBlock b2 = hbr.readBlockData(curOffset, b.getOnDiskSizeWithHeader(), pread, false);
+            HFileBlock b2 =
+                hbr.readBlockData(curOffset, b.getOnDiskSizeWithHeader(), pread, false, true);
             b2.sanityCheck();
 
             assertEquals(b.getBlockType(), b2.getBlockType());
@@ -599,6 +654,7 @@ public class TestHFileBlock {
             assertEquals(b.getOnDiskDataSizeWithHeader(),
                          b2.getOnDiskDataSizeWithHeader());
             assertEquals(0, HFile.getAndResetChecksumFailuresCount());
+            assertTrue(b2.release());
 
             curOffset += b.getOnDiskSizeWithHeader();
 
@@ -606,14 +662,14 @@ public class TestHFileBlock {
               // NOTE: cache-on-write testing doesn't actually involve a BlockCache. It simply
               // verifies that the unpacked value read back off disk matches the unpacked value
               // generated before writing to disk.
-              b = b.unpack(meta, hbr);
+              HFileBlock newBlock = b.unpack(meta, hbr);
               // b's buffer has header + data + checksum while
               // expectedContents have header + data only
-              ByteBuff bufRead = b.getBufferReadOnly();
+              ByteBuff bufRead = newBlock.getBufferReadOnly();
               ByteBuffer bufExpected = expectedContents.get(i);
               boolean bytesAreCorrect = Bytes.compareTo(bufRead.array(),
                   bufRead.arrayOffset(),
-                  bufRead.limit() - b.totalChecksumBytes(),
+                  bufRead.limit() - newBlock.totalChecksumBytes(),
                   bufExpected.array(), bufExpected.arrayOffset(),
                   bufExpected.limit()) == 0;
               String wrongBytesMsg = "";
@@ -642,9 +698,12 @@ public class TestHFileBlock {
                 }
               }
               assertTrue(wrongBytesMsg, bytesAreCorrect);
+              assertTrue(newBlock.release());
+              if (newBlock != b) {
+                assertTrue(b.release());
+              }
             }
           }
-
           assertEquals(curOffset, fs.getFileStatus(path).getLen());
           is.close();
         }
@@ -687,29 +746,37 @@ public class TestHFileBlock {
         boolean pread = true;
         boolean withOnDiskSize = rand.nextBoolean();
         long expectedSize =
-          (blockId == NUM_TEST_BLOCKS - 1 ? fileSize
-              : offsets.get(blockId + 1)) - offset;
-
-        HFileBlock b;
+            (blockId == NUM_TEST_BLOCKS - 1 ? fileSize : offsets.get(blockId + 1)) - offset;
+        HFileBlock b = null;
         try {
           long onDiskSizeArg = withOnDiskSize ? expectedSize : -1;
-          b = hbr.readBlockData(offset, onDiskSizeArg, pread, false);
+          b = hbr.readBlockData(offset, onDiskSizeArg, pread, false, false);
+          if (useHeapAllocator) {
+            assertTrue(b.isOnHeap());
+          } else {
+            assertTrue(!b.getBlockType().isData() || !b.isOnHeap());
+          }
+          assertEquals(types.get(blockId), b.getBlockType());
+          assertEquals(expectedSize, b.getOnDiskSizeWithHeader());
+          assertEquals(offset, b.getOffset());
         } catch (IOException ex) {
-          LOG.error("Error in client " + clientId + " trying to read block at "
-              + offset + ", pread=" + pread + ", withOnDiskSize=" +
-              withOnDiskSize, ex);
+          LOG.error("Error in client " + clientId + " trying to read block at " + offset
+              + ", pread=" + pread + ", withOnDiskSize=" + withOnDiskSize,
+            ex);
           return false;
+        } finally {
+          if (b != null) {
+            b.release();
+          }
         }
-
-        assertEquals(types.get(blockId), b.getBlockType());
-        assertEquals(expectedSize, b.getOnDiskSizeWithHeader());
-        assertEquals(offset, b.getOffset());
-
         ++numBlocksRead;
-        if (pread)
+        if (pread) {
           ++numPositionalRead;
-        if (withOnDiskSize)
+        }
+
+        if (withOnDiskSize) {
           ++numWithOnDiskSize;
+        }
       }
       LOG.info("Client " + clientId + " successfully read " + numBlocksRead +
         " blocks (with pread: " + numPositionalRead + ", with onDiskSize " +
@@ -717,7 +784,6 @@ public class TestHFileBlock {
 
       return true;
     }
-
   }
 
   @Test
@@ -742,7 +808,7 @@ public class TestHFileBlock {
                           .withCompression(compressAlgo)
                           .build();
       HFileBlock.FSReader hbr =
-          new HFileBlock.FSReaderImpl(is, fileSize, meta, ByteBuffAllocator.HEAP);
+          new HFileBlock.FSReaderImpl(is, fileSize, meta, alloc);
 
       Executor exec = Executors.newFixedThreadPool(NUM_READER_THREADS);
       ExecutorCompletionService<Boolean> ecs = new ExecutorCompletionService<>(exec);
@@ -761,7 +827,6 @@ public class TestHFileBlock {
             + ")");
         }
       }
-
       is.close();
     }
   }
@@ -874,9 +939,9 @@ public class TestHFileBlock {
     ByteBuffer buf = ByteBuffer.wrap(byteArr, 0, size);
     HFileContext meta = new HFileContextBuilder().build();
     HFileBlock blockWithNextBlockMetadata = new HFileBlock(BlockType.DATA, size, size, -1, buf,
-        HFileBlock.FILL_HEADER, -1, 52, -1, meta, ByteBuffAllocator.HEAP);
+        HFileBlock.FILL_HEADER, -1, 52, -1, meta, alloc);
     HFileBlock blockWithoutNextBlockMetadata = new HFileBlock(BlockType.DATA, size, size, -1, buf,
-        HFileBlock.FILL_HEADER, -1, -1, -1, meta, ByteBuffAllocator.HEAP);
+        HFileBlock.FILL_HEADER, -1, -1, -1, meta, alloc);
     ByteBuffer buff1 = ByteBuffer.allocate(length);
     ByteBuffer buff2 = ByteBuffer.allocate(length);
     blockWithNextBlockMetadata.serialize(buff1, true);
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java
index 73f1c24..6f8d0b0 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java
@@ -192,7 +192,7 @@ public class TestHFileBlockIndex {
       }
 
       missCount += 1;
-      prevBlock = realReader.readBlockData(offset, onDiskSize, pread, false);
+      prevBlock = realReader.readBlockData(offset, onDiskSize, pread, false, true);
       prevOffset = offset;
       prevOnDiskSize = onDiskSize;
       prevPread = pread;
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileEncryption.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileEncryption.java
index 1222d07..508b1fe 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileEncryption.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileEncryption.java
@@ -109,7 +109,7 @@ public class TestHFileEncryption {
 
   private long readAndVerifyBlock(long pos, HFileContext ctx, HFileBlock.FSReaderImpl hbr, int size)
       throws IOException {
-    HFileBlock b = hbr.readBlockData(pos, -1, false, false);
+    HFileBlock b = hbr.readBlockData(pos, -1, false, false, true);
     assertEquals(0, HFile.getAndResetChecksumFailuresCount());
     b.sanityCheck();
     assertFalse(b.isUnpacked());
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV3.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV3.java
index b92f7c6..f8da706 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV3.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV3.java
@@ -224,8 +224,8 @@ public class TestHFileWriterV3 {
     fsdis.seek(0);
     long curBlockPos = 0;
     while (curBlockPos <= trailer.getLastDataBlockOffset()) {
-      HFileBlock block = blockReader.readBlockData(curBlockPos, -1, false, false)
-        .unpack(context, blockReader);
+      HFileBlock block = blockReader.readBlockData(curBlockPos, -1, false, false, true)
+          .unpack(context, blockReader);
       assertEquals(BlockType.DATA, block.getBlockType());
       ByteBuff buf = block.getBufferWithoutHeader();
       int keyLen = -1;
@@ -285,8 +285,8 @@ public class TestHFileWriterV3 {
     while (fsdis.getPos() < trailer.getLoadOnOpenDataOffset()) {
       LOG.info("Current offset: " + fsdis.getPos() + ", scanning until " +
           trailer.getLoadOnOpenDataOffset());
-      HFileBlock block = blockReader.readBlockData(curBlockPos, -1, false, false)
-        .unpack(context, blockReader);
+      HFileBlock block = blockReader.readBlockData(curBlockPos, -1, false, false, true)
+          .unpack(context, blockReader);
       assertEquals(BlockType.META, block.getBlockType());
       Text t = new Text();
       ByteBuff buf = block.getBufferWithoutHeader();
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestLazyDataBlockDecompression.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestLazyDataBlockDecompression.java
index 5935f91..f1a12a2 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestLazyDataBlockDecompression.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestLazyDataBlockDecompression.java
@@ -160,7 +160,7 @@ public class TestLazyDataBlockDecompression {
     CacheConfig cc = new CacheConfig(lazyCompressDisabled,
         new LruBlockCache(maxSize, HConstants.DEFAULT_BLOCKSIZE, false, lazyCompressDisabled));
     assertFalse(cc.shouldCacheDataCompressed());
-    assertTrue(cc.getBlockCache().get() instanceof LruBlockCache);
+    assertFalse(cc.isCombinedBlockCache());
     LruBlockCache disabledBlockCache = (LruBlockCache) cc.getBlockCache().get();
     LOG.info("disabledBlockCache=" + disabledBlockCache);
     assertEquals("test inconsistency detected.", maxSize, disabledBlockCache.getMaxSize());
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketWriterThread.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketWriterThread.java
index 4e7291d..746cf8d 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketWriterThread.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketWriterThread.java
@@ -59,10 +59,10 @@ public class TestBucketWriterThread {
   private static class MockBucketCache extends BucketCache {
 
     public MockBucketCache(String ioEngineName, long capacity, int blockSize, int[] bucketSizes,
-      int writerThreadNum, int writerQLen, String persistencePath, int ioErrorsTolerationDuration)
-      throws FileNotFoundException, IOException {
+        int writerThreadNum, int writerQLen, String persistencePath, int ioErrorsTolerationDuration)
+        throws IOException {
       super(ioEngineName, capacity, blockSize, bucketSizes, writerThreadNum, writerQLen,
-        persistencePath, ioErrorsTolerationDuration, HBaseConfiguration.create());
+          persistencePath, ioErrorsTolerationDuration, HBaseConfiguration.create());
     }
 
     @Override
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/AbstractTestDLS.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/AbstractTestDLS.java
index 3348386..db15ca6 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/AbstractTestDLS.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/AbstractTestDLS.java
@@ -83,7 +83,6 @@ import org.apache.hadoop.hbase.wal.WALFactory;
 import org.apache.hadoop.hbase.wal.WALKeyImpl;
 import org.apache.hadoop.hbase.wal.WALSplitUtil;
 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
-import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
 import org.junit.After;
 import org.junit.AfterClass;
 import org.junit.Before;
@@ -192,9 +191,7 @@ public abstract class AbstractTestDLS {
     Path rootdir = FSUtils.getRootDir(conf);
 
     int numRegions = 50;
-    try (ZKWatcher zkw = new ZKWatcher(conf, "table-creation", null);
-        Table t = installTable(zkw, numRegions)) {
-      TableName table = t.getName();
+    try (Table t = installTable(numRegions)) {
       List<RegionInfo> regions = null;
       HRegionServer hrs = null;
       for (int i = 0; i < NUM_RS; i++) {
@@ -224,7 +221,6 @@ public abstract class AbstractTestDLS {
 
       int count = 0;
       for (RegionInfo hri : regions) {
-        Path tdir = FSUtils.getWALTableDir(conf, table);
         @SuppressWarnings("deprecation")
         Path editsdir = WALSplitUtil
             .getRegionDirRecoveredEditsDir(FSUtils.getWALRegionDir(conf,
@@ -266,8 +262,7 @@ public abstract class AbstractTestDLS {
     // they will consume recovered.edits
     master.balanceSwitch(false);
 
-    try (ZKWatcher zkw = new ZKWatcher(conf, "table-creation", null);
-        Table ht = installTable(zkw, numRegionsToCreate)) {
+    try (Table ht = installTable(numRegionsToCreate)) {
       HRegionServer hrs = findRSToKill(false);
       List<RegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
       makeWAL(hrs, regions, numLogLines, 100);
@@ -329,8 +324,7 @@ public abstract class AbstractTestDLS {
     final Path logDir = new Path(rootdir,
         AbstractFSWALProvider.getWALDirectoryName(hrs.getServerName().toString()));
 
-    try (ZKWatcher zkw = new ZKWatcher(conf, "table-creation", null);
-        Table t = installTable(zkw, 40)) {
+    try (Table t = installTable(40)) {
       makeWAL(hrs, ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices()), numLogLines, 100);
 
       new Thread() {
@@ -380,8 +374,7 @@ public abstract class AbstractTestDLS {
 
     startCluster(NUM_RS); // NUM_RS=6.
 
-    try (ZKWatcher zkw = new ZKWatcher(conf, "distributed log splitting test", null);
-        Table table = installTable(zkw, numRegionsToCreate)) {
+    try (Table table = installTable(numRegionsToCreate)) {
       populateDataInTable(numRowsPerRegion);
 
       List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
@@ -482,11 +475,11 @@ public abstract class AbstractTestDLS {
     }
   }
 
-  private Table installTable(ZKWatcher zkw, int nrs) throws Exception {
-    return installTable(zkw, nrs, 0);
+  private Table installTable(int nrs) throws Exception {
+    return installTable(nrs, 0);
   }
 
-  private Table installTable(ZKWatcher zkw, int nrs, int existingRegions) throws Exception {
+  private Table installTable(int nrs, int existingRegions) throws Exception {
     // Create a table with regions
     byte[] family = Bytes.toBytes("family");
     LOG.info("Creating table with " + nrs + " regions");
@@ -497,14 +490,14 @@ public abstract class AbstractTestDLS {
     }
     assertEquals(nrs, numRegions);
     LOG.info("Waiting for no more RIT\n");
-    blockUntilNoRIT(zkw, master);
+    blockUntilNoRIT();
     // disable-enable cycle to get rid of table's dead regions left behind
     // by createMultiRegions
     assertTrue(TEST_UTIL.getAdmin().isTableEnabled(tableName));
     LOG.debug("Disabling table\n");
     TEST_UTIL.getAdmin().disableTable(tableName);
     LOG.debug("Waiting for no more RIT\n");
-    blockUntilNoRIT(zkw, master);
+    blockUntilNoRIT();
     NavigableSet<String> regions = HBaseTestingUtility.getAllOnlineRegions(cluster);
     LOG.debug("Verifying only catalog region is assigned\n");
     if (regions.size() != 1) {
@@ -515,7 +508,7 @@ public abstract class AbstractTestDLS {
     LOG.debug("Enabling table\n");
     TEST_UTIL.getAdmin().enableTable(tableName);
     LOG.debug("Waiting for no more RIT\n");
-    blockUntilNoRIT(zkw, master);
+    blockUntilNoRIT();
     LOG.debug("Verifying there are " + numRegions + " assigned on cluster\n");
     regions = HBaseTestingUtility.getAllOnlineRegions(cluster);
     assertEquals(numRegions + 1 + existingRegions, regions.size());
@@ -651,7 +644,7 @@ public abstract class AbstractTestDLS {
     return count;
   }
 
-  private void blockUntilNoRIT(ZKWatcher zkw, HMaster master) throws Exception {
+  private void blockUntilNoRIT() throws Exception {
     TEST_UTIL.waitUntilNoRegionsInTransition(60000);
   }

[hbase] 02/22: HBASE-21917 Make the HFileBlock#validateChecksum can accept ByteBuff as an input.

Posted by op...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

openinx pushed a commit to branch HBASE-21879
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 53c783f660df9f8b9dfc7dba507c4f18db57babc
Author: huzheng <op...@gmail.com>
AuthorDate: Mon Feb 18 17:12:23 2019 +0800

    HBASE-21917 Make the HFileBlock#validateChecksum can accept ByteBuff as an input.
---
 .../apache/hadoop/hbase/io/hfile/ChecksumUtil.java | 153 ++++++++++++++-------
 .../apache/hadoop/hbase/io/hfile/HFileBlock.java   |  14 +-
 .../apache/hadoop/hbase/io/hfile/TestChecksum.java |  64 ++++++---
 3 files changed, 151 insertions(+), 80 deletions(-)

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java
index 5eb1826..5317f0e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java
@@ -17,11 +17,12 @@
  */
 package org.apache.hadoop.hbase.io.hfile;
 
-import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 
 import org.apache.hadoop.fs.ChecksumException;
+import org.apache.hadoop.hbase.nio.ByteBuff;
+import org.apache.hadoop.hbase.nio.SingleByteBuff;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -35,8 +36,7 @@ import org.apache.hadoop.util.DataChecksum;
 public class ChecksumUtil {
   public static final Logger LOG = LoggerFactory.getLogger(ChecksumUtil.class);
 
-  /** This is used to reserve space in a byte buffer */
-  private static byte[] DUMMY_VALUE = new byte[128 * HFileBlock.CHECKSUM_SIZE];
+  public static final int CHECKSUM_BUF_SIZE = 256;
 
   /**
    * This is used by unit tests to make checksum failures throw an
@@ -78,50 +78,118 @@ public class ChecksumUtil {
   }
 
   /**
+   * Like the hadoop's {@link DataChecksum#verifyChunkedSums(ByteBuffer, ByteBuffer, String, long)},
+   * this method will also verify checksum of each chunk in data. the difference is: this method can
+   * accept {@link ByteBuff} as arguments, we can not add it in hadoop-common so defined here.
+   * @param dataChecksum to calculate the checksum.
+   * @param data as the input
+   * @param checksums to compare
+   * @param pathName indicate that the data is read from which file.
+   * @return a flag indicate the checksum match or mismatch.
+   * @see org.apache.hadoop.util.DataChecksum#verifyChunkedSums(ByteBuffer, ByteBuffer, String,
+   *      long)
+   */
+  private static boolean verifyChunkedSums(DataChecksum dataChecksum, ByteBuff data,
+      ByteBuff checksums, String pathName) {
+    // Almost all of the HFile Block are about 64KB, so it would be a SingleByteBuff, use the
+    // Hadoop's verify checksum directly, because it'll use the native checksum, which has no extra
+    // byte[] allocation or copying. (HBASE-21917)
+    if (data instanceof SingleByteBuff && checksums instanceof SingleByteBuff) {
+      // the checksums ByteBuff must also be an SingleByteBuff because it's duplicated from data.
+      ByteBuffer dataBB = (ByteBuffer) (data.nioByteBuffers()[0]).duplicate()
+          .position(data.position()).limit(data.limit());
+      ByteBuffer checksumBB = (ByteBuffer) (checksums.nioByteBuffers()[0]).duplicate()
+          .position(checksums.position()).limit(checksums.limit());
+      try {
+        dataChecksum.verifyChunkedSums(dataBB, checksumBB, pathName, 0);
+        return true;
+      } catch (ChecksumException e) {
+        return false;
+      }
+    }
+
+    // Only when the dataBlock is larger than 4MB (default buffer size in BucketCache), the block
+    // will be an MultiByteBuff. we use a small byte[] to update the checksum many times for
+    // reducing GC pressure. it's a rare case.
+    int checksumTypeSize = dataChecksum.getChecksumType().size;
+    if (checksumTypeSize == 0) {
+      return true;
+    }
+    // we have 5 checksum type now: NULL,DEFAULT,MIXED,CRC32,CRC32C. the former three need 0 byte,
+    // and the other two need 4 bytes.
+    assert checksumTypeSize == 4;
+
+    int bytesPerChecksum = dataChecksum.getBytesPerChecksum();
+    int startDataPos = data.position();
+    data.mark();
+    checksums.mark();
+    try {
+      // allocate an small buffer for reducing young GC (HBASE-21917), and copy 256 bytes from
+      // ByteBuff to update the checksum each time. if we upgrade to an future JDK and hadoop
+      // version which support DataCheckSum#update(ByteBuffer), we won't need to update the checksum
+      // multiple times then.
+      byte[] buf = new byte[CHECKSUM_BUF_SIZE];
+      byte[] sum = new byte[checksumTypeSize];
+      while (data.remaining() > 0) {
+        int n = Math.min(data.remaining(), bytesPerChecksum);
+        checksums.get(sum);
+        dataChecksum.reset();
+        for (int remain = n, len; remain > 0; remain -= len) {
+          // Copy 256 bytes from ByteBuff to update the checksum each time, if the remaining
+          // bytes is less than 256, then just update the remaining bytes.
+          len = Math.min(CHECKSUM_BUF_SIZE, remain);
+          data.get(buf, 0, len);
+          dataChecksum.update(buf, 0, len);
+        }
+        int calculated = (int) dataChecksum.getValue();
+        int stored = (sum[0] << 24 & 0xff000000) | (sum[1] << 16 & 0xff0000)
+            | (sum[2] << 8 & 0xff00) | (sum[3] & 0xff);
+        if (calculated != stored) {
+          if (LOG.isTraceEnabled()) {
+            long errPos = data.position() - startDataPos - n;
+            LOG.trace("Checksum error: {} at {} expected: {} got: {}", pathName, errPos, stored,
+              calculated);
+          }
+          return false;
+        }
+      }
+    } finally {
+      data.reset();
+      checksums.reset();
+    }
+    return true;
+  }
+
+  /**
    * Validates that the data in the specified HFileBlock matches the checksum. Generates the
    * checksums for the data and then validate that it matches those stored in the end of the data.
-   * @param buffer Contains the data in following order: HFileBlock header, data, checksums.
+   * @param buf Contains the data in following order: HFileBlock header, data, checksums.
    * @param pathName Path of the HFile to which the {@code data} belongs. Only used for logging.
    * @param offset offset of the data being validated. Only used for logging.
    * @param hdrSize Size of the block header in {@code data}. Only used for logging.
    * @return True if checksum matches, else false.
    */
-  static boolean validateChecksum(ByteBuffer buffer, String pathName, long offset, int hdrSize)
-      throws IOException {
-    // A ChecksumType.NULL indicates that the caller is not interested in validating checksums,
-    // so we always return true.
-    ChecksumType cktype =
-        ChecksumType.codeToType(buffer.get(HFileBlock.Header.CHECKSUM_TYPE_INDEX));
-    if (cktype == ChecksumType.NULL) {
-      return true; // No checksum validations needed for this block.
+  static boolean validateChecksum(ByteBuff buf, String pathName, long offset, int hdrSize) {
+    ChecksumType ctype = ChecksumType.codeToType(buf.get(HFileBlock.Header.CHECKSUM_TYPE_INDEX));
+    if (ctype == ChecksumType.NULL) {
+      return true;// No checksum validations needed for this block.
     }
 
     // read in the stored value of the checksum size from the header.
-    int bytesPerChecksum = buffer.getInt(HFileBlock.Header.BYTES_PER_CHECKSUM_INDEX);
-
-    DataChecksum dataChecksum = DataChecksum.newDataChecksum(
-        cktype.getDataChecksumType(), bytesPerChecksum);
+    int bytesPerChecksum = buf.getInt(HFileBlock.Header.BYTES_PER_CHECKSUM_INDEX);
+    DataChecksum dataChecksum =
+        DataChecksum.newDataChecksum(ctype.getDataChecksumType(), bytesPerChecksum);
     assert dataChecksum != null;
     int onDiskDataSizeWithHeader =
-        buffer.getInt(HFileBlock.Header.ON_DISK_DATA_SIZE_WITH_HEADER_INDEX);
+        buf.getInt(HFileBlock.Header.ON_DISK_DATA_SIZE_WITH_HEADER_INDEX);
     if (LOG.isTraceEnabled()) {
-      LOG.info("dataLength=" + buffer.capacity()
-          + ", sizeWithHeader=" + onDiskDataSizeWithHeader
-          + ", checksumType=" + cktype.getName()
-          + ", file=" + pathName
-          + ", offset=" + offset
-          + ", headerSize=" + hdrSize
-          + ", bytesPerChecksum=" + bytesPerChecksum);
-    }
-    try {
-      ByteBuffer data = (ByteBuffer) buffer.duplicate().position(0).limit(onDiskDataSizeWithHeader);
-      ByteBuffer checksums = (ByteBuffer) buffer.duplicate().position(onDiskDataSizeWithHeader)
-          .limit(buffer.capacity());
-      dataChecksum.verifyChunkedSums(data, checksums, pathName, 0);
-    } catch (ChecksumException e) {
-      return false;
+      LOG.info("dataLength=" + buf.capacity() + ", sizeWithHeader=" + onDiskDataSizeWithHeader
+          + ", checksumType=" + ctype.getName() + ", file=" + pathName + ", offset=" + offset
+          + ", headerSize=" + hdrSize + ", bytesPerChecksum=" + bytesPerChecksum);
     }
-    return true;  // checksum is valid
+    ByteBuff data = buf.duplicate().position(0).limit(onDiskDataSizeWithHeader);
+    ByteBuff checksums = buf.duplicate().position(onDiskDataSizeWithHeader).limit(buf.limit());
+    return verifyChunkedSums(dataChecksum, data, checksums, pathName);
   }
 
   /**
@@ -151,25 +219,6 @@ public class ChecksumUtil {
   }
 
   /**
-   * Write dummy checksums to the end of the specified bytes array
-   * to reserve space for writing checksums later
-   * @param baos OutputStream to write dummy checkum values
-   * @param numBytes Number of bytes of data for which dummy checksums
-   *                 need to be generated
-   * @param bytesPerChecksum Number of bytes per checksum value
-   */
-  static void reserveSpaceForChecksums(ByteArrayOutputStream baos,
-    int numBytes, int bytesPerChecksum) throws IOException {
-    long numChunks = numChunks(numBytes, bytesPerChecksum);
-    long bytesLeft = numChunks * HFileBlock.CHECKSUM_SIZE;
-    while (bytesLeft > 0) {
-      long count = Math.min(bytesLeft, DUMMY_VALUE.length);
-      baos.write(DUMMY_VALUE, 0, (int)count);
-      bytesLeft -= count;
-    }
-  }
-
-  /**
    * Mechanism to throw an exception in case of hbase checksum
    * failure. This is used by unit tests only.
    * @param value Setting this to true will cause hbase checksum
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
index 968a87e..91e63fd 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
@@ -1784,10 +1784,10 @@ public class HFileBlock implements Cacheable {
       // Do a few checks before we go instantiate HFileBlock.
       assert onDiskSizeWithHeader > this.hdrSize;
       verifyOnDiskSizeMatchesHeader(onDiskSizeWithHeader, headerBuf, offset, checksumSupport);
-      ByteBuffer onDiskBlockByteBuffer = ByteBuffer.wrap(onDiskBlock, 0, onDiskSizeWithHeader);
+      ByteBuff onDiskBlockByteBuff =
+          new SingleByteBuff(ByteBuffer.wrap(onDiskBlock, 0, onDiskSizeWithHeader));
       // Verify checksum of the data before using it for building HFileBlock.
-      if (verifyChecksum &&
-          !validateChecksum(offset, onDiskBlockByteBuffer, hdrSize)) {
+      if (verifyChecksum && !validateChecksum(offset, onDiskBlockByteBuff, hdrSize)) {
         return null;
       }
       long duration = System.currentTimeMillis() - startTime;
@@ -1797,9 +1797,8 @@ public class HFileBlock implements Cacheable {
       // The onDiskBlock will become the headerAndDataBuffer for this block.
       // If nextBlockOnDiskSizeWithHeader is not zero, the onDiskBlock already
       // contains the header of next block, so no need to set next block's header in it.
-      HFileBlock hFileBlock =
-          new HFileBlock(new SingleByteBuff(onDiskBlockByteBuffer), checksumSupport,
-              MemoryType.EXCLUSIVE, offset, nextBlockOnDiskSize, fileContext);
+      HFileBlock hFileBlock = new HFileBlock(onDiskBlockByteBuff, checksumSupport,
+          MemoryType.EXCLUSIVE, offset, nextBlockOnDiskSize, fileContext);
       // Run check on uncompressed sizings.
       if (!fileContext.isCompressedOrEncrypted()) {
         hFileBlock.sanityCheckUncompressed();
@@ -1838,8 +1837,7 @@ public class HFileBlock implements Cacheable {
      * If the block doesn't uses checksum, returns false.
      * @return True if checksum matches, else false.
      */
-    private boolean validateChecksum(long offset, ByteBuffer data, int hdrSize)
-        throws IOException {
+    private boolean validateChecksum(long offset, ByteBuff data, int hdrSize) {
       // If this is an older version of the block that does not have checksums, then return false
       // indicating that checksum verification did not succeed. Actually, this method should never
       // be called when the minorVersion is 0, thus this is a defensive check for a cannot-happen
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
index de28422..e93b61e 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java
@@ -21,16 +21,15 @@ import static org.apache.hadoop.hbase.io.compress.Compression.Algorithm.GZ;
 import static org.apache.hadoop.hbase.io.compress.Compression.Algorithm.NONE;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 import java.io.ByteArrayInputStream;
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
 import java.io.IOException;
 import java.nio.BufferUnderflowException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.List;
+import java.nio.ByteBuffer;
+
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
@@ -42,6 +41,8 @@ import org.apache.hadoop.hbase.fs.HFileSystem;
 import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
 import org.apache.hadoop.hbase.io.compress.Compression;
 import org.apache.hadoop.hbase.nio.ByteBuff;
+import org.apache.hadoop.hbase.nio.MultiByteBuff;
+import org.apache.hadoop.hbase.nio.SingleByteBuff;
 import org.apache.hadoop.hbase.testclassification.IOTests;
 import org.apache.hadoop.hbase.testclassification.SmallTests;
 import org.apache.hadoop.hbase.util.ChecksumType;
@@ -102,22 +103,35 @@ public class TestChecksum {
     assertEquals(b.getChecksumType(), ChecksumType.getDefaultChecksumType().getCode());
   }
 
-  /**
-   * Test all checksum types by writing and reading back blocks.
-   */
+  private void verifyMBBCheckSum(ByteBuff buf) throws IOException {
+    int size = buf.remaining() / 2 + 1;
+    ByteBuff mbb = new MultiByteBuff(ByteBuffer.allocate(size), ByteBuffer.allocate(size))
+          .position(0).limit(buf.remaining());
+    for (int i = buf.position(); i < buf.limit(); i++) {
+      mbb.put(buf.get(i));
+    }
+    mbb.position(0).limit(buf.remaining());
+    assertEquals(mbb.remaining(), buf.remaining());
+    assertTrue(mbb.remaining() > size);
+    ChecksumUtil.validateChecksum(mbb, "test", 0, HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM);
+  }
+
+  private void verifySBBCheckSum(ByteBuff buf) throws IOException {
+    ChecksumUtil.validateChecksum(buf, "test", 0, HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM);
+  }
+
   @Test
-  public void testAllChecksumTypes() throws IOException {
-    List<ChecksumType> cktypes = new ArrayList<>(Arrays.asList(ChecksumType.values()));
-    for (Iterator<ChecksumType> itr = cktypes.iterator(); itr.hasNext(); ) {
-      ChecksumType cktype = itr.next();
-      Path path = new Path(TEST_UTIL.getDataTestDir(), "checksum" + cktype.getName());
+  public void testVerifyCheckSum() throws IOException {
+    int intCount = 10000;
+    for (ChecksumType ckt : ChecksumType.values()) {
+      Path path = new Path(TEST_UTIL.getDataTestDir(), "checksum" + ckt.getName());
       FSDataOutputStream os = fs.create(path);
       HFileContext meta = new HFileContextBuilder()
-          .withChecksumType(cktype)
-          .build();
+            .withChecksumType(ckt)
+            .build();
       HFileBlock.Writer hbw = new HFileBlock.Writer(null, meta);
       DataOutputStream dos = hbw.startWriting(BlockType.DATA);
-      for (int i = 0; i < 1000; ++i) {
+      for (int i = 0; i < intCount; ++i) {
         dos.writeInt(i);
       }
       hbw.writeHeaderAndData(os);
@@ -130,19 +144,25 @@ public class TestChecksum {
       FSDataInputStreamWrapper is = new FSDataInputStreamWrapper(fs, path);
       meta = new HFileContextBuilder().withHBaseCheckSum(true).build();
       HFileBlock.FSReader hbr = new HFileBlock.FSReaderImpl(
-          is, totalSize, (HFileSystem) fs, path, meta);
+            is, totalSize, (HFileSystem) fs, path, meta);
       HFileBlock b = hbr.readBlockData(0, -1, false, false);
+
+      // verify SingleByteBuff checksum.
+      verifySBBCheckSum(b.getBufferReadOnly());
+
+      // verify MultiByteBuff checksum.
+      verifyMBBCheckSum(b.getBufferReadOnly());
+
       ByteBuff data = b.getBufferWithoutHeader();
-      for (int i = 0; i < 1000; i++) {
+      for (int i = 0; i < intCount; i++) {
         assertEquals(i, data.getInt());
       }
-      boolean exception_thrown = false;
       try {
         data.getInt();
+        fail();
       } catch (BufferUnderflowException e) {
-        exception_thrown = true;
+        // expected failure
       }
-      assertTrue(exception_thrown);
       assertEquals(0, HFile.getAndResetChecksumFailuresCount());
     }
   }
@@ -216,16 +236,19 @@ public class TestChecksum {
         for (int i = 0; i <
              HFileBlock.CHECKSUM_VERIFICATION_NUM_IO_THRESHOLD + 1; i++) {
           b = hbr.readBlockData(0, -1, pread, false);
+          assertTrue(b.getBufferReadOnly() instanceof SingleByteBuff);
           assertEquals(0, HFile.getAndResetChecksumFailuresCount());
         }
         // The next read should have hbase checksum verification reanabled,
         // we verify this by assertng that there was a hbase-checksum failure.
         b = hbr.readBlockData(0, -1, pread, false);
+        assertTrue(b.getBufferReadOnly() instanceof SingleByteBuff);
         assertEquals(1, HFile.getAndResetChecksumFailuresCount());
 
         // Since the above encountered a checksum failure, we switch
         // back to not checking hbase checksums.
         b = hbr.readBlockData(0, -1, pread, false);
+        assertTrue(b.getBufferReadOnly() instanceof SingleByteBuff);
         assertEquals(0, HFile.getAndResetChecksumFailuresCount());
         is.close();
 
@@ -319,6 +342,7 @@ public class TestChecksum {
         HFileBlock.FSReader hbr = new HFileBlock.FSReaderImpl(new FSDataInputStreamWrapper(
             is, nochecksum), totalSize, hfs, path, meta);
         HFileBlock b = hbr.readBlockData(0, -1, pread, false);
+        assertTrue(b.getBufferReadOnly() instanceof SingleByteBuff);
         is.close();
         b.sanityCheck();
         assertEquals(dataSize, b.getUncompressedSizeWithoutHeader());

[hbase] 14/22: HBASE-22412 Improve the metrics in ByteBuffAllocator

Posted by op...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

openinx pushed a commit to branch HBASE-21879
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 69eeb597b6908f341a19107ba2a0d92041a0926a
Author: huzheng <op...@gmail.com>
AuthorDate: Tue May 14 16:19:05 2019 +0800

    HBASE-22412 Improve the metrics in ByteBuffAllocator
---
 .../apache/hadoop/hbase/io/ByteBuffAllocator.java  | 50 ++++++++++++++--------
 .../hadoop/hbase/io/TestByteBuffAllocator.java     | 49 +++++++++++++--------
 .../regionserver/MetricsRegionServerSource.java    | 20 ++++-----
 .../regionserver/MetricsRegionServerWrapper.java   |  6 +--
 .../MetricsRegionServerSourceImpl.java             | 22 +++++-----
 .../tmpl/regionserver/ServerMetricsTmpl.jamon      | 16 ++++---
 .../MetricsRegionServerWrapperImpl.java            | 12 +++---
 .../MetricsRegionServerWrapperStub.java            |  6 +--
 8 files changed, 103 insertions(+), 78 deletions(-)

diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java
index 5939d4a..75a4699 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java
@@ -97,12 +97,12 @@ public class ByteBuffAllocator {
 
   private final Queue<ByteBuffer> buffers = new ConcurrentLinkedQueue<>();
 
-  // Metrics to track the pool allocation number and heap allocation number. If heap allocation
-  // number is increasing so much, then we may need to increase the max.buffer.count .
-  private final LongAdder poolAllocationNum = new LongAdder();
-  private final LongAdder heapAllocationNum = new LongAdder();
-  private long lastPoolAllocationNum = 0;
-  private long lastHeapAllocationNum = 0;
+  // Metrics to track the pool allocation bytes and heap allocation bytes. If heap allocation
+  // bytes is increasing so much, then we may need to increase the max.buffer.count .
+  private final LongAdder poolAllocationBytes = new LongAdder();
+  private final LongAdder heapAllocationBytes = new LongAdder();
+  private long lastPoolAllocationBytes = 0;
+  private long lastHeapAllocationBytes = 0;
 
   /**
    * Initialize an {@link ByteBuffAllocator} which will try to allocate ByteBuffers from off-heap if
@@ -161,14 +161,26 @@ public class ByteBuffAllocator {
     return reservoirEnabled;
   }
 
-  public long getHeapAllocationNum() {
-    return heapAllocationNum.sum();
+  public long getHeapAllocationBytes() {
+    return heapAllocationBytes.sum();
   }
 
-  public long getPoolAllocationNum() {
-    return poolAllocationNum.sum();
+  public long getPoolAllocationBytes() {
+    return poolAllocationBytes.sum();
   }
 
+  public int getBufferSize() {
+    return this.bufSize;
+  }
+
+  public int getUsedBufferCount() {
+    return this.usedBufCount.intValue();
+  }
+
+  /**
+   * The {@link ConcurrentLinkedQueue#size()} is O(N) complexity and time-consuming, so DO NOT use
+   * the method except in UT.
+   */
   @VisibleForTesting
   public int getFreeBufferCount() {
     return this.buffers.size();
@@ -179,15 +191,15 @@ public class ByteBuffAllocator {
   }
 
   public double getHeapAllocationRatio() {
-    long heapAllocNum = heapAllocationNum.sum(), poolAllocNum = poolAllocationNum.sum();
-    double heapDelta = heapAllocNum - lastHeapAllocationNum;
-    double poolDelta = poolAllocNum - lastPoolAllocationNum;
-    lastHeapAllocationNum = heapAllocNum;
-    lastPoolAllocationNum = poolAllocNum;
+    long heapAllocBytes = heapAllocationBytes.sum(), poolAllocBytes = poolAllocationBytes.sum();
+    double heapDelta = heapAllocBytes - lastHeapAllocationBytes;
+    double poolDelta = poolAllocBytes - lastPoolAllocationBytes;
+    lastHeapAllocationBytes = heapAllocBytes;
+    lastPoolAllocationBytes = poolAllocBytes;
     if (Math.abs(heapDelta + poolDelta) < 1e-3) {
       return 0.0;
     }
-    return heapDelta / (heapDelta + poolDelta) * 100;
+    return heapDelta / (heapDelta + poolDelta);
   }
 
   /**
@@ -208,7 +220,7 @@ public class ByteBuffAllocator {
   }
 
   private ByteBuffer allocateOnHeap(int size) {
-    heapAllocationNum.increment();
+    heapAllocationBytes.add(size);
     return ByteBuffer.allocate(size);
   }
 
@@ -282,7 +294,7 @@ public class ByteBuffAllocator {
     if (bb != null) {
       // To reset the limit to capacity and position to 0, must clear here.
       bb.clear();
-      poolAllocationNum.increment();
+      poolAllocationBytes.add(bufSize);
       return bb;
     }
     while (true) {
@@ -299,7 +311,7 @@ public class ByteBuffAllocator {
       if (!this.usedBufCount.compareAndSet(c, c + 1)) {
         continue;
       }
-      poolAllocationNum.increment();
+      poolAllocationBytes.add(bufSize);
       return ByteBuffer.allocateDirect(bufSize);
     }
   }
diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/io/TestByteBuffAllocator.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/io/TestByteBuffAllocator.java
index 0d0da80..9186be4 100644
--- a/hbase-common/src/test/java/org/apache/hadoop/hbase/io/TestByteBuffAllocator.java
+++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/io/TestByteBuffAllocator.java
@@ -47,9 +47,12 @@ public class TestByteBuffAllocator {
     int maxBuffersInPool = 10;
     int bufSize = 6 * 1024;
     ByteBuffAllocator alloc = new ByteBuffAllocator(true, maxBuffersInPool, bufSize, bufSize / 6);
+    assertEquals(0, alloc.getUsedBufferCount());
+
     ByteBuff buff = alloc.allocate(10 * bufSize);
-    assertEquals(10, alloc.getPoolAllocationNum());
-    assertEquals(0, alloc.getHeapAllocationNum());
+    assertEquals(61440, alloc.getPoolAllocationBytes());
+    assertEquals(0, alloc.getHeapAllocationBytes());
+    assertEquals(10, alloc.getUsedBufferCount());
     buff.release();
     // When the request size is less than 1/6th of the pool buffer size. We should use on demand
     // created on heap Buffer
@@ -57,15 +60,17 @@ public class TestByteBuffAllocator {
     assertTrue(buff.hasArray());
     assertEquals(maxBuffersInPool, alloc.getFreeBufferCount());
     assertEquals(maxBuffersInPool, alloc.getTotalBufferCount());
-    assertEquals(10, alloc.getPoolAllocationNum());
-    assertEquals(1, alloc.getHeapAllocationNum());
+    assertEquals(61440, alloc.getPoolAllocationBytes());
+    assertEquals(200, alloc.getHeapAllocationBytes());
+    assertEquals(10, alloc.getUsedBufferCount());
     buff.release();
     // When the request size is > 1/6th of the pool buffer size.
     buff = alloc.allocate(1024);
     assertFalse(buff.hasArray());
     assertEquals(maxBuffersInPool - 1, alloc.getFreeBufferCount());
-    assertEquals(11, alloc.getPoolAllocationNum());
-    assertEquals(1, alloc.getHeapAllocationNum());
+    assertEquals(67584, alloc.getPoolAllocationBytes());
+    assertEquals(200, alloc.getHeapAllocationBytes());
+    assertEquals(10, alloc.getUsedBufferCount());
     buff.release();// ByteBuff Recycler#free should put back the BB to pool.
     assertEquals(maxBuffersInPool, alloc.getFreeBufferCount());
     // Request size> pool buffer size
@@ -79,8 +84,9 @@ public class TestByteBuffAllocator {
     assertEquals(6 * 1024, bbs[0].limit());
     assertEquals(1024, bbs[1].limit());
     assertEquals(maxBuffersInPool - 2, alloc.getFreeBufferCount());
-    assertEquals(13, alloc.getPoolAllocationNum());
-    assertEquals(1, alloc.getHeapAllocationNum());
+    assertEquals(79872, alloc.getPoolAllocationBytes());
+    assertEquals(200, alloc.getHeapAllocationBytes());
+    assertEquals(10, alloc.getUsedBufferCount());
     buff.release();
     assertEquals(maxBuffersInPool, alloc.getFreeBufferCount());
 
@@ -94,14 +100,16 @@ public class TestByteBuffAllocator {
     assertEquals(6 * 1024, bbs[0].limit());
     assertEquals(200, bbs[1].limit());
     assertEquals(maxBuffersInPool - 1, alloc.getFreeBufferCount());
-    assertEquals(14, alloc.getPoolAllocationNum());
-    assertEquals(2, alloc.getHeapAllocationNum());
+    assertEquals(86016, alloc.getPoolAllocationBytes());
+    assertEquals(400, alloc.getHeapAllocationBytes());
+    assertEquals(10, alloc.getUsedBufferCount());
     buff.release();
     assertEquals(maxBuffersInPool, alloc.getFreeBufferCount());
 
     alloc.allocate(bufSize * (maxBuffersInPool - 1));
-    assertEquals(23, alloc.getPoolAllocationNum());
-    assertEquals(2, alloc.getHeapAllocationNum());
+    assertEquals(141312, alloc.getPoolAllocationBytes());
+    assertEquals(400, alloc.getHeapAllocationBytes());
+    assertEquals(10, alloc.getUsedBufferCount());
 
     buff = alloc.allocate(20 * 1024);
     assertFalse(buff.hasArray());
@@ -113,21 +121,24 @@ public class TestByteBuffAllocator {
     assertEquals(6 * 1024, bbs[0].limit());
     assertEquals(14 * 1024, bbs[1].limit());
     assertEquals(0, alloc.getFreeBufferCount());
-    assertEquals(24, alloc.getPoolAllocationNum());
-    assertEquals(3, alloc.getHeapAllocationNum());
+    assertEquals(147456, alloc.getPoolAllocationBytes());
+    assertEquals(14736, alloc.getHeapAllocationBytes());
+    assertEquals(10, alloc.getUsedBufferCount());
 
     buff.release();
     assertEquals(1, alloc.getFreeBufferCount());
     alloc.allocateOneBuffer();
-    assertEquals(25, alloc.getPoolAllocationNum());
-    assertEquals(3, alloc.getHeapAllocationNum());
+    assertEquals(153600, alloc.getPoolAllocationBytes());
+    assertEquals(14736, alloc.getHeapAllocationBytes());
+    assertEquals(10, alloc.getUsedBufferCount());
 
     buff = alloc.allocate(7 * 1024);
     assertTrue(buff.hasArray());
     assertTrue(buff instanceof SingleByteBuff);
     assertEquals(7 * 1024, buff.nioByteBuffers()[0].limit());
-    assertEquals(25, alloc.getPoolAllocationNum());
-    assertEquals(4, alloc.getHeapAllocationNum());
+    assertEquals(153600, alloc.getPoolAllocationBytes());
+    assertEquals(21904, alloc.getHeapAllocationBytes());
+    assertEquals(10, alloc.getUsedBufferCount());
     buff.release();
   }
 
@@ -142,7 +153,7 @@ public class TestByteBuffAllocator {
       // expected exception
     }
     ByteBuff bb = allocator.allocate(0);
-    assertEquals(1, allocator.getHeapAllocationNum());
+    assertEquals(0, allocator.getHeapAllocationBytes());
     bb.release();
   }
 
diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java
index 68548c8..b7fad25 100644
--- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java
+++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java
@@ -560,17 +560,17 @@ public interface MetricsRegionServerSource extends BaseSource, JvmPauseMonitorSo
       "Average region size over the RegionServer including memstore and storefile sizes.";
 
   /** Metrics for {@link org.apache.hadoop.hbase.io.ByteBuffAllocator} **/
-  String BYTE_BUFF_ALLOCATOR_HEAP_ALLOCATION_NUM = "ByteBuffAllocatorHeapAllocationNum";
-  String BYTE_BUFF_ALLOCATOR_HEAP_ALLOCATION_NUM_DESC =
-      "Number of heap allocation from ByteBuffAllocator";
-  String BYTE_BUFF_ALLOCATOR_POOL_ALLOCATION_NUM = "ByteBuffAllocatorPoolAllocationNum";
-  String BYTE_BUFF_ALLOCATOR_POOL_ALLOCATION_NUM_DESC =
-      "Number of pool allocation from ByteBuffAllocator";
-  String BYTE_BUFF_ALLOCATOR_HEAP_ALLOACTION_RATIO = "ByteBuffAllocatorHeapAllocationRatio";
-  String BYTE_BUFF_ALLOCATOR_HEAP_ALLOACTION_RATIO_DESC =
+  String BYTE_BUFF_ALLOCATOR_HEAP_ALLOCATION_BYTES = "ByteBuffAllocatorHeapAllocationBytes";
+  String BYTE_BUFF_ALLOCATOR_HEAP_ALLOCATION_BYTES_DESC =
+      "Bytes of heap allocation from ByteBuffAllocator";
+  String BYTE_BUFF_ALLOCATOR_POOL_ALLOCATION_BYTES = "ByteBuffAllocatorPoolAllocationBytes";
+  String BYTE_BUFF_ALLOCATOR_POOL_ALLOCATION_BYTES_DESC =
+      "Bytes of pool allocation from ByteBuffAllocator";
+  String BYTE_BUFF_ALLOCATOR_HEAP_ALLOCATION_RATIO = "ByteBuffAllocatorHeapAllocationRatio";
+  String BYTE_BUFF_ALLOCATOR_HEAP_ALLOCATION_RATIO_DESC =
       "Ratio of heap allocation from ByteBuffAllocator, means heapAllocation/totalAllocation";
   String BYTE_BUFF_ALLOCATOR_TOTAL_BUFFER_COUNT = "ByteBuffAllocatorTotalBufferCount";
   String BYTE_BUFF_ALLOCATOR_TOTAL_BUFFER_COUNT_DESC = "Total buffer count in ByteBuffAllocator";
-  String BYTE_BUFF_ALLOCATOR_FREE_BUFFER_COUNT = "ByteBuffAllocatorFreeBufferCount";
-  String BYTE_BUFF_ALLOCATOR_FREE_BUFFER_COUNT_DESC = "Free buffer count in ByteBuffAllocator";
+  String BYTE_BUFF_ALLOCATOR_USED_BUFFER_COUNT = "ByteBuffAllocatorUsedBufferCount";
+  String BYTE_BUFF_ALLOCATOR_USED_BUFFER_COUNT_DESC = "Used buffer count in ByteBuffAllocator";
 }
diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapper.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapper.java
index c196cda..8f360d3 100644
--- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapper.java
+++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapper.java
@@ -524,13 +524,13 @@ public interface MetricsRegionServerWrapper {
 
   long getTotalRowActionRequestCount();
 
-  long getByteBuffAllocatorHeapAllocationNum();
+  long getByteBuffAllocatorHeapAllocationBytes();
 
-  long getByteBuffAllocatorPoolAllocationNum();
+  long getByteBuffAllocatorPoolAllocationBytes();
 
   double getByteBuffAllocatorHeapAllocRatio();
 
   long getByteBuffAllocatorTotalBufferCount();
 
-  long getByteBuffAllocatorFreeBufferCount();
+  long getByteBuffAllocatorUsedBufferCount();
 }
diff --git a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java
index e259022..d7c62d5 100644
--- a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java
+++ b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java
@@ -554,21 +554,21 @@ public class MetricsRegionServerSourceImpl
                     rsWrap.getReadRequestsRatePerSecond())
             .addGauge(Interns.info(WRITE_REQUEST_RATE_PER_SECOND, WRITE_REQUEST_RATE_DESC),
                     rsWrap.getWriteRequestsRatePerSecond())
-            .addGauge(Interns.info(BYTE_BUFF_ALLOCATOR_HEAP_ALLOCATION_NUM,
-                BYTE_BUFF_ALLOCATOR_HEAP_ALLOCATION_NUM_DESC),
-                rsWrap.getByteBuffAllocatorHeapAllocationNum())
-            .addGauge(Interns.info(BYTE_BUFF_ALLOCATOR_POOL_ALLOCATION_NUM,
-                BYTE_BUFF_ALLOCATOR_POOL_ALLOCATION_NUM_DESC),
-                rsWrap.getByteBuffAllocatorPoolAllocationNum())
-            .addGauge(Interns.info(BYTE_BUFF_ALLOCATOR_HEAP_ALLOACTION_RATIO,
-                BYTE_BUFF_ALLOCATOR_HEAP_ALLOACTION_RATIO_DESC),
+            .addGauge(Interns.info(BYTE_BUFF_ALLOCATOR_HEAP_ALLOCATION_BYTES,
+                  BYTE_BUFF_ALLOCATOR_HEAP_ALLOCATION_BYTES_DESC),
+                rsWrap.getByteBuffAllocatorHeapAllocationBytes())
+            .addGauge(Interns.info(BYTE_BUFF_ALLOCATOR_POOL_ALLOCATION_BYTES,
+                  BYTE_BUFF_ALLOCATOR_POOL_ALLOCATION_BYTES_DESC),
+                rsWrap.getByteBuffAllocatorPoolAllocationBytes())
+            .addGauge(Interns.info(BYTE_BUFF_ALLOCATOR_HEAP_ALLOCATION_RATIO,
+                  BYTE_BUFF_ALLOCATOR_HEAP_ALLOCATION_RATIO_DESC),
                 rsWrap.getByteBuffAllocatorHeapAllocRatio())
             .addGauge(Interns.info(BYTE_BUFF_ALLOCATOR_TOTAL_BUFFER_COUNT,
                 BYTE_BUFF_ALLOCATOR_TOTAL_BUFFER_COUNT_DESC),
                 rsWrap.getByteBuffAllocatorTotalBufferCount())
-            .addGauge(Interns.info(BYTE_BUFF_ALLOCATOR_FREE_BUFFER_COUNT,
-                BYTE_BUFF_ALLOCATOR_FREE_BUFFER_COUNT_DESC),
-                rsWrap.getByteBuffAllocatorFreeBufferCount());
+            .addGauge(Interns.info(BYTE_BUFF_ALLOCATOR_USED_BUFFER_COUNT,
+                BYTE_BUFF_ALLOCATOR_USED_BUFFER_COUNT_DESC),
+                rsWrap.getByteBuffAllocatorUsedBufferCount());
   }
 
   @Override
diff --git a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/regionserver/ServerMetricsTmpl.jamon b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/regionserver/ServerMetricsTmpl.jamon
index 5ace343..4528620 100644
--- a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/regionserver/ServerMetricsTmpl.jamon
+++ b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/regionserver/ServerMetricsTmpl.jamon
@@ -238,18 +238,20 @@ ByteBuffAllocator bbAllocator;
 </%args>
 <table class="table table-striped">
 <tr>
-    <th>Number of Heap Allocation</th>
-    <th>Number of Pool Allocation</th>
+    <th>Total Heap Allocation(Bytes)</th>
+    <th>Total Pool Allocation(Bytes)</th>
     <th>Heap Allocation Ratio</th>
     <th>Total Buffer Count</th>
-    <th>Free Buffer Count</th>
+    <th>Used Buffer Count</th>
+    <th>Buffer Size(Bytes)</th>
 </tr>
 <tr>
-    <td><% bbAllocator.getHeapAllocationNum() %></td>
-    <td><% bbAllocator.getPoolAllocationNum() %></td>
-    <td><% bbAllocator.getHeapAllocationRatio() %>%</td>
+    <td><% bbAllocator.getHeapAllocationBytes() %></td>
+    <td><% bbAllocator.getPoolAllocationBytes() %></td>
+    <td><% String.format("%.3f", bbAllocator.getHeapAllocationRatio() * 100) %><% "%" %></td>
     <td><% bbAllocator.getTotalBufferCount() %></td>
-    <td><% bbAllocator.getFreeBufferCount() %></td>
+    <td><% bbAllocator.getUsedBufferCount() %></td>
+    <td><% bbAllocator.getBufferSize() %></td>
 </tr>
 </table>
 </%def>
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java
index db72d11..717634c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java
@@ -1010,13 +1010,13 @@ class MetricsRegionServerWrapperImpl
   }
 
   @Override
-  public long getByteBuffAllocatorHeapAllocationNum() {
-    return this.allocator.getHeapAllocationNum();
+  public long getByteBuffAllocatorHeapAllocationBytes() {
+    return this.allocator.getHeapAllocationBytes();
   }
 
   @Override
-  public long getByteBuffAllocatorPoolAllocationNum() {
-    return this.allocator.getPoolAllocationNum();
+  public long getByteBuffAllocatorPoolAllocationBytes() {
+    return this.allocator.getPoolAllocationBytes();
   }
 
   @Override
@@ -1030,7 +1030,7 @@ class MetricsRegionServerWrapperImpl
   }
 
   @Override
-  public long getByteBuffAllocatorFreeBufferCount() {
-    return this.allocator.getFreeBufferCount();
+  public long getByteBuffAllocatorUsedBufferCount() {
+    return this.allocator.getUsedBufferCount();
   }
 }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperStub.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperStub.java
index 035167a..f3cb930 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperStub.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperStub.java
@@ -116,12 +116,12 @@ public class MetricsRegionServerWrapperStub implements MetricsRegionServerWrappe
   }
 
   @Override
-  public long getByteBuffAllocatorHeapAllocationNum() {
+  public long getByteBuffAllocatorHeapAllocationBytes() {
     return 0;
   }
 
   @Override
-  public long getByteBuffAllocatorPoolAllocationNum() {
+  public long getByteBuffAllocatorPoolAllocationBytes() {
     return 0;
   }
 
@@ -136,7 +136,7 @@ public class MetricsRegionServerWrapperStub implements MetricsRegionServerWrappe
   }
 
   @Override
-  public long getByteBuffAllocatorFreeBufferCount() {
+  public long getByteBuffAllocatorUsedBufferCount() {
     return 0;
   }

[hbase] 21/22: HBASE-22531 The HFileReaderImpl#shouldUseHeap return the incorrect true when disabled BlockCache (#304)

Posted by op...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

openinx pushed a commit to branch HBASE-21879
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 5572cf4b02bfa46dd3a58a370aac3f1e374a0385
Author: openinx <op...@gmail.com>
AuthorDate: Fri Jun 14 09:41:11 2019 +0800

    HBASE-22531 The HFileReaderImpl#shouldUseHeap return the incorrect true when disabled BlockCache (#304)
---
 .../hadoop/hbase/io/hfile/HFileReaderImpl.java     |  2 +-
 .../hfile/TestHFileScannerImplReferenceCount.java  | 25 ++++++++++++++++++++++
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
index 0dae13c..9cef9c0 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
@@ -1419,7 +1419,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
    *      boolean, boolean)
    */
   private boolean shouldUseHeap(BlockType expectedBlockType) {
-    if (cacheConf.getBlockCache() == null) {
+    if (!cacheConf.getBlockCache().isPresent()) {
       return false;
     } else if (!cacheConf.isCombinedBlockCache()) {
       // Block to cache in LruBlockCache must be an heap one. So just allocate block memory from
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileScannerImplReferenceCount.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileScannerImplReferenceCount.java
index 87dd29e..dd9a1c8 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileScannerImplReferenceCount.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileScannerImplReferenceCount.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.io.hfile;
 
 import static org.apache.hadoop.hbase.HConstants.BUCKET_CACHE_IOENGINE_KEY;
 import static org.apache.hadoop.hbase.HConstants.BUCKET_CACHE_SIZE_KEY;
+import static org.apache.hadoop.hbase.HConstants.HFILE_BLOCK_CACHE_SIZE_KEY;
 import static org.apache.hadoop.hbase.io.ByteBuffAllocator.BUFFER_SIZE_KEY;
 import static org.apache.hadoop.hbase.io.ByteBuffAllocator.MAX_BUFFER_COUNT_KEY;
 import static org.apache.hadoop.hbase.io.ByteBuffAllocator.MIN_ALLOCATE_SIZE_KEY;
@@ -422,4 +423,28 @@ public class TestHFileScannerImplReferenceCount {
     Assert.assertNull(scanner.curBlock);
     Assert.assertTrue(scanner.prevBlocks.isEmpty());
   }
+
+  @Test
+  public void testDisabledBlockCache() throws Exception {
+    writeHFile(conf, fs, hfilePath, Algorithm.NONE, DataBlockEncoding.NONE, CELL_COUNT);
+    // Set LruBlockCache
+    conf.setFloat(HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
+    BlockCache defaultBC = BlockCacheFactory.createBlockCache(conf);
+    Assert.assertNull(defaultBC);
+    CacheConfig cacheConfig = new CacheConfig(conf, null, defaultBC, allocator);
+    Assert.assertFalse(cacheConfig.isCombinedBlockCache()); // Must be LruBlockCache.
+    HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConfig, true, conf);
+    Assert.assertTrue(reader instanceof HFileReaderImpl);
+    // We've build a HFile tree with index = 16.
+    Assert.assertEquals(16, reader.getTrailer().getNumDataIndexLevels());
+
+    HFileBlock block1 = reader.getDataBlockIndexReader()
+        .loadDataBlockWithScanInfo(firstCell, null, true, true, false, DataBlockEncoding.NONE)
+        .getHFileBlock();
+
+    Assert.assertTrue(block1.isSharedMem());
+    Assert.assertTrue(block1 instanceof SharedMemHFileBlock);
+    Assert.assertEquals(1, block1.refCnt());
+    Assert.assertTrue(block1.release());
+  }
 }

[hbase] 08/22: HBASE-21957 Unify refCount of BucketEntry and refCount of hbase.nio.ByteBuff into one

Posted by op...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

openinx pushed a commit to branch HBASE-21879
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit c323fc78a40658196a0c4f4572862eef90d815b4
Author: huzheng <op...@gmail.com>
AuthorDate: Wed Apr 10 16:33:30 2019 +0800

    HBASE-21957 Unify refCount of BucketEntry and refCount of hbase.nio.ByteBuff into one
---
 .../java/org/apache/hadoop/hbase/nio/ByteBuff.java |  73 ++--
 .../{RefCnt.java => HBaseReferenceCounted.java}    |  25 +-
 .../org/apache/hadoop/hbase/nio/MultiByteBuff.java |   2 +-
 .../java/org/apache/hadoop/hbase/nio/RefCnt.java   |  18 +-
 .../apache/hadoop/hbase/nio/SingleByteBuff.java    |   2 +-
 .../apache/hadoop/hbase/util/ByteBufferArray.java  |  14 +-
 .../hadoop/hbase/util/TestByteBufferArray.java     |   4 +-
 .../hadoop/hbase/io/hfile/BlockCacheUtil.java      |   2 +-
 .../apache/hadoop/hbase/io/hfile/Cacheable.java    |  23 +-
 .../hadoop/hbase/io/hfile/CombinedBlockCache.java  |   5 +-
 .../apache/hadoop/hbase/io/hfile/HFileBlock.java   |   4 +-
 .../hbase/io/hfile/bucket/BucketAllocator.java     |   1 -
 .../hadoop/hbase/io/hfile/bucket/BucketCache.java  | 453 +++++----------------
 .../hadoop/hbase/io/hfile/bucket/BucketEntry.java  | 239 +++++++++++
 .../hbase/io/hfile/bucket/BucketProtoUtils.java    |  12 +-
 .../hbase/io/hfile/bucket/ByteBufferIOEngine.java  |   8 +-
 .../hbase/io/hfile/bucket/CachedEntryQueue.java    |  23 +-
 .../hfile/bucket/ExclusiveMemoryMmapIOEngine.java  |  17 +-
 .../hadoop/hbase/io/hfile/bucket/FileIOEngine.java |  29 +-
 .../hbase/io/hfile/bucket/FileMmapIOEngine.java    |   4 +-
 .../hadoop/hbase/io/hfile/bucket/IOEngine.java     |  14 +-
 .../io/hfile/bucket/SharedMemoryMmapIOEngine.java  |  10 +-
 .../bucket/UnsafeSharedMemoryBucketEntry.java      |  81 ----
 .../hbase/client/TestBlockEvictionFromClient.java  |  51 ++-
 .../hadoop/hbase/io/hfile/CacheTestUtils.java      |  69 +---
 .../hbase/io/hfile/bucket/TestBucketCache.java     |  46 ++-
 .../io/hfile/bucket/TestBucketCacheRefCnt.java     | 266 ++++++++++++
 .../io/hfile/bucket/TestBucketWriterThread.java    |   1 -
 .../io/hfile/bucket/TestByteBufferIOEngine.java    |  61 ++-
 .../bucket/TestExclusiveMemoryMmapEngine.java      |  10 +-
 .../hbase/io/hfile/bucket/TestFileIOEngine.java    |  22 +-
 31 files changed, 876 insertions(+), 713 deletions(-)

diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/ByteBuff.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/ByteBuff.java
index 9339f43..c04c3f5 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/ByteBuff.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/ByteBuff.java
@@ -22,14 +22,12 @@ import java.nio.ByteBuffer;
 import java.nio.channels.ReadableByteChannel;
 import java.util.List;
 
-import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.io.ByteBuffAllocator.Recycler;
 import org.apache.hadoop.hbase.util.ByteBufferUtils;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.ObjectIntPair;
 import org.apache.yetus.audience.InterfaceAudience;
 
-import org.apache.hbase.thirdparty.io.netty.util.ReferenceCounted;
 import org.apache.hbase.thirdparty.io.netty.util.internal.ObjectUtil;
 
 
@@ -39,7 +37,7 @@ import org.apache.hbase.thirdparty.io.netty.util.internal.ObjectUtil;
  * provides APIs similar to the ones provided in java's nio ByteBuffers and allows you to do
  * positional reads/writes and relative reads and writes on the underlying BB. In addition to it, we
  * have some additional APIs which helps us in the read path. <br/>
- * The ByteBuff implement {@link ReferenceCounted} interface which mean need to maintains a
+ * The ByteBuff implement {@link HBaseReferenceCounted} interface which mean need to maintains a
  * {@link RefCnt} inside, if ensure that the ByteBuff won't be used any more, we must do a
  * {@link ByteBuff#release()} to recycle its NIO ByteBuffers. when considering the
  * {@link ByteBuff#duplicate()} or {@link ByteBuff#slice()}, releasing either the duplicated one or
@@ -59,7 +57,7 @@ import org.apache.hbase.thirdparty.io.netty.util.internal.ObjectUtil;
  * </pre>
  */
 @InterfaceAudience.Private
-public abstract class ByteBuff implements ReferenceCounted {
+public abstract class ByteBuff implements HBaseReferenceCounted {
   private static final String REFERENCE_COUNT_NAME = "ReferenceCount";
   private static final int NIO_BUFFER_LIMIT = 64 * 1024; // should not be more than 64KB.
 
@@ -80,26 +78,6 @@ public abstract class ByteBuff implements ReferenceCounted {
     return refCnt.release();
   }
 
-  @Override
-  public final ByteBuff retain(int increment) {
-    throw new UnsupportedOperationException();
-  }
-
-  @Override
-  public final boolean release(int increment) {
-    throw new UnsupportedOperationException();
-  }
-
-  @Override
-  public final ByteBuff touch() {
-    throw new UnsupportedOperationException();
-  }
-
-  @Override
-  public final ByteBuff touch(Object hint) {
-    throw new UnsupportedOperationException();
-  }
-
   /******************************* Methods for ByteBuff **************************************/
 
   /**
@@ -563,31 +541,56 @@ public abstract class ByteBuff implements ReferenceCounted {
 
   /********************************* ByteBuff wrapper methods ***********************************/
 
-  public static ByteBuff wrap(ByteBuffer[] buffers, Recycler recycler) {
+  /**
+   * In theory, the upstream should never construct an ByteBuff by passing an given refCnt, so
+   * please don't use this public method in other place. Make the method public here because the
+   * BucketEntry#wrapAsCacheable in hbase-server module will use its own refCnt and ByteBuffers from
+   * IOEngine to composite an HFileBlock's ByteBuff, we didn't find a better way so keep the public
+   * way here.
+   */
+  public static ByteBuff wrap(ByteBuffer[] buffers, RefCnt refCnt) {
     if (buffers == null || buffers.length == 0) {
       throw new IllegalArgumentException("buffers shouldn't be null or empty");
     }
-    return buffers.length == 1 ? new SingleByteBuff(recycler, buffers[0])
-        : new MultiByteBuff(recycler, buffers);
+    return buffers.length == 1 ? new SingleByteBuff(refCnt, buffers[0])
+        : new MultiByteBuff(refCnt, buffers);
+  }
+
+  public static ByteBuff wrap(ByteBuffer[] buffers, Recycler recycler) {
+    return wrap(buffers, RefCnt.create(recycler));
   }
 
   public static ByteBuff wrap(ByteBuffer[] buffers) {
-    return wrap(buffers, ByteBuffAllocator.NONE);
+    return wrap(buffers, RefCnt.create());
   }
 
   public static ByteBuff wrap(List<ByteBuffer> buffers, Recycler recycler) {
-    if (buffers == null || buffers.size() == 0) {
-      throw new IllegalArgumentException("buffers shouldn't be null or empty");
-    }
-    return buffers.size() == 1 ? new SingleByteBuff(recycler, buffers.get(0))
-        : new MultiByteBuff(recycler, buffers.toArray(new ByteBuffer[0]));
+    return wrap(buffers, RefCnt.create(recycler));
   }
 
   public static ByteBuff wrap(List<ByteBuffer> buffers) {
-    return wrap(buffers, ByteBuffAllocator.NONE);
+    return wrap(buffers, RefCnt.create());
   }
 
   public static ByteBuff wrap(ByteBuffer buffer) {
-    return new SingleByteBuff(ByteBuffAllocator.NONE, buffer);
+    return wrap(buffer, RefCnt.create());
+  }
+
+  /**
+   * Make this private because we don't want to expose the refCnt related wrap method to upstream.
+   */
+  private static ByteBuff wrap(List<ByteBuffer> buffers, RefCnt refCnt) {
+    if (buffers == null || buffers.size() == 0) {
+      throw new IllegalArgumentException("buffers shouldn't be null or empty");
+    }
+    return buffers.size() == 1 ? new SingleByteBuff(refCnt, buffers.get(0))
+        : new MultiByteBuff(refCnt, buffers.toArray(new ByteBuffer[0]));
+  }
+
+  /**
+   * Make this private because we don't want to expose the refCnt related wrap method to upstream.
+   */
+  private static ByteBuff wrap(ByteBuffer buffer, RefCnt refCnt) {
+    return new SingleByteBuff(refCnt, buffer);
   }
 }
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/RefCnt.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/HBaseReferenceCounted.java
similarity index 63%
copy from hbase-common/src/main/java/org/apache/hadoop/hbase/nio/RefCnt.java
copy to hbase-common/src/main/java/org/apache/hadoop/hbase/nio/HBaseReferenceCounted.java
index 80172b2..b7a88be 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/RefCnt.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/HBaseReferenceCounted.java
@@ -17,33 +17,34 @@
  */
 package org.apache.hadoop.hbase.nio;
 
-import org.apache.hadoop.hbase.io.ByteBuffAllocator;
-import org.apache.hadoop.hbase.io.ByteBuffAllocator.Recycler;
 import org.apache.yetus.audience.InterfaceAudience;
 
-import org.apache.hbase.thirdparty.io.netty.util.AbstractReferenceCounted;
 import org.apache.hbase.thirdparty.io.netty.util.ReferenceCounted;
 
 /**
- * Maintain an reference count integer inside to track life cycle of {@link ByteBuff}, if the
- * reference count become 0, it'll call {@link Recycler#free()} once.
+ * The HBaseReferenceCounted disabled several methods in Netty's {@link ReferenceCounted}, because
+ * those methods are unlikely to be used.
  */
 @InterfaceAudience.Private
-class RefCnt extends AbstractReferenceCounted {
+public interface HBaseReferenceCounted extends ReferenceCounted {
 
-  private Recycler recycler = ByteBuffAllocator.NONE;
+  @Override
+  default HBaseReferenceCounted retain(int increment) {
+    throw new UnsupportedOperationException();
+  }
 
-  RefCnt(Recycler recycler) {
-    this.recycler = recycler;
+  @Override
+  default boolean release(int increment) {
+    throw new UnsupportedOperationException();
   }
 
   @Override
-  protected final void deallocate() {
-    this.recycler.free();
+  default HBaseReferenceCounted touch() {
+    throw new UnsupportedOperationException();
   }
 
   @Override
-  public final ReferenceCounted touch(Object hint) {
+  default HBaseReferenceCounted touch(Object hint) {
     throw new UnsupportedOperationException();
   }
 }
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/MultiByteBuff.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/MultiByteBuff.java
index e9eadc7..186d9ba 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/MultiByteBuff.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/MultiByteBuff.java
@@ -61,7 +61,7 @@ public class MultiByteBuff extends ByteBuff {
     this(new RefCnt(recycler), items);
   }
 
-  private MultiByteBuff(RefCnt refCnt, ByteBuffer... items) {
+  MultiByteBuff(RefCnt refCnt, ByteBuffer... items) {
     this.refCnt = refCnt;
     assert items != null;
     assert items.length > 0;
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/RefCnt.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/RefCnt.java
index 80172b2..91c6ee7 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/RefCnt.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/RefCnt.java
@@ -29,11 +29,25 @@ import org.apache.hbase.thirdparty.io.netty.util.ReferenceCounted;
  * reference count become 0, it'll call {@link Recycler#free()} once.
  */
 @InterfaceAudience.Private
-class RefCnt extends AbstractReferenceCounted {
+public class RefCnt extends AbstractReferenceCounted {
 
   private Recycler recycler = ByteBuffAllocator.NONE;
 
-  RefCnt(Recycler recycler) {
+  /**
+   * Create an {@link RefCnt} with an initial reference count = 1. If the reference count become
+   * zero, the recycler will do nothing. Usually, an Heap {@link ByteBuff} will use this kind of
+   * refCnt to track its life cycle, it help to abstract the code path although it's meaningless to
+   * use an refCnt for heap ByteBuff.
+   */
+  public static RefCnt create() {
+    return new RefCnt(ByteBuffAllocator.NONE);
+  }
+
+  public static RefCnt create(Recycler recycler) {
+    return new RefCnt(recycler);
+  }
+
+  public RefCnt(Recycler recycler) {
     this.recycler = recycler;
   }
 
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/SingleByteBuff.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/SingleByteBuff.java
index 7205251..36a83a0 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/SingleByteBuff.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/SingleByteBuff.java
@@ -57,7 +57,7 @@ public class SingleByteBuff extends ByteBuff {
     this(new RefCnt(recycler), buf);
   }
 
-  private SingleByteBuff(RefCnt refCnt, ByteBuffer buf) {
+  SingleByteBuff(RefCnt refCnt, ByteBuffer buf) {
     this.refCnt = refCnt;
     this.buf = buf;
     if (buf.hasArray()) {
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferArray.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferArray.java
index e5a0b13..42d1bf4 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferArray.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferArray.java
@@ -192,15 +192,17 @@ public class ByteBufferArray {
   }
 
   /**
-   * Creates a ByteBuff from a given array of ByteBuffers from the given offset to the length
+   * Creates a sub-array from a given array of ByteBuffers from the given offset to the length
    * specified. For eg, if there are 4 buffers forming an array each with length 10 and if we call
-   * asSubBuffer(5, 10) then we will create an MBB consisting of two BBs and the first one be a BB
-   * from 'position' 5 to a 'length' 5 and the 2nd BB will be from 'position' 0 to 'length' 5.
+   * asSubByteBuffers(5, 10) then we will create an sub-array consisting of two BBs and the first
+   * one be a BB from 'position' 5 to a 'length' 5 and the 2nd BB will be from 'position' 0 to
+   * 'length' 5.
    * @param offset the position in the whole array which is composited by multiple byte buffers.
    * @param len the length of bytes
-   * @return a ByteBuff formed from the underlying ByteBuffers
+   * @return the underlying ByteBuffers, each ByteBuffer is a slice from the backend and will have a
+   *         zero position.
    */
-  public ByteBuff asSubByteBuff(long offset, final int len) {
+  public ByteBuffer[] asSubByteBuffers(long offset, final int len) {
     BufferIterator it = new BufferIterator(offset, len);
     ByteBuffer[] mbb = new ByteBuffer[it.getBufferCount()];
     for (int i = 0; i < mbb.length; i++) {
@@ -208,7 +210,7 @@ public class ByteBufferArray {
       mbb[i] = it.next();
     }
     assert it.getSum() == len;
-    return ByteBuff.wrap(mbb);
+    return mbb;
   }
 
   /**
diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestByteBufferArray.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestByteBufferArray.java
index 0534924..40f8b65 100644
--- a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestByteBufferArray.java
+++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestByteBufferArray.java
@@ -51,7 +51,7 @@ public class TestByteBufferArray {
   public void testAsSubBufferWhenEndOffsetLandInLastBuffer() throws Exception {
     int capacity = 4 * 1024 * 1024;
     ByteBufferArray array = new ByteBufferArray(capacity, ALLOC);
-    ByteBuff subBuf = array.asSubByteBuff(0, capacity);
+    ByteBuff subBuf = ByteBuff.wrap(array.asSubByteBuffers(0, capacity));
     subBuf.position(capacity - 1);// Position to the last byte
     assertTrue(subBuf.hasRemaining());
     // Read last byte
@@ -179,7 +179,7 @@ public class TestByteBufferArray {
   }
 
   private void testAsSubByteBuff(ByteBufferArray array, int off, int len, boolean isMulti) {
-    ByteBuff ret = array.asSubByteBuff(off, len);
+    ByteBuff ret = ByteBuff.wrap(array.asSubByteBuffers(off, len));
     if (isMulti) {
       assertTrue(ret instanceof MultiByteBuff);
     } else {
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheUtil.java
index 02c7b17..bf3a279 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheUtil.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheUtil.java
@@ -228,8 +228,8 @@ public class BlockCacheUtil {
    */
   public static boolean shouldReplaceExistingCacheBlock(BlockCache blockCache,
       BlockCacheKey cacheKey, Cacheable newBlock) {
+    // NOTICE: The getBlock has retained the existingBlock inside.
     Cacheable existingBlock = blockCache.getBlock(cacheKey, false, false, false);
-    existingBlock.retain();
     try {
       int comparison = BlockCacheUtil.validateBlockAddition(existingBlock, newBlock, cacheKey);
       if (comparison < 0) {
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/Cacheable.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/Cacheable.java
index 93b520e..9b4b38f 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/Cacheable.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/Cacheable.java
@@ -21,10 +21,9 @@ package org.apache.hadoop.hbase.io.hfile;
 
 import java.nio.ByteBuffer;
 
-import org.apache.yetus.audience.InterfaceAudience;
 import org.apache.hadoop.hbase.io.HeapSize;
-
-import org.apache.hbase.thirdparty.io.netty.util.ReferenceCounted;
+import org.apache.hadoop.hbase.nio.HBaseReferenceCounted;
+import org.apache.yetus.audience.InterfaceAudience;
 
 /**
  * Cacheable is an interface that allows for an object to be cached. If using an
@@ -36,7 +35,7 @@ import org.apache.hbase.thirdparty.io.netty.util.ReferenceCounted;
  *
  */
 @InterfaceAudience.Private
-public interface Cacheable extends HeapSize, ReferenceCounted {
+public interface Cacheable extends HeapSize, HBaseReferenceCounted {
   /**
    * Returns the length of the ByteBuffer required to serialized the object. If the
    * object cannot be serialized, it should return 0.
@@ -87,10 +86,6 @@ public interface Cacheable extends HeapSize, ReferenceCounted {
     return this;
   }
 
-  default Cacheable retain(int increment) {
-    throw new UnsupportedOperationException();
-  }
-
   /**
    * Reference count of this Cacheable.
    */
@@ -106,16 +101,4 @@ public interface Cacheable extends HeapSize, ReferenceCounted {
   default boolean release() {
     return false;
   }
-
-  default boolean release(int increment) {
-    throw new UnsupportedOperationException();
-  }
-
-  default ReferenceCounted touch() {
-    throw new UnsupportedOperationException();
-  }
-
-  default ReferenceCounted touch(Object hint) {
-    throw new UnsupportedOperationException();
-  }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
index e5e57f5..cb01540 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
@@ -386,9 +386,10 @@ public class CombinedBlockCache implements ResizableBlockCache, HeapSize {
   }
 
   @VisibleForTesting
-  public int getRefCount(BlockCacheKey cacheKey) {
+  public int getRpcRefCount(BlockCacheKey cacheKey) {
     return (this.l2Cache instanceof BucketCache)
-        ? ((BucketCache) this.l2Cache).getRefCount(cacheKey) : 0;
+        ? ((BucketCache) this.l2Cache).getRpcRefCount(cacheKey)
+        : 0;
   }
 
   public FirstLevelBlockCache getFirstLevelCache() {
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
index 2fe9255..2c8fa4d 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
@@ -274,7 +274,7 @@ public class HFileBlock implements Cacheable {
         newByteBuff = buf.slice();
       } else {
         int len = buf.limit();
-        newByteBuff = new SingleByteBuff(ByteBuffer.allocate(len));
+        newByteBuff = ByteBuff.wrap(ByteBuffer.allocate(len));
         newByteBuff.put(0, buf, buf.position(), len);
       }
       // Read out the BLOCK_METADATA_SPACE content and shove into our HFileBlock.
@@ -323,7 +323,7 @@ public class HFileBlock implements Cacheable {
       that.prevBlockOffset, that.offset, that.onDiskDataSizeWithHeader, that.nextBlockOnDiskSize,
       that.fileContext, that.allocator);
     if (bufCopy) {
-      this.buf = new SingleByteBuff(ByteBuffer.wrap(that.buf.toBytes(0, that.buf.limit())));
+      this.buf = ByteBuff.wrap(ByteBuffer.wrap(that.buf.toBytes(0, that.buf.limit())));
     } else {
       this.buf = that.buf.duplicate();
     }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketAllocator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketAllocator.java
index 09d5750..2883ff2 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketAllocator.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketAllocator.java
@@ -28,7 +28,6 @@ import java.util.Set;
 import java.util.concurrent.atomic.LongAdder;
 import org.apache.hadoop.hbase.io.hfile.BlockCacheFactory;
 import org.apache.hadoop.hbase.io.hfile.BlockCacheKey;
-import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache.BucketEntry;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
index 91f3986..a99af7c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
@@ -22,10 +22,8 @@ package org.apache.hadoop.hbase.io.hfile.bucket;
 
 import java.io.File;
 import java.io.FileInputStream;
-import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.IOException;
-import java.io.Serializable;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Comparator;
@@ -44,7 +42,6 @@ import java.util.concurrent.ConcurrentSkipListSet;
 import java.util.concurrent.Executors;
 import java.util.concurrent.ScheduledExecutorService;
 import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.concurrent.atomic.LongAdder;
 import java.util.concurrent.locks.Lock;
@@ -54,6 +51,9 @@ import java.util.function.Consumer;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.io.ByteBuffAllocator.Recycler;
 import org.apache.hadoop.hbase.io.HeapSize;
 import org.apache.hadoop.hbase.io.hfile.BlockCache;
 import org.apache.hadoop.hbase.io.hfile.BlockCacheKey;
@@ -62,18 +62,15 @@ import org.apache.hadoop.hbase.io.hfile.BlockPriority;
 import org.apache.hadoop.hbase.io.hfile.BlockType;
 import org.apache.hadoop.hbase.io.hfile.CacheStats;
 import org.apache.hadoop.hbase.io.hfile.Cacheable;
-import org.apache.hadoop.hbase.io.hfile.Cacheable.MemoryType;
-import org.apache.hadoop.hbase.io.hfile.CacheableDeserializer;
-import org.apache.hadoop.hbase.io.hfile.CacheableDeserializerIdManager;
 import org.apache.hadoop.hbase.io.hfile.CachedBlock;
 import org.apache.hadoop.hbase.io.hfile.HFileBlock;
 import org.apache.hadoop.hbase.nio.ByteBuff;
+import org.apache.hadoop.hbase.nio.RefCnt;
 import org.apache.hadoop.hbase.protobuf.ProtobufMagic;
 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
 import org.apache.hadoop.hbase.util.HasThread;
 import org.apache.hadoop.hbase.util.IdReadWriteLock;
 import org.apache.hadoop.hbase.util.IdReadWriteLock.ReferenceType;
-import org.apache.hadoop.hbase.util.UnsafeAvailChecker;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
@@ -82,6 +79,7 @@ import org.slf4j.LoggerFactory;
 import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
 import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
 import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder;
+
 import org.apache.hadoop.hbase.shaded.protobuf.generated.BucketCacheProtos;
 
 /**
@@ -204,23 +202,13 @@ public class BucketCache implements BlockCache, HeapSize {
   @VisibleForTesting
   transient final IdReadWriteLock<Long> offsetLock = new IdReadWriteLock<>(ReferenceType.SOFT);
 
-  private final NavigableSet<BlockCacheKey> blocksByHFile =
-      new ConcurrentSkipListSet<>(new Comparator<BlockCacheKey>() {
-        @Override
-        public int compare(BlockCacheKey a, BlockCacheKey b) {
-          int nameComparison = a.getHfileName().compareTo(b.getHfileName());
-          if (nameComparison != 0) {
-            return nameComparison;
-          }
-
-          if (a.getOffset() == b.getOffset()) {
-            return 0;
-          } else if (a.getOffset() < b.getOffset()) {
-            return -1;
-          }
-          return 1;
-        }
-      });
+  private final NavigableSet<BlockCacheKey> blocksByHFile = new ConcurrentSkipListSet<>((a, b) -> {
+    int nameComparison = a.getHfileName().compareTo(b.getHfileName());
+    if (nameComparison != 0) {
+      return nameComparison;
+    }
+    return Long.compare(a.getOffset(), b.getOffset());
+  });
 
   /** Statistics thread schedule pool (for heavy debugging, could remove) */
   private transient final ScheduledExecutorService scheduleThreadPool =
@@ -249,16 +237,14 @@ public class BucketCache implements BlockCache, HeapSize {
   private float memoryFactor;
 
   public BucketCache(String ioEngineName, long capacity, int blockSize, int[] bucketSizes,
-      int writerThreadNum, int writerQLen, String persistencePath) throws FileNotFoundException,
-      IOException {
+      int writerThreadNum, int writerQLen, String persistencePath) throws IOException {
     this(ioEngineName, capacity, blockSize, bucketSizes, writerThreadNum, writerQLen,
-      persistencePath, DEFAULT_ERROR_TOLERATION_DURATION, HBaseConfiguration.create());
+        persistencePath, DEFAULT_ERROR_TOLERATION_DURATION, HBaseConfiguration.create());
   }
 
   public BucketCache(String ioEngineName, long capacity, int blockSize, int[] bucketSizes,
-                     int writerThreadNum, int writerQLen, String persistencePath, int ioErrorsTolerationDuration,
-                     Configuration conf)
-      throws FileNotFoundException, IOException {
+      int writerThreadNum, int writerQLen, String persistencePath, int ioErrorsTolerationDuration,
+      Configuration conf) throws IOException {
     this.ioEngine = getIOEngineFromName(ioEngineName, capacity, persistencePath);
     this.writerThreads = new WriterThread[writerThreadNum];
     long blockNumCapacity = capacity / blockSize;
@@ -444,7 +430,8 @@ public class BucketCache implements BlockCache, HeapSize {
     LOG.trace("Caching key={}, item={}", cacheKey, cachedItem);
     // Stuff the entry into the RAM cache so it can get drained to the persistent store
     RAMQueueEntry re =
-        new RAMQueueEntry(cacheKey, cachedItem, accessCount.incrementAndGet(), inMemory);
+        new RAMQueueEntry(cacheKey, cachedItem, accessCount.incrementAndGet(), inMemory,
+              createRecycler(cacheKey));
     /**
      * Don't use ramCache.put(cacheKey, re) here. because there may be a existing entry with same
      * key in ramCache, the heap size of bucket cache need to update if replacing entry from
@@ -509,21 +496,16 @@ public class BucketCache implements BlockCache, HeapSize {
         // maybe changed. If we lock BlockCacheKey instead of offset, then we can only check
         // existence here.
         if (bucketEntry.equals(backingMap.get(key))) {
-          // TODO : change this area - should be removed after server cells and
-          // 12295 are available
-          int len = bucketEntry.getLength();
-          if (LOG.isTraceEnabled()) {
-            LOG.trace("Read offset=" + bucketEntry.offset() + ", len=" + len);
-          }
-          Cacheable cachedBlock = ioEngine.read(bucketEntry.offset(), len,
-              bucketEntry.deserializerReference());
-          long timeTaken = System.nanoTime() - start;
+          // Read the block from IOEngine based on the bucketEntry's offset and length, NOTICE: the
+          // block will use the refCnt of bucketEntry, which means if two HFileBlock mapping to
+          // the same BucketEntry, then all of the three will share the same refCnt.
+          Cacheable cachedBlock = ioEngine.read(bucketEntry);
+          // RPC start to reference, so retain here.
+          cachedBlock.retain();
+          // Update the cache statistics.
           if (updateCacheMetrics) {
             cacheStats.hit(caching, key.isPrimary(), key.getBlockType());
-            cacheStats.ioHit(timeTaken);
-          }
-          if (cachedBlock.getMemoryType() == MemoryType.SHARED) {
-            bucketEntry.incrementRefCountAndGet();
+            cacheStats.ioHit(System.nanoTime() - start);
           }
           bucketEntry.access(accessCount.incrementAndGet());
           if (this.ioErrorStartTime > 0) {
@@ -554,40 +536,58 @@ public class BucketCache implements BlockCache, HeapSize {
     }
   }
 
+  /**
+   * Try to evict the block from {@link BlockCache} by force. We'll call this in few cases:<br>
+   * 1. Close an HFile, and clear all cached blocks. <br>
+   * 2. Call {@link Admin#clearBlockCache(TableName)} to clear all blocks for a given table.<br>
+   * <p>
+   * Firstly, we'll try to remove the block from RAMCache. If it doesn't exist in RAMCache, then try
+   * to evict from backingMap. Here we only need to free the reference from bucket cache by calling
+   * {@link BucketEntry#markedAsEvicted}. If there're still some RPC referring this block, block can
+   * only be de-allocated when all of them release the block.
+   * <p>
+   * NOTICE: we need to grab the write offset lock firstly before releasing the reference from
+   * bucket cache. if we don't, we may read an {@link BucketEntry} with refCnt = 0 when
+   * {@link BucketCache#getBlock(BlockCacheKey, boolean, boolean, boolean)}, it's a memory leak.
+   * @param cacheKey Block to evict
+   * @return true to indicate whether we've evicted successfully or not.
+   */
   @Override
   public boolean evictBlock(BlockCacheKey cacheKey) {
-    return evictBlock(cacheKey, true);
-  }
-
-  // does not check for the ref count. Just tries to evict it if found in the
-  // bucket map
-  private boolean forceEvict(BlockCacheKey cacheKey) {
     if (!cacheEnabled) {
       return false;
     }
     boolean existed = removeFromRamCache(cacheKey);
-    BucketEntry bucketEntry = backingMap.get(cacheKey);
-    if (bucketEntry == null) {
+    BucketEntry be = backingMap.get(cacheKey);
+    if (be == null) {
       if (existed) {
         cacheStats.evicted(0, cacheKey.isPrimary());
-        return true;
-      } else {
-        return false;
       }
+      return existed;
+    } else {
+      return be.withWriteLock(offsetLock, be::markAsEvicted);
     }
-    ReentrantReadWriteLock lock = offsetLock.getLock(bucketEntry.offset());
-    try {
-      lock.writeLock().lock();
-      if (backingMap.remove(cacheKey, bucketEntry)) {
-        blockEvicted(cacheKey, bucketEntry, !existed);
-      } else {
-        return false;
+  }
+
+  private Recycler createRecycler(BlockCacheKey cacheKey) {
+    return () -> {
+      if (!cacheEnabled) {
+        return;
       }
-    } finally {
-      lock.writeLock().unlock();
-    }
-    cacheStats.evicted(bucketEntry.getCachedTime(), cacheKey.isPrimary());
-    return true;
+      boolean existed = removeFromRamCache(cacheKey);
+      BucketEntry be = backingMap.get(cacheKey);
+      if (be == null && existed) {
+        cacheStats.evicted(0, cacheKey.isPrimary());
+      } else if (be != null) {
+        be.withWriteLock(offsetLock, () -> {
+          if (backingMap.remove(cacheKey, be)) {
+            blockEvicted(cacheKey, be, !existed);
+            cacheStats.evicted(be.getCachedTime(), cacheKey.isPrimary());
+          }
+          return null;
+        });
+      }
+    };
   }
 
   private boolean removeFromRamCache(BlockCacheKey cacheKey) {
@@ -599,53 +599,6 @@ public class BucketCache implements BlockCache, HeapSize {
     });
   }
 
-  public boolean evictBlock(BlockCacheKey cacheKey, boolean deletedBlock) {
-    if (!cacheEnabled) {
-      return false;
-    }
-    boolean existed = removeFromRamCache(cacheKey);
-    BucketEntry bucketEntry = backingMap.get(cacheKey);
-    if (bucketEntry == null) {
-      if (existed) {
-        cacheStats.evicted(0, cacheKey.isPrimary());
-        return true;
-      } else {
-        return false;
-      }
-    }
-    ReentrantReadWriteLock lock = offsetLock.getLock(bucketEntry.offset());
-    try {
-      lock.writeLock().lock();
-      int refCount = bucketEntry.getRefCount();
-      if (refCount == 0) {
-        if (backingMap.remove(cacheKey, bucketEntry)) {
-          blockEvicted(cacheKey, bucketEntry, !existed);
-        } else {
-          return false;
-        }
-      } else {
-        if(!deletedBlock) {
-          if (LOG.isDebugEnabled()) {
-            LOG.debug("This block " + cacheKey + " is still referred by " + refCount
-                + " readers. Can not be freed now");
-          }
-          return false;
-        } else {
-          if (LOG.isDebugEnabled()) {
-            LOG.debug("This block " + cacheKey + " is still referred by " + refCount
-                + " readers. Can not be freed now. Hence will mark this"
-                + " for evicting at a later point");
-          }
-          bucketEntry.markForEvict();
-        }
-      }
-    } finally {
-      lock.writeLock().unlock();
-    }
-    cacheStats.evicted(bucketEntry.getCachedTime(), cacheKey.isPrimary());
-    return true;
-  }
-
   /*
    * Statistics thread.  Periodically output cache statistics to the log.
    */
@@ -732,19 +685,17 @@ public class BucketCache implements BlockCache, HeapSize {
     if (completelyFreeBucketsNeeded != 0) {
       // First we will build a set where the offsets are reference counted, usually
       // this set is small around O(Handler Count) unless something else is wrong
-      Set<Integer> inUseBuckets = new HashSet<Integer>();
-      for (BucketEntry entry : backingMap.values()) {
-        if (entry.getRefCount() != 0) {
-          inUseBuckets.add(bucketAllocator.getBucketIndex(entry.offset()));
+      Set<Integer> inUseBuckets = new HashSet<>();
+      backingMap.forEach((k, be) -> {
+        if (be.isRpcRef()) {
+          inUseBuckets.add(bucketAllocator.getBucketIndex(be.offset()));
         }
-      }
-
-      Set<Integer> candidateBuckets = bucketAllocator.getLeastFilledBuckets(
-          inUseBuckets, completelyFreeBucketsNeeded);
+      });
+      Set<Integer> candidateBuckets =
+          bucketAllocator.getLeastFilledBuckets(inUseBuckets, completelyFreeBucketsNeeded);
       for (Map.Entry<BlockCacheKey, BucketEntry> entry : backingMap.entrySet()) {
-        if (candidateBuckets.contains(bucketAllocator
-            .getBucketIndex(entry.getValue().offset()))) {
-          evictBlock(entry.getKey(), false);
+        if (candidateBuckets.contains(bucketAllocator.getBucketIndex(entry.getValue().offset()))) {
+          entry.getValue().withWriteLock(offsetLock, entry.getValue()::markStaleAsEvicted);
         }
       }
     }
@@ -921,7 +872,9 @@ public class BucketCache implements BlockCache, HeapSize {
               // Blocks
               entries = getRAMQueueEntries(inputQueue, entries);
             } catch (InterruptedException ie) {
-              if (!cacheEnabled) break;
+              if (!cacheEnabled || !writerEnabled) {
+                break;
+              }
             }
             doDrain(entries);
           } catch (Exception ioe) {
@@ -949,13 +902,10 @@ public class BucketCache implements BlockCache, HeapSize {
     private void putIntoBackingMap(BlockCacheKey key, BucketEntry bucketEntry) {
       BucketEntry previousEntry = backingMap.put(key, bucketEntry);
       if (previousEntry != null && previousEntry != bucketEntry) {
-        ReentrantReadWriteLock lock = offsetLock.getLock(previousEntry.offset());
-        lock.writeLock().lock();
-        try {
+        previousEntry.withWriteLock(offsetLock, () -> {
           blockEvicted(key, previousEntry, false);
-        } finally {
-          lock.writeLock().unlock();
-        }
+          return null;
+        });
       }
     }
 
@@ -1049,22 +999,13 @@ public class BucketCache implements BlockCache, HeapSize {
         });
         if (!existed && bucketEntries[i] != null) {
           // Block should have already been evicted. Remove it and free space.
-          ReentrantReadWriteLock lock = offsetLock.getLock(bucketEntries[i].offset());
-          try {
-            lock.writeLock().lock();
-            int refCount = bucketEntries[i].getRefCount();
-            if (refCount == 0) {
-              if (backingMap.remove(key, bucketEntries[i])) {
-                blockEvicted(key, bucketEntries[i], false);
-              } else {
-                bucketEntries[i].markForEvict();
-              }
-            } else {
-              bucketEntries[i].markForEvict();
+          final BucketEntry bucketEntry = bucketEntries[i];
+          bucketEntry.withWriteLock(offsetLock, () -> {
+            if (backingMap.remove(key, bucketEntry)) {
+              blockEvicted(key, bucketEntry, false);
             }
-          } finally {
-            lock.writeLock().unlock();
-          }
+            return null;
+          });
         }
       }
 
@@ -1077,17 +1018,16 @@ public class BucketCache implements BlockCache, HeapSize {
   }
 
   /**
-   * Blocks until elements available in {@code q} then tries to grab as many as possible
-   * before returning.
-   * @param receptacle Where to stash the elements taken from queue. We clear before we use it
-   *     just in case.
+   * Blocks until elements available in {@code q} then tries to grab as many as possible before
+   * returning.
+   * @param receptacle Where to stash the elements taken from queue. We clear before we use it just
+   *          in case.
    * @param q The queue to take from.
    * @return {@code receptacle} laden with elements taken from the queue or empty if none found.
    */
   @VisibleForTesting
-  static List<RAMQueueEntry> getRAMQueueEntries(final BlockingQueue<RAMQueueEntry> q,
-      final List<RAMQueueEntry> receptacle)
-  throws InterruptedException {
+  static List<RAMQueueEntry> getRAMQueueEntries(BlockingQueue<RAMQueueEntry> q,
+      List<RAMQueueEntry> receptacle) throws InterruptedException {
     // Clear sets all entries to null and sets size to 0. We retain allocations. Presume it
     // ok even if list grew to accommodate thousands.
     receptacle.clear();
@@ -1314,155 +1254,6 @@ public class BucketCache implements BlockCache, HeapSize {
   }
 
   /**
-   * Item in cache. We expect this to be where most memory goes. Java uses 8
-   * bytes just for object headers; after this, we want to use as little as
-   * possible - so we only use 8 bytes, but in order to do so we end up messing
-   * around with all this Java casting stuff. Offset stored as 5 bytes that make
-   * up the long. Doubt we'll see devices this big for ages. Offsets are divided
-   * by 256. So 5 bytes gives us 256TB or so.
-   */
-  static class BucketEntry implements Serializable {
-    private static final long serialVersionUID = -6741504807982257534L;
-
-    // access counter comparator, descending order
-    static final Comparator<BucketEntry> COMPARATOR = Comparator
-        .comparingLong(BucketEntry::getAccessCounter).reversed();
-
-    private int offsetBase;
-    private int length;
-    private byte offset1;
-
-    /**
-     * The index of the deserializer that can deserialize this BucketEntry content.
-     * See {@link CacheableDeserializerIdManager} for hosting of index to serializers.
-     */
-    byte deserialiserIndex;
-
-    private volatile long accessCounter;
-    private BlockPriority priority;
-
-    /**
-     * Time this block was cached.  Presumes we are created just before we are added to the cache.
-     */
-    private final long cachedTime = System.nanoTime();
-
-    BucketEntry(long offset, int length, long accessCounter, boolean inMemory) {
-      setOffset(offset);
-      this.length = length;
-      this.accessCounter = accessCounter;
-      if (inMemory) {
-        this.priority = BlockPriority.MEMORY;
-      } else {
-        this.priority = BlockPriority.SINGLE;
-      }
-    }
-
-    long offset() { // Java has no unsigned numbers
-      long o = ((long) offsetBase) & 0xFFFFFFFFL; //This needs the L cast otherwise it will be sign extended as a negative number.
-      o += (((long) (offset1)) & 0xFF) << 32; //The 0xFF here does not need the L cast because it is treated as a positive int.
-      return o << 8;
-    }
-
-    private void setOffset(long value) {
-      assert (value & 0xFF) == 0;
-      value >>= 8;
-      offsetBase = (int) value;
-      offset1 = (byte) (value >> 32);
-    }
-
-    public int getLength() {
-      return length;
-    }
-
-    protected CacheableDeserializer<Cacheable> deserializerReference() {
-      return CacheableDeserializerIdManager.getDeserializer(deserialiserIndex);
-    }
-
-    protected void setDeserialiserReference(CacheableDeserializer<Cacheable> deserializer) {
-      this.deserialiserIndex = (byte) deserializer.getDeserialiserIdentifier();
-    }
-
-    public long getAccessCounter() {
-      return accessCounter;
-    }
-
-    /**
-     * Block has been accessed. Update its local access counter.
-     */
-    public void access(long accessCounter) {
-      this.accessCounter = accessCounter;
-      if (this.priority == BlockPriority.SINGLE) {
-        this.priority = BlockPriority.MULTI;
-      }
-    }
-
-    public BlockPriority getPriority() {
-      return this.priority;
-    }
-
-    public long getCachedTime() {
-      return cachedTime;
-    }
-
-    protected int getRefCount() {
-      return 0;
-    }
-
-    protected int incrementRefCountAndGet() {
-      return 0;
-    }
-
-    protected int decrementRefCountAndGet() {
-      return 0;
-    }
-
-    protected boolean isMarkedForEvict() {
-      return false;
-    }
-
-    protected void markForEvict() {
-      // noop;
-    }
-  }
-
-  static class SharedMemoryBucketEntry extends BucketEntry {
-    private static final long serialVersionUID = -2187147283772338481L;
-
-    // Set this when we were not able to forcefully evict the block
-    private volatile boolean markedForEvict;
-    private AtomicInteger refCount = new AtomicInteger(0);
-
-    SharedMemoryBucketEntry(long offset, int length, long accessCounter, boolean inMemory) {
-      super(offset, length, accessCounter, inMemory);
-    }
-
-    @Override
-    protected int getRefCount() {
-      return this.refCount.get();
-    }
-
-    @Override
-    protected int incrementRefCountAndGet() {
-      return this.refCount.incrementAndGet();
-    }
-
-    @Override
-    protected int decrementRefCountAndGet() {
-      return this.refCount.decrementAndGet();
-    }
-
-    @Override
-    protected boolean isMarkedForEvict() {
-      return this.markedForEvict;
-    }
-
-    @Override
-    protected void markForEvict() {
-      this.markedForEvict = true;
-    }
-  }
-
-  /**
    * Used to group bucket entries into priority buckets. There will be a
    * BucketEntryGroup for each priority (single, multi, memory). Once bucketed,
    * the eviction algorithm takes the appropriate number of elements out of each
@@ -1491,8 +1282,9 @@ public class BucketCache implements BlockCache, HeapSize {
       // TODO avoid a cycling siutation. We find no block which is not in use and so no way to free
       // What to do then? Caching attempt fail? Need some changes in cacheBlock API?
       while ((entry = queue.pollLast()) != null) {
-        if (evictBlock(entry.getKey(), false)) {
-          freedBytes += entry.getValue().getLength();
+        BucketEntry be = entry.getValue();
+        if (be.withWriteLock(offsetLock, be::markStaleAsEvicted)) {
+          freedBytes += be.getLength();
         }
         if (freedBytes >= toFree) {
           return freedBytes;
@@ -1515,17 +1307,19 @@ public class BucketCache implements BlockCache, HeapSize {
    */
   @VisibleForTesting
   static class RAMQueueEntry {
-    private BlockCacheKey key;
-    private Cacheable data;
+    private final BlockCacheKey key;
+    private final Cacheable data;
     private long accessCounter;
     private boolean inMemory;
+    private final Recycler recycler;
 
-    public RAMQueueEntry(BlockCacheKey bck, Cacheable data, long accessCounter,
-        boolean inMemory) {
+    RAMQueueEntry(BlockCacheKey bck, Cacheable data, long accessCounter, boolean inMemory,
+        Recycler recycler) {
       this.key = bck;
       this.data = data;
       this.accessCounter = accessCounter;
       this.inMemory = inMemory;
+      this.recycler = recycler;
     }
 
     public Cacheable getData() {
@@ -1540,30 +1334,19 @@ public class BucketCache implements BlockCache, HeapSize {
       this.accessCounter = accessCounter;
     }
 
-    private BucketEntry getBucketEntry(IOEngine ioEngine, long offset, int len) {
-      if (ioEngine.usesSharedMemory()) {
-        if (UnsafeAvailChecker.isAvailable()) {
-          return new UnsafeSharedMemoryBucketEntry(offset, len, accessCounter, inMemory);
-        } else {
-          return new SharedMemoryBucketEntry(offset, len, accessCounter, inMemory);
-        }
-      } else {
-        return new BucketEntry(offset, len, accessCounter, inMemory);
-      }
-    }
-
-    public BucketEntry writeToCache(final IOEngine ioEngine, final BucketAllocator bucketAllocator,
+    public BucketEntry writeToCache(final IOEngine ioEngine, final BucketAllocator alloc,
         final LongAdder realCacheSize) throws IOException {
       int len = data.getSerializedLength();
       // This cacheable thing can't be serialized
       if (len == 0) {
         return null;
       }
-      long offset = bucketAllocator.allocateBlock(len);
+      long offset = alloc.allocateBlock(len);
       boolean succ = false;
-      BucketEntry bucketEntry;
+      BucketEntry bucketEntry = null;
       try {
-        bucketEntry = getBucketEntry(ioEngine, offset, len);
+        bucketEntry =
+            new BucketEntry(offset, len, accessCounter, inMemory, RefCnt.create(recycler));
         bucketEntry.setDeserialiserReference(data.getDeserializer());
         if (data instanceof HFileBlock) {
           // If an instance of HFileBlock, save on some allocations.
@@ -1581,7 +1364,7 @@ public class BucketCache implements BlockCache, HeapSize {
         succ = true;
       } finally {
         if (!succ) {
-          bucketAllocator.freeBlock(offset);
+          alloc.freeBlock(offset);
         }
       }
       realCacheSize.add(len);
@@ -1697,25 +1480,11 @@ public class BucketCache implements BlockCache, HeapSize {
     return null;
   }
 
-  @Override
-  public void returnBlock(BlockCacheKey cacheKey, Cacheable block) {
-    block.release();
-    if (block.getMemoryType() == MemoryType.SHARED) {
-      BucketEntry bucketEntry = backingMap.get(cacheKey);
-      if (bucketEntry != null) {
-        int refCount = bucketEntry.decrementRefCountAndGet();
-        if (refCount == 0 && bucketEntry.isMarkedForEvict()) {
-          evictBlock(cacheKey);
-        }
-      }
-    }
-  }
-
   @VisibleForTesting
-  public int getRefCount(BlockCacheKey cacheKey) {
+  public int getRpcRefCount(BlockCacheKey cacheKey) {
     BucketEntry bucketEntry = backingMap.get(cacheKey);
     if (bucketEntry != null) {
-      return bucketEntry.getRefCount();
+      return bucketEntry.refCnt() - (bucketEntry.markedAsEvicted.get() ? 0 : 1);
     }
     return 0;
   }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketEntry.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketEntry.java
new file mode 100644
index 0000000..b6e83d5
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketEntry.java
@@ -0,0 +1,239 @@
+/**
+ * Copyright The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io.hfile.bucket;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Comparator;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+
+import org.apache.hadoop.hbase.io.hfile.BlockPriority;
+import org.apache.hadoop.hbase.io.hfile.Cacheable;
+import org.apache.hadoop.hbase.io.hfile.Cacheable.MemoryType;
+import org.apache.hadoop.hbase.io.hfile.CacheableDeserializer;
+import org.apache.hadoop.hbase.io.hfile.CacheableDeserializerIdManager;
+import org.apache.hadoop.hbase.nio.ByteBuff;
+import org.apache.hadoop.hbase.nio.HBaseReferenceCounted;
+import org.apache.hadoop.hbase.nio.RefCnt;
+import org.apache.hadoop.hbase.util.IdReadWriteLock;
+import org.apache.yetus.audience.InterfaceAudience;
+
+/**
+ * Item in cache. We expect this to be where most memory goes. Java uses 8 bytes just for object
+ * headers; after this, we want to use as little as possible - so we only use 8 bytes, but in order
+ * to do so we end up messing around with all this Java casting stuff. Offset stored as 5 bytes that
+ * make up the long. Doubt we'll see devices this big for ages. Offsets are divided by 256. So 5
+ * bytes gives us 256TB or so.
+ */
+@InterfaceAudience.Private
+class BucketEntry implements HBaseReferenceCounted {
+  // access counter comparator, descending order
+  static final Comparator<BucketEntry> COMPARATOR =
+      Comparator.comparingLong(BucketEntry::getAccessCounter).reversed();
+
+  private int offsetBase;
+  private int length;
+  private byte offset1;
+
+  /**
+   * The index of the deserializer that can deserialize this BucketEntry content. See
+   * {@link CacheableDeserializerIdManager} for hosting of index to serializers.
+   */
+  byte deserialiserIndex;
+
+  private volatile long accessCounter;
+  private BlockPriority priority;
+
+  /**
+   * The RefCnt means how many paths are referring the {@link BucketEntry}, each RPC reading path is
+   * considering as one path, the {@link BucketCache#backingMap} reference is also considered a
+   * path. NOTICE that if two read RPC path hit the same {@link BucketEntry}, then the HFileBlocks
+   * the two RPC referred will share the same refCnt instance with the BucketEntry. so the refCnt
+   * will increase or decrease as the following: <br>
+   * 1. when writerThread flush the block into IOEngine and add the bucketEntry into backingMap, the
+   * refCnt ++; <br>
+   * 2. If BucketCache evict the block and move the bucketEntry out of backingMap, the refCnt--; it
+   * usually happen when HFile is closing or someone call the clearBucketCache by force. <br>
+   * 3. The read RPC path start to refer the block which is backend by the memory area in
+   * bucketEntry, then refCnt ++ ; <br>
+   * 4. The read RPC patch shipped the response, and release the block. then refCnt--; <br>
+   * Once the refCnt decrease to zero, then the {@link BucketAllocator} will free the block area.
+   */
+  private final RefCnt refCnt;
+  final AtomicBoolean markedAsEvicted;
+
+  /**
+   * Time this block was cached. Presumes we are created just before we are added to the cache.
+   */
+  private final long cachedTime = System.nanoTime();
+
+  BucketEntry(long offset, int length, long accessCounter, boolean inMemory) {
+    this(offset, length, accessCounter, inMemory, RefCnt.create());
+  }
+
+  BucketEntry(long offset, int length, long accessCounter, boolean inMemory, RefCnt refCnt) {
+    setOffset(offset);
+    this.length = length;
+    this.accessCounter = accessCounter;
+    this.priority = inMemory ? BlockPriority.MEMORY : BlockPriority.MULTI;
+    this.refCnt = refCnt;
+    this.markedAsEvicted = new AtomicBoolean(false);
+  }
+
+  long offset() {
+    // Java has no unsigned numbers, so this needs the L cast otherwise it will be sign extended
+    // as a negative number.
+    long o = ((long) offsetBase) & 0xFFFFFFFFL;
+    // The 0xFF here does not need the L cast because it is treated as a positive int.
+    o += (((long) (offset1)) & 0xFF) << 32;
+    return o << 8;
+  }
+
+  private void setOffset(long value) {
+    assert (value & 0xFF) == 0;
+    value >>= 8;
+    offsetBase = (int) value;
+    offset1 = (byte) (value >> 32);
+  }
+
+  public int getLength() {
+    return length;
+  }
+
+  CacheableDeserializer<Cacheable> deserializerReference() {
+    return CacheableDeserializerIdManager.getDeserializer(deserialiserIndex);
+  }
+
+  void setDeserialiserReference(CacheableDeserializer<Cacheable> deserializer) {
+    this.deserialiserIndex = (byte) deserializer.getDeserialiserIdentifier();
+  }
+
+  long getAccessCounter() {
+    return accessCounter;
+  }
+
+  /**
+   * Block has been accessed. Update its local access counter.
+   */
+  void access(long accessCounter) {
+    this.accessCounter = accessCounter;
+    if (this.priority == BlockPriority.SINGLE) {
+      this.priority = BlockPriority.MULTI;
+    }
+  }
+
+  public BlockPriority getPriority() {
+    return this.priority;
+  }
+
+  long getCachedTime() {
+    return cachedTime;
+  }
+
+  /**
+   * The {@link BucketCache} will try to release its reference to this BucketEntry many times. we
+   * must make sure the idempotent, otherwise it'll decrease the RPC's reference count in advance,
+   * then for RPC memory leak happen.
+   * @return true if we deallocate this entry successfully.
+   */
+  boolean markAsEvicted() {
+    if (markedAsEvicted.compareAndSet(false, true)) {
+      return this.release();
+    }
+    return false;
+  }
+
+  /**
+   * Mark as evicted only when NO RPC references. Mainly used for eviction when cache size exceed
+   * the max acceptable size.
+   * @return true if we deallocate this entry successfully.
+   */
+  boolean markStaleAsEvicted() {
+    if (!markedAsEvicted.get() && this.refCnt() == 1) {
+      // The only reference was coming from backingMap, now release the stale entry.
+      return this.markAsEvicted();
+    }
+    return false;
+  }
+
+  /**
+   * Check whether have some RPC patch referring this block. There're two case: <br>
+   * 1. If current refCnt is greater than 1, there must be at least one referring RPC path; <br>
+   * 2. If current refCnt is equal to 1 and the markedAtEvicted is true, the it means backingMap has
+   * released its reference, the remaining reference can only be from RPC path. <br>
+   * We use this check to decide whether we can free the block area: when cached size exceed the
+   * acceptable size, our eviction policy will choose those stale blocks without any RPC reference
+   * and the RPC referred block will be excluded.
+   * @return true to indicate there're some RPC referring the block.
+   */
+  boolean isRpcRef() {
+    boolean evicted = markedAsEvicted.get();
+    return this.refCnt() > 1 || (evicted && refCnt() == 1);
+  }
+
+  Cacheable wrapAsCacheable(ByteBuffer[] buffers, MemoryType memoryType) throws IOException {
+    ByteBuff buf = ByteBuff.wrap(buffers, this.refCnt);
+    return this.deserializerReference().deserialize(buf, true, memoryType);
+  }
+
+  interface BucketEntryHandler<T> {
+    T handle();
+  }
+
+  <T> T withWriteLock(IdReadWriteLock<Long> offsetLock, BucketEntryHandler<T> handler) {
+    ReentrantReadWriteLock lock = offsetLock.getLock(this.offset());
+    try {
+      lock.writeLock().lock();
+      return handler.handle();
+    } finally {
+      lock.writeLock().unlock();
+    }
+  }
+
+  @Override
+  public int refCnt() {
+    return this.refCnt.refCnt();
+  }
+
+  @Override
+  public BucketEntry retain() {
+    refCnt.retain();
+    return this;
+  }
+
+  /**
+   * We've three cases to release refCnt now: <br>
+   * 1. BucketCache#evictBlock, it will release the backingMap's reference by force because we're
+   * closing file or clear the bucket cache or some corruption happen. when all rpc references gone,
+   * then free the area in bucketAllocator. <br>
+   * 2. BucketCache#returnBlock . when rpc shipped, we'll release the block, only when backingMap
+   * also release its refCnt (case.1 will do this) and no other rpc reference, then it will free the
+   * area in bucketAllocator. <br>
+   * 3.evict those block without any rpc reference if cache size exceeded. we'll only free those
+   * blocks with zero rpc reference count, as the {@link BucketEntry#markStaleAsEvicted()} do.
+   * @return true to indicate we've decreased to zero and do the de-allocation.
+   */
+  @Override
+  public boolean release() {
+    return refCnt.release();
+  }
+}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketProtoUtils.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketProtoUtils.java
index 35daff7..72765de 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketProtoUtils.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketProtoUtils.java
@@ -50,9 +50,9 @@ final class BucketProtoUtils {
   }
 
   private static BucketCacheProtos.BackingMap toPB(
-      Map<BlockCacheKey, BucketCache.BucketEntry> backingMap) {
+      Map<BlockCacheKey, BucketEntry> backingMap) {
     BucketCacheProtos.BackingMap.Builder builder = BucketCacheProtos.BackingMap.newBuilder();
-    for (Map.Entry<BlockCacheKey, BucketCache.BucketEntry> entry : backingMap.entrySet()) {
+    for (Map.Entry<BlockCacheKey, BucketEntry> entry : backingMap.entrySet()) {
       builder.addEntry(BucketCacheProtos.BackingMapEntry.newBuilder()
           .setKey(toPB(entry.getKey()))
           .setValue(toPB(entry.getValue()))
@@ -101,7 +101,7 @@ final class BucketProtoUtils {
     }
   }
 
-  private static BucketCacheProtos.BucketEntry toPB(BucketCache.BucketEntry entry) {
+  private static BucketCacheProtos.BucketEntry toPB(BucketEntry entry) {
     return BucketCacheProtos.BucketEntry.newBuilder()
         .setOffset(entry.offset())
         .setLength(entry.getLength())
@@ -124,16 +124,16 @@ final class BucketProtoUtils {
     }
   }
 
-  static ConcurrentHashMap<BlockCacheKey, BucketCache.BucketEntry> fromPB(
+  static ConcurrentHashMap<BlockCacheKey, BucketEntry> fromPB(
       Map<Integer, String> deserializers, BucketCacheProtos.BackingMap backingMap)
       throws IOException {
-    ConcurrentHashMap<BlockCacheKey, BucketCache.BucketEntry> result = new ConcurrentHashMap<>();
+    ConcurrentHashMap<BlockCacheKey, BucketEntry> result = new ConcurrentHashMap<>();
     for (BucketCacheProtos.BackingMapEntry entry : backingMap.getEntryList()) {
       BucketCacheProtos.BlockCacheKey protoKey = entry.getKey();
       BlockCacheKey key = new BlockCacheKey(protoKey.getHfilename(), protoKey.getOffset(),
           protoKey.getPrimaryReplicaBlock(), fromPb(protoKey.getBlockType()));
       BucketCacheProtos.BucketEntry protoValue = entry.getValue();
-      BucketCache.BucketEntry value = new BucketCache.BucketEntry(
+      BucketEntry value = new BucketEntry(
           protoValue.getOffset(),
           protoValue.getLength(),
           protoValue.getAccessCounter(),
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/ByteBufferIOEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/ByteBufferIOEngine.java
index fa8b184..4e1b913 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/ByteBufferIOEngine.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/ByteBufferIOEngine.java
@@ -23,7 +23,6 @@ import java.nio.ByteBuffer;
 
 import org.apache.yetus.audience.InterfaceAudience;
 import org.apache.hadoop.hbase.io.hfile.Cacheable;
-import org.apache.hadoop.hbase.io.hfile.CacheableDeserializer;
 import org.apache.hadoop.hbase.io.hfile.Cacheable.MemoryType;
 import org.apache.hadoop.hbase.nio.ByteBuff;
 import org.apache.hadoop.hbase.util.ByteBufferAllocator;
@@ -100,16 +99,15 @@ public class ByteBufferIOEngine implements IOEngine {
   }
 
   @Override
-  public Cacheable read(long offset, int length, CacheableDeserializer<Cacheable> deserializer)
-      throws IOException {
-    ByteBuff dstBuffer = bufferArray.asSubByteBuff(offset, length);
+  public Cacheable read(BucketEntry be) throws IOException {
+    ByteBuffer[] buffers = bufferArray.asSubByteBuffers(be.offset(), be.getLength());
     // Here the buffer that is created directly refers to the buffer in the actual buckets.
     // When any cell is referring to the blocks created out of these buckets then it means that
     // those cells are referring to a shared memory area which if evicted by the BucketCache would
     // lead to corruption of results. Hence we set the type of the buffer as SHARED_MEMORY
     // so that the readers using this block are aware of this fact and do the necessary action
     // to prevent eviction till the results are either consumed or copied
-    return deserializer.deserialize(dstBuffer, true, MemoryType.SHARED);
+    return be.wrapAsCacheable(buffers, MemoryType.SHARED);
   }
 
   /**
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/CachedEntryQueue.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/CachedEntryQueue.java
index 29721ab..d8c677c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/CachedEntryQueue.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/CachedEntryQueue.java
@@ -21,11 +21,9 @@ package org.apache.hadoop.hbase.io.hfile.bucket;
 
 import java.util.Comparator;
 import java.util.Map;
-import java.util.Map.Entry;
 
 import org.apache.yetus.audience.InterfaceAudience;
 import org.apache.hadoop.hbase.io.hfile.BlockCacheKey;
-import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache.BucketEntry;
 
 import org.apache.hbase.thirdparty.com.google.common.collect.MinMaxPriorityQueue;
 
@@ -43,6 +41,9 @@ import org.apache.hbase.thirdparty.com.google.common.collect.MinMaxPriorityQueue
 @InterfaceAudience.Private
 public class CachedEntryQueue {
 
+  private static final Comparator<Map.Entry<BlockCacheKey, BucketEntry>> COMPARATOR =
+    (a, b) -> BucketEntry.COMPARATOR.compare(a.getValue(), b.getValue());
+
   private MinMaxPriorityQueue<Map.Entry<BlockCacheKey, BucketEntry>> queue;
 
   private long cacheSize;
@@ -57,15 +58,7 @@ public class CachedEntryQueue {
     if (initialSize == 0) {
       initialSize++;
     }
-    queue = MinMaxPriorityQueue.orderedBy(new Comparator<Map.Entry<BlockCacheKey, BucketEntry>>() {
-
-      @Override
-      public int compare(Entry<BlockCacheKey, BucketEntry> entry1,
-          Entry<BlockCacheKey, BucketEntry> entry2) {
-        return BucketEntry.COMPARATOR.compare(entry1.getValue(), entry2.getValue());
-      }
-
-    }).expectedSize(initialSize).create();
+    queue = MinMaxPriorityQueue.orderedBy(COMPARATOR).expectedSize(initialSize).create();
     cacheSize = 0;
     this.maxSize = maxSize;
   }
@@ -112,12 +105,4 @@ public class CachedEntryQueue {
   public Map.Entry<BlockCacheKey, BucketEntry> pollLast() {
     return queue.pollLast();
   }
-
-  /**
-   * Total size of all elements in this queue.
-   * @return size of all elements currently in queue, in bytes
-   */
-  public long cacheSize() {
-    return cacheSize;
-  }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/ExclusiveMemoryMmapIOEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/ExclusiveMemoryMmapIOEngine.java
index b8e29c6..af749d7 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/ExclusiveMemoryMmapIOEngine.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/ExclusiveMemoryMmapIOEngine.java
@@ -16,19 +16,16 @@
  */
 package org.apache.hadoop.hbase.io.hfile.bucket;
 
-import static org.apache.hadoop.hbase.io.ByteBuffAllocator.HEAP;
-
 import java.io.IOException;
+import java.nio.ByteBuffer;
 
 import org.apache.hadoop.hbase.io.hfile.Cacheable;
 import org.apache.hadoop.hbase.io.hfile.Cacheable.MemoryType;
-import org.apache.hadoop.hbase.io.hfile.CacheableDeserializer;
 import org.apache.hadoop.hbase.nio.ByteBuff;
 import org.apache.yetus.audience.InterfaceAudience;
 
 /**
- * IO engine that stores data to a file on the local block device using memory mapping
- * mechanism
+ * IO engine that stores data to a file on the local block device using memory mapping mechanism
  */
 @InterfaceAudience.Private
 public class ExclusiveMemoryMmapIOEngine extends FileMmapIOEngine {
@@ -38,10 +35,10 @@ public class ExclusiveMemoryMmapIOEngine extends FileMmapIOEngine {
   }
 
   @Override
-  public Cacheable read(long offset, int length, CacheableDeserializer<Cacheable> deserializer)
-      throws IOException {
-    ByteBuff dst = HEAP.allocate(length);
-    bufferArray.read(offset, dst);
-    return deserializer.deserialize(dst.position(0).limit(length), true, MemoryType.EXCLUSIVE);
+  public Cacheable read(BucketEntry be) throws IOException {
+    ByteBuff dst = ByteBuff.wrap(ByteBuffer.allocate(be.getLength()));
+    bufferArray.read(be.offset(), dst);
+    dst.position(0).limit(be.getLength());
+    return be.wrapAsCacheable(dst.nioByteBuffers(), MemoryType.EXCLUSIVE);
   }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FileIOEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FileIOEngine.java
index f6e49cf..f5ab309 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FileIOEngine.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FileIOEngine.java
@@ -27,11 +27,11 @@ import java.nio.channels.ClosedChannelException;
 import java.nio.channels.FileChannel;
 import java.util.Arrays;
 import java.util.concurrent.locks.ReentrantLock;
+
+import org.apache.hadoop.hbase.exceptions.IllegalArgumentIOException;
 import org.apache.hadoop.hbase.io.hfile.Cacheable;
 import org.apache.hadoop.hbase.io.hfile.Cacheable.MemoryType;
-import org.apache.hadoop.hbase.io.hfile.CacheableDeserializer;
 import org.apache.hadoop.hbase.nio.ByteBuff;
-import org.apache.hadoop.hbase.nio.SingleByteBuff;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
@@ -121,30 +121,29 @@ public class FileIOEngine implements IOEngine {
 
   /**
    * Transfers data from file to the given byte buffer
-   * @param offset The offset in the file where the first byte to be read
-   * @param length The length of buffer that should be allocated for reading
-   *               from the file channel
-   * @return number of bytes read
-   * @throws IOException
+   * @param be an {@link BucketEntry} which maintains an (offset, len, refCnt)
+   * @return the {@link Cacheable} with block data inside.
+   * @throws IOException if any IO error happen.
    */
   @Override
-  public Cacheable read(long offset, int length, CacheableDeserializer<Cacheable> deserializer)
-      throws IOException {
+  public Cacheable read(BucketEntry be) throws IOException {
+    long offset = be.offset();
+    int length = be.getLength();
     Preconditions.checkArgument(length >= 0, "Length of read can not be less than 0.");
     ByteBuffer dstBuffer = ByteBuffer.allocate(length);
     if (length != 0) {
       accessFile(readAccessor, dstBuffer, offset);
       // The buffer created out of the fileChannel is formed by copying the data from the file
-      // Hence in this case there is no shared memory that we point to. Even if the BucketCache evicts
-      // this buffer from the file the data is already copied and there is no need to ensure that
-      // the results are not corrupted before consuming them.
+      // Hence in this case there is no shared memory that we point to. Even if the BucketCache
+      // evicts this buffer from the file the data is already copied and there is no need to
+      // ensure that the results are not corrupted before consuming them.
       if (dstBuffer.limit() != length) {
-        throw new RuntimeException("Only " + dstBuffer.limit() + " bytes read, " + length
-            + " expected");
+        throw new IllegalArgumentIOException(
+            "Only " + dstBuffer.limit() + " bytes read, " + length + " expected");
       }
     }
     dstBuffer.rewind();
-    return deserializer.deserialize(new SingleByteBuff(dstBuffer), true, MemoryType.EXCLUSIVE);
+    return be.wrapAsCacheable(new ByteBuffer[] { dstBuffer }, MemoryType.EXCLUSIVE);
   }
 
   @VisibleForTesting
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FileMmapIOEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FileMmapIOEngine.java
index bd17fd5..ee37e91 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FileMmapIOEngine.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FileMmapIOEngine.java
@@ -24,7 +24,6 @@ import java.nio.channels.FileChannel;
 import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.hadoop.hbase.io.hfile.Cacheable;
-import org.apache.hadoop.hbase.io.hfile.CacheableDeserializer;
 import org.apache.hadoop.hbase.nio.ByteBuff;
 import org.apache.hadoop.hbase.util.ByteBufferAllocator;
 import org.apache.hadoop.hbase.util.ByteBufferArray;
@@ -101,8 +100,7 @@ public abstract class FileMmapIOEngine implements IOEngine {
   }
 
   @Override
-  public abstract Cacheable read(long offset, int length,
-      CacheableDeserializer<Cacheable> deserializer) throws IOException;
+  public abstract Cacheable read(BucketEntry be) throws IOException;
 
   /**
    * Transfers data from the given byte buffer to file
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/IOEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/IOEngine.java
index 87f71a5..3ffb57e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/IOEngine.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/IOEngine.java
@@ -23,7 +23,6 @@ import java.nio.ByteBuffer;
 
 import org.apache.yetus.audience.InterfaceAudience;
 import org.apache.hadoop.hbase.io.hfile.Cacheable;
-import org.apache.hadoop.hbase.io.hfile.CacheableDeserializer;
 import org.apache.hadoop.hbase.nio.ByteBuff;
 
 /**
@@ -48,15 +47,12 @@ public interface IOEngine {
 
   /**
    * Transfers data from IOEngine to a Cacheable object.
-   * @param length How many bytes to be read from the offset
-   * @param offset The offset in the IO engine where the first byte to be read
-   * @param deserializer The deserializer to be used to make a Cacheable from the data.
-   * @return Cacheable
-   * @throws IOException
-   * @throws RuntimeException when the length of the ByteBuff read is less than 'len'
+   * @param be maintains an (offset,len,refCnt) inside.
+   * @return Cacheable which will wrap the NIO ByteBuffers from IOEngine.
+   * @throws IOException when any IO error happen
+   * @throws IllegalArgumentException when the length of the ByteBuff read is less than 'len'
    */
-  Cacheable read(long offset, int length, CacheableDeserializer<Cacheable> deserializer)
-      throws IOException;
+  Cacheable read(BucketEntry be) throws IOException;
 
   /**
    * Transfers data from the given byte buffer to IOEngine
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/SharedMemoryMmapIOEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/SharedMemoryMmapIOEngine.java
index b6a7a57..bd83dd4 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/SharedMemoryMmapIOEngine.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/SharedMemoryMmapIOEngine.java
@@ -18,11 +18,10 @@
 package org.apache.hadoop.hbase.io.hfile.bucket;
 
 import java.io.IOException;
+import java.nio.ByteBuffer;
 
 import org.apache.hadoop.hbase.io.hfile.Cacheable;
 import org.apache.hadoop.hbase.io.hfile.Cacheable.MemoryType;
-import org.apache.hadoop.hbase.io.hfile.CacheableDeserializer;
-import org.apache.hadoop.hbase.nio.ByteBuff;
 import org.apache.yetus.audience.InterfaceAudience;
 
 /**
@@ -50,15 +49,14 @@ public class SharedMemoryMmapIOEngine extends FileMmapIOEngine {
   }
 
   @Override
-  public Cacheable read(long offset, int length, CacheableDeserializer<Cacheable> deserializer)
-      throws IOException {
-    ByteBuff dstBuffer = bufferArray.asSubByteBuff(offset, length);
+  public Cacheable read(BucketEntry be) throws IOException {
+    ByteBuffer[] buffers = bufferArray.asSubByteBuffers(be.offset(), be.getLength());
     // Here the buffer that is created directly refers to the buffer in the actual buckets.
     // When any cell is referring to the blocks created out of these buckets then it means that
     // those cells are referring to a shared memory area which if evicted by the BucketCache would
     // lead to corruption of results. Hence we set the type of the buffer as SHARED_MEMORY
     // so that the readers using this block are aware of this fact and do the necessary action
     // to prevent eviction till the results are either consumed or copied
-    return deserializer.deserialize(dstBuffer, true, MemoryType.SHARED);
+    return be.wrapAsCacheable(buffers, MemoryType.SHARED);
   }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/UnsafeSharedMemoryBucketEntry.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/UnsafeSharedMemoryBucketEntry.java
deleted file mode 100644
index 5d93e97..0000000
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/UnsafeSharedMemoryBucketEntry.java
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.io.hfile.bucket;
-
-import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache.BucketEntry;
-import org.apache.hadoop.hbase.util.UnsafeAccess;
-import org.apache.yetus.audience.InterfaceAudience;
-
-import sun.misc.Unsafe;
-
-@InterfaceAudience.Private
-public class UnsafeSharedMemoryBucketEntry extends BucketEntry {
-  private static final long serialVersionUID = 707544024564058801L;
-
-  // We are just doing what AtomicInteger doing for the Atomic incrementAndGet/decrementAndGet.
-  // We are avoiding the need to have a field of AtomicIneger type and have it as just int type.
-  // We would like to reduce the head overhead per object of this type as much as possible.
-  // Doing this direct Unsafe usage save us 16 bytes per Object.
-  // ie Just using 4 bytes for int type than 20 bytes requirement for an AtomicInteger (16 bytes)
-  // and 4 bytes reference to it.
-  private static final Unsafe unsafe = UnsafeAccess.theUnsafe;
-  private static final long refCountOffset;
-
-  static {
-    try {
-      refCountOffset = unsafe
-          .objectFieldOffset(UnsafeSharedMemoryBucketEntry.class.getDeclaredField("refCount"));
-    } catch (Exception ex) {
-      throw new Error(ex);
-    }
-  }
-
-  // Set this when we were not able to forcefully evict the block
-  private volatile boolean markedForEvict;
-  private volatile int refCount = 0;
-
-  public UnsafeSharedMemoryBucketEntry(long offset, int length, long accessCounter,
-      boolean inMemory) {
-    super(offset, length, accessCounter, inMemory);
-  }
-
-  @Override
-  protected int getRefCount() {
-    return this.refCount;
-  }
-
-  @Override
-  protected int incrementRefCountAndGet() {
-    return unsafe.getAndAddInt(this, refCountOffset, 1) + 1;
-  }
-
-  @Override
-  protected int decrementRefCountAndGet() {
-    return unsafe.getAndAddInt(this, refCountOffset, -1) - 1;
-  }
-
-  @Override
-  protected boolean isMarkedForEvict() {
-    return this.markedForEvict;
-  }
-
-  @Override
-  protected void markForEvict() {
-    this.markedForEvict = true;
-  }
-}
\ No newline at end of file
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestBlockEvictionFromClient.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestBlockEvictionFromClient.java
index 12ef7e9..cbded23 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestBlockEvictionFromClient.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestBlockEvictionFromClient.java
@@ -30,7 +30,6 @@ import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.atomic.AtomicLong;
 import java.util.concurrent.atomic.AtomicReference;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.Cell;
@@ -441,9 +440,9 @@ public class TestBlockEvictionFromClient {
         CachedBlock next = iterator.next();
         BlockCacheKey cacheKey = new BlockCacheKey(next.getFilename(), next.getOffset());
         if (cache instanceof BucketCache) {
-          refCount = ((BucketCache) cache).getRefCount(cacheKey);
+          refCount = ((BucketCache) cache).getRpcRefCount(cacheKey);
         } else if (cache instanceof CombinedBlockCache) {
-          refCount = ((CombinedBlockCache) cache).getRefCount(cacheKey);
+          refCount = ((CombinedBlockCache) cache).getRpcRefCount(cacheKey);
         } else {
           continue;
         }
@@ -536,9 +535,9 @@ public class TestBlockEvictionFromClient {
         CachedBlock next = iterator.next();
         BlockCacheKey cacheKey = new BlockCacheKey(next.getFilename(), next.getOffset());
         if (cache instanceof BucketCache) {
-          refCount = ((BucketCache) cache).getRefCount(cacheKey);
+          refCount = ((BucketCache) cache).getRpcRefCount(cacheKey);
         } else if (cache instanceof CombinedBlockCache) {
-          refCount = ((CombinedBlockCache) cache).getRefCount(cacheKey);
+          refCount = ((CombinedBlockCache) cache).getRpcRefCount(cacheKey);
         } else {
           continue;
         }
@@ -670,9 +669,9 @@ public class TestBlockEvictionFromClient {
         CachedBlock next = iterator.next();
         BlockCacheKey cacheKey = new BlockCacheKey(next.getFilename(), next.getOffset());
         if (cache instanceof BucketCache) {
-          refCount = ((BucketCache) cache).getRefCount(cacheKey);
+          refCount = ((BucketCache) cache).getRpcRefCount(cacheKey);
         } else if (cache instanceof CombinedBlockCache) {
-          refCount = ((CombinedBlockCache) cache).getRefCount(cacheKey);
+          refCount = ((CombinedBlockCache) cache).getRpcRefCount(cacheKey);
         } else {
           continue;
         }
@@ -758,9 +757,9 @@ public class TestBlockEvictionFromClient {
         CachedBlock next = iterator.next();
         BlockCacheKey cacheKey = new BlockCacheKey(next.getFilename(), next.getOffset());
         if (cache instanceof BucketCache) {
-          refCount = ((BucketCache) cache).getRefCount(cacheKey);
+          refCount = ((BucketCache) cache).getRpcRefCount(cacheKey);
         } else if (cache instanceof CombinedBlockCache) {
-          refCount = ((CombinedBlockCache) cache).getRefCount(cacheKey);
+          refCount = ((CombinedBlockCache) cache).getRpcRefCount(cacheKey);
         } else {
           continue;
         }
@@ -925,9 +924,9 @@ public class TestBlockEvictionFromClient {
         CachedBlock next = iterator.next();
         BlockCacheKey cacheKey = new BlockCacheKey(next.getFilename(), next.getOffset());
         if (cache instanceof BucketCache) {
-          refCount = ((BucketCache) cache).getRefCount(cacheKey);
+          refCount = ((BucketCache) cache).getRpcRefCount(cacheKey);
         } else if (cache instanceof CombinedBlockCache) {
-          refCount = ((CombinedBlockCache) cache).getRefCount(cacheKey);
+          refCount = ((CombinedBlockCache) cache).getRpcRefCount(cacheKey);
         } else {
           continue;
         }
@@ -952,9 +951,9 @@ public class TestBlockEvictionFromClient {
         CachedBlock next = iterator.next();
         BlockCacheKey cacheKey = new BlockCacheKey(next.getFilename(), next.getOffset());
         if (cache instanceof BucketCache) {
-          refCount = ((BucketCache) cache).getRefCount(cacheKey);
+          refCount = ((BucketCache) cache).getRpcRefCount(cacheKey);
         } else if (cache instanceof CombinedBlockCache) {
-          refCount = ((CombinedBlockCache) cache).getRefCount(cacheKey);
+          refCount = ((CombinedBlockCache) cache).getRpcRefCount(cacheKey);
         } else {
           continue;
         }
@@ -1043,9 +1042,9 @@ public class TestBlockEvictionFromClient {
         CachedBlock next = iterator.next();
         BlockCacheKey cacheKey = new BlockCacheKey(next.getFilename(), next.getOffset());
         if (cache instanceof BucketCache) {
-          refCount = ((BucketCache) cache).getRefCount(cacheKey);
+          refCount = ((BucketCache) cache).getRpcRefCount(cacheKey);
         } else if (cache instanceof CombinedBlockCache) {
-          refCount = ((CombinedBlockCache) cache).getRefCount(cacheKey);
+          refCount = ((CombinedBlockCache) cache).getRpcRefCount(cacheKey);
         } else {
           continue;
         }
@@ -1079,9 +1078,9 @@ public class TestBlockEvictionFromClient {
         CachedBlock next = iterator.next();
         BlockCacheKey cacheKey = new BlockCacheKey(next.getFilename(), next.getOffset());
         if (cache instanceof BucketCache) {
-          refCount = ((BucketCache) cache).getRefCount(cacheKey);
+          refCount = ((BucketCache) cache).getRpcRefCount(cacheKey);
         } else if (cache instanceof CombinedBlockCache) {
-          refCount = ((CombinedBlockCache) cache).getRefCount(cacheKey);
+          refCount = ((CombinedBlockCache) cache).getRpcRefCount(cacheKey);
         } else {
           continue;
         }
@@ -1160,9 +1159,9 @@ public class TestBlockEvictionFromClient {
         CachedBlock next = iterator.next();
         BlockCacheKey cacheKey = new BlockCacheKey(next.getFilename(), next.getOffset());
         if (cache instanceof BucketCache) {
-          refCount = ((BucketCache) cache).getRefCount(cacheKey);
+          refCount = ((BucketCache) cache).getRpcRefCount(cacheKey);
         } else if (cache instanceof CombinedBlockCache) {
-          refCount = ((CombinedBlockCache) cache).getRefCount(cacheKey);
+          refCount = ((CombinedBlockCache) cache).getRpcRefCount(cacheKey);
         } else {
           continue;
         }
@@ -1186,9 +1185,9 @@ public class TestBlockEvictionFromClient {
         CachedBlock next = iterator.next();
         BlockCacheKey cacheKey = new BlockCacheKey(next.getFilename(), next.getOffset());
         if (cache instanceof BucketCache) {
-          refCount = ((BucketCache) cache).getRefCount(cacheKey);
+          refCount = ((BucketCache) cache).getRpcRefCount(cacheKey);
         } else if (cache instanceof CombinedBlockCache) {
-          refCount = ((CombinedBlockCache) cache).getRefCount(cacheKey);
+          refCount = ((CombinedBlockCache) cache).getRpcRefCount(cacheKey);
         } else {
           continue;
         }
@@ -1214,9 +1213,9 @@ public class TestBlockEvictionFromClient {
       CachedBlock next = iterator.next();
       BlockCacheKey cacheKey = new BlockCacheKey(next.getFilename(), next.getOffset());
       if (cache instanceof BucketCache) {
-        refCount = ((BucketCache) cache).getRefCount(cacheKey);
+        refCount = ((BucketCache) cache).getRpcRefCount(cacheKey);
       } else if (cache instanceof CombinedBlockCache) {
-        refCount = ((CombinedBlockCache) cache).getRefCount(cacheKey);
+        refCount = ((CombinedBlockCache) cache).getRpcRefCount(cacheKey);
       } else {
         continue;
       }
@@ -1293,9 +1292,9 @@ public class TestBlockEvictionFromClient {
       CachedBlock next = iterator.next();
       BlockCacheKey cacheKey = new BlockCacheKey(next.getFilename(), next.getOffset());
       if (cache instanceof BucketCache) {
-        refCount = ((BucketCache) cache).getRefCount(cacheKey);
+        refCount = ((BucketCache) cache).getRpcRefCount(cacheKey);
       } else if (cache instanceof CombinedBlockCache) {
-        refCount = ((CombinedBlockCache) cache).getRefCount(cacheKey);
+        refCount = ((CombinedBlockCache) cache).getRpcRefCount(cacheKey);
       } else {
         continue;
       }
@@ -1562,8 +1561,6 @@ public class TestBlockEvictionFromClient {
   }
 
   public static class CustomInnerRegionObserver implements RegionCoprocessor, RegionObserver {
-    static final AtomicLong sleepTime = new AtomicLong(0);
-    static final AtomicBoolean slowDownNext = new AtomicBoolean(false);
     static final AtomicInteger countOfNext = new AtomicInteger(0);
     static final AtomicInteger countOfGets = new AtomicInteger(0);
     static final AtomicBoolean waitForGets = new AtomicBoolean(false);
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java
index 6d6f2a7..97003e0 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java
@@ -170,16 +170,15 @@ public class CacheTestUtils {
 
   }
 
-  public static void hammerSingleKey(final BlockCache toBeTested,
-      int BlockSize, int numThreads, int numQueries) throws Exception {
+  public static void hammerSingleKey(final BlockCache toBeTested, int numThreads, int numQueries)
+      throws Exception {
     final BlockCacheKey key = new BlockCacheKey("key", 0);
     final byte[] buf = new byte[5 * 1024];
     Arrays.fill(buf, (byte) 5);
 
     final ByteArrayCacheable bac = new ByteArrayCacheable(buf);
     Configuration conf = new Configuration();
-    MultithreadedTestUtil.TestContext ctx = new MultithreadedTestUtil.TestContext(
-        conf);
+    MultithreadedTestUtil.TestContext ctx = new MultithreadedTestUtil.TestContext(conf);
 
     final AtomicInteger totalQueries = new AtomicInteger();
     toBeTested.cacheBlock(key, bac);
@@ -188,8 +187,8 @@ public class CacheTestUtils {
       TestThread t = new MultithreadedTestUtil.RepeatingTestThread(ctx) {
         @Override
         public void doAnAction() throws Exception {
-          ByteArrayCacheable returned = (ByteArrayCacheable) toBeTested
-              .getBlock(key, false, false, true);
+          ByteArrayCacheable returned =
+              (ByteArrayCacheable) toBeTested.getBlock(key, false, false, true);
           if (returned != null) {
             assertArrayEquals(buf, returned.buf);
           } else {
@@ -223,52 +222,6 @@ public class CacheTestUtils {
     ctx.stop();
   }
 
-  public static void hammerEviction(final BlockCache toBeTested, int BlockSize,
-      int numThreads, int numQueries) throws Exception {
-
-    Configuration conf = new Configuration();
-    MultithreadedTestUtil.TestContext ctx = new MultithreadedTestUtil.TestContext(
-        conf);
-
-    final AtomicInteger totalQueries = new AtomicInteger();
-
-    for (int i = 0; i < numThreads; i++) {
-      final int finalI = i;
-
-      final byte[] buf = new byte[5 * 1024];
-      TestThread t = new MultithreadedTestUtil.RepeatingTestThread(ctx) {
-        @Override
-        public void doAnAction() throws Exception {
-          for (int j = 0; j < 100; j++) {
-            BlockCacheKey key = new BlockCacheKey("key_" + finalI + "_" + j, 0);
-            Arrays.fill(buf, (byte) (finalI * j));
-            final ByteArrayCacheable bac = new ByteArrayCacheable(buf);
-
-            ByteArrayCacheable gotBack = (ByteArrayCacheable) toBeTested
-                .getBlock(key, true, false, true);
-            if (gotBack != null) {
-              assertArrayEquals(gotBack.buf, bac.buf);
-            } else {
-              toBeTested.cacheBlock(key, bac);
-            }
-          }
-          totalQueries.incrementAndGet();
-        }
-      };
-
-      t.setDaemon(true);
-      ctx.addThread(t);
-    }
-
-    ctx.startThreads();
-    while (totalQueries.get() < numQueries && ctx.shouldRun()) {
-      Thread.sleep(10);
-    }
-    ctx.stop();
-
-    assertTrue(toBeTested.getStats().getEvictedCount() > 0);
-  }
-
   public static class ByteArrayCacheable implements Cacheable {
 
     static final CacheableDeserializer<Cacheable> blockDeserializer =
@@ -405,8 +358,14 @@ public class CacheTestUtils {
     destBuffer.clear();
     cache.cacheBlock(key, blockToCache);
     Cacheable actualBlock = cache.getBlock(key, false, false, false);
-    actualBlock.serialize(destBuffer, true);
-    assertEquals(expectedBuffer, destBuffer);
-    cache.returnBlock(key, actualBlock);
+    try {
+      actualBlock.serialize(destBuffer, true);
+      assertEquals(expectedBuffer, destBuffer);
+    } finally {
+      // Release the reference count increased by getBlock.
+      if (actualBlock != null) {
+        actualBlock.release();
+      }
+    }
   }
 }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java
index 1029a77..121e070 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java
@@ -194,7 +194,7 @@ public class TestBucketCache {
 
   @Test
   public void testCacheMultiThreadedSingleKey() throws Exception {
-    CacheTestUtils.hammerSingleKey(cache, BLOCK_SIZE, 2 * NUM_THREADS, 2 * NUM_QUERIES);
+    CacheTestUtils.hammerSingleKey(cache, 2 * NUM_THREADS, 2 * NUM_QUERIES);
   }
 
   @Test
@@ -208,6 +208,7 @@ public class TestBucketCache {
     while (!cache.backingMap.containsKey(cacheKey) || cache.ramCache.containsKey(cacheKey)) {
       Thread.sleep(100);
     }
+    Thread.sleep(1000);
   }
 
   // BucketCache.cacheBlock is async, it first adds block to ramCache and writeQueue, then writer
@@ -221,29 +222,28 @@ public class TestBucketCache {
   @Test
   public void testMemoryLeak() throws Exception {
     final BlockCacheKey cacheKey = new BlockCacheKey("dummy", 1L);
-    cacheAndWaitUntilFlushedToBucket(cache, cacheKey, new CacheTestUtils.ByteArrayCacheable(
-        new byte[10]));
+    cacheAndWaitUntilFlushedToBucket(cache, cacheKey,
+      new CacheTestUtils.ByteArrayCacheable(new byte[10]));
     long lockId = cache.backingMap.get(cacheKey).offset();
     ReentrantReadWriteLock lock = cache.offsetLock.getLock(lockId);
     lock.writeLock().lock();
     Thread evictThread = new Thread("evict-block") {
-
       @Override
       public void run() {
         cache.evictBlock(cacheKey);
       }
-
     };
     evictThread.start();
     cache.offsetLock.waitForWaiters(lockId, 1);
     cache.blockEvicted(cacheKey, cache.backingMap.remove(cacheKey), true);
-    cacheAndWaitUntilFlushedToBucket(cache, cacheKey, new CacheTestUtils.ByteArrayCacheable(
-        new byte[10]));
+    assertEquals(0, cache.getBlockCount());
+    cacheAndWaitUntilFlushedToBucket(cache, cacheKey,
+      new CacheTestUtils.ByteArrayCacheable(new byte[10]));
+    assertEquals(1, cache.getBlockCount());
     lock.writeLock().unlock();
     evictThread.join();
-    assertEquals(1L, cache.getBlockCount());
-    assertTrue(cache.getCurrentSize() > 0L);
-    assertTrue("We should have a block!", cache.iterator().hasNext());
+    assertEquals(0, cache.getBlockCount());
+    assertEquals(cache.getCurrentSize(), 0L);
   }
 
   @Test
@@ -416,10 +416,10 @@ public class TestBucketCache {
 
   @Test
   public void testOffsetProducesPositiveOutput() {
-    //This number is picked because it produces negative output if the values isn't ensured to be positive.
-    //See HBASE-18757 for more information.
+    // This number is picked because it produces negative output if the values isn't ensured to be
+    // positive. See HBASE-18757 for more information.
     long testValue = 549888460800L;
-    BucketCache.BucketEntry bucketEntry = new BucketCache.BucketEntry(testValue, 10, 10L, true);
+    BucketEntry bucketEntry = new BucketEntry(testValue, 10, 10L, true);
     assertEquals(testValue, bucketEntry.offset());
   }
 
@@ -427,16 +427,15 @@ public class TestBucketCache {
   public void testCacheBlockNextBlockMetadataMissing() throws Exception {
     int size = 100;
     int length = HConstants.HFILEBLOCK_HEADER_SIZE + size;
-    byte[] byteArr = new byte[length];
-    ByteBuffer buf = ByteBuffer.wrap(byteArr, 0, size);
+    ByteBuffer buf1 = ByteBuffer.allocate(size), buf2 = ByteBuffer.allocate(size);
     HFileContext meta = new HFileContextBuilder().build();
     ByteBuffAllocator allocator = ByteBuffAllocator.HEAP;
-    HFileBlock blockWithNextBlockMetadata = new HFileBlock(BlockType.DATA, size, size, -1, buf,
+    HFileBlock blockWithNextBlockMetadata = new HFileBlock(BlockType.DATA, size, size, -1, buf1,
         HFileBlock.FILL_HEADER, -1, 52, -1, meta, allocator);
-    HFileBlock blockWithoutNextBlockMetadata = new HFileBlock(BlockType.DATA, size, size, -1, buf,
+    HFileBlock blockWithoutNextBlockMetadata = new HFileBlock(BlockType.DATA, size, size, -1, buf2,
         HFileBlock.FILL_HEADER, -1, -1, -1, meta, allocator);
 
-    BlockCacheKey key = new BlockCacheKey("key1", 0);
+    BlockCacheKey key = new BlockCacheKey("testCacheBlockNextBlockMetadataMissing", 0);
     ByteBuffer actualBuffer = ByteBuffer.allocate(length);
     ByteBuffer block1Buffer = ByteBuffer.allocate(length);
     ByteBuffer block2Buffer = ByteBuffer.allocate(length);
@@ -448,6 +447,8 @@ public class TestBucketCache {
       block1Buffer);
 
     waitUntilFlushedToBucket(cache, key);
+    assertNotNull(cache.backingMap.get(key));
+    assertEquals(1, cache.backingMap.get(key).refCnt());
     assertEquals(1, blockWithNextBlockMetadata.getBufferReadOnly().refCnt());
     assertEquals(1, blockWithoutNextBlockMetadata.getBufferReadOnly().refCnt());
 
@@ -456,9 +457,10 @@ public class TestBucketCache {
       block1Buffer);
     assertEquals(1, blockWithNextBlockMetadata.getBufferReadOnly().refCnt());
     assertEquals(1, blockWithoutNextBlockMetadata.getBufferReadOnly().refCnt());
+    assertEquals(1, cache.backingMap.get(key).refCnt());
 
     // Clear and add blockWithoutNextBlockMetadata
-    cache.evictBlock(key);
+    assertTrue(cache.evictBlock(key));
     assertEquals(1, blockWithNextBlockMetadata.getBufferReadOnly().refCnt());
     assertEquals(1, blockWithoutNextBlockMetadata.getBufferReadOnly().refCnt());
 
@@ -494,8 +496,8 @@ public class TestBucketCache {
         -1, 52, -1, meta, ByteBuffAllocator.HEAP);
     HFileBlock blk2 = new HFileBlock(BlockType.DATA, size, size, -1, buf, HFileBlock.FILL_HEADER,
         -1, -1, -1, meta, ByteBuffAllocator.HEAP);
-    RAMQueueEntry re1 = new RAMQueueEntry(key1, blk1, 1, false);
-    RAMQueueEntry re2 = new RAMQueueEntry(key1, blk2, 1, false);
+    RAMQueueEntry re1 = new RAMQueueEntry(key1, blk1, 1, false, ByteBuffAllocator.NONE);
+    RAMQueueEntry re2 = new RAMQueueEntry(key1, blk2, 1, false, ByteBuffAllocator.NONE);
 
     assertFalse(cache.containsKey(key1));
     assertNull(cache.putIfAbsent(key1, re1));
@@ -542,7 +544,7 @@ public class TestBucketCache {
     BucketAllocator allocator = new BucketAllocator(availableSpace, null);
 
     BlockCacheKey key = new BlockCacheKey("dummy", 1L);
-    RAMQueueEntry re = new RAMQueueEntry(key, block, 1, true);
+    RAMQueueEntry re = new RAMQueueEntry(key, block, 1, true, ByteBuffAllocator.NONE);
 
     Assert.assertEquals(0, allocator.getUsedSize());
     try {
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCacheRefCnt.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCacheRefCnt.java
new file mode 100644
index 0000000..1dcd2a2
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCacheRefCnt.java
@@ -0,0 +1,266 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io.hfile.bucket;
+
+import static org.apache.hadoop.hbase.io.ByteBuffAllocator.HEAP;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.io.hfile.BlockCacheKey;
+import org.apache.hadoop.hbase.io.hfile.BlockType;
+import org.apache.hadoop.hbase.io.hfile.Cacheable;
+import org.apache.hadoop.hbase.io.hfile.Cacheable.MemoryType;
+import org.apache.hadoop.hbase.io.hfile.HFileBlock;
+import org.apache.hadoop.hbase.io.hfile.HFileContext;
+import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
+import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache.WriterThread;
+import org.apache.hadoop.hbase.testclassification.IOTests;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+@Category({ IOTests.class, MediumTests.class })
+public class TestBucketCacheRefCnt {
+
+  @ClassRule
+  public static final HBaseClassTestRule CLASS_RULE =
+      HBaseClassTestRule.forClass(TestBucketCacheRefCnt.class);
+
+  private static final String IO_ENGINE = "offheap";
+  private static final long CAPACITY_SIZE = 32 * 1024 * 1024;
+  private static final int BLOCK_SIZE = 1024;
+  private static final int[] BLOCK_SIZE_ARRAY =
+      new int[] { 64, 128, 256, 512, 1024, 2048, 4096, 8192 };
+  private static final String PERSISTENCE_PATH = null;
+  private static final HFileContext CONTEXT = new HFileContextBuilder().build();
+
+  private BucketCache cache;
+
+  private static BucketCache create(int writerSize, int queueSize) throws IOException {
+    return new BucketCache(IO_ENGINE, CAPACITY_SIZE, BLOCK_SIZE, BLOCK_SIZE_ARRAY, writerSize,
+        queueSize, PERSISTENCE_PATH);
+  }
+
+  private static HFileBlock createBlock(int offset, int size) {
+    return new HFileBlock(BlockType.DATA, size, size, -1, ByteBuffer.allocate(size),
+        HFileBlock.FILL_HEADER, offset, 52, size, CONTEXT, HEAP);
+  }
+
+  private static BlockCacheKey createKey(String hfileName, long offset) {
+    return new BlockCacheKey(hfileName, offset);
+  }
+
+  private void disableWriter() {
+    if (cache != null) {
+      for (WriterThread wt : cache.writerThreads) {
+        wt.disableWriter();
+        wt.interrupt();
+      }
+    }
+  }
+
+  @Test
+  public void testBlockInRAMCache() throws IOException {
+    cache = create(1, 1000);
+    disableWriter();
+    try {
+      for (int i = 0; i < 10; i++) {
+        HFileBlock blk = createBlock(i, 1020);
+        BlockCacheKey key = createKey("testHFile-00", i);
+        assertEquals(1, blk.refCnt());
+        cache.cacheBlock(key, blk);
+        assertEquals(i + 1, cache.getBlockCount());
+        assertEquals(2, blk.refCnt());
+
+        Cacheable block = cache.getBlock(key, false, false, false);
+        try {
+          assertEquals(3, blk.refCnt());
+          assertEquals(3, block.refCnt());
+          assertEquals(blk, block);
+        } finally {
+          block.release();
+        }
+        assertEquals(2, blk.refCnt());
+        assertEquals(2, block.refCnt());
+      }
+
+      for (int i = 0; i < 10; i++) {
+        BlockCacheKey key = createKey("testHFile-00", i);
+        Cacheable blk = cache.getBlock(key, false, false, false);
+        assertEquals(3, blk.refCnt());
+        assertFalse(blk.release());
+        assertEquals(2, blk.refCnt());
+
+        assertTrue(cache.evictBlock(key));
+        assertEquals(1, blk.refCnt());
+        assertTrue(blk.release());
+        assertEquals(0, blk.refCnt());
+      }
+    } finally {
+      cache.shutdown();
+    }
+  }
+
+  private void waitUntilFlushedToCache(BlockCacheKey key) throws InterruptedException {
+    while (!cache.backingMap.containsKey(key) || cache.ramCache.containsKey(key)) {
+      Thread.sleep(100);
+    }
+    Thread.sleep(1000);
+  }
+
+  @Test
+  public void testBlockInBackingMap() throws Exception {
+    cache = create(1, 1000);
+    try {
+      HFileBlock blk = createBlock(200, 1020);
+      BlockCacheKey key = createKey("testHFile-00", 200);
+      cache.cacheBlock(key, blk);
+      waitUntilFlushedToCache(key);
+      assertEquals(1, blk.refCnt());
+
+      Cacheable block = cache.getBlock(key, false, false, false);
+      assertTrue(block.getMemoryType() == MemoryType.SHARED);
+      assertTrue(block instanceof HFileBlock);
+      assertEquals(2, block.refCnt());
+
+      block.retain();
+      assertEquals(3, block.refCnt());
+
+      Cacheable newBlock = cache.getBlock(key, false, false, false);
+      assertTrue(newBlock.getMemoryType() == MemoryType.SHARED);
+      assertTrue(newBlock instanceof HFileBlock);
+      assertEquals(4, newBlock.refCnt());
+
+      // release the newBlock
+      assertFalse(newBlock.release());
+      assertEquals(3, newBlock.refCnt());
+      assertEquals(3, block.refCnt());
+
+      // Evict the key
+      cache.evictBlock(key);
+      assertEquals(2, block.refCnt());
+
+      // Evict again, shouldn't change the refCnt.
+      cache.evictBlock(key);
+      assertEquals(2, block.refCnt());
+
+      assertFalse(block.release());
+      assertEquals(1, block.refCnt());
+
+      newBlock = cache.getBlock(key, false, false, false);
+      assertEquals(2, block.refCnt());
+      assertEquals(2, newBlock.refCnt());
+
+      // Release the block
+      assertFalse(block.release());
+      assertEquals(1, block.refCnt());
+
+      // Release the newBlock;
+      assertTrue(newBlock.release());
+      assertEquals(0, newBlock.refCnt());
+    } finally {
+      cache.shutdown();
+    }
+  }
+
+  @Test
+  public void testInBucketCache() throws IOException {
+    cache = create(1, 1000);
+    try {
+      HFileBlock blk = createBlock(200, 1020);
+      BlockCacheKey key = createKey("testHFile-00", 200);
+      cache.cacheBlock(key, blk);
+      assertTrue(blk.refCnt() == 1 || blk.refCnt() == 2);
+
+      Cacheable block1 = cache.getBlock(key, false, false, false);
+      assertTrue(block1.refCnt() >= 2);
+
+      Cacheable block2 = cache.getBlock(key, false, false, false);
+      assertTrue(block2.refCnt() >= 3);
+
+      cache.evictBlock(key);
+      assertTrue(blk.refCnt() >= 1);
+      assertTrue(block1.refCnt() >= 2);
+      assertTrue(block2.refCnt() >= 2);
+
+      // Get key again
+      Cacheable block3 = cache.getBlock(key, false, false, false);
+      if (block3 != null) {
+        assertTrue(block3.refCnt() >= 3);
+        assertFalse(block3.release());
+      }
+
+      blk.release();
+      boolean ret1 = block1.release();
+      boolean ret2 = block2.release();
+      assertTrue(ret1 || ret2);
+      assertEquals(0, blk.refCnt());
+      assertEquals(0, block1.refCnt());
+      assertEquals(0, block2.refCnt());
+    } finally {
+      cache.shutdown();
+    }
+  }
+
+  @Test
+  public void testMarkStaleAsEvicted() throws Exception {
+    cache = create(1, 1000);
+    try {
+      HFileBlock blk = createBlock(200, 1020);
+      BlockCacheKey key = createKey("testMarkStaleAsEvicted", 200);
+      cache.cacheBlock(key, blk);
+      waitUntilFlushedToCache(key);
+      assertEquals(1, blk.refCnt());
+      assertNotNull(cache.backingMap.get(key));
+      assertEquals(1, cache.backingMap.get(key).refCnt());
+
+      // RPC reference this cache.
+      Cacheable block1 = cache.getBlock(key, false, false, false);
+      assertEquals(2, block1.refCnt());
+      BucketEntry be1 = cache.backingMap.get(key);
+      assertNotNull(be1);
+      assertEquals(2, be1.refCnt());
+
+      // We've some RPC reference, so it won't have any effect.
+      assertFalse(be1.markStaleAsEvicted());
+      assertEquals(2, block1.refCnt());
+      assertEquals(2, cache.backingMap.get(key).refCnt());
+
+      // Release the RPC reference.
+      block1.release();
+      assertEquals(1, block1.refCnt());
+      assertEquals(1, cache.backingMap.get(key).refCnt());
+
+      // Mark the stale as evicted again, it'll do the de-allocation.
+      assertTrue(be1.markStaleAsEvicted());
+      assertEquals(0, block1.refCnt());
+      assertNull(cache.backingMap.get(key));
+      assertEquals(0, cache.size());
+    } finally {
+      cache.shutdown();
+    }
+  }
+}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketWriterThread.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketWriterThread.java
index 746cf8d..d6a0077 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketWriterThread.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketWriterThread.java
@@ -31,7 +31,6 @@ import org.apache.hadoop.hbase.HBaseClassTestRule;
 import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.io.hfile.BlockCacheKey;
 import org.apache.hadoop.hbase.io.hfile.Cacheable;
-import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache.BucketEntry;
 import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache.RAMQueueEntry;
 import org.apache.hadoop.hbase.testclassification.IOTests;
 import org.apache.hadoop.hbase.testclassification.SmallTests;
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestByteBufferIOEngine.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestByteBufferIOEngine.java
index a06d86d..2f8c838 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestByteBufferIOEngine.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestByteBufferIOEngine.java
@@ -23,6 +23,7 @@ import org.apache.hadoop.hbase.HBaseClassTestRule;
 import org.apache.hadoop.hbase.io.hfile.Cacheable;
 import org.apache.hadoop.hbase.io.hfile.Cacheable.MemoryType;
 import org.apache.hadoop.hbase.io.hfile.CacheableDeserializer;
+import org.apache.hadoop.hbase.io.hfile.CacheableDeserializerIdManager;
 import org.apache.hadoop.hbase.nio.ByteBuff;
 import org.apache.hadoop.hbase.testclassification.IOTests;
 import org.apache.hadoop.hbase.testclassification.SmallTests;
@@ -34,13 +35,46 @@ import org.junit.experimental.categories.Category;
 /**
  * Basic test for {@link ByteBufferIOEngine}
  */
-@Category({IOTests.class, SmallTests.class})
+@Category({ IOTests.class, SmallTests.class })
 public class TestByteBufferIOEngine {
 
   @ClassRule
   public static final HBaseClassTestRule CLASS_RULE =
       HBaseClassTestRule.forClass(TestByteBufferIOEngine.class);
 
+  /**
+   * Override the {@link BucketEntry} so that we can set an arbitrary offset.
+   */
+  private static class MockBucketEntry extends BucketEntry {
+    private long off;
+
+    MockBucketEntry(long offset, int length) {
+      super(offset & 0xFF00, length, 0, false);
+      this.off = offset;
+    }
+
+    @Override
+    long offset() {
+      return this.off;
+    }
+  }
+
+  private static BufferGrabbingDeserializer DESERIALIZER = new BufferGrabbingDeserializer();
+  static {
+    int id = CacheableDeserializerIdManager.registerDeserializer(DESERIALIZER);
+    DESERIALIZER.setIdentifier(id);
+  }
+
+  static BucketEntry createBucketEntry(long offset, int len) {
+    BucketEntry be = new MockBucketEntry(offset, len);
+    be.setDeserialiserReference(DESERIALIZER);
+    return be;
+  }
+
+  static ByteBuff getByteBuff(BucketEntry be) {
+    return ((BufferGrabbingDeserializer) be.deserializerReference()).buf;
+  }
+
   @Test
   public void testByteBufferIOEngine() throws Exception {
     int capacity = 32 * 1024 * 1024; // 32 MB
@@ -71,9 +105,9 @@ public class TestByteBufferIOEngine {
       ioEngine.write(src, offset);
       src.position(pos).limit(lim);
 
-      BufferGrabbingDeserializer deserializer = new BufferGrabbingDeserializer();
-      ioEngine.read(offset, blockSize, deserializer);
-      ByteBuff dst = deserializer.buf;
+      BucketEntry be = createBucketEntry(offset, blockSize);
+      ioEngine.read(be);
+      ByteBuff dst = getByteBuff(be);
       Assert.assertEquals(src.remaining(), blockSize);
       Assert.assertEquals(dst.remaining(), blockSize);
       Assert.assertEquals(0, ByteBuff.compareTo(src, src.position(), src.remaining(), dst,
@@ -85,10 +119,11 @@ public class TestByteBufferIOEngine {
 
   /**
    * A CacheableDeserializer implementation which just store reference to the {@link ByteBuff} to be
-   * deserialized. Use {@link #getDeserializedByteBuff()} to get this reference.
+   * deserialized.
    */
   static class BufferGrabbingDeserializer implements CacheableDeserializer<Cacheable> {
     private ByteBuff buf;
+    private int identifier;
 
     @Override
     public Cacheable deserialize(ByteBuff b) throws IOException {
@@ -102,13 +137,13 @@ public class TestByteBufferIOEngine {
       return null;
     }
 
-    @Override
-    public int getDeserialiserIdentifier() {
-      return 0;
+    public void setIdentifier(int identifier) {
+      this.identifier = identifier;
     }
 
-    public ByteBuff getDeserializedByteBuff() {
-      return this.buf;
+    @Override
+    public int getDeserialiserIdentifier() {
+      return identifier;
     }
   }
 
@@ -151,9 +186,9 @@ public class TestByteBufferIOEngine {
       ioEngine.write(src, offset);
       src.position(pos).limit(lim);
 
-      BufferGrabbingDeserializer deserializer = new BufferGrabbingDeserializer();
-      ioEngine.read(offset, blockSize, deserializer);
-      ByteBuff dst = deserializer.buf;
+      BucketEntry be = createBucketEntry(offset, blockSize);
+      ioEngine.read(be);
+      ByteBuff dst = getByteBuff(be);
       Assert.assertEquals(src.remaining(), blockSize);
       Assert.assertEquals(dst.remaining(), blockSize);
       Assert.assertEquals(0, ByteBuff.compareTo(src, src.position(), src.remaining(), dst,
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestExclusiveMemoryMmapEngine.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestExclusiveMemoryMmapEngine.java
index 79d58f0..9b51b65 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestExclusiveMemoryMmapEngine.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestExclusiveMemoryMmapEngine.java
@@ -17,10 +17,12 @@
  */
 package org.apache.hadoop.hbase.io.hfile.bucket;
 
+import static org.apache.hadoop.hbase.io.hfile.bucket.TestByteBufferIOEngine.createBucketEntry;
+import static org.apache.hadoop.hbase.io.hfile.bucket.TestByteBufferIOEngine.getByteBuff;
+
 import java.io.File;
 import java.io.IOException;
 import org.apache.hadoop.hbase.HBaseClassTestRule;
-import org.apache.hadoop.hbase.io.hfile.bucket.TestByteBufferIOEngine.BufferGrabbingDeserializer;
 import org.apache.hadoop.hbase.nio.ByteBuff;
 import org.apache.hadoop.hbase.testclassification.IOTests;
 import org.apache.hadoop.hbase.testclassification.SmallTests;
@@ -57,9 +59,9 @@ public class TestExclusiveMemoryMmapEngine {
         src.position(pos).limit(lim);
 
         // read
-        BufferGrabbingDeserializer deserializer = new BufferGrabbingDeserializer();
-        fileMmapEngine.read(offset, len, deserializer);
-        ByteBuff dst = deserializer.getDeserializedByteBuff();
+        BucketEntry be = createBucketEntry(offset, len);
+        fileMmapEngine.read(be);
+        ByteBuff dst = getByteBuff(be);
 
         Assert.assertEquals(src.remaining(), len);
         Assert.assertEquals(dst.remaining(), len);
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestFileIOEngine.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestFileIOEngine.java
index 6b0d603..6bd91d0 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestFileIOEngine.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestFileIOEngine.java
@@ -17,6 +17,8 @@
  */
 package org.apache.hadoop.hbase.io.hfile.bucket;
 
+import static org.apache.hadoop.hbase.io.hfile.bucket.TestByteBufferIOEngine.createBucketEntry;
+import static org.apache.hadoop.hbase.io.hfile.bucket.TestByteBufferIOEngine.getByteBuff;
 import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotEquals;
@@ -29,7 +31,6 @@ import java.nio.channels.FileChannel;
 import java.util.ArrayList;
 import java.util.List;
 import org.apache.hadoop.hbase.HBaseClassTestRule;
-import org.apache.hadoop.hbase.io.hfile.bucket.TestByteBufferIOEngine.BufferGrabbingDeserializer;
 import org.apache.hadoop.hbase.nio.ByteBuff;
 import org.apache.hadoop.hbase.testclassification.IOTests;
 import org.apache.hadoop.hbase.testclassification.SmallTests;
@@ -110,9 +111,10 @@ public class TestFileIOEngine {
         data1[j] = (byte) (Math.random() * 255);
       }
       fileIOEngine.write(ByteBuffer.wrap(data1), offset);
-      BufferGrabbingDeserializer deserializer = new BufferGrabbingDeserializer();
-      fileIOEngine.read(offset, len, deserializer);
-      ByteBuff data2 = deserializer.getDeserializedByteBuff();
+
+      BucketEntry be = createBucketEntry(offset, len);
+      fileIOEngine.read(be);
+      ByteBuff data2 = getByteBuff(be);
       assertArrayEquals(data1, data2.array());
     }
   }
@@ -122,9 +124,9 @@ public class TestFileIOEngine {
     byte[] data1 = new byte[0];
 
     fileIOEngine.write(ByteBuffer.wrap(data1), 0);
-    BufferGrabbingDeserializer deserializer = new BufferGrabbingDeserializer();
-    fileIOEngine.read(0, 0, deserializer);
-    ByteBuff data2 = deserializer.getDeserializedByteBuff();
+    BucketEntry be = createBucketEntry(0, 0);
+    fileIOEngine.read(be);
+    ByteBuff data2 = getByteBuff(be);
     assertArrayEquals(data1, data2.array());
   }
 
@@ -140,9 +142,9 @@ public class TestFileIOEngine {
       fileIOEngine.write(src, offset);
       src.position(pos).limit(lim);
 
-      BufferGrabbingDeserializer deserializer = new BufferGrabbingDeserializer();
-      fileIOEngine.read(offset, len, deserializer);
-      ByteBuff dst = deserializer.getDeserializedByteBuff();
+      BucketEntry be = createBucketEntry(offset, len);
+      fileIOEngine.read(be);
+      ByteBuff dst = getByteBuff(be);
 
       Assert.assertEquals(src.remaining(), len);
       Assert.assertEquals(dst.remaining(), len);

[hbase] 01/22: HBASE-21916 Abstract an ByteBuffAllocator to allocate/free ByteBuffer in ByteBufferPool

Posted by op...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

openinx pushed a commit to branch HBASE-21879
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 568d12933b4503a91c5d3cd08b4aef4b7f9c0824
Author: huzheng <op...@gmail.com>
AuthorDate: Sat Feb 16 17:16:09 2019 +0800

    HBASE-21916 Abstract an ByteBuffAllocator to allocate/free ByteBuffer in ByteBufferPool
---
 .../apache/hadoop/hbase/ipc/CellBlockBuilder.java  |   9 +-
 hbase-common/pom.xml                               |   4 +
 .../apache/hadoop/hbase/io/ByteBuffAllocator.java  | 282 +++++++++++++++++++
 .../hbase/io/ByteBufferListOutputStream.java       |  40 ++-
 .../org/apache/hadoop/hbase/io/ByteBufferPool.java | 155 -----------
 .../hbase/io/encoding/CopyKeyDataBlockEncoder.java |   2 +-
 .../hbase/io/encoding/DiffKeyDeltaEncoder.java     |   2 +-
 .../hbase/io/encoding/FastDiffDeltaEncoder.java    |   2 +-
 .../hbase/io/encoding/PrefixKeyDeltaEncoder.java   |   2 +-
 .../hadoop/hbase/io/encoding/RowIndexSeekerV1.java |   2 +-
 .../java/org/apache/hadoop/hbase/nio/ByteBuff.java | 146 +++++-----
 .../org/apache/hadoop/hbase/nio/MultiByteBuff.java |  98 +++++--
 .../java/org/apache/hadoop/hbase/nio/RefCnt.java   |  49 ++++
 .../apache/hadoop/hbase/nio/SingleByteBuff.java    |  92 ++++--
 .../apache/hadoop/hbase/util/ByteBufferArray.java  |  10 +-
 .../apache/hadoop/hbase/util/ByteBufferUtils.java  |  31 ++-
 .../hadoop/hbase/io/TestByteBuffAllocator.java     | 309 +++++++++++++++++++++
 .../hbase/io/TestByteBufferListOutputStream.java   |  18 +-
 .../apache/hadoop/hbase/io/TestByteBufferPool.java |  67 -----
 .../apache/hadoop/hbase/nio/TestMultiByteBuff.java |   4 +-
 .../apache/hadoop/hbase/io/hfile/Cacheable.java    |   7 +-
 .../hadoop/hbase/ipc/NettyRpcFrameDecoder.java     |   2 +-
 .../apache/hadoop/hbase/ipc/NettyRpcServer.java    |   2 +-
 .../apache/hadoop/hbase/ipc/NettyServerCall.java   |  12 +-
 .../hadoop/hbase/ipc/NettyServerRpcConnection.java |   9 +-
 .../org/apache/hadoop/hbase/ipc/RpcServer.java     |  96 +------
 .../org/apache/hadoop/hbase/ipc/ServerCall.java    |  18 +-
 .../apache/hadoop/hbase/ipc/SimpleRpcServer.java   |   2 +-
 .../apache/hadoop/hbase/ipc/SimpleServerCall.java  |  15 +-
 .../hbase/ipc/SimpleServerRpcConnection.java       |  26 +-
 .../client/TestAsyncTableGetMultiThreaded.java     |   4 +-
 .../hbase/client/TestServerLoadDurability.java     |   8 +-
 .../hadoop/hbase/io/hfile/TestHFileBlock.java      |   2 +-
 .../org/apache/hadoop/hbase/ipc/TestRpcServer.java | 144 ----------
 34 files changed, 974 insertions(+), 697 deletions(-)

diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/CellBlockBuilder.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/CellBlockBuilder.java
index 8d68e87..111f768 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/CellBlockBuilder.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/CellBlockBuilder.java
@@ -33,6 +33,7 @@ import org.apache.hadoop.conf.Configurable;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.CellScanner;
 import org.apache.hadoop.hbase.DoNotRetryIOException;
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -41,7 +42,6 @@ import org.apache.hadoop.hbase.io.ByteBuffInputStream;
 import org.apache.hadoop.hbase.io.ByteBufferInputStream;
 import org.apache.hadoop.hbase.io.ByteBufferListOutputStream;
 import org.apache.hadoop.hbase.io.ByteBufferOutputStream;
-import org.apache.hadoop.hbase.io.ByteBufferPool;
 import org.apache.hadoop.hbase.nio.ByteBuff;
 import org.apache.hadoop.hbase.nio.SingleByteBuff;
 import org.apache.hadoop.hbase.util.ClassSize;
@@ -208,7 +208,7 @@ class CellBlockBuilder {
    * @param codec to use for encoding
    * @param compressor to use for encoding
    * @param cellScanner to encode
-   * @param pool Pool of ByteBuffers to make use of.
+   * @param allocator to allocate the {@link ByteBuff}.
    * @return Null or byte buffer filled with a cellblock filled with passed-in Cells encoded using
    *         passed in <code>codec</code> and/or <code>compressor</code>; the returned buffer has
    *         been flipped and is ready for reading. Use limit to find total size. If
@@ -217,15 +217,14 @@ class CellBlockBuilder {
    * @throws IOException if encoding the cells fail
    */
   public ByteBufferListOutputStream buildCellBlockStream(Codec codec, CompressionCodec compressor,
-      CellScanner cellScanner, ByteBufferPool pool) throws IOException {
+      CellScanner cellScanner, ByteBuffAllocator allocator) throws IOException {
     if (cellScanner == null) {
       return null;
     }
     if (codec == null) {
       throw new CellScannerButNoCodecException();
     }
-    assert pool != null;
-    ByteBufferListOutputStream bbos = new ByteBufferListOutputStream(pool);
+    ByteBufferListOutputStream bbos = new ByteBufferListOutputStream(allocator);
     encodeCellsTo(bbos, cellScanner, codec, compressor);
     if (bbos.size() == 0) {
       bbos.releaseResources();
diff --git a/hbase-common/pom.xml b/hbase-common/pom.xml
index 7d7dea2..c23b9d4 100644
--- a/hbase-common/pom.xml
+++ b/hbase-common/pom.xml
@@ -151,6 +151,10 @@
       <artifactId>hbase-shaded-miscellaneous</artifactId>
     </dependency>
     <dependency>
+      <groupId>org.apache.hbase.thirdparty</groupId>
+      <artifactId>hbase-shaded-netty</artifactId>
+    </dependency>
+    <dependency>
       <groupId>org.slf4j</groupId>
       <artifactId>slf4j-api</artifactId>
     </dependency>
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java
new file mode 100644
index 0000000..1833462
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBuffAllocator.java
@@ -0,0 +1,282 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.io;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Queue;
+import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.nio.ByteBuff;
+import org.apache.hadoop.hbase.nio.MultiByteBuff;
+import org.apache.hadoop.hbase.nio.SingleByteBuff;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
+
+/**
+ * ByteBuffAllocator is used for allocating/freeing the ByteBuffers from/to NIO ByteBuffer pool, and
+ * it provide high-level interfaces for upstream. when allocating desired memory size, it will
+ * return {@link ByteBuff}, if we are sure that those ByteBuffers have reached the end of life
+ * cycle, we must do the {@link ByteBuff#release()} to return back the buffers to the pool,
+ * otherwise ByteBuffers leak will happen, and the NIO ByteBuffer pool may be exhausted. there's
+ * possible that the desired memory size is large than ByteBufferPool has, we'll downgrade to
+ * allocate ByteBuffers from heap which meaning the GC pressure may increase again. Of course, an
+ * better way is increasing the ByteBufferPool size if we detected this case. <br/>
+ * <br/>
+ * On the other hand, for better memory utilization, we have set an lower bound named
+ * minSizeForReservoirUse in this allocator, and if the desired size is less than
+ * minSizeForReservoirUse, the allocator will just allocate the ByteBuffer from heap and let the JVM
+ * free its memory, because it's too wasting to allocate a single fixed-size ByteBuffer for some
+ * small objects. <br/>
+ * <br/>
+ * We recommend to use this class to allocate/free {@link ByteBuff} in the RPC layer or the entire
+ * read/write path, because it hide the details of memory management and its APIs are more friendly
+ * to the upper layer.
+ */
+@InterfaceAudience.Private
+public class ByteBuffAllocator {
+
+  private static final Logger LOG = LoggerFactory.getLogger(ByteBuffAllocator.class);
+
+  public static final String MAX_BUFFER_COUNT_KEY = "hbase.ipc.server.allocator.max.buffer.count";
+
+  public static final String BUFFER_SIZE_KEY = "hbase.ipc.server.allocator.buffer.size";
+  // 64 KB. Making it same as the chunk size what we will write/read to/from the socket channel.
+  public static final int DEFAULT_BUFFER_SIZE = 64 * 1024;
+
+  public static final String MIN_ALLOCATE_SIZE_KEY =
+      "hbase.ipc.server.reservoir.minimal.allocating.size";
+
+  public static final Recycler NONE = () -> {
+  };
+
+  public interface Recycler {
+    void free();
+  }
+
+  private final boolean reservoirEnabled;
+  private final int bufSize;
+  private final int maxBufCount;
+  private final AtomicInteger usedBufCount = new AtomicInteger(0);
+
+  private boolean maxPoolSizeInfoLevelLogged = false;
+
+  // If the desired size is at least this size, it'll allocated from ByteBufferPool, otherwise it'll
+  // allocated from heap for better utilization. We make this to be 1/6th of the pool buffer size.
+  private final int minSizeForReservoirUse;
+
+  private final Queue<ByteBuffer> buffers = new ConcurrentLinkedQueue<>();
+
+  /**
+   * Initialize an {@link ByteBuffAllocator} which will try to allocate ByteBuffers from off-heap if
+   * reservoir is enabled and the reservoir has enough buffers, otherwise the allocator will just
+   * allocate the insufficient buffers from on-heap to meet the requirement.
+   * @param conf which get the arguments to initialize the allocator.
+   * @param reservoirEnabled indicate whether the reservoir is enabled or disabled.
+   * @return ByteBuffAllocator to manage the byte buffers.
+   */
+  public static ByteBuffAllocator create(Configuration conf, boolean reservoirEnabled) {
+    int poolBufSize = conf.getInt(BUFFER_SIZE_KEY, DEFAULT_BUFFER_SIZE);
+    if (reservoirEnabled) {
+      // The max number of buffers to be pooled in the ByteBufferPool. The default value been
+      // selected based on the #handlers configured. When it is read request, 2 MB is the max size
+      // at which we will send back one RPC request. Means max we need 2 MB for creating the
+      // response cell block. (Well it might be much lesser than this because in 2 MB size calc, we
+      // include the heap size overhead of each cells also.) Considering 2 MB, we will need
+      // (2 * 1024 * 1024) / poolBufSize buffers to make the response cell block. Pool buffer size
+      // is by default 64 KB.
+      // In case of read request, at the end of the handler process, we will make the response
+      // cellblock and add the Call to connection's response Q and a single Responder thread takes
+      // connections and responses from that one by one and do the socket write. So there is chances
+      // that by the time a handler originated response is actually done writing to socket and so
+      // released the BBs it used, the handler might have processed one more read req. On an avg 2x
+      // we consider and consider that also for the max buffers to pool
+      int bufsForTwoMB = (2 * 1024 * 1024) / poolBufSize;
+      int maxBuffCount =
+          conf.getInt(MAX_BUFFER_COUNT_KEY, conf.getInt(HConstants.REGION_SERVER_HANDLER_COUNT,
+            HConstants.DEFAULT_REGION_SERVER_HANDLER_COUNT) * bufsForTwoMB * 2);
+      int minSizeForReservoirUse = conf.getInt(MIN_ALLOCATE_SIZE_KEY, poolBufSize / 6);
+      return new ByteBuffAllocator(true, maxBuffCount, poolBufSize, minSizeForReservoirUse);
+    } else {
+      return new ByteBuffAllocator(false, 0, poolBufSize, Integer.MAX_VALUE);
+    }
+  }
+
+  /**
+   * Initialize an {@link ByteBuffAllocator} which only allocate ByteBuffer from on-heap, it's
+   * designed for testing purpose or disabled reservoir case.
+   * @return allocator to allocate on-heap ByteBuffer.
+   */
+  public static ByteBuffAllocator createOnHeap() {
+    return new ByteBuffAllocator(false, 0, DEFAULT_BUFFER_SIZE, Integer.MAX_VALUE);
+  }
+
+  @VisibleForTesting
+  ByteBuffAllocator(boolean reservoirEnabled, int maxBufCount, int bufSize,
+      int minSizeForReservoirUse) {
+    this.reservoirEnabled = reservoirEnabled;
+    this.maxBufCount = maxBufCount;
+    this.bufSize = bufSize;
+    this.minSizeForReservoirUse = minSizeForReservoirUse;
+  }
+
+  public boolean isReservoirEnabled() {
+    return reservoirEnabled;
+  }
+
+  @VisibleForTesting
+  public int getQueueSize() {
+    return this.buffers.size();
+  }
+
+  /**
+   * Allocate an buffer with buffer size from ByteBuffAllocator, Note to call the
+   * {@link ByteBuff#release()} if no need any more, otherwise the memory leak happen in NIO
+   * ByteBuffer pool.
+   * @return an ByteBuff with the buffer size.
+   */
+  public SingleByteBuff allocateOneBuffer() {
+    if (isReservoirEnabled()) {
+      ByteBuffer bb = getBuffer();
+      if (bb != null) {
+        return new SingleByteBuff(() -> putbackBuffer(bb), bb);
+      }
+    }
+    // Allocated from heap, let the JVM free its memory.
+    return new SingleByteBuff(NONE, ByteBuffer.allocate(this.bufSize));
+  }
+
+  /**
+   * Allocate size bytes from the ByteBufAllocator, Note to call the {@link ByteBuff#release()} if
+   * no need any more, otherwise the memory leak happen in NIO ByteBuffer pool.
+   * @param size to allocate
+   * @return an ByteBuff with the desired size.
+   */
+  public ByteBuff allocate(int size) {
+    if (size < 0) {
+      throw new IllegalArgumentException("size to allocate should >=0");
+    }
+    // If disabled the reservoir, just allocate it from on-heap.
+    if (!isReservoirEnabled() || size == 0) {
+      return new SingleByteBuff(NONE, ByteBuffer.allocate(size));
+    }
+    int reminder = size % bufSize;
+    int len = size / bufSize + (reminder > 0 ? 1 : 0);
+    List<ByteBuffer> bbs = new ArrayList<>(len);
+    // Allocate from ByteBufferPool until the remaining is less than minSizeForReservoirUse or
+    // reservoir is exhausted.
+    int remain = size;
+    while (remain >= minSizeForReservoirUse) {
+      ByteBuffer bb = this.getBuffer();
+      if (bb == null) {
+        break;
+      }
+      bbs.add(bb);
+      remain -= bufSize;
+    }
+    int lenFromReservoir = bbs.size();
+    if (remain > 0) {
+      // If the last ByteBuffer is too small or the reservoir can not provide more ByteBuffers, we
+      // just allocate the ByteBuffer from on-heap.
+      bbs.add(ByteBuffer.allocate(remain));
+    }
+    ByteBuff bb = wrap(bbs, () -> {
+      for (int i = 0; i < lenFromReservoir; i++) {
+        this.putbackBuffer(bbs.get(i));
+      }
+    });
+    bb.limit(size);
+    return bb;
+  }
+
+  public static ByteBuff wrap(ByteBuffer[] buffers, Recycler recycler) {
+    if (buffers == null || buffers.length == 0) {
+      throw new IllegalArgumentException("buffers shouldn't be null or empty");
+    }
+    return buffers.length == 1 ? new SingleByteBuff(recycler, buffers[0])
+        : new MultiByteBuff(recycler, buffers);
+  }
+
+  public static ByteBuff wrap(ByteBuffer[] buffers) {
+    return wrap(buffers, NONE);
+  }
+
+  public static ByteBuff wrap(List<ByteBuffer> buffers, Recycler recycler) {
+    if (buffers == null || buffers.size() == 0) {
+      throw new IllegalArgumentException("buffers shouldn't be null or empty");
+    }
+    return buffers.size() == 1 ? new SingleByteBuff(recycler, buffers.get(0))
+        : new MultiByteBuff(recycler, buffers.toArray(new ByteBuffer[0]));
+  }
+
+  public static ByteBuff wrap(List<ByteBuffer> buffers) {
+    return wrap(buffers, NONE);
+  }
+
+  /**
+   * @return One free DirectByteBuffer from the pool. If no free ByteBuffer and we have not reached
+   *         the maximum pool size, it will create a new one and return. In case of max pool size
+   *         also reached, will return null. When pool returned a ByteBuffer, make sure to return it
+   *         back to pool after use.
+   */
+  private ByteBuffer getBuffer() {
+    ByteBuffer bb = buffers.poll();
+    if (bb != null) {
+      // To reset the limit to capacity and position to 0, must clear here.
+      bb.clear();
+      return bb;
+    }
+    while (true) {
+      int c = this.usedBufCount.intValue();
+      if (c >= this.maxBufCount) {
+        if (!maxPoolSizeInfoLevelLogged) {
+          LOG.info("Pool already reached its max capacity : {} and no free buffers now. Consider "
+              + "increasing the value for '{}' ?",
+            maxBufCount, MAX_BUFFER_COUNT_KEY);
+          maxPoolSizeInfoLevelLogged = true;
+        }
+        return null;
+      }
+      if (!this.usedBufCount.compareAndSet(c, c + 1)) {
+        continue;
+      }
+      return ByteBuffer.allocateDirect(bufSize);
+    }
+  }
+
+  /**
+   * Return back a ByteBuffer after its use. Don't read/write the ByteBuffer after the returning.
+   * @param buf ByteBuffer to return.
+   */
+  private void putbackBuffer(ByteBuffer buf) {
+    if (buf.capacity() != bufSize || (reservoirEnabled ^ buf.isDirect())) {
+      LOG.warn("Trying to put a buffer, not created by this pool! Will be just ignored");
+      return;
+    }
+    buffers.offer(buf);
+  }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBufferListOutputStream.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBufferListOutputStream.java
index 0b97abb..e8bd322 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBufferListOutputStream.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBufferListOutputStream.java
@@ -23,6 +23,8 @@ import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.hadoop.hbase.nio.ByteBuff;
+import org.apache.hadoop.hbase.nio.SingleByteBuff;
 import org.apache.hadoop.hbase.util.ByteBufferUtils;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
@@ -39,18 +41,17 @@ import org.slf4j.LoggerFactory;
 public class ByteBufferListOutputStream extends ByteBufferOutputStream {
   private static final Logger LOG = LoggerFactory.getLogger(ByteBufferListOutputStream.class);
 
-  private ByteBufferPool pool;
+  private ByteBuffAllocator allocator;
   // Keep track of the BBs where bytes written to. We will first try to get a BB from the pool. If
   // it is not available will make a new one our own and keep writing to that. We keep track of all
   // the BBs that we got from pool, separately so that on closeAndPutbackBuffers, we can make sure
   // to return back all of them to pool
-  protected List<ByteBuffer> allBufs = new ArrayList<>();
-  protected List<ByteBuffer> bufsFromPool = new ArrayList<>();
+  protected List<SingleByteBuff> allBufs = new ArrayList<>();
 
   private boolean lastBufFlipped = false;// Indicate whether the curBuf/lastBuf is flipped already
 
-  public ByteBufferListOutputStream(ByteBufferPool pool) {
-    this.pool = pool;
+  public ByteBufferListOutputStream(ByteBuffAllocator allocator) {
+    this.allocator = allocator;
     allocateNewBuffer();
   }
 
@@ -58,18 +59,10 @@ public class ByteBufferListOutputStream extends ByteBufferOutputStream {
     if (this.curBuf != null) {
       this.curBuf.flip();// On the current buf set limit = pos and pos = 0.
     }
-    // Get an initial BB to work with from the pool
-    this.curBuf = this.pool.getBuffer();
-    if (this.curBuf == null) {
-      // No free BB at this moment. Make a new one. The pool returns off heap BBs. Don't make off
-      // heap BB on demand. It is difficult to account for all such and so proper sizing of Max
-      // direct heap size. See HBASE-15525 also for more details.
-      // Make BB with same size of pool's buffer size.
-      this.curBuf = ByteBuffer.allocate(this.pool.getBufferSize());
-    } else {
-      this.bufsFromPool.add(this.curBuf);
-    }
-    this.allBufs.add(this.curBuf);
+    // Get an initial ByteBuffer from the allocator.
+    SingleByteBuff sbb = allocator.allocateOneBuffer();
+    this.curBuf = sbb.nioByteBuffers()[0];
+    this.allBufs.add(sbb);
   }
 
   @Override
@@ -118,11 +111,8 @@ public class ByteBufferListOutputStream extends ByteBufferOutputStream {
       LOG.debug(e.toString(), e);
     }
     // Return back all the BBs to pool
-    if (this.bufsFromPool != null) {
-      for (int i = 0; i < this.bufsFromPool.size(); i++) {
-        this.pool.putbackBuffer(this.bufsFromPool.get(i));
-      }
-      this.bufsFromPool = null;
+    for (ByteBuff buf : this.allBufs) {
+      buf.release();
     }
     this.allBufs = null;
     this.curBuf = null;
@@ -144,7 +134,11 @@ public class ByteBufferListOutputStream extends ByteBufferOutputStream {
       // All the other BBs are already flipped while moving to the new BB.
       curBuf.flip();
     }
-    return this.allBufs;
+    List<ByteBuffer> bbs = new ArrayList<>(this.allBufs.size());
+    for (SingleByteBuff bb : this.allBufs) {
+      bbs.add(bb.nioByteBuffers()[0]);
+    }
+    return bbs;
   }
 
   @Override
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBufferPool.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBufferPool.java
deleted file mode 100644
index caca20b..0000000
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBufferPool.java
+++ /dev/null
@@ -1,155 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.io;
-
-import java.nio.ByteBuffer;
-import java.util.Queue;
-import java.util.concurrent.ConcurrentLinkedQueue;
-import java.util.concurrent.atomic.AtomicInteger;
-
-import org.apache.yetus.audience.InterfaceAudience;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
-
-/**
- * Like Hadoops' ByteBufferPool only you do not specify desired size when getting a ByteBuffer. This
- * pool keeps an upper bound on the count of ByteBuffers in the pool and a fixed size of ByteBuffer
- * that it will create. When requested, if a free ByteBuffer is already present, it will return
- * that. And when no free ByteBuffer available and we are below the max count, it will create a new
- * one and return that.
- *
- * <p>
- * Note: This pool returns off heap ByteBuffers by default. If on heap ByteBuffers to be pooled,
- * pass 'directByteBuffer' as false while construction of the pool.
- * <p>
- * This class is thread safe.
- *
- * @see ByteBufferListOutputStream
- */
-@InterfaceAudience.Private
-public class ByteBufferPool {
-  private static final Logger LOG = LoggerFactory.getLogger(ByteBufferPool.class);
-  // TODO better config names?
-  // hbase.ipc.server.reservoir.initial.max -> hbase.ipc.server.reservoir.max.buffer.count
-  // hbase.ipc.server.reservoir.initial.buffer.size -> hbase.ipc.server.reservoir.buffer.size
-  public static final String MAX_POOL_SIZE_KEY = "hbase.ipc.server.reservoir.initial.max";
-  public static final String BUFFER_SIZE_KEY = "hbase.ipc.server.reservoir.initial.buffer.size";
-  public static final int DEFAULT_BUFFER_SIZE = 64 * 1024;// 64 KB. Making it same as the chunk size
-                                                          // what we will write/read to/from the
-                                                          // socket channel.
-  private final Queue<ByteBuffer> buffers = new ConcurrentLinkedQueue<>();
-
-  private final int bufferSize;
-  private final int maxPoolSize;
-  private AtomicInteger count; // Count of the BBs created already for this pool.
-  private final boolean directByteBuffer; //Whether this pool should return DirectByteBuffers
-  private boolean maxPoolSizeInfoLevelLogged = false;
-
-  /**
-   * @param bufferSize Size of each buffer created by this pool.
-   * @param maxPoolSize Max number of buffers to keep in this pool.
-   */
-  public ByteBufferPool(int bufferSize, int maxPoolSize) {
-    this(bufferSize, maxPoolSize, true);
-  }
-
-  /**
-   * @param bufferSize Size of each buffer created by this pool.
-   * @param maxPoolSize Max number of buffers to keep in this pool.
-   * @param directByteBuffer Whether to create direct ByteBuffer or on heap ByteBuffer.
-   */
-  public ByteBufferPool(int bufferSize, int maxPoolSize, boolean directByteBuffer) {
-    this.bufferSize = bufferSize;
-    this.maxPoolSize = maxPoolSize;
-    this.directByteBuffer = directByteBuffer;
-    // TODO can add initialPoolSize config also and make those many BBs ready for use.
-    LOG.info("Created with bufferSize={} and maxPoolSize={}",
-        org.apache.hadoop.util.StringUtils.byteDesc(bufferSize),
-        org.apache.hadoop.util.StringUtils.byteDesc(maxPoolSize));
-    this.count = new AtomicInteger(0);
-  }
-
-  /**
-   * @return One free ByteBuffer from the pool. If no free ByteBuffer and we have not reached the
-   *         maximum pool size, it will create a new one and return. In case of max pool size also
-   *         reached, will return null. When pool returned a ByteBuffer, make sure to return it back
-   *         to pool after use.
-   * @see #putbackBuffer(ByteBuffer)
-   */
-  public ByteBuffer getBuffer() {
-    ByteBuffer bb = buffers.poll();
-    if (bb != null) {
-      // Clear sets limit == capacity. Position == 0.
-      bb.clear();
-      return bb;
-    }
-    while (true) {
-      int c = this.count.intValue();
-      if (c >= this.maxPoolSize) {
-        if (maxPoolSizeInfoLevelLogged) {
-          if (LOG.isDebugEnabled()) {
-            LOG.debug("Pool already reached its max capacity : " + this.maxPoolSize
-                + " and no free buffers now. Consider increasing the value for '"
-                + MAX_POOL_SIZE_KEY + "' ?");
-          }
-        } else {
-          LOG.info("Pool already reached its max capacity : " + this.maxPoolSize
-              + " and no free buffers now. Consider increasing the value for '" + MAX_POOL_SIZE_KEY
-              + "' ?");
-          maxPoolSizeInfoLevelLogged = true;
-        }
-        return null;
-      }
-      if (!this.count.compareAndSet(c, c + 1)) {
-        continue;
-      }
-      if (LOG.isTraceEnabled()) {
-        LOG.trace("Creating a new offheap ByteBuffer of size: " + this.bufferSize);
-      }
-      return this.directByteBuffer ? ByteBuffer.allocateDirect(this.bufferSize)
-          : ByteBuffer.allocate(this.bufferSize);
-    }
-  }
-
-  /**
-   * Return back a ByteBuffer after its use. Do not try to return put back a ByteBuffer, not
-   * obtained from this pool.
-   * @param buf ByteBuffer to return.
-   */
-  public void putbackBuffer(ByteBuffer buf) {
-    if (buf.capacity() != this.bufferSize || (this.directByteBuffer ^ buf.isDirect())) {
-      LOG.warn("Trying to put a buffer, not created by this pool! Will be just ignored");
-      return;
-    }
-    buffers.offer(buf);
-  }
-
-  public int getBufferSize() {
-    return this.bufferSize;
-  }
-
-  /**
-   * @return Number of free buffers
-   */
-  @VisibleForTesting
-  public int getQueueSize() {
-    return buffers.size();
-  }
-}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/CopyKeyDataBlockEncoder.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/CopyKeyDataBlockEncoder.java
index 8bc7974..d7ab009 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/CopyKeyDataBlockEncoder.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/CopyKeyDataBlockEncoder.java
@@ -98,7 +98,7 @@ public class CopyKeyDataBlockEncoder extends BufferedDataBlockEncoder {
           currentBuffer.skip(current.tagsLength);
         }
         if (includesMvcc()) {
-          current.memstoreTS = ByteBuff.readVLong(currentBuffer);
+          current.memstoreTS = ByteBufferUtils.readVLong(currentBuffer);
         } else {
           current.memstoreTS = 0;
         }
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DiffKeyDeltaEncoder.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DiffKeyDeltaEncoder.java
index 01f0a9d..ab93d19 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DiffKeyDeltaEncoder.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DiffKeyDeltaEncoder.java
@@ -477,7 +477,7 @@ public class DiffKeyDeltaEncoder extends BufferedDataBlockEncoder {
           decodeTags();
         }
         if (includesMvcc()) {
-          current.memstoreTS = ByteBuff.readVLong(currentBuffer);
+          current.memstoreTS = ByteBufferUtils.readVLong(currentBuffer);
         } else {
           current.memstoreTS = 0;
         }
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/FastDiffDeltaEncoder.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/FastDiffDeltaEncoder.java
index baa1856..aa9a436 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/FastDiffDeltaEncoder.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/FastDiffDeltaEncoder.java
@@ -501,7 +501,7 @@ public class FastDiffDeltaEncoder extends BufferedDataBlockEncoder {
           decodeTags();
         }
         if (includesMvcc()) {
-          current.memstoreTS = ByteBuff.readVLong(currentBuffer);
+          current.memstoreTS = ByteBufferUtils.readVLong(currentBuffer);
         } else {
           current.memstoreTS = 0;
         }
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixKeyDeltaEncoder.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixKeyDeltaEncoder.java
index 63da7e7..176bea3 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixKeyDeltaEncoder.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixKeyDeltaEncoder.java
@@ -213,7 +213,7 @@ public class PrefixKeyDeltaEncoder extends BufferedDataBlockEncoder {
           decodeTags();
         }
         if (includesMvcc()) {
-          current.memstoreTS = ByteBuff.readVLong(currentBuffer);
+          current.memstoreTS = ByteBufferUtils.readVLong(currentBuffer);
         } else {
           current.memstoreTS = 0;
         }
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/RowIndexSeekerV1.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/RowIndexSeekerV1.java
index 14d847c..9c0532e 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/RowIndexSeekerV1.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/RowIndexSeekerV1.java
@@ -282,7 +282,7 @@ public class RowIndexSeekerV1 extends AbstractEncodedSeeker {
       decodeTags();
     }
     if (includesMvcc()) {
-      current.memstoreTS = ByteBuff.readVLong(currentBuffer);
+      current.memstoreTS = ByteBufferUtils.readVLong(currentBuffer);
     } else {
       current.memstoreTS = 0;
     }
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/ByteBuff.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/ByteBuff.java
index 68cf56e..1ee3607 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/ByteBuff.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/ByteBuff.java
@@ -24,22 +24,81 @@ import java.nio.channels.ReadableByteChannel;
 import org.apache.hadoop.hbase.util.ByteBufferUtils;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.ObjectIntPair;
-import org.apache.hadoop.io.WritableUtils;
 import org.apache.yetus.audience.InterfaceAudience;
 
+import org.apache.hbase.thirdparty.io.netty.util.ReferenceCounted;
+import org.apache.hbase.thirdparty.io.netty.util.internal.ObjectUtil;
+
+
 /**
- * An abstract class that abstracts out as to how the byte buffers are used,
- * either single or multiple. We have this interface because the java's ByteBuffers
- * cannot be sub-classed. This class provides APIs similar to the ones provided
- * in java's nio ByteBuffers and allows you to do positional reads/writes and relative
- * reads and writes on the underlying BB. In addition to it, we have some additional APIs which
- * helps us in the read path.
+ * An abstract class that abstracts out as to how the byte buffers are used, either single or
+ * multiple. We have this interface because the java's ByteBuffers cannot be sub-classed. This class
+ * provides APIs similar to the ones provided in java's nio ByteBuffers and allows you to do
+ * positional reads/writes and relative reads and writes on the underlying BB. In addition to it, we
+ * have some additional APIs which helps us in the read path. <br/>
+ * The ByteBuff implement {@link ReferenceCounted} interface which mean need to maintains a
+ * {@link RefCnt} inside, if ensure that the ByteBuff won't be used any more, we must do a
+ * {@link ByteBuff#release()} to recycle its NIO ByteBuffers. when considering the
+ * {@link ByteBuff#duplicate()} or {@link ByteBuff#slice()}, releasing either the duplicated one or
+ * the original one will free its memory, because they share the same NIO ByteBuffers. when you want
+ * to retain the NIO ByteBuffers even if the origin one called {@link ByteBuff#release()}, you can
+ * do like this:
+ *
+ * <pre>
+ *   ByteBuff original = ...;
+ *   ByteBuff dup = original.duplicate();
+ *   dup.retain();
+ *   original.release();
+ *   // The NIO buffers can still be accessed unless you release the duplicated one
+ *   dup.get(...);
+ *   dup.release();
+ *   // Both the original and dup can not access the NIO buffers any more.
+ * </pre>
  */
 @InterfaceAudience.Private
-// TODO to have another name. This can easily get confused with netty's ByteBuf
-public abstract class ByteBuff {
+public abstract class ByteBuff implements ReferenceCounted {
+  private static final String REFERENCE_COUNT_NAME = "ReferenceCount";
   private static final int NIO_BUFFER_LIMIT = 64 * 1024; // should not be more than 64KB.
 
+  protected RefCnt refCnt;
+
+  /*************************** Methods for reference count **********************************/
+
+  protected void checkRefCount() {
+    ObjectUtil.checkPositive(refCnt(), REFERENCE_COUNT_NAME);
+  }
+
+  public int refCnt() {
+    return refCnt.refCnt();
+  }
+
+  @Override
+  public boolean release() {
+    return refCnt.release();
+  }
+
+  @Override
+  public final ByteBuff retain(int increment) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public final boolean release(int increment) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public final ByteBuff touch() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public final ByteBuff touch(Object hint) {
+    throw new UnsupportedOperationException();
+  }
+
+  /******************************* Methods for ByteBuff **************************************/
+
   /**
    * @return this ByteBuff's current position
    */
@@ -491,78 +550,11 @@ public abstract class ByteBuff {
     return tmpLength;
   }
 
-  /**
-   * Similar to {@link WritableUtils#readVLong(java.io.DataInput)} but reads from a
-   * {@link ByteBuff}.
-   */
-  public static long readVLong(ByteBuff in) {
-    byte firstByte = in.get();
-    int len = WritableUtils.decodeVIntSize(firstByte);
-    if (len == 1) {
-      return firstByte;
-    }
-    long i = 0;
-    for (int idx = 0; idx < len-1; idx++) {
-      byte b = in.get();
-      i = i << 8;
-      i = i | (b & 0xFF);
-    }
-    return (WritableUtils.isNegativeVInt(firstByte) ? (i ^ -1L) : i);
-  }
-
-  /**
-   * Search sorted array "a" for byte "key".
-   * 
-   * @param a Array to search. Entries must be sorted and unique.
-   * @param fromIndex First index inclusive of "a" to include in the search.
-   * @param toIndex Last index exclusive of "a" to include in the search.
-   * @param key The byte to search for.
-   * @return The index of key if found. If not found, return -(index + 1), where
-   *         negative indicates "not found" and the "index + 1" handles the "-0"
-   *         case.
-   */
-  public static int unsignedBinarySearch(ByteBuff a, int fromIndex, int toIndex, byte key) {
-    int unsignedKey = key & 0xff;
-    int low = fromIndex;
-    int high = toIndex - 1;
-
-    while (low <= high) {
-      int mid = low + ((high - low) >> 1);
-      int midVal = a.get(mid) & 0xff;
-
-      if (midVal < unsignedKey) {
-        low = mid + 1;
-      } else if (midVal > unsignedKey) {
-        high = mid - 1;
-      } else {
-        return mid; // key found
-      }
-    }
-    return -(low + 1); // key not found.
-  }
+  public abstract ByteBuffer[] nioByteBuffers();
 
   @Override
   public String toString() {
     return this.getClass().getSimpleName() + "[pos=" + position() + ", lim=" + limit() +
         ", cap= " + capacity() + "]";
   }
-
-  public static String toStringBinary(final ByteBuff b, int off, int len) {
-    StringBuilder result = new StringBuilder();
-    // Just in case we are passed a 'len' that is > buffer length...
-    if (off >= b.capacity())
-      return result.toString();
-    if (off + len > b.capacity())
-      len = b.capacity() - off;
-    for (int i = off; i < off + len; ++i) {
-      int ch = b.get(i) & 0xFF;
-      if ((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')
-          || " `~!@#$%^&*()-_=+[]{}|;:'\",.<>/?".indexOf(ch) >= 0) {
-        result.append((char) ch);
-      } else {
-        result.append(String.format("\\x%02X", ch));
-      }
-    }
-    return result.toString();
-  }
 }
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/MultiByteBuff.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/MultiByteBuff.java
index 97f5141..e9eadc7 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/MultiByteBuff.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/MultiByteBuff.java
@@ -17,6 +17,8 @@
  */
 package org.apache.hadoop.hbase.nio;
 
+import static org.apache.hadoop.hbase.io.ByteBuffAllocator.NONE;
+
 import java.io.IOException;
 import java.nio.BufferOverflowException;
 import java.nio.BufferUnderflowException;
@@ -24,13 +26,12 @@ import java.nio.ByteBuffer;
 import java.nio.InvalidMarkException;
 import java.nio.channels.ReadableByteChannel;
 
+import org.apache.hadoop.hbase.io.ByteBuffAllocator.Recycler;
 import org.apache.hadoop.hbase.util.ByteBufferUtils;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.ObjectIntPair;
 import org.apache.yetus.audience.InterfaceAudience;
 
-import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
-
 /**
  * Provides a unified view of all the underlying ByteBuffers and will look as if a bigger
  * sequential buffer. This class provides similar APIs as in {@link ByteBuffer} to put/get int,
@@ -53,6 +54,15 @@ public class MultiByteBuff extends ByteBuff {
   private final int[] itemBeginPos;
 
   public MultiByteBuff(ByteBuffer... items) {
+    this(NONE, items);
+  }
+
+  public MultiByteBuff(Recycler recycler, ByteBuffer... items) {
+    this(new RefCnt(recycler), items);
+  }
+
+  private MultiByteBuff(RefCnt refCnt, ByteBuffer... items) {
+    this.refCnt = refCnt;
     assert items != null;
     assert items.length > 0;
     this.items = items;
@@ -75,8 +85,9 @@ public class MultiByteBuff extends ByteBuff {
     this.limitedItemIndex = this.items.length - 1;
   }
 
-  private MultiByteBuff(ByteBuffer[] items, int[] itemBeginPos, int limit, int limitedIndex,
-      int curItemIndex, int markedIndex) {
+  private MultiByteBuff(RefCnt refCnt, ByteBuffer[] items, int[] itemBeginPos, int limit,
+      int limitedIndex, int curItemIndex, int markedIndex) {
+    this.refCnt = refCnt;
     this.items = items;
     this.curItemIndex = curItemIndex;
     this.curItem = this.items[this.curItemIndex];
@@ -117,6 +128,7 @@ public class MultiByteBuff extends ByteBuff {
    */
   @Override
   public int capacity() {
+    checkRefCount();
     int c = 0;
     for (ByteBuffer item : this.items) {
       c += item.capacity();
@@ -131,12 +143,14 @@ public class MultiByteBuff extends ByteBuff {
    */
   @Override
   public byte get(int index) {
+    checkRefCount();
     int itemIndex = getItemIndex(index);
     return ByteBufferUtils.toByte(this.items[itemIndex], index - this.itemBeginPos[itemIndex]);
   }
 
   @Override
   public byte getByteAfterPosition(int offset) {
+    checkRefCount();
     // Mostly the index specified will land within this current item. Short circuit for that
     int index = offset + this.position();
     int itemIndex = getItemIndexFromCurItemIndex(index);
@@ -179,6 +193,7 @@ public class MultiByteBuff extends ByteBuff {
    */
   @Override
   public int getInt(int index) {
+    checkRefCount();
     // Mostly the index specified will land within this current item. Short circuit for that
     int itemIndex;
     if (this.itemBeginPos[this.curItemIndex] <= index
@@ -192,6 +207,7 @@ public class MultiByteBuff extends ByteBuff {
 
   @Override
   public int getIntAfterPosition(int offset) {
+    checkRefCount();
     // Mostly the index specified will land within this current item. Short circuit for that
     int index = offset + this.position();
     int itemIndex;
@@ -210,6 +226,7 @@ public class MultiByteBuff extends ByteBuff {
    */
   @Override
   public short getShort(int index) {
+    checkRefCount();
     // Mostly the index specified will land within this current item. Short circuit for that
     int itemIndex;
     if (this.itemBeginPos[this.curItemIndex] <= index
@@ -238,6 +255,7 @@ public class MultiByteBuff extends ByteBuff {
 
   @Override
   public short getShortAfterPosition(int offset) {
+    checkRefCount();
     // Mostly the index specified will land within this current item. Short circuit for that
     int index = offset + this.position();
     int itemIndex;
@@ -319,6 +337,7 @@ public class MultiByteBuff extends ByteBuff {
    */
   @Override
   public long getLong(int index) {
+    checkRefCount();
     // Mostly the index specified will land within this current item. Short circuit for that
     int itemIndex;
     if (this.itemBeginPos[this.curItemIndex] <= index
@@ -332,6 +351,7 @@ public class MultiByteBuff extends ByteBuff {
 
   @Override
   public long getLongAfterPosition(int offset) {
+    checkRefCount();
     // Mostly the index specified will land within this current item. Short circuit for that
     int index = offset + this.position();
     int itemIndex;
@@ -348,6 +368,7 @@ public class MultiByteBuff extends ByteBuff {
    */
   @Override
   public int position() {
+    checkRefCount();
     return itemBeginPos[this.curItemIndex] + this.curItem.position();
   }
 
@@ -358,6 +379,7 @@ public class MultiByteBuff extends ByteBuff {
    */
   @Override
   public MultiByteBuff position(int position) {
+    checkRefCount();
     // Short circuit for positioning within the cur item. Mostly that is the case.
     if (this.itemBeginPos[this.curItemIndex] <= position
         && this.itemBeginPos[this.curItemIndex + 1] > position) {
@@ -385,6 +407,7 @@ public class MultiByteBuff extends ByteBuff {
    */
   @Override
   public MultiByteBuff rewind() {
+    checkRefCount();
     for (int i = 0; i < this.items.length; i++) {
       this.items[i].rewind();
     }
@@ -400,6 +423,7 @@ public class MultiByteBuff extends ByteBuff {
    */
   @Override
   public MultiByteBuff mark() {
+    checkRefCount();
     this.markedItemIndex = this.curItemIndex;
     this.curItem.mark();
     return this;
@@ -412,6 +436,7 @@ public class MultiByteBuff extends ByteBuff {
    */
   @Override
   public MultiByteBuff reset() {
+    checkRefCount();
     // when the buffer is moved to the next one.. the reset should happen on the previous marked
     // item and the new one should be taken as the base
     if (this.markedItemIndex < 0) throw new InvalidMarkException();
@@ -433,6 +458,7 @@ public class MultiByteBuff extends ByteBuff {
    */
   @Override
   public int remaining() {
+    checkRefCount();
     int remain = 0;
     for (int i = curItemIndex; i < items.length; i++) {
       remain += items[i].remaining();
@@ -446,6 +472,7 @@ public class MultiByteBuff extends ByteBuff {
    */
   @Override
   public final boolean hasRemaining() {
+    checkRefCount();
     return this.curItem.hasRemaining() || (this.curItemIndex < this.limitedItemIndex
         && this.items[this.curItemIndex + 1].hasRemaining());
   }
@@ -457,6 +484,7 @@ public class MultiByteBuff extends ByteBuff {
    */
   @Override
   public byte get() {
+    checkRefCount();
     if (this.curItem.remaining() == 0) {
       if (items.length - 1 == this.curItemIndex) {
         // means cur item is the last one and we wont be able to read a long. Throw exception
@@ -476,6 +504,7 @@ public class MultiByteBuff extends ByteBuff {
    */
   @Override
   public short getShort() {
+    checkRefCount();
     int remaining = this.curItem.remaining();
     if (remaining >= Bytes.SIZEOF_SHORT) {
       return this.curItem.getShort();
@@ -494,6 +523,7 @@ public class MultiByteBuff extends ByteBuff {
    */
   @Override
   public int getInt() {
+    checkRefCount();
     int remaining = this.curItem.remaining();
     if (remaining >= Bytes.SIZEOF_INT) {
       return this.curItem.getInt();
@@ -514,6 +544,7 @@ public class MultiByteBuff extends ByteBuff {
    */
   @Override
   public long getLong() {
+    checkRefCount();
     int remaining = this.curItem.remaining();
     if (remaining >= Bytes.SIZEOF_LONG) {
       return this.curItem.getLong();
@@ -545,6 +576,7 @@ public class MultiByteBuff extends ByteBuff {
    */
   @Override
   public void get(byte[] dst, int offset, int length) {
+    checkRefCount();
     while (length > 0) {
       int toRead = Math.min(length, this.curItem.remaining());
       ByteBufferUtils.copyFromBufferToArray(dst, this.curItem, this.curItem.position(), offset,
@@ -560,6 +592,7 @@ public class MultiByteBuff extends ByteBuff {
 
   @Override
   public void get(int sourceOffset, byte[] dst, int offset, int length) {
+    checkRefCount();
     int itemIndex = getItemIndex(sourceOffset);
     ByteBuffer item = this.items[itemIndex];
     sourceOffset = sourceOffset - this.itemBeginPos[itemIndex];
@@ -583,6 +616,7 @@ public class MultiByteBuff extends ByteBuff {
    */
   @Override
   public MultiByteBuff limit(int limit) {
+    checkRefCount();
     this.limit = limit;
     // Normally the limit will try to limit within the last BB item
     int limitedIndexBegin = this.itemBeginPos[this.limitedItemIndex];
@@ -622,29 +656,30 @@ public class MultiByteBuff extends ByteBuff {
    */
   @Override
   public MultiByteBuff slice() {
+    checkRefCount();
     ByteBuffer[] copy = new ByteBuffer[this.limitedItemIndex - this.curItemIndex + 1];
     for (int i = curItemIndex, j = 0; i <= this.limitedItemIndex; i++, j++) {
       copy[j] = this.items[i].slice();
     }
-    return new MultiByteBuff(copy);
+    return new MultiByteBuff(refCnt, copy);
   }
 
   /**
-   * Returns an MBB which is a duplicate version of this MBB. The position, limit and mark
-   * of the new MBB will be independent than that of the original MBB.
-   * The content of the new MBB will start at this MBB's current position
-   * The position, limit and mark of the new MBB would be identical to this MBB in terms of
-   * values.
-   * @return a sliced MBB
+   * Returns an MBB which is a duplicate version of this MBB. The position, limit and mark of the
+   * new MBB will be independent than that of the original MBB. The content of the new MBB will
+   * start at this MBB's current position The position, limit and mark of the new MBB would be
+   * identical to this MBB in terms of values.
+   * @return a duplicated MBB
    */
   @Override
   public MultiByteBuff duplicate() {
+    checkRefCount();
     ByteBuffer[] itemsCopy = new ByteBuffer[this.items.length];
     for (int i = 0; i < this.items.length; i++) {
       itemsCopy[i] = items[i].duplicate();
     }
-    return new MultiByteBuff(itemsCopy, this.itemBeginPos, this.limit, this.limitedItemIndex,
-        this.curItemIndex, this.markedItemIndex);
+    return new MultiByteBuff(refCnt, itemsCopy, this.itemBeginPos, this.limit,
+        this.limitedItemIndex, this.curItemIndex, this.markedItemIndex);
   }
 
   /**
@@ -654,6 +689,7 @@ public class MultiByteBuff extends ByteBuff {
    */
   @Override
   public MultiByteBuff put(byte b) {
+    checkRefCount();
     if (this.curItem.remaining() == 0) {
       if (this.curItemIndex == this.items.length - 1) {
         throw new BufferOverflowException();
@@ -673,6 +709,7 @@ public class MultiByteBuff extends ByteBuff {
    */
   @Override
   public MultiByteBuff put(int index, byte b) {
+    checkRefCount();
     int itemIndex = getItemIndex(limit);
     ByteBuffer item = items[itemIndex];
     item.put(index - itemBeginPos[itemIndex], b);
@@ -688,6 +725,7 @@ public class MultiByteBuff extends ByteBuff {
    */
   @Override
   public MultiByteBuff put(int offset, ByteBuff src, int srcOffset, int length) {
+    checkRefCount();
     int destItemIndex = getItemIndex(offset);
     int srcItemIndex = getItemIndex(srcOffset);
     ByteBuffer destItem = this.items[destItemIndex];
@@ -723,7 +761,7 @@ public class MultiByteBuff extends ByteBuff {
   }
 
   private static ByteBuffer getItemByteBuffer(ByteBuff buf, int index) {
-    return (buf instanceof SingleByteBuff) ? ((SingleByteBuff) buf).getEnclosingByteBuffer()
+    return (buf instanceof SingleByteBuff) ? buf.nioByteBuffers()[0]
         : ((MultiByteBuff) buf).items[index];
   }
 
@@ -734,6 +772,7 @@ public class MultiByteBuff extends ByteBuff {
    */
   @Override
   public MultiByteBuff putInt(int val) {
+    checkRefCount();
     if (this.curItem.remaining() >= Bytes.SIZEOF_INT) {
       this.curItem.putInt(val);
       return this;
@@ -784,6 +823,7 @@ public class MultiByteBuff extends ByteBuff {
    */
   @Override
   public MultiByteBuff put(byte[] src, int offset, int length) {
+    checkRefCount();
     if (this.curItem.remaining() >= length) {
       ByteBufferUtils.copyFromArrayToBuffer(this.curItem, src, offset, length);
       return this;
@@ -803,6 +843,7 @@ public class MultiByteBuff extends ByteBuff {
    */
   @Override
   public MultiByteBuff putLong(long val) {
+    checkRefCount();
     if (this.curItem.remaining() >= Bytes.SIZEOF_LONG) {
       this.curItem.putLong(val);
       return this;
@@ -860,6 +901,7 @@ public class MultiByteBuff extends ByteBuff {
    */
   @Override
   public MultiByteBuff skip(int length) {
+    checkRefCount();
     // Get available bytes from this item and remaining from next
     int jump = 0;
     while (true) {
@@ -882,6 +924,7 @@ public class MultiByteBuff extends ByteBuff {
    */
   @Override
   public MultiByteBuff moveBack(int length) {
+    checkRefCount();
     while (length != 0) {
       if (length > curItem.position()) {
         length -= curItem.position();
@@ -909,6 +952,7 @@ public class MultiByteBuff extends ByteBuff {
    */
   @Override
   public ByteBuffer asSubByteBuffer(int length) {
+    checkRefCount();
     if (this.curItem.remaining() >= length) {
       return this.curItem;
     }
@@ -918,8 +962,8 @@ public class MultiByteBuff extends ByteBuff {
     ByteBuffer locCurItem = curItem;
     while (length > 0) {
       int toRead = Math.min(length, locCurItem.remaining());
-      ByteBufferUtils
-          .copyFromBufferToArray(dupB, locCurItem, locCurItem.position(), offset, toRead);
+      ByteBufferUtils.copyFromBufferToArray(dupB, locCurItem, locCurItem.position(), offset,
+        toRead);
       length -= toRead;
       if (length == 0) break;
       locCurItemIndex++;
@@ -945,6 +989,7 @@ public class MultiByteBuff extends ByteBuff {
    */
   @Override
   public void asSubByteBuffer(int offset, int length, ObjectIntPair<ByteBuffer> pair) {
+    checkRefCount();
     if (this.itemBeginPos[this.curItemIndex] <= offset) {
       int relOffsetInCurItem = offset - this.itemBeginPos[this.curItemIndex];
       if (this.curItem.limit() - relOffsetInCurItem >= length) {
@@ -988,6 +1033,7 @@ public class MultiByteBuff extends ByteBuff {
   @Override
   public void get(ByteBuffer out, int sourceOffset,
       int length) {
+    checkRefCount();
       // Not used from real read path actually. So not going with
       // optimization
     for (int i = 0; i < length; ++i) {
@@ -1007,6 +1053,7 @@ public class MultiByteBuff extends ByteBuff {
    */
   @Override
   public byte[] toBytes(int offset, int length) {
+    checkRefCount();
     byte[] output = new byte[length];
     this.get(offset, output, 0, length);
     return output;
@@ -1014,6 +1061,7 @@ public class MultiByteBuff extends ByteBuff {
 
   @Override
   public int read(ReadableByteChannel channel) throws IOException {
+    checkRefCount();
     int total = 0;
     while (true) {
       // Read max possible into the current BB
@@ -1034,13 +1082,19 @@ public class MultiByteBuff extends ByteBuff {
   }
 
   @Override
+  public ByteBuffer[] nioByteBuffers() {
+    checkRefCount();
+    return this.items;
+  }
+
+  @Override
   public boolean equals(Object obj) {
     if (!(obj instanceof MultiByteBuff)) return false;
     if (this == obj) return true;
     MultiByteBuff that = (MultiByteBuff) obj;
     if (this.capacity() != that.capacity()) return false;
     if (ByteBuff.compareTo(this, this.position(), this.limit(), that, that.position(),
-        that.limit()) == 0) {
+      that.limit()) == 0) {
       return true;
     }
     return false;
@@ -1055,11 +1109,9 @@ public class MultiByteBuff extends ByteBuff {
     return hash;
   }
 
-  /**
-   * @return the ByteBuffers which this wraps.
-   */
-  @VisibleForTesting
-  public ByteBuffer[] getEnclosingByteBuffers() {
-    return this.items;
+  @Override
+  public MultiByteBuff retain() {
+    refCnt.retain();
+    return this;
   }
 }
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/RefCnt.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/RefCnt.java
new file mode 100644
index 0000000..80172b2
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/RefCnt.java
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.nio;
+
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
+import org.apache.hadoop.hbase.io.ByteBuffAllocator.Recycler;
+import org.apache.yetus.audience.InterfaceAudience;
+
+import org.apache.hbase.thirdparty.io.netty.util.AbstractReferenceCounted;
+import org.apache.hbase.thirdparty.io.netty.util.ReferenceCounted;
+
+/**
+ * Maintain an reference count integer inside to track life cycle of {@link ByteBuff}, if the
+ * reference count become 0, it'll call {@link Recycler#free()} once.
+ */
+@InterfaceAudience.Private
+class RefCnt extends AbstractReferenceCounted {
+
+  private Recycler recycler = ByteBuffAllocator.NONE;
+
+  RefCnt(Recycler recycler) {
+    this.recycler = recycler;
+  }
+
+  @Override
+  protected final void deallocate() {
+    this.recycler.free();
+  }
+
+  @Override
+  public final ReferenceCounted touch(Object hint) {
+    throw new UnsupportedOperationException();
+  }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/SingleByteBuff.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/SingleByteBuff.java
index 6d64d7b..7205251 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/SingleByteBuff.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/nio/SingleByteBuff.java
@@ -17,22 +17,24 @@
  */
 package org.apache.hadoop.hbase.nio;
 
+import static org.apache.hadoop.hbase.io.ByteBuffAllocator.NONE;
+
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.channels.ReadableByteChannel;
 
+import org.apache.hadoop.hbase.io.ByteBuffAllocator.Recycler;
 import org.apache.hadoop.hbase.util.ByteBufferUtils;
 import org.apache.hadoop.hbase.util.ObjectIntPair;
 import org.apache.hadoop.hbase.util.UnsafeAccess;
 import org.apache.hadoop.hbase.util.UnsafeAvailChecker;
 import org.apache.yetus.audience.InterfaceAudience;
-import sun.nio.ch.DirectBuffer;
 
-import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
+import sun.nio.ch.DirectBuffer;
 
 /**
- * An implementation of ByteBuff where a single BB backs the BBI. This just acts
- * as a wrapper over a normal BB - offheap or onheap
+ * An implementation of ByteBuff where a single BB backs the BBI. This just acts as a wrapper over a
+ * normal BB - offheap or onheap
  */
 @InterfaceAudience.Private
 public class SingleByteBuff extends ByteBuff {
@@ -48,6 +50,15 @@ public class SingleByteBuff extends ByteBuff {
   private Object unsafeRef = null;
 
   public SingleByteBuff(ByteBuffer buf) {
+    this(NONE, buf);
+  }
+
+  public SingleByteBuff(Recycler recycler, ByteBuffer buf) {
+    this(new RefCnt(recycler), buf);
+  }
+
+  private SingleByteBuff(RefCnt refCnt, ByteBuffer buf) {
+    this.refCnt = refCnt;
     this.buf = buf;
     if (buf.hasArray()) {
       this.unsafeOffset = UnsafeAccess.BYTE_ARRAY_BASE_OFFSET + buf.arrayOffset();
@@ -59,63 +70,74 @@ public class SingleByteBuff extends ByteBuff {
 
   @Override
   public int position() {
+    checkRefCount();
     return this.buf.position();
   }
 
   @Override
   public SingleByteBuff position(int position) {
+    checkRefCount();
     this.buf.position(position);
     return this;
   }
 
   @Override
   public SingleByteBuff skip(int len) {
+    checkRefCount();
     this.buf.position(this.buf.position() + len);
     return this;
   }
 
   @Override
   public SingleByteBuff moveBack(int len) {
+    checkRefCount();
     this.buf.position(this.buf.position() - len);
     return this;
   }
 
   @Override
   public int capacity() {
+    checkRefCount();
     return this.buf.capacity();
   }
 
   @Override
   public int limit() {
+    checkRefCount();
     return this.buf.limit();
   }
 
   @Override
   public SingleByteBuff limit(int limit) {
+    checkRefCount();
     this.buf.limit(limit);
     return this;
   }
 
   @Override
   public SingleByteBuff rewind() {
+    checkRefCount();
     this.buf.rewind();
     return this;
   }
 
   @Override
   public SingleByteBuff mark() {
+    checkRefCount();
     this.buf.mark();
     return this;
   }
 
   @Override
   public ByteBuffer asSubByteBuffer(int length) {
+    checkRefCount();
     // Just return the single BB that is available
     return this.buf;
   }
 
   @Override
   public void asSubByteBuffer(int offset, int length, ObjectIntPair<ByteBuffer> pair) {
+    checkRefCount();
     // Just return the single BB that is available
     pair.setFirst(this.buf);
     pair.setSecond(offset);
@@ -123,37 +145,44 @@ public class SingleByteBuff extends ByteBuff {
 
   @Override
   public int remaining() {
+    checkRefCount();
     return this.buf.remaining();
   }
 
   @Override
   public boolean hasRemaining() {
+    checkRefCount();
     return buf.hasRemaining();
   }
 
   @Override
   public SingleByteBuff reset() {
+    checkRefCount();
     this.buf.reset();
     return this;
   }
 
   @Override
   public SingleByteBuff slice() {
-    return new SingleByteBuff(this.buf.slice());
+    checkRefCount();
+    return new SingleByteBuff(this.refCnt, this.buf.slice());
   }
 
   @Override
   public SingleByteBuff duplicate() {
-    return new SingleByteBuff(this.buf.duplicate());
+    checkRefCount();
+    return new SingleByteBuff(this.refCnt, this.buf.duplicate());
   }
 
   @Override
   public byte get() {
+    checkRefCount();
     return buf.get();
   }
 
   @Override
   public byte get(int index) {
+    checkRefCount();
     if (UNSAFE_AVAIL) {
       return UnsafeAccess.toByte(this.unsafeRef, this.unsafeOffset + index);
     }
@@ -162,29 +191,34 @@ public class SingleByteBuff extends ByteBuff {
 
   @Override
   public byte getByteAfterPosition(int offset) {
+    checkRefCount();
     return get(this.buf.position() + offset);
   }
 
   @Override
   public SingleByteBuff put(byte b) {
+    checkRefCount();
     this.buf.put(b);
     return this;
   }
 
   @Override
   public SingleByteBuff put(int index, byte b) {
+    checkRefCount();
     buf.put(index, b);
     return this;
   }
 
   @Override
   public void get(byte[] dst, int offset, int length) {
+    checkRefCount();
     ByteBufferUtils.copyFromBufferToArray(dst, buf, buf.position(), offset, length);
     buf.position(buf.position() + length);
   }
 
   @Override
   public void get(int sourceOffset, byte[] dst, int offset, int length) {
+    checkRefCount();
     ByteBufferUtils.copyFromBufferToArray(dst, buf, sourceOffset, offset, length);
   }
 
@@ -195,9 +229,10 @@ public class SingleByteBuff extends ByteBuff {
 
   @Override
   public SingleByteBuff put(int offset, ByteBuff src, int srcOffset, int length) {
+    checkRefCount();
     if (src instanceof SingleByteBuff) {
       ByteBufferUtils.copyFromBufferToBuffer(((SingleByteBuff) src).buf, this.buf, srcOffset,
-          offset, length);
+        offset, length);
     } else {
       // TODO we can do some optimization here? Call to asSubByteBuffer might
       // create a copy.
@@ -205,7 +240,7 @@ public class SingleByteBuff extends ByteBuff {
       src.asSubByteBuffer(srcOffset, length, pair);
       if (pair.getFirst() != null) {
         ByteBufferUtils.copyFromBufferToBuffer(pair.getFirst(), this.buf, pair.getSecond(), offset,
-            length);
+          length);
       }
     }
     return this;
@@ -213,37 +248,44 @@ public class SingleByteBuff extends ByteBuff {
 
   @Override
   public SingleByteBuff put(byte[] src, int offset, int length) {
+    checkRefCount();
     ByteBufferUtils.copyFromArrayToBuffer(this.buf, src, offset, length);
     return this;
   }
 
   @Override
   public SingleByteBuff put(byte[] src) {
+    checkRefCount();
     return put(src, 0, src.length);
   }
 
   @Override
   public boolean hasArray() {
+    checkRefCount();
     return this.buf.hasArray();
   }
 
   @Override
   public byte[] array() {
+    checkRefCount();
     return this.buf.array();
   }
 
   @Override
   public int arrayOffset() {
+    checkRefCount();
     return this.buf.arrayOffset();
   }
 
   @Override
   public short getShort() {
+    checkRefCount();
     return this.buf.getShort();
   }
 
   @Override
   public short getShort(int index) {
+    checkRefCount();
     if (UNSAFE_UNALIGNED) {
       return UnsafeAccess.toShort(unsafeRef, unsafeOffset + index);
     }
@@ -252,22 +294,26 @@ public class SingleByteBuff extends ByteBuff {
 
   @Override
   public short getShortAfterPosition(int offset) {
+    checkRefCount();
     return getShort(this.buf.position() + offset);
   }
 
   @Override
   public int getInt() {
+    checkRefCount();
     return this.buf.getInt();
   }
 
   @Override
   public SingleByteBuff putInt(int value) {
+    checkRefCount();
     ByteBufferUtils.putInt(this.buf, value);
     return this;
   }
 
   @Override
   public int getInt(int index) {
+    checkRefCount();
     if (UNSAFE_UNALIGNED) {
       return UnsafeAccess.toInt(unsafeRef, unsafeOffset + index);
     }
@@ -276,22 +322,26 @@ public class SingleByteBuff extends ByteBuff {
 
   @Override
   public int getIntAfterPosition(int offset) {
+    checkRefCount();
     return getInt(this.buf.position() + offset);
   }
 
   @Override
   public long getLong() {
+    checkRefCount();
     return this.buf.getLong();
   }
 
   @Override
   public SingleByteBuff putLong(long value) {
+    checkRefCount();
     ByteBufferUtils.putLong(this.buf, value);
     return this;
   }
 
   @Override
   public long getLong(int index) {
+    checkRefCount();
     if (UNSAFE_UNALIGNED) {
       return UnsafeAccess.toLong(unsafeRef, unsafeOffset + index);
     }
@@ -300,11 +350,13 @@ public class SingleByteBuff extends ByteBuff {
 
   @Override
   public long getLongAfterPosition(int offset) {
+    checkRefCount();
     return getLong(this.buf.position() + offset);
   }
 
   @Override
   public byte[] toBytes(int offset, int length) {
+    checkRefCount();
     byte[] output = new byte[length];
     ByteBufferUtils.copyFromBufferToArray(output, buf, offset, 0, length);
     return output;
@@ -312,18 +364,28 @@ public class SingleByteBuff extends ByteBuff {
 
   @Override
   public void get(ByteBuffer out, int sourceOffset, int length) {
+    checkRefCount();
     ByteBufferUtils.copyFromBufferToBuffer(buf, out, sourceOffset, length);
   }
 
   @Override
   public int read(ReadableByteChannel channel) throws IOException {
+    checkRefCount();
     return channelRead(channel, buf);
   }
 
   @Override
+  public ByteBuffer[] nioByteBuffers() {
+    checkRefCount();
+    return new ByteBuffer[] { this.buf };
+  }
+
+  @Override
   public boolean equals(Object obj) {
-    if(!(obj instanceof SingleByteBuff)) return false;
-    return this.buf.equals(((SingleByteBuff)obj).buf);
+    if (!(obj instanceof SingleByteBuff)) {
+      return false;
+    }
+    return this.buf.equals(((SingleByteBuff) obj).buf);
   }
 
   @Override
@@ -331,11 +393,9 @@ public class SingleByteBuff extends ByteBuff {
     return this.buf.hashCode();
   }
 
-  /**
-   * @return the ByteBuffer which this wraps.
-   */
-  @VisibleForTesting
-  public ByteBuffer getEnclosingByteBuffer() {
-    return this.buf;
+  @Override
+  public SingleByteBuff retain() {
+    refCnt.retain();
+    return this;
   }
 }
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferArray.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferArray.java
index 2e14b13..d023339 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferArray.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferArray.java
@@ -27,9 +27,9 @@ import java.util.concurrent.Future;
 import java.util.concurrent.LinkedBlockingQueue;
 import java.util.concurrent.ThreadPoolExecutor;
 import java.util.concurrent.TimeUnit;
+
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.nio.ByteBuff;
-import org.apache.hadoop.hbase.nio.MultiByteBuff;
-import org.apache.hadoop.hbase.nio.SingleByteBuff;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
@@ -311,10 +311,6 @@ public class ByteBufferArray {
       srcIndex += cnt;
     }
     assert srcIndex == len;
-    if (mbb.length > 1) {
-      return new MultiByteBuff(mbb);
-    } else {
-      return new SingleByteBuff(mbb[0]);
-    }
+    return ByteBuffAllocator.wrap(mbb);
   }
 }
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java
index 3ea0a5c..98bc88a 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java
@@ -30,6 +30,7 @@ import java.util.Arrays;
 
 import org.apache.hadoop.hbase.io.ByteBufferWriter;
 import org.apache.hadoop.hbase.io.util.StreamUtils;
+import org.apache.hadoop.hbase.nio.ByteBuff;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.WritableUtils;
 import org.apache.yetus.audience.InterfaceAudience;
@@ -348,25 +349,39 @@ public final class ByteBufferUtils {
     }
   }
 
-  /**
-   * Similar to {@link WritableUtils#readVLong(DataInput)} but reads from a
-   * {@link ByteBuffer}.
-   */
-  public static long readVLong(ByteBuffer in) {
-    byte firstByte = in.get();
+  private interface ByteVisitor {
+    byte get();
+  }
+
+  private static long readVLong(ByteVisitor visitor) {
+    byte firstByte = visitor.get();
     int len = WritableUtils.decodeVIntSize(firstByte);
     if (len == 1) {
       return firstByte;
     }
     long i = 0;
-    for (int idx = 0; idx < len-1; idx++) {
-      byte b = in.get();
+    for (int idx = 0; idx < len - 1; idx++) {
+      byte b = visitor.get();
       i = i << 8;
       i = i | (b & 0xFF);
     }
     return (WritableUtils.isNegativeVInt(firstByte) ? (i ^ -1L) : i);
   }
 
+  /**
+   * Similar to {@link WritableUtils#readVLong(DataInput)} but reads from a {@link ByteBuffer}.
+   */
+  public static long readVLong(ByteBuffer in) {
+    return readVLong(in::get);
+  }
+
+  /**
+   * Similar to {@link WritableUtils#readVLong(java.io.DataInput)} but reads from a
+   * {@link ByteBuff}.
+   */
+  public static long readVLong(ByteBuff in) {
+    return readVLong(in::get);
+  }
 
   /**
    * Put in buffer integer using 7 bit encoding. For each written byte:
diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/io/TestByteBuffAllocator.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/io/TestByteBuffAllocator.java
new file mode 100644
index 0000000..0976c11
--- /dev/null
+++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/io/TestByteBuffAllocator.java
@@ -0,0 +1,309 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.io;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.nio.ByteBuff;
+import org.apache.hadoop.hbase.nio.MultiByteBuff;
+import org.apache.hadoop.hbase.nio.SingleByteBuff;
+import org.apache.hadoop.hbase.testclassification.RPCTests;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+@Category({ RPCTests.class, SmallTests.class })
+public class TestByteBuffAllocator {
+
+  @ClassRule
+  public static final HBaseClassTestRule CLASS_RULE =
+      HBaseClassTestRule.forClass(TestByteBuffAllocator.class);
+
+  @Test
+  public void testAllocateByteBuffToReadInto() {
+    int maxBuffersInPool = 10;
+    int bufSize = 6 * 1024;
+    ByteBuffAllocator alloc = new ByteBuffAllocator(true, maxBuffersInPool, bufSize, bufSize / 6);
+    ByteBuff buff = alloc.allocate(10 * bufSize);
+    buff.release();
+    // When the request size is less than 1/6th of the pool buffer size. We should use on demand
+    // created on heap Buffer
+    buff = alloc.allocate(200);
+    assertTrue(buff.hasArray());
+    assertEquals(maxBuffersInPool, alloc.getQueueSize());
+    buff.release();
+    // When the request size is > 1/6th of the pool buffer size.
+    buff = alloc.allocate(1024);
+    assertFalse(buff.hasArray());
+    assertEquals(maxBuffersInPool - 1, alloc.getQueueSize());
+    buff.release();// ByteBuffDeallocaor#free should put back the BB to pool.
+    assertEquals(maxBuffersInPool, alloc.getQueueSize());
+    // Request size> pool buffer size
+    buff = alloc.allocate(7 * 1024);
+    assertFalse(buff.hasArray());
+    assertTrue(buff instanceof MultiByteBuff);
+    ByteBuffer[] bbs = buff.nioByteBuffers();
+    assertEquals(2, bbs.length);
+    assertTrue(bbs[0].isDirect());
+    assertTrue(bbs[1].isDirect());
+    assertEquals(6 * 1024, bbs[0].limit());
+    assertEquals(1024, bbs[1].limit());
+    assertEquals(maxBuffersInPool - 2, alloc.getQueueSize());
+    buff.release();
+    assertEquals(maxBuffersInPool, alloc.getQueueSize());
+
+    buff = alloc.allocate(6 * 1024 + 200);
+    assertFalse(buff.hasArray());
+    assertTrue(buff instanceof MultiByteBuff);
+    bbs = buff.nioByteBuffers();
+    assertEquals(2, bbs.length);
+    assertTrue(bbs[0].isDirect());
+    assertFalse(bbs[1].isDirect());
+    assertEquals(6 * 1024, bbs[0].limit());
+    assertEquals(200, bbs[1].limit());
+    assertEquals(maxBuffersInPool - 1, alloc.getQueueSize());
+    buff.release();
+    assertEquals(maxBuffersInPool, alloc.getQueueSize());
+
+    alloc.allocate(bufSize * (maxBuffersInPool - 1));
+    buff = alloc.allocate(20 * 1024);
+    assertFalse(buff.hasArray());
+    assertTrue(buff instanceof MultiByteBuff);
+    bbs = buff.nioByteBuffers();
+    assertEquals(2, bbs.length);
+    assertTrue(bbs[0].isDirect());
+    assertFalse(bbs[1].isDirect());
+    assertEquals(6 * 1024, bbs[0].limit());
+    assertEquals(14 * 1024, bbs[1].limit());
+    assertEquals(0, alloc.getQueueSize());
+    buff.release();
+    assertEquals(1, alloc.getQueueSize());
+    alloc.allocateOneBuffer();
+
+    buff = alloc.allocate(7 * 1024);
+    assertTrue(buff.hasArray());
+    assertTrue(buff instanceof SingleByteBuff);
+    assertEquals(7 * 1024, buff.nioByteBuffers()[0].limit());
+    buff.release();
+  }
+
+  @Test
+  public void testNegativeAllocatedSize() {
+    int maxBuffersInPool = 10;
+    ByteBuffAllocator allocator =
+        new ByteBuffAllocator(true, maxBuffersInPool, 6 * 1024, 1024);
+    try {
+      allocator.allocate(-1);
+      fail("Should throw exception when size < 0");
+    } catch (IllegalArgumentException e) {
+      // expected exception
+    }
+    ByteBuff bb = allocator.allocate(0);
+    bb.release();
+  }
+
+  @Test
+  public void testAllocateOneBuffer() {
+    // Allocate from on-heap
+    ByteBuffAllocator allocator = ByteBuffAllocator.createOnHeap();
+    ByteBuff buf = allocator.allocateOneBuffer();
+    assertTrue(buf.hasArray());
+    assertEquals(ByteBuffAllocator.DEFAULT_BUFFER_SIZE, buf.remaining());
+    buf.release();
+
+    // Allocate from off-heap
+    int bufSize = 10;
+    allocator = new ByteBuffAllocator(true, 1, 10, 3);
+    buf = allocator.allocateOneBuffer();
+    assertFalse(buf.hasArray());
+    assertEquals(buf.remaining(), bufSize);
+    // The another one will be allocated from on-heap because the pool has only one ByteBuffer,
+    // and still not be cleaned.
+    ByteBuff buf2 = allocator.allocateOneBuffer();
+    assertTrue(buf2.hasArray());
+    assertEquals(buf2.remaining(), bufSize);
+    // free the first one
+    buf.release();
+    // The next one will be off-heap again.
+    buf = allocator.allocateOneBuffer();
+    assertFalse(buf.hasArray());
+    assertEquals(buf.remaining(), bufSize);
+    buf.release();
+  }
+
+  @Test
+  public void testReferenceCount() {
+    int bufSize = 64;
+    ByteBuffAllocator alloc = new ByteBuffAllocator(true, 2, bufSize, 3);
+    ByteBuff buf1 = alloc.allocate(bufSize * 2);
+    assertFalse(buf1.hasArray());
+    // The next one will be allocated from heap
+    ByteBuff buf2 = alloc.allocateOneBuffer();
+    assertTrue(buf2.hasArray());
+
+    // duplicate the buf2, if the dup released, buf2 will also be released (SingleByteBuffer)
+    ByteBuff dup2 = buf2.duplicate();
+    dup2.release();
+    assertEquals(0, buf2.refCnt());
+    assertEquals(0, dup2.refCnt());
+    assertEquals(0, alloc.getQueueSize());
+    assertException(dup2::position);
+    assertException(buf2::position);
+
+    // duplicate the buf1, if the dup1 released, buf1 will also be released (MultipleByteBuffer)
+    ByteBuff dup1 = buf1.duplicate();
+    dup1.release();
+    assertEquals(0, buf1.refCnt());
+    assertEquals(0, dup1.refCnt());
+    assertEquals(2, alloc.getQueueSize());
+    assertException(dup1::position);
+    assertException(buf1::position);
+
+    // slice the buf3, if the slice3 released, buf3 will also be released (SingleByteBuffer)
+    ByteBuff buf3 = alloc.allocateOneBuffer();
+    assertFalse(buf3.hasArray());
+    ByteBuff slice3 = buf3.slice();
+    slice3.release();
+    assertEquals(0, buf3.refCnt());
+    assertEquals(0, slice3.refCnt());
+    assertEquals(2, alloc.getQueueSize());
+
+    // slice the buf4, if the slice4 released, buf4 will also be released (MultipleByteBuffer)
+    ByteBuff buf4 = alloc.allocate(bufSize * 2);
+    assertFalse(buf4.hasArray());
+    ByteBuff slice4 = buf4.slice();
+    slice4.release();
+    assertEquals(0, buf4.refCnt());
+    assertEquals(0, slice4.refCnt());
+    assertEquals(2, alloc.getQueueSize());
+
+    // Test multiple reference for the same ByteBuff (SingleByteBuff)
+    ByteBuff buf5 = alloc.allocateOneBuffer();
+    ByteBuff slice5 = buf5.duplicate().duplicate().duplicate().slice().slice();
+    slice5.release();
+    assertEquals(0, buf5.refCnt());
+    assertEquals(0, slice5.refCnt());
+    assertEquals(2, alloc.getQueueSize());
+    assertException(slice5::position);
+    assertException(buf5::position);
+
+    // Test multiple reference for the same ByteBuff (SingleByteBuff)
+    ByteBuff buf6 = alloc.allocate(bufSize >> 2);
+    ByteBuff slice6 = buf6.duplicate().duplicate().duplicate().slice().slice();
+    slice6.release();
+    assertEquals(0, buf6.refCnt());
+    assertEquals(0, slice6.refCnt());
+    assertEquals(2, alloc.getQueueSize());
+
+    // Test retain the parent SingleByteBuff (duplicate)
+    ByteBuff parent = alloc.allocateOneBuffer();
+    ByteBuff child = parent.duplicate();
+    child.retain();
+    parent.release();
+    assertEquals(1, child.refCnt());
+    assertEquals(1, parent.refCnt());
+    assertEquals(1, alloc.getQueueSize());
+    parent.release();
+    assertEquals(0, child.refCnt());
+    assertEquals(0, parent.refCnt());
+    assertEquals(2, alloc.getQueueSize());
+
+    // Test retain parent MultiByteBuff (duplicate)
+    parent = alloc.allocate(bufSize << 1);
+    child = parent.duplicate();
+    child.retain();
+    parent.release();
+    assertEquals(1, child.refCnt());
+    assertEquals(1, parent.refCnt());
+    assertEquals(0, alloc.getQueueSize());
+    parent.release();
+    assertEquals(0, child.refCnt());
+    assertEquals(0, parent.refCnt());
+    assertEquals(2, alloc.getQueueSize());
+
+    // Test retain the parent SingleByteBuff (slice)
+    parent = alloc.allocateOneBuffer();
+    child = parent.slice();
+    child.retain();
+    parent.release();
+    assertEquals(1, child.refCnt());
+    assertEquals(1, parent.refCnt());
+    assertEquals(1, alloc.getQueueSize());
+    parent.release();
+    assertEquals(0, child.refCnt());
+    assertEquals(0, parent.refCnt());
+    assertEquals(2, alloc.getQueueSize());
+
+    // Test retain parent MultiByteBuff (slice)
+    parent = alloc.allocate(bufSize << 1);
+    child = parent.slice();
+    child.retain();
+    parent.release();
+    assertEquals(1, child.refCnt());
+    assertEquals(1, parent.refCnt());
+    assertEquals(0, alloc.getQueueSize());
+    parent.release();
+    assertEquals(0, child.refCnt());
+    assertEquals(0, parent.refCnt());
+    assertEquals(2, alloc.getQueueSize());
+  }
+
+  @Test
+  public void testReverseRef() {
+    int bufSize = 64;
+    ByteBuffAllocator alloc = new ByteBuffAllocator(true, 1, bufSize, 3);
+    ByteBuff buf1 = alloc.allocate(bufSize);
+    ByteBuff dup1 = buf1.duplicate();
+    assertEquals(1, buf1.refCnt());
+    assertEquals(1, dup1.refCnt());
+    buf1.release();
+    assertEquals(0, buf1.refCnt());
+    assertEquals(0, dup1.refCnt());
+    assertEquals(1, alloc.getQueueSize());
+    assertException(buf1::position);
+    assertException(dup1::position);
+  }
+
+  @Test
+  public void testByteBuffUnsupportedMethods() {
+    int bufSize = 64;
+    ByteBuffAllocator alloc = new ByteBuffAllocator(true, 1, bufSize, 3);
+    ByteBuff buf = alloc.allocate(bufSize);
+    assertException(() -> buf.retain(2));
+    assertException(() -> buf.release(2));
+    assertException(() -> buf.touch());
+    assertException(() -> buf.touch(new Object()));
+  }
+
+  private void assertException(Runnable r) {
+    try {
+      r.run();
+      fail();
+    } catch (Exception e) {
+      // expected exception.
+    }
+  }
+}
diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/io/TestByteBufferListOutputStream.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/io/TestByteBufferListOutputStream.java
index 2f7a869..3ac7a75 100644
--- a/hbase-common/src/test/java/org/apache/hadoop/hbase/io/TestByteBufferListOutputStream.java
+++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/io/TestByteBufferListOutputStream.java
@@ -23,6 +23,7 @@ import static org.junit.Assert.assertTrue;
 import java.nio.ByteBuffer;
 import java.util.List;
 import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.nio.ByteBuff;
 import org.apache.hadoop.hbase.testclassification.IOTests;
 import org.apache.hadoop.hbase.testclassification.SmallTests;
 import org.apache.hadoop.hbase.util.ByteBufferUtils;
@@ -40,29 +41,30 @@ public class TestByteBufferListOutputStream {
 
   @Test
   public void testWrites() throws Exception {
-    ByteBufferPool pool = new ByteBufferPool(10, 3);
-    ByteBufferListOutputStream bbos = new ByteBufferListOutputStream(pool);
+    ByteBuffAllocator alloc = new ByteBuffAllocator(true, 3, 10, 10 / 6);
+    ByteBufferListOutputStream bbos = new ByteBufferListOutputStream(alloc);
     bbos.write(2);// Write a byte
     bbos.writeInt(100);// Write an int
     byte[] b = Bytes.toBytes("row123");// 6 bytes
     bbos.write(b);
+    assertEquals(2, bbos.allBufs.size());
     // Just use the 3rd BB from pool so that pabos, on request, wont get one
-    ByteBuffer bb1 = pool.getBuffer();
+    ByteBuff bb1 = alloc.allocateOneBuffer();
     ByteBuffer bb = ByteBuffer.wrap(Bytes.toBytes("row123_cf1_q1"));// 13 bytes
     bbos.write(bb, 0, bb.capacity());
-    pool.putbackBuffer(bb1);
+    bb1.release();
     bbos.writeInt(123);
     bbos.writeInt(124);
-    assertEquals(0, pool.getQueueSize());
+    assertEquals(0, alloc.getQueueSize());
     List<ByteBuffer> allBufs = bbos.getByteBuffers();
     assertEquals(4, allBufs.size());
-    assertEquals(3, bbos.bufsFromPool.size());
+    assertEquals(4, bbos.allBufs.size());
     ByteBuffer b1 = allBufs.get(0);
     assertEquals(10, b1.remaining());
     assertEquals(2, b1.get());
     assertEquals(100, b1.getInt());
     byte[] bActual = new byte[b.length];
-    b1.get(bActual, 0, 5);//5 bytes in 1st BB
+    b1.get(bActual, 0, 5);// 5 bytes in 1st BB
     ByteBuffer b2 = allBufs.get(1);
     assertEquals(10, b2.remaining());
     b2.get(bActual, 5, 1);// Remaining 1 byte in 2nd BB
@@ -78,6 +80,6 @@ public class TestByteBufferListOutputStream {
     assertEquals(4, b4.remaining());
     assertEquals(124, b4.getInt());
     bbos.releaseResources();
-    assertEquals(3, pool.getQueueSize());
+    assertEquals(3, alloc.getQueueSize());
   }
 }
diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/io/TestByteBufferPool.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/io/TestByteBufferPool.java
deleted file mode 100644
index 44d2f45..0000000
--- a/hbase-common/src/test/java/org/apache/hadoop/hbase/io/TestByteBufferPool.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.io;
-
-import static org.junit.Assert.assertEquals;
-
-import java.nio.ByteBuffer;
-import org.apache.hadoop.hbase.HBaseClassTestRule;
-import org.apache.hadoop.hbase.testclassification.IOTests;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.junit.ClassRule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-@Category({ IOTests.class, SmallTests.class })
-public class TestByteBufferPool {
-
-  @ClassRule
-  public static final HBaseClassTestRule CLASS_RULE =
-      HBaseClassTestRule.forClass(TestByteBufferPool.class);
-
-  @Test
-  public void testOffheapBBPool() throws Exception {
-    boolean directByteBuffer = true;
-    testBBPool(10, 100, directByteBuffer);
-  }
-
-  @Test
-  public void testOnheapBBPool() throws Exception {
-    boolean directByteBuffer = false;
-    testBBPool(10, 100, directByteBuffer);
-  }
-
-  private void testBBPool(int maxPoolSize, int bufferSize, boolean directByteBuffer) {
-    ByteBufferPool pool = new ByteBufferPool(bufferSize, maxPoolSize, directByteBuffer);
-    for (int i = 0; i < maxPoolSize; i++) {
-      ByteBuffer buffer = pool.getBuffer();
-      assertEquals(0, buffer.position());
-      assertEquals(bufferSize, buffer.limit());
-      assertEquals(directByteBuffer, buffer.isDirect());
-    }
-    assertEquals(0, pool.getQueueSize());
-    ByteBuffer bb = directByteBuffer ? ByteBuffer.allocate(bufferSize)
-        : ByteBuffer.allocateDirect(bufferSize);
-    pool.putbackBuffer(bb);
-    assertEquals(0, pool.getQueueSize());
-    bb = directByteBuffer ? ByteBuffer.allocateDirect(bufferSize + 1)
-        : ByteBuffer.allocate(bufferSize + 1);
-    pool.putbackBuffer(bb);
-    assertEquals(0, pool.getQueueSize());
-  }
-}
diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/nio/TestMultiByteBuff.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/nio/TestMultiByteBuff.java
index 84cf7a4..fcfb77a 100644
--- a/hbase-common/src/test/java/org/apache/hadoop/hbase/nio/TestMultiByteBuff.java
+++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/nio/TestMultiByteBuff.java
@@ -286,12 +286,12 @@ public class TestMultiByteBuff {
     multi.putInt(45);
     multi.position(1);
     multi.limit(multi.position() + (2 * Bytes.SIZEOF_LONG));
-    MultiByteBuff sliced = multi.slice();
+    ByteBuff sliced = multi.slice();
     assertEquals(0, sliced.position());
     assertEquals((2 * Bytes.SIZEOF_LONG), sliced.limit());
     assertEquals(l1, sliced.getLong());
     assertEquals(l2, sliced.getLong());
-    MultiByteBuff dup = multi.duplicate();
+    ByteBuff dup = multi.duplicate();
     assertEquals(1, dup.position());
     assertEquals(dup.position() + (2 * Bytes.SIZEOF_LONG), dup.limit());
     assertEquals(l1, dup.getLong());
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/Cacheable.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/Cacheable.java
index 0224dea..a842967 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/Cacheable.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/Cacheable.java
@@ -69,11 +69,10 @@ public interface Cacheable extends HeapSize {
 
   /**
    * SHARED means when this Cacheable is read back from cache it refers to the same memory area as
-   * used by the cache for caching it.
-   * EXCLUSIVE means when this Cacheable is read back from cache, the data was copied to an
-   * exclusive memory area of this Cacheable.
+   * used by the cache for caching it. EXCLUSIVE means when this Cacheable is read back from cache,
+   * the data was copied to an exclusive memory area of this Cacheable.
    */
-  public static enum MemoryType {
+  enum MemoryType {
     SHARED, EXCLUSIVE
   }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/NettyRpcFrameDecoder.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/NettyRpcFrameDecoder.java
index 80b1288..5ed3d2e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/NettyRpcFrameDecoder.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/NettyRpcFrameDecoder.java
@@ -127,7 +127,7 @@ public class NettyRpcFrameDecoder extends ByteToMessageDecoder {
     NettyServerCall reqTooBig =
       new NettyServerCall(header.getCallId(), connection.service, null, null, null, null,
         connection, 0, connection.addr, System.currentTimeMillis(), 0,
-        connection.rpcServer.reservoir, connection.rpcServer.cellBlockBuilder, null);
+        connection.rpcServer.bbAllocator, connection.rpcServer.cellBlockBuilder, null);
 
     connection.rpcServer.metrics.exception(SimpleRpcServer.REQUEST_TOO_BIG_EXCEPTION);
 
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/NettyRpcServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/NettyRpcServer.java
index 742a728..bba1bed 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/NettyRpcServer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/NettyRpcServer.java
@@ -187,7 +187,7 @@ public class NettyRpcServer extends RpcServer {
       Message param, CellScanner cellScanner, long receiveTime, MonitoredRPCHandler status,
       long startTime, int timeout) throws IOException {
     NettyServerCall fakeCall = new NettyServerCall(-1, service, md, null, param, cellScanner, null,
-        -1, null, receiveTime, timeout, reservoir, cellBlockBuilder, null);
+        -1, null, receiveTime, timeout, bbAllocator, cellBlockBuilder, null);
     return call(fakeCall, status);
   }
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/NettyServerCall.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/NettyServerCall.java
index 2fae311..8dc08c9 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/NettyServerCall.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/NettyServerCall.java
@@ -21,9 +21,9 @@ import java.io.IOException;
 import java.net.InetAddress;
 
 import org.apache.hadoop.hbase.CellScanner;
-import org.apache.yetus.audience.InterfaceAudience;
-import org.apache.hadoop.hbase.io.ByteBufferPool;
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.ipc.RpcServer.CallCleanup;
+import org.apache.yetus.audience.InterfaceAudience;
 import org.apache.hbase.thirdparty.com.google.protobuf.BlockingService;
 import org.apache.hbase.thirdparty.com.google.protobuf.Descriptors.MethodDescriptor;
 import org.apache.hbase.thirdparty.com.google.protobuf.Message;
@@ -39,10 +39,10 @@ class NettyServerCall extends ServerCall<NettyServerRpcConnection> {
 
   NettyServerCall(int id, BlockingService service, MethodDescriptor md, RequestHeader header,
       Message param, CellScanner cellScanner, NettyServerRpcConnection connection, long size,
-      InetAddress remoteAddress, long receiveTime, int timeout,
-      ByteBufferPool reservoir, CellBlockBuilder cellBlockBuilder, CallCleanup reqCleanup) {
-    super(id, service, md, header, param, cellScanner, connection, size, remoteAddress,
-        receiveTime, timeout, reservoir, cellBlockBuilder, reqCleanup);
+      InetAddress remoteAddress, long receiveTime, int timeout, ByteBuffAllocator bbAllocator,
+      CellBlockBuilder cellBlockBuilder, CallCleanup reqCleanup) {
+    super(id, service, md, header, param, cellScanner, connection, size, remoteAddress, receiveTime,
+        timeout, bbAllocator, cellBlockBuilder, reqCleanup);
   }
 
   /**
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/NettyServerRpcConnection.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/NettyServerRpcConnection.java
index ffa16bf..2f97f53 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/NettyServerRpcConnection.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/NettyServerRpcConnection.java
@@ -59,12 +59,7 @@ class NettyServerRpcConnection extends ServerRpcConnection {
 
   void process(final ByteBuf buf) throws IOException, InterruptedException {
     if (connectionHeaderRead) {
-      this.callCleanup = new RpcServer.CallCleanup() {
-        @Override
-        public void run() {
-          buf.release();
-        }
-      };
+      this.callCleanup = buf::release;
       process(new SingleByteBuff(buf.nioBuffer()));
     } else {
       ByteBuffer connectionHeader = ByteBuffer.allocate(buf.readableBytes());
@@ -121,7 +116,7 @@ class NettyServerRpcConnection extends ServerRpcConnection {
       long size, final InetAddress remoteAddress, int timeout,
       CallCleanup reqCleanup) {
     return new NettyServerCall(id, service, md, header, param, cellScanner, this, size,
-        remoteAddress, System.currentTimeMillis(), timeout, this.rpcServer.reservoir,
+        remoteAddress, System.currentTimeMillis(), timeout, this.rpcServer.bbAllocator,
         this.rpcServer.cellBlockBuilder, reqCleanup);
   }
 
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServer.java
index 3ab63dd..ac8c26c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServer.java
@@ -26,7 +26,6 @@ import java.net.InetSocketAddress;
 import java.nio.ByteBuffer;
 import java.nio.channels.ReadableByteChannel;
 import java.nio.channels.WritableByteChannel;
-import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
@@ -38,16 +37,12 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.CallQueueTooBigException;
 import org.apache.hadoop.hbase.CellScanner;
 import org.apache.hadoop.hbase.DoNotRetryIOException;
-import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.Server;
 import org.apache.hadoop.hbase.conf.ConfigurationObserver;
 import org.apache.hadoop.hbase.exceptions.RequestTooBigException;
-import org.apache.hadoop.hbase.io.ByteBufferPool;
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.monitoring.MonitoredRPCHandler;
 import org.apache.hadoop.hbase.monitoring.TaskMonitor;
-import org.apache.hadoop.hbase.nio.ByteBuff;
-import org.apache.hadoop.hbase.nio.MultiByteBuff;
-import org.apache.hadoop.hbase.nio.SingleByteBuff;
 import org.apache.hadoop.hbase.regionserver.RSRpcServices;
 import org.apache.hadoop.hbase.security.SaslUtil;
 import org.apache.hadoop.hbase.security.SaslUtil.QualityOfProtection;
@@ -210,11 +205,7 @@ public abstract class RpcServer implements RpcServerInterface,
 
   protected UserProvider userProvider;
 
-  protected final ByteBufferPool reservoir;
-  // The requests and response will use buffers from ByteBufferPool, when the size of the
-  // request/response is at least this size.
-  // We make this to be 1/6th of the pool buffer size.
-  protected final int minSizeForReservoirUse;
+  protected final ByteBuffAllocator bbAllocator;
 
   protected volatile boolean allowFallbackToSimpleAuth;
 
@@ -225,7 +216,7 @@ public abstract class RpcServer implements RpcServerInterface,
   private RSRpcServices rsRpcServices;
 
   @FunctionalInterface
-  protected static interface CallCleanup {
+  protected interface CallCleanup {
     void run();
   }
 
@@ -266,32 +257,7 @@ public abstract class RpcServer implements RpcServerInterface,
       final List<BlockingServiceAndInterface> services,
       final InetSocketAddress bindAddress, Configuration conf,
       RpcScheduler scheduler, boolean reservoirEnabled) throws IOException {
-    if (reservoirEnabled) {
-      int poolBufSize = conf.getInt(ByteBufferPool.BUFFER_SIZE_KEY,
-          ByteBufferPool.DEFAULT_BUFFER_SIZE);
-      // The max number of buffers to be pooled in the ByteBufferPool. The default value been
-      // selected based on the #handlers configured. When it is read request, 2 MB is the max size
-      // at which we will send back one RPC request. Means max we need 2 MB for creating the
-      // response cell block. (Well it might be much lesser than this because in 2 MB size calc, we
-      // include the heap size overhead of each cells also.) Considering 2 MB, we will need
-      // (2 * 1024 * 1024) / poolBufSize buffers to make the response cell block. Pool buffer size
-      // is by default 64 KB.
-      // In case of read request, at the end of the handler process, we will make the response
-      // cellblock and add the Call to connection's response Q and a single Responder thread takes
-      // connections and responses from that one by one and do the socket write. So there is chances
-      // that by the time a handler originated response is actually done writing to socket and so
-      // released the BBs it used, the handler might have processed one more read req. On an avg 2x
-      // we consider and consider that also for the max buffers to pool
-      int bufsForTwoMB = (2 * 1024 * 1024) / poolBufSize;
-      int maxPoolSize = conf.getInt(ByteBufferPool.MAX_POOL_SIZE_KEY,
-          conf.getInt(HConstants.REGION_SERVER_HANDLER_COUNT,
-              HConstants.DEFAULT_REGION_SERVER_HANDLER_COUNT) * bufsForTwoMB * 2);
-      this.reservoir = new ByteBufferPool(poolBufSize, maxPoolSize);
-      this.minSizeForReservoirUse = getMinSizeForReservoirUse(this.reservoir);
-    } else {
-      reservoir = null;
-      this.minSizeForReservoirUse = Integer.MAX_VALUE;// reservoir itself not in place.
-    }
+    this.bbAllocator = ByteBuffAllocator.create(conf, reservoirEnabled);
     this.server = server;
     this.services = services;
     this.bindAddress = bindAddress;
@@ -325,11 +291,6 @@ public abstract class RpcServer implements RpcServerInterface,
     this.scheduler = scheduler;
   }
 
-  @VisibleForTesting
-  static int getMinSizeForReservoirUse(ByteBufferPool pool) {
-    return pool.getBufferSize() / 6;
-  }
-
   @Override
   public void onConfigurationChange(Configuration newConf) {
     initReconfigurable(newConf);
@@ -652,55 +613,6 @@ public abstract class RpcServer implements RpcServerInterface,
   }
 
   /**
-   * This is extracted to a static method for better unit testing. We try to get buffer(s) from pool
-   * as much as possible.
-   *
-   * @param pool The ByteBufferPool to use
-   * @param minSizeForPoolUse Only for buffer size above this, we will try to use pool. Any buffer
-   *           need of size below this, create on heap ByteBuffer.
-   * @param reqLen Bytes count in request
-   */
-  @VisibleForTesting
-  static Pair<ByteBuff, CallCleanup> allocateByteBuffToReadInto(ByteBufferPool pool,
-      int minSizeForPoolUse, int reqLen) {
-    ByteBuff resultBuf;
-    List<ByteBuffer> bbs = new ArrayList<>((reqLen / pool.getBufferSize()) + 1);
-    int remain = reqLen;
-    ByteBuffer buf = null;
-    while (remain >= minSizeForPoolUse && (buf = pool.getBuffer()) != null) {
-      bbs.add(buf);
-      remain -= pool.getBufferSize();
-    }
-    ByteBuffer[] bufsFromPool = null;
-    if (bbs.size() > 0) {
-      bufsFromPool = new ByteBuffer[bbs.size()];
-      bbs.toArray(bufsFromPool);
-    }
-    if (remain > 0) {
-      bbs.add(ByteBuffer.allocate(remain));
-    }
-    if (bbs.size() > 1) {
-      ByteBuffer[] items = new ByteBuffer[bbs.size()];
-      bbs.toArray(items);
-      resultBuf = new MultiByteBuff(items);
-    } else {
-      // We are backed by single BB
-      resultBuf = new SingleByteBuff(bbs.get(0));
-    }
-    resultBuf.limit(reqLen);
-    if (bufsFromPool != null) {
-      final ByteBuffer[] bufsFromPoolFinal = bufsFromPool;
-      return new Pair<>(resultBuf, () -> {
-        // Return back all the BBs to pool
-        for (int i = 0; i < bufsFromPoolFinal.length; i++) {
-          pool.putbackBuffer(bufsFromPoolFinal[i]);
-        }
-      });
-    }
-    return new Pair<>(resultBuf, null);
-  }
-
-  /**
    * Needed for features such as delayed calls.  We need to be able to store the current call
    * so that we can complete it later or ask questions of what is supported by the current ongoing
    * call.
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/ServerCall.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/ServerCall.java
index cf1cf9a..f93f3a1 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/ServerCall.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/ServerCall.java
@@ -26,10 +26,10 @@ import java.util.Optional;
 
 import org.apache.hadoop.hbase.CellScanner;
 import org.apache.hadoop.hbase.DoNotRetryIOException;
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.apache.hadoop.hbase.exceptions.RegionMovedException;
 import org.apache.hadoop.hbase.io.ByteBufferListOutputStream;
-import org.apache.hadoop.hbase.io.ByteBufferPool;
 import org.apache.hadoop.hbase.ipc.RpcServer.CallCleanup;
 import org.apache.hadoop.hbase.security.User;
 import org.apache.hbase.thirdparty.com.google.protobuf.BlockingService;
@@ -67,7 +67,7 @@ abstract class ServerCall<T extends ServerRpcConnection> implements RpcCall, Rpc
   protected long startTime;
   protected final long deadline;// the deadline to handle this call, if exceed we can drop it.
 
-  protected final ByteBufferPool reservoir;
+  protected final ByteBuffAllocator bbAllocator;
 
   protected final CellBlockBuilder cellBlockBuilder;
 
@@ -91,11 +91,11 @@ abstract class ServerCall<T extends ServerRpcConnection> implements RpcCall, Rpc
   private long exceptionSize = 0;
   private final boolean retryImmediatelySupported;
 
-  @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NP_NULL_ON_SOME_PATH",
-      justification="Can't figure why this complaint is happening... see below")
+  @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "NP_NULL_ON_SOME_PATH",
+      justification = "Can't figure why this complaint is happening... see below")
   ServerCall(int id, BlockingService service, MethodDescriptor md, RequestHeader header,
-      Message param, CellScanner cellScanner, T connection, long size,
-      InetAddress remoteAddress, long receiveTime, int timeout, ByteBufferPool reservoir,
+      Message param, CellScanner cellScanner, T connection, long size, InetAddress remoteAddress,
+      long receiveTime, int timeout, ByteBuffAllocator byteBuffAllocator,
       CellBlockBuilder cellBlockBuilder, CallCleanup reqCleanup) {
     this.id = id;
     this.service = service;
@@ -118,7 +118,7 @@ abstract class ServerCall<T extends ServerRpcConnection> implements RpcCall, Rpc
     this.remoteAddress = remoteAddress;
     this.timeout = timeout;
     this.deadline = this.timeout > 0 ? this.receiveTime + this.timeout : Long.MAX_VALUE;
-    this.reservoir = reservoir;
+    this.bbAllocator = byteBuffAllocator;
     this.cellBlockBuilder = cellBlockBuilder;
     this.reqCleanup = reqCleanup;
   }
@@ -199,9 +199,9 @@ abstract class ServerCall<T extends ServerRpcConnection> implements RpcCall, Rpc
       // high when we can avoid a big buffer allocation on each rpc.
       List<ByteBuffer> cellBlock = null;
       int cellBlockSize = 0;
-      if (this.reservoir != null) {
+      if (bbAllocator.isReservoirEnabled()) {
         this.cellBlockStream = this.cellBlockBuilder.buildCellBlockStream(this.connection.codec,
-          this.connection.compressionCodec, cells, this.reservoir);
+          this.connection.compressionCodec, cells, bbAllocator);
         if (this.cellBlockStream != null) {
           cellBlock = this.cellBlockStream.getByteBuffers();
           cellBlockSize = this.cellBlockStream.size();
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/SimpleRpcServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/SimpleRpcServer.java
index 2a8cfbe..f3f7807 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/SimpleRpcServer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/SimpleRpcServer.java
@@ -488,7 +488,7 @@ public class SimpleRpcServer extends RpcServer {
       Message param, CellScanner cellScanner, long receiveTime, MonitoredRPCHandler status,
       long startTime, int timeout) throws IOException {
     SimpleServerCall fakeCall = new SimpleServerCall(-1, service, md, null, param, cellScanner,
-        null, -1, null, receiveTime, timeout, reservoir, cellBlockBuilder, null, null);
+        null, -1, null, receiveTime, timeout, bbAllocator, cellBlockBuilder, null, null);
     return call(fakeCall, status);
   }
 
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/SimpleServerCall.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/SimpleServerCall.java
index 6084138..311b4c7 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/SimpleServerCall.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/SimpleServerCall.java
@@ -21,9 +21,9 @@ import java.io.IOException;
 import java.net.InetAddress;
 
 import org.apache.hadoop.hbase.CellScanner;
-import org.apache.yetus.audience.InterfaceAudience;
-import org.apache.hadoop.hbase.io.ByteBufferPool;
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.ipc.RpcServer.CallCleanup;
+import org.apache.yetus.audience.InterfaceAudience;
 import org.apache.hbase.thirdparty.com.google.protobuf.BlockingService;
 import org.apache.hbase.thirdparty.com.google.protobuf.Descriptors.MethodDescriptor;
 import org.apache.hbase.thirdparty.com.google.protobuf.Message;
@@ -42,11 +42,12 @@ class SimpleServerCall extends ServerCall<SimpleServerRpcConnection> {
       justification = "Can't figure why this complaint is happening... see below")
   SimpleServerCall(int id, final BlockingService service, final MethodDescriptor md,
       RequestHeader header, Message param, CellScanner cellScanner,
-      SimpleServerRpcConnection connection, long size,
-      final InetAddress remoteAddress, long receiveTime, int timeout, ByteBufferPool reservoir,
-      CellBlockBuilder cellBlockBuilder, CallCleanup reqCleanup, SimpleRpcServerResponder responder) {
-    super(id, service, md, header, param, cellScanner, connection, size, remoteAddress,
-        receiveTime, timeout, reservoir, cellBlockBuilder, reqCleanup);
+      SimpleServerRpcConnection connection, long size, final InetAddress remoteAddress,
+      long receiveTime, int timeout, ByteBuffAllocator bbAllocator,
+      CellBlockBuilder cellBlockBuilder, CallCleanup reqCleanup,
+      SimpleRpcServerResponder responder) {
+    super(id, service, md, header, param, cellScanner, connection, size, remoteAddress, receiveTime,
+        timeout, bbAllocator, cellBlockBuilder, reqCleanup);
     this.responder = responder;
   }
 
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/SimpleServerRpcConnection.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/SimpleServerRpcConnection.java
index b4b5f33..01127cc 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/SimpleServerRpcConnection.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/SimpleServerRpcConnection.java
@@ -36,14 +36,12 @@ import org.apache.hadoop.hbase.client.VersionInfoUtil;
 import org.apache.hadoop.hbase.exceptions.RequestTooBigException;
 import org.apache.hadoop.hbase.ipc.RpcServer.CallCleanup;
 import org.apache.hadoop.hbase.nio.ByteBuff;
-import org.apache.hadoop.hbase.nio.SingleByteBuff;
 import org.apache.hbase.thirdparty.com.google.protobuf.BlockingService;
 import org.apache.hbase.thirdparty.com.google.protobuf.CodedInputStream;
 import org.apache.hbase.thirdparty.com.google.protobuf.Descriptors.MethodDescriptor;
 import org.apache.hbase.thirdparty.com.google.protobuf.Message;
 import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.RPCProtos.RequestHeader;
-import org.apache.hadoop.hbase.util.Pair;
 
 /** Reads calls from a connection and queues them for handling. */
 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "VO_VOLATILE_INCREMENT",
@@ -212,7 +210,7 @@ class SimpleServerRpcConnection extends ServerRpcConnection {
           // Notify the client about the offending request
           SimpleServerCall reqTooBig = new SimpleServerCall(header.getCallId(), this.service, null,
               null, null, null, this, 0, this.addr, System.currentTimeMillis(), 0,
-              this.rpcServer.reservoir, this.rpcServer.cellBlockBuilder, null, responder);
+              this.rpcServer.bbAllocator, this.rpcServer.cellBlockBuilder, null, responder);
           this.rpcServer.metrics.exception(SimpleRpcServer.REQUEST_TOO_BIG_EXCEPTION);
           // Make sure the client recognizes the underlying exception
           // Otherwise, throw a DoNotRetryIOException.
@@ -255,24 +253,8 @@ class SimpleServerRpcConnection extends ServerRpcConnection {
 
   // It creates the ByteBuff and CallCleanup and assign to Connection instance.
   private void initByteBuffToReadInto(int length) {
-    // We create random on heap buffers are read into those when
-    // 1. ByteBufferPool is not there.
-    // 2. When the size of the req is very small. Using a large sized (64 KB) buffer from pool is
-    // waste then. Also if all the reqs are of this size, we will be creating larger sized
-    // buffers and pool them permanently. This include Scan/Get request and DDL kind of reqs like
-    // RegionOpen.
-    // 3. If it is an initial handshake signal or initial connection request. Any way then
-    // condition 2 itself will match
-    // 4. When SASL use is ON.
-    if (this.rpcServer.reservoir == null || skipInitialSaslHandshake || !connectionHeaderRead ||
-        useSasl || length < this.rpcServer.minSizeForReservoirUse) {
-      this.data = new SingleByteBuff(ByteBuffer.allocate(length));
-    } else {
-      Pair<ByteBuff, CallCleanup> pair = RpcServer.allocateByteBuffToReadInto(
-        this.rpcServer.reservoir, this.rpcServer.minSizeForReservoirUse, length);
-      this.data = pair.getFirst();
-      this.callCleanup = pair.getSecond();
-    }
+    this.data = rpcServer.bbAllocator.allocate(length);
+    this.callCleanup = data::release;
   }
 
   protected int channelDataRead(ReadableByteChannel channel, ByteBuff buf) throws IOException {
@@ -345,7 +327,7 @@ class SimpleServerRpcConnection extends ServerRpcConnection {
       RequestHeader header, Message param, CellScanner cellScanner, long size,
       InetAddress remoteAddress, int timeout, CallCleanup reqCleanup) {
     return new SimpleServerCall(id, service, md, header, param, cellScanner, this, size,
-        remoteAddress, System.currentTimeMillis(), timeout, this.rpcServer.reservoir,
+        remoteAddress, System.currentTimeMillis(), timeout, this.rpcServer.bbAllocator,
         this.rpcServer.cellBlockBuilder, reqCleanup, this.responder);
   }
 
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncTableGetMultiThreaded.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncTableGetMultiThreaded.java
index 75800ba..8a993b8 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncTableGetMultiThreaded.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncTableGetMultiThreaded.java
@@ -18,6 +18,7 @@
 package org.apache.hadoop.hbase.client;
 
 import static org.apache.hadoop.hbase.HConstants.HBASE_CLIENT_META_OPERATION_TIMEOUT;
+import static org.apache.hadoop.hbase.io.ByteBuffAllocator.MAX_BUFFER_COUNT_KEY;
 import static org.apache.hadoop.hbase.master.LoadBalancer.TABLES_ON_MASTER;
 import static org.junit.Assert.assertEquals;
 
@@ -42,7 +43,6 @@ import org.apache.hadoop.hbase.MemoryCompactionPolicy;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.Waiter.ExplainingPredicate;
-import org.apache.hadoop.hbase.io.ByteBufferPool;
 import org.apache.hadoop.hbase.regionserver.CompactingMemStore;
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.testclassification.ClientTests;
@@ -94,7 +94,7 @@ public class TestAsyncTableGetMultiThreaded {
   protected static void setUp(MemoryCompactionPolicy memoryCompaction) throws Exception {
     TEST_UTIL.getConfiguration().set(TABLES_ON_MASTER, "none");
     TEST_UTIL.getConfiguration().setLong(HBASE_CLIENT_META_OPERATION_TIMEOUT, 60000L);
-    TEST_UTIL.getConfiguration().setInt(ByteBufferPool.MAX_POOL_SIZE_KEY, 100);
+    TEST_UTIL.getConfiguration().setInt(MAX_BUFFER_COUNT_KEY, 100);
     TEST_UTIL.getConfiguration().set(CompactingMemStore.COMPACTING_MEMSTORE_TYPE_KEY,
       String.valueOf(memoryCompaction));
 
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestServerLoadDurability.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestServerLoadDurability.java
index 267e9e8..abf20dd 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestServerLoadDurability.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestServerLoadDurability.java
@@ -17,6 +17,8 @@
  */
 package org.apache.hadoop.hbase.client;
 
+import static org.apache.hadoop.hbase.io.ByteBuffAllocator.BUFFER_SIZE_KEY;
+
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
@@ -26,7 +28,6 @@ import org.apache.hadoop.hbase.HBaseClassTestRule;
 import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.TableName;
-import org.apache.hadoop.hbase.io.ByteBufferPool;
 import org.apache.hadoop.hbase.ipc.NettyRpcServer;
 import org.apache.hadoop.hbase.ipc.RpcServerFactory;
 import org.apache.hadoop.hbase.ipc.SimpleRpcServer;
@@ -71,9 +72,8 @@ public class TestServerLoadDurability {
 
   private static Configuration createConfigurationForSimpleRpcServer() {
     Configuration conf = HBaseConfiguration.create();
-    conf.set(RpcServerFactory.CUSTOM_RPC_SERVER_IMPL_CONF_KEY,
-        SimpleRpcServer.class.getName());
-    conf.setInt(ByteBufferPool.BUFFER_SIZE_KEY, 20);
+    conf.set(RpcServerFactory.CUSTOM_RPC_SERVER_IMPL_CONF_KEY, SimpleRpcServer.class.getName());
+    conf.setInt(BUFFER_SIZE_KEY, 20);
     return conf;
   }
 
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java
index 48080b2..32160a1 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java
@@ -832,7 +832,7 @@ public class TestHFileBlock {
     if (ClassSize.is32BitJVM()) {
       assertEquals(64, HFileBlock.MULTI_BYTE_BUFFER_HEAP_SIZE);
     } else {
-      assertEquals(72, HFileBlock.MULTI_BYTE_BUFFER_HEAP_SIZE);
+      assertEquals(80, HFileBlock.MULTI_BYTE_BUFFER_HEAP_SIZE);
     }
 
     for (int size : new int[] { 100, 256, 12345 }) {
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/ipc/TestRpcServer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/ipc/TestRpcServer.java
deleted file mode 100644
index 560190b..0000000
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/ipc/TestRpcServer.java
+++ /dev/null
@@ -1,144 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hbase.ipc;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-
-import java.nio.ByteBuffer;
-import org.apache.hadoop.hbase.HBaseClassTestRule;
-import org.apache.hadoop.hbase.io.ByteBufferPool;
-import org.apache.hadoop.hbase.ipc.RpcServer.CallCleanup;
-import org.apache.hadoop.hbase.nio.ByteBuff;
-import org.apache.hadoop.hbase.nio.MultiByteBuff;
-import org.apache.hadoop.hbase.nio.SingleByteBuff;
-import org.apache.hadoop.hbase.testclassification.RPCTests;
-import org.apache.hadoop.hbase.testclassification.SmallTests;
-import org.apache.hadoop.hbase.util.Pair;
-import org.junit.ClassRule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-
-@Category({ RPCTests.class, SmallTests.class })
-public class TestRpcServer {
-
-  @ClassRule
-  public static final HBaseClassTestRule CLASS_RULE =
-      HBaseClassTestRule.forClass(TestRpcServer.class);
-
-  @Test
-  public void testAllocateByteBuffToReadInto() throws Exception {
-    int maxBuffersInPool = 10;
-    ByteBufferPool pool = new ByteBufferPool(6 * 1024, maxBuffersInPool);
-    initPoolWithAllBuffers(pool, maxBuffersInPool);
-    ByteBuff buff = null;
-    Pair<ByteBuff, CallCleanup> pair;
-    // When the request size is less than 1/6th of the pool buffer size. We should use on demand
-    // created on heap Buffer
-    pair = RpcServer.allocateByteBuffToReadInto(pool, RpcServer.getMinSizeForReservoirUse(pool),
-        200);
-    buff = pair.getFirst();
-    assertTrue(buff.hasArray());
-    assertEquals(maxBuffersInPool, pool.getQueueSize());
-    assertNull(pair.getSecond());
-    // When the request size is > 1/6th of the pool buffer size.
-    pair = RpcServer.allocateByteBuffToReadInto(pool, RpcServer.getMinSizeForReservoirUse(pool),
-        1024);
-    buff = pair.getFirst();
-    assertFalse(buff.hasArray());
-    assertEquals(maxBuffersInPool - 1, pool.getQueueSize());
-    assertNotNull(pair.getSecond());
-    pair.getSecond().run();// CallCleanup#run should put back the BB to pool.
-    assertEquals(maxBuffersInPool, pool.getQueueSize());
-    // Request size> pool buffer size
-    pair = RpcServer.allocateByteBuffToReadInto(pool, RpcServer.getMinSizeForReservoirUse(pool),
-        7 * 1024);
-    buff = pair.getFirst();
-    assertFalse(buff.hasArray());
-    assertTrue(buff instanceof MultiByteBuff);
-    ByteBuffer[] bbs = ((MultiByteBuff) buff).getEnclosingByteBuffers();
-    assertEquals(2, bbs.length);
-    assertTrue(bbs[0].isDirect());
-    assertTrue(bbs[1].isDirect());
-    assertEquals(6 * 1024, bbs[0].limit());
-    assertEquals(1024, bbs[1].limit());
-    assertEquals(maxBuffersInPool - 2, pool.getQueueSize());
-    assertNotNull(pair.getSecond());
-    pair.getSecond().run();// CallCleanup#run should put back the BB to pool.
-    assertEquals(maxBuffersInPool, pool.getQueueSize());
-
-    pair = RpcServer.allocateByteBuffToReadInto(pool, RpcServer.getMinSizeForReservoirUse(pool),
-        6 * 1024 + 200);
-    buff = pair.getFirst();
-    assertFalse(buff.hasArray());
-    assertTrue(buff instanceof MultiByteBuff);
-    bbs = ((MultiByteBuff) buff).getEnclosingByteBuffers();
-    assertEquals(2, bbs.length);
-    assertTrue(bbs[0].isDirect());
-    assertFalse(bbs[1].isDirect());
-    assertEquals(6 * 1024, bbs[0].limit());
-    assertEquals(200, bbs[1].limit());
-    assertEquals(maxBuffersInPool - 1, pool.getQueueSize());
-    assertNotNull(pair.getSecond());
-    pair.getSecond().run();// CallCleanup#run should put back the BB to pool.
-    assertEquals(maxBuffersInPool, pool.getQueueSize());
-
-    ByteBuffer[] buffers = new ByteBuffer[maxBuffersInPool - 1];
-    for (int i = 0; i < maxBuffersInPool - 1; i++) {
-      buffers[i] = pool.getBuffer();
-    }
-    pair = RpcServer.allocateByteBuffToReadInto(pool, RpcServer.getMinSizeForReservoirUse(pool),
-        20 * 1024);
-    buff = pair.getFirst();
-    assertFalse(buff.hasArray());
-    assertTrue(buff instanceof MultiByteBuff);
-    bbs = ((MultiByteBuff) buff).getEnclosingByteBuffers();
-    assertEquals(2, bbs.length);
-    assertTrue(bbs[0].isDirect());
-    assertFalse(bbs[1].isDirect());
-    assertEquals(6 * 1024, bbs[0].limit());
-    assertEquals(14 * 1024, bbs[1].limit());
-    assertEquals(0, pool.getQueueSize());
-    assertNotNull(pair.getSecond());
-    pair.getSecond().run();// CallCleanup#run should put back the BB to pool.
-    assertEquals(1, pool.getQueueSize());
-    pool.getBuffer();
-    pair = RpcServer.allocateByteBuffToReadInto(pool, RpcServer.getMinSizeForReservoirUse(pool),
-        7 * 1024);
-    buff = pair.getFirst();
-    assertTrue(buff.hasArray());
-    assertTrue(buff instanceof SingleByteBuff);
-    assertEquals(7 * 1024, ((SingleByteBuff) buff).getEnclosingByteBuffer().limit());
-    assertNull(pair.getSecond());
-  }
-
-  private void initPoolWithAllBuffers(ByteBufferPool pool, int maxBuffersInPool) {
-    ByteBuffer[] buffers = new ByteBuffer[maxBuffersInPool];
-    // Just call getBuffer() on pool 'maxBuffersInPool' so as to init all buffers and then put back
-    // all. Makes pool with max #buffers.
-    for (int i = 0; i < maxBuffersInPool; i++) {
-      buffers[i] = pool.getBuffer();
-    }
-    for (ByteBuffer buf : buffers) {
-      pool.putbackBuffer(buf);
-    }
-  }
-}

[hbase] 13/22: HBASE-22090 The HFileBlock#CacheableDeserializer should pass ByteBuffAllocator to the newly created HFileBlock

Posted by op...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

openinx pushed a commit to branch HBASE-21879
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 56008736f62713d6cda1c8805990da5157a9756a
Author: huzheng <op...@gmail.com>
AuthorDate: Thu Apr 25 19:41:04 2019 +0800

    HBASE-22090 The HFileBlock#CacheableDeserializer should pass ByteBuffAllocator to the newly created HFileBlock
---
 .../hadoop/hbase/io/hfile/MemcachedBlockCache.java |  5 +--
 .../hbase/io/hfile/CacheableDeserializer.java      | 27 ++++++----------
 .../apache/hadoop/hbase/io/hfile/HFileBlock.java   | 27 +++++-----------
 .../hadoop/hbase/io/hfile/bucket/BucketCache.java  | 14 ++++++--
 .../hadoop/hbase/io/hfile/bucket/BucketEntry.java  | 18 +++++++----
 .../hbase/io/hfile/bucket/BucketProtoUtils.java    |  6 ++--
 .../hadoop/hbase/io/hfile/CacheTestUtils.java      | 37 +++++++++-------------
 .../hadoop/hbase/io/hfile/TestCacheConfig.java     | 11 ++-----
 .../hadoop/hbase/io/hfile/TestHFileBlock.java      | 17 +++++-----
 .../io/hfile/bucket/TestBucketCacheRefCnt.java     | 21 +++++++++---
 .../io/hfile/bucket/TestByteBufferIOEngine.java    | 12 +++----
 11 files changed, 93 insertions(+), 102 deletions(-)

diff --git a/hbase-external-blockcache/src/main/java/org/apache/hadoop/hbase/io/hfile/MemcachedBlockCache.java b/hbase-external-blockcache/src/main/java/org/apache/hadoop/hbase/io/hfile/MemcachedBlockCache.java
index 51fe754..22abd2c 100644
--- a/hbase-external-blockcache/src/main/java/org/apache/hadoop/hbase/io/hfile/MemcachedBlockCache.java
+++ b/hbase-external-blockcache/src/main/java/org/apache/hadoop/hbase/io/hfile/MemcachedBlockCache.java
@@ -36,6 +36,7 @@ import net.spy.memcached.transcoders.Transcoder;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.io.hfile.Cacheable.MemoryType;
 import org.apache.hadoop.hbase.nio.ByteBuff;
 import org.apache.hadoop.hbase.nio.SingleByteBuff;
@@ -271,10 +272,10 @@ public class MemcachedBlockCache implements BlockCache {
     public HFileBlock decode(CachedData d) {
       try {
         ByteBuff buf = new SingleByteBuff(ByteBuffer.wrap(d.getData()));
-        return (HFileBlock) HFileBlock.BLOCK_DESERIALIZER.deserialize(buf, true,
+        return (HFileBlock) HFileBlock.BLOCK_DESERIALIZER.deserialize(buf, ByteBuffAllocator.HEAP,
           MemoryType.EXCLUSIVE);
       } catch (IOException e) {
-        LOG.warn("Error deserializing data from memcached",e);
+        LOG.warn("Failed to deserialize data from memcached", e);
       }
       return null;
     }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheableDeserializer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheableDeserializer.java
index fe855d7..0205097 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheableDeserializer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheableDeserializer.java
@@ -19,37 +19,30 @@ package org.apache.hadoop.hbase.io.hfile;
 
 import java.io.IOException;
 
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.apache.hadoop.hbase.io.hfile.Cacheable.MemoryType;
 import org.apache.hadoop.hbase.nio.ByteBuff;
 
 /**
- * Interface for a deserializer. Throws an IOException if the serialized data is
- * incomplete or wrong.
- * */
+ * Interface for a deserializer. Throws an IOException if the serialized data is incomplete or
+ * wrong.
+ */
 @InterfaceAudience.Private
 public interface CacheableDeserializer<T extends Cacheable> {
   /**
-   * Returns the deserialized object.
-   *
-   * @return T the deserialized object.
-   */
-  T deserialize(ByteBuff b) throws IOException;
-
-  /**
-   * @param b
-   * @param reuse true if Cacheable object can use the given buffer as its
-   *          content
+   * @param b ByteBuff to deserialize the Cacheable.
+   * @param allocator to manage NIO ByteBuffers for future allocation or de-allocation.
    * @param memType the {@link MemoryType} of the buffer
    * @return T the deserialized object.
    * @throws IOException
    */
-  T deserialize(ByteBuff b, boolean reuse, MemoryType memType) throws IOException;
+  T deserialize(ByteBuff b, ByteBuffAllocator allocator, MemoryType memType) throws IOException;
 
   /**
-   * Get the identifier of this deserialiser. Identifier is unique for each
-   * deserializer and generated by {@link CacheableDeserializerIdManager}
+   * Get the identifier of this deserializer. Identifier is unique for each deserializer and
+   * generated by {@link CacheableDeserializerIdManager}
    * @return identifier number of this cacheable deserializer
    */
-  int getDeserialiserIdentifier();
+  int getDeserializerIdentifier();
 }
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
index a3738d6..92dcf44 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
@@ -264,42 +264,27 @@ public class HFileBlock implements Cacheable {
     }
 
     @Override
-    public HFileBlock deserialize(ByteBuff buf, boolean reuse, MemoryType memType)
+    public HFileBlock deserialize(ByteBuff buf, ByteBuffAllocator alloc, MemoryType memType)
         throws IOException {
       // The buf has the file block followed by block metadata.
       // Set limit to just before the BLOCK_METADATA_SPACE then rewind.
       buf.limit(buf.limit() - BLOCK_METADATA_SPACE).rewind();
       // Get a new buffer to pass the HFileBlock for it to 'own'.
-      ByteBuff newByteBuff;
-      if (reuse) {
-        newByteBuff = buf.slice();
-      } else {
-        int len = buf.limit();
-        newByteBuff = ByteBuff.wrap(ByteBuffer.allocate(len));
-        newByteBuff.put(0, buf, buf.position(), len);
-      }
+      ByteBuff newByteBuff = buf.slice();
       // Read out the BLOCK_METADATA_SPACE content and shove into our HFileBlock.
       buf.position(buf.limit());
       buf.limit(buf.limit() + HFileBlock.BLOCK_METADATA_SPACE);
       boolean usesChecksum = buf.get() == (byte) 1;
       long offset = buf.getLong();
       int nextBlockOnDiskSize = buf.getInt();
-      // TODO make the newly created HFileBlock use the off-heap allocator, Need change the
-      // deserializer or change the deserialize interface.
       return new HFileBlock(newByteBuff, usesChecksum, memType, offset, nextBlockOnDiskSize, null,
-          ByteBuffAllocator.HEAP);
+          alloc);
     }
 
     @Override
-    public int getDeserialiserIdentifier() {
+    public int getDeserializerIdentifier() {
       return DESERIALIZER_IDENTIFIER;
     }
-
-    @Override
-    public HFileBlock deserialize(ByteBuff b) throws IOException {
-      // Used only in tests
-      return deserialize(b, false, MemoryType.EXCLUSIVE);
-    }
   }
 
   private static final int DESERIALIZER_IDENTIFIER;
@@ -563,6 +548,10 @@ public class HFileBlock implements Cacheable {
     return dup;
   }
 
+  public ByteBuffAllocator getByteBuffAllocator() {
+    return this.allocator;
+  }
+
   @VisibleForTesting
   private void sanityCheckAssertion(long valueFromBuf, long valueFromField,
       String fieldName) throws IOException {
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
index a99af7c..bb0b79c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
@@ -53,6 +53,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.Admin;
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.io.ByteBuffAllocator.Recycler;
 import org.apache.hadoop.hbase.io.HeapSize;
 import org.apache.hadoop.hbase.io.hfile.BlockCache;
@@ -1334,6 +1335,13 @@ public class BucketCache implements BlockCache, HeapSize {
       this.accessCounter = accessCounter;
     }
 
+    private ByteBuffAllocator getByteBuffAllocator() {
+      if (data instanceof HFileBlock) {
+        return ((HFileBlock) data).getByteBuffAllocator();
+      }
+      return ByteBuffAllocator.HEAP;
+    }
+
     public BucketEntry writeToCache(final IOEngine ioEngine, final BucketAllocator alloc,
         final LongAdder realCacheSize) throws IOException {
       int len = data.getSerializedLength();
@@ -1345,9 +1353,9 @@ public class BucketCache implements BlockCache, HeapSize {
       boolean succ = false;
       BucketEntry bucketEntry = null;
       try {
-        bucketEntry =
-            new BucketEntry(offset, len, accessCounter, inMemory, RefCnt.create(recycler));
-        bucketEntry.setDeserialiserReference(data.getDeserializer());
+        bucketEntry = new BucketEntry(offset, len, accessCounter, inMemory, RefCnt.create(recycler),
+            getByteBuffAllocator());
+        bucketEntry.setDeserializerReference(data.getDeserializer());
         if (data instanceof HFileBlock) {
           // If an instance of HFileBlock, save on some allocations.
           HFileBlock block = (HFileBlock) data;
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketEntry.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketEntry.java
index b6e83d5..a533793 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketEntry.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketEntry.java
@@ -26,6 +26,7 @@ import java.util.Comparator;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
 
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.io.hfile.BlockPriority;
 import org.apache.hadoop.hbase.io.hfile.Cacheable;
 import org.apache.hadoop.hbase.io.hfile.Cacheable.MemoryType;
@@ -58,7 +59,7 @@ class BucketEntry implements HBaseReferenceCounted {
    * The index of the deserializer that can deserialize this BucketEntry content. See
    * {@link CacheableDeserializerIdManager} for hosting of index to serializers.
    */
-  byte deserialiserIndex;
+  byte deserializerIndex;
 
   private volatile long accessCounter;
   private BlockPriority priority;
@@ -80,6 +81,7 @@ class BucketEntry implements HBaseReferenceCounted {
    */
   private final RefCnt refCnt;
   final AtomicBoolean markedAsEvicted;
+  private final ByteBuffAllocator allocator;
 
   /**
    * Time this block was cached. Presumes we are created just before we are added to the cache.
@@ -87,16 +89,18 @@ class BucketEntry implements HBaseReferenceCounted {
   private final long cachedTime = System.nanoTime();
 
   BucketEntry(long offset, int length, long accessCounter, boolean inMemory) {
-    this(offset, length, accessCounter, inMemory, RefCnt.create());
+    this(offset, length, accessCounter, inMemory, RefCnt.create(), ByteBuffAllocator.HEAP);
   }
 
-  BucketEntry(long offset, int length, long accessCounter, boolean inMemory, RefCnt refCnt) {
+  BucketEntry(long offset, int length, long accessCounter, boolean inMemory, RefCnt refCnt,
+      ByteBuffAllocator allocator) {
     setOffset(offset);
     this.length = length;
     this.accessCounter = accessCounter;
     this.priority = inMemory ? BlockPriority.MEMORY : BlockPriority.MULTI;
     this.refCnt = refCnt;
     this.markedAsEvicted = new AtomicBoolean(false);
+    this.allocator = allocator;
   }
 
   long offset() {
@@ -120,11 +124,11 @@ class BucketEntry implements HBaseReferenceCounted {
   }
 
   CacheableDeserializer<Cacheable> deserializerReference() {
-    return CacheableDeserializerIdManager.getDeserializer(deserialiserIndex);
+    return CacheableDeserializerIdManager.getDeserializer(deserializerIndex);
   }
 
-  void setDeserialiserReference(CacheableDeserializer<Cacheable> deserializer) {
-    this.deserialiserIndex = (byte) deserializer.getDeserialiserIdentifier();
+  void setDeserializerReference(CacheableDeserializer<Cacheable> deserializer) {
+    this.deserializerIndex = (byte) deserializer.getDeserializerIdentifier();
   }
 
   long getAccessCounter() {
@@ -192,7 +196,7 @@ class BucketEntry implements HBaseReferenceCounted {
 
   Cacheable wrapAsCacheable(ByteBuffer[] buffers, MemoryType memoryType) throws IOException {
     ByteBuff buf = ByteBuff.wrap(buffers, this.refCnt);
-    return this.deserializerReference().deserialize(buf, true, memoryType);
+    return this.deserializerReference().deserialize(buf, allocator, memoryType);
   }
 
   interface BucketEntryHandler<T> {
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketProtoUtils.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketProtoUtils.java
index 72765de..69b8370 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketProtoUtils.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketProtoUtils.java
@@ -105,7 +105,7 @@ final class BucketProtoUtils {
     return BucketCacheProtos.BucketEntry.newBuilder()
         .setOffset(entry.offset())
         .setLength(entry.getLength())
-        .setDeserialiserIndex(entry.deserialiserIndex)
+        .setDeserialiserIndex(entry.deserializerIndex)
         .setAccessCounter(entry.getAccessCounter())
         .setPriority(toPB(entry.getPriority()))
         .build();
@@ -146,8 +146,8 @@ final class BucketProtoUtils {
       }
       // Convert it to the identifier for the deserializer that we have in this runtime
       if (deserializerClass.equals(HFileBlock.BlockDeserializer.class.getName())) {
-        int actualIndex = HFileBlock.BLOCK_DESERIALIZER.getDeserialiserIdentifier();
-        value.deserialiserIndex = (byte) actualIndex;
+        int actualIndex = HFileBlock.BLOCK_DESERIALIZER.getDeserializerIdentifier();
+        value.deserializerIndex = (byte) actualIndex;
       } else {
         // We could make this more plugable, but right now HFileBlock is the only implementation
         // of Cacheable outside of tests, so this might not ever matter.
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java
index 97003e0..5f1f617 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java
@@ -225,29 +225,22 @@ public class CacheTestUtils {
   public static class ByteArrayCacheable implements Cacheable {
 
     static final CacheableDeserializer<Cacheable> blockDeserializer =
-      new CacheableDeserializer<Cacheable>() {
-
-      @Override
-      public Cacheable deserialize(ByteBuff b) throws IOException {
-        int len = b.getInt();
-        Thread.yield();
-        byte buf[] = new byte[len];
-        b.get(buf);
-        return new ByteArrayCacheable(buf);
-      }
-
-      @Override
-      public int getDeserialiserIdentifier() {
-        return deserializerIdentifier;
-      }
-
+        new CacheableDeserializer<Cacheable>() {
+          @Override
+          public int getDeserializerIdentifier() {
+            return deserializerIdentifier;
+          }
 
-      @Override
-      public Cacheable deserialize(ByteBuff b, boolean reuse, MemoryType memType)
-          throws IOException {
-        return deserialize(b);
-      }
-    };
+          @Override
+          public Cacheable deserialize(ByteBuff b, ByteBuffAllocator alloc, MemoryType memType)
+              throws IOException {
+            int len = b.getInt();
+            Thread.yield();
+            byte buf[] = new byte[len];
+            b.get(buf);
+            return new ByteArrayCacheable(buf);
+          }
+        };
 
     final byte[] buf;
 
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheConfig.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheConfig.java
index 3dae278..eda54f7 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheConfig.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheConfig.java
@@ -77,18 +77,13 @@ public class TestCacheConfig {
     }
 
     @Override
-    public int getDeserialiserIdentifier() {
+    public int getDeserializerIdentifier() {
       return deserializedIdentifier;
     }
 
     @Override
-    public Cacheable deserialize(ByteBuff b, boolean reuse, MemoryType memType) throws IOException {
-      LOG.info("Deserialized " + b + ", reuse=" + reuse);
-      return cacheable;
-    }
-
-    @Override
-    public Cacheable deserialize(ByteBuff b) throws IOException {
+    public Cacheable deserialize(ByteBuff b, ByteBuffAllocator alloc, MemoryType memType)
+        throws IOException {
       LOG.info("Deserialized " + b);
       return cacheable;
     }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java
index 5fdd7a4..538a5a6 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hbase.io.hfile;
 
+import static org.apache.hadoop.hbase.io.ByteBuffAllocator.HEAP;
 import static org.apache.hadoop.hbase.io.compress.Compression.Algorithm.GZ;
 import static org.apache.hadoop.hbase.io.compress.Compression.Algorithm.NONE;
 import static org.junit.Assert.*;
@@ -120,7 +121,7 @@ public class TestHFileBlock {
     this.includesMemstoreTS = includesMemstoreTS;
     this.includesTag = includesTag;
     this.useHeapAllocator = useHeapAllocator;
-    this.alloc = useHeapAllocator ? ByteBuffAllocator.HEAP : createOffHeapAlloc();
+    this.alloc = useHeapAllocator ? HEAP : createOffHeapAlloc();
     assertAllocator();
   }
 
@@ -524,16 +525,14 @@ public class TestHFileBlock {
             for (boolean reuseBuffer : new boolean[] { false, true }) {
               ByteBuffer serialized = ByteBuffer.allocate(blockFromHFile.getSerializedLength());
               blockFromHFile.serialize(serialized, true);
-              HFileBlock deserialized =
-                  (HFileBlock) blockFromHFile.getDeserializer().deserialize(
-                    new SingleByteBuff(serialized), reuseBuffer, MemoryType.EXCLUSIVE);
-              assertEquals(
-                "Serialization did not preserve block state. reuseBuffer=" + reuseBuffer,
+              HFileBlock deserialized = (HFileBlock) blockFromHFile.getDeserializer()
+                  .deserialize(new SingleByteBuff(serialized), HEAP, MemoryType.EXCLUSIVE);
+              assertEquals("Serialization did not preserve block state. reuseBuffer=" + reuseBuffer,
                 blockFromHFile, deserialized);
               // intentional reference comparison
               if (blockFromHFile != blockUnpacked) {
-                assertEquals("Deserializaed block cannot be unpacked correctly.",
-                  blockUnpacked, deserialized.unpack(meta, hbr));
+                assertEquals("Deserialized block cannot be unpacked correctly.", blockUnpacked,
+                  deserialized.unpack(meta, hbr));
               }
             }
             assertTrue(blockUnpacked.release());
@@ -916,7 +915,7 @@ public class TestHFileBlock {
                           .withBytesPerCheckSum(HFile.DEFAULT_BYTES_PER_CHECKSUM)
                           .withChecksumType(ChecksumType.NULL).build();
       HFileBlock block = new HFileBlock(BlockType.DATA, size, size, -1, buf, HFileBlock.FILL_HEADER,
-          -1, 0, -1, meta, ByteBuffAllocator.HEAP);
+          -1, 0, -1, meta, HEAP);
       long byteBufferExpectedSize = ClassSize.align(ClassSize.estimateBase(
           new MultiByteBuff(buf).getClass(), true)
           + HConstants.HFILEBLOCK_HEADER_SIZE + size);
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCacheRefCnt.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCacheRefCnt.java
index 1dcd2a2..6015706 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCacheRefCnt.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCacheRefCnt.java
@@ -17,7 +17,6 @@
  */
 package org.apache.hadoop.hbase.io.hfile.bucket;
 
-import static org.apache.hadoop.hbase.io.ByteBuffAllocator.HEAP;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
@@ -28,6 +27,8 @@ import java.io.IOException;
 import java.nio.ByteBuffer;
 
 import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.io.hfile.BlockCacheKey;
 import org.apache.hadoop.hbase.io.hfile.BlockType;
 import org.apache.hadoop.hbase.io.hfile.Cacheable;
@@ -65,8 +66,12 @@ public class TestBucketCacheRefCnt {
   }
 
   private static HFileBlock createBlock(int offset, int size) {
+    return createBlock(offset, size, ByteBuffAllocator.HEAP);
+  }
+
+  private static HFileBlock createBlock(int offset, int size, ByteBuffAllocator alloc) {
     return new HFileBlock(BlockType.DATA, size, size, -1, ByteBuffer.allocate(size),
-        HFileBlock.FILL_HEADER, offset, 52, size, CONTEXT, HEAP);
+        HFileBlock.FILL_HEADER, offset, 52, size, CONTEXT, alloc);
   }
 
   private static BlockCacheKey createKey(String hfileName, long offset) {
@@ -133,9 +138,10 @@ public class TestBucketCacheRefCnt {
 
   @Test
   public void testBlockInBackingMap() throws Exception {
+    ByteBuffAllocator alloc = ByteBuffAllocator.create(HBaseConfiguration.create(), true);
     cache = create(1, 1000);
     try {
-      HFileBlock blk = createBlock(200, 1020);
+      HFileBlock blk = createBlock(200, 1020, alloc);
       BlockCacheKey key = createKey("testHFile-00", 200);
       cache.cacheBlock(key, blk);
       waitUntilFlushedToCache(key);
@@ -144,6 +150,7 @@ public class TestBucketCacheRefCnt {
       Cacheable block = cache.getBlock(key, false, false, false);
       assertTrue(block.getMemoryType() == MemoryType.SHARED);
       assertTrue(block instanceof HFileBlock);
+      assertTrue(((HFileBlock) block).getByteBuffAllocator() == alloc);
       assertEquals(2, block.refCnt());
 
       block.retain();
@@ -152,6 +159,7 @@ public class TestBucketCacheRefCnt {
       Cacheable newBlock = cache.getBlock(key, false, false, false);
       assertTrue(newBlock.getMemoryType() == MemoryType.SHARED);
       assertTrue(newBlock instanceof HFileBlock);
+      assertTrue(((HFileBlock) newBlock).getByteBuffAllocator() == alloc);
       assertEquals(4, newBlock.refCnt());
 
       // release the newBlock
@@ -173,6 +181,7 @@ public class TestBucketCacheRefCnt {
       newBlock = cache.getBlock(key, false, false, false);
       assertEquals(2, block.refCnt());
       assertEquals(2, newBlock.refCnt());
+      assertTrue(((HFileBlock) newBlock).getByteBuffAllocator() == alloc);
 
       // Release the block
       assertFalse(block.release());
@@ -188,17 +197,20 @@ public class TestBucketCacheRefCnt {
 
   @Test
   public void testInBucketCache() throws IOException {
+    ByteBuffAllocator alloc = ByteBuffAllocator.create(HBaseConfiguration.create(), true);
     cache = create(1, 1000);
     try {
-      HFileBlock blk = createBlock(200, 1020);
+      HFileBlock blk = createBlock(200, 1020, alloc);
       BlockCacheKey key = createKey("testHFile-00", 200);
       cache.cacheBlock(key, blk);
       assertTrue(blk.refCnt() == 1 || blk.refCnt() == 2);
 
       Cacheable block1 = cache.getBlock(key, false, false, false);
       assertTrue(block1.refCnt() >= 2);
+      assertTrue(((HFileBlock) block1).getByteBuffAllocator() == alloc);
 
       Cacheable block2 = cache.getBlock(key, false, false, false);
+      assertTrue(((HFileBlock) block2).getByteBuffAllocator() == alloc);
       assertTrue(block2.refCnt() >= 3);
 
       cache.evictBlock(key);
@@ -209,6 +221,7 @@ public class TestBucketCacheRefCnt {
       // Get key again
       Cacheable block3 = cache.getBlock(key, false, false, false);
       if (block3 != null) {
+        assertTrue(((HFileBlock) block3).getByteBuffAllocator() == alloc);
         assertTrue(block3.refCnt() >= 3);
         assertFalse(block3.release());
       }
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestByteBufferIOEngine.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestByteBufferIOEngine.java
index 2f8c838..1a8964f 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestByteBufferIOEngine.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestByteBufferIOEngine.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.io.hfile.bucket;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.io.ByteBuffAllocator;
 import org.apache.hadoop.hbase.io.hfile.Cacheable;
 import org.apache.hadoop.hbase.io.hfile.Cacheable.MemoryType;
 import org.apache.hadoop.hbase.io.hfile.CacheableDeserializer;
@@ -67,7 +68,7 @@ public class TestByteBufferIOEngine {
 
   static BucketEntry createBucketEntry(long offset, int len) {
     BucketEntry be = new MockBucketEntry(offset, len);
-    be.setDeserialiserReference(DESERIALIZER);
+    be.setDeserializerReference(DESERIALIZER);
     return be;
   }
 
@@ -126,12 +127,7 @@ public class TestByteBufferIOEngine {
     private int identifier;
 
     @Override
-    public Cacheable deserialize(ByteBuff b) throws IOException {
-      return null;
-    }
-
-    @Override
-    public Cacheable deserialize(final ByteBuff b, boolean reuse, MemoryType memType)
+    public Cacheable deserialize(final ByteBuff b, ByteBuffAllocator alloc, MemoryType memType)
         throws IOException {
       this.buf = b;
       return null;
@@ -142,7 +138,7 @@ public class TestByteBufferIOEngine {
     }
 
     @Override
-    public int getDeserialiserIdentifier() {
+    public int getDeserializerIdentifier() {
       return identifier;
     }
   }