You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ozone.apache.org by pr...@apache.org on 2021/02/04 18:21:00 UTC

[ozone] branch HDDS-3698-upgrade updated: HDDS-4587. Merge remote-tracking branch 'upstream/master' into HDDS-3698 (#1822)

This is an automated email from the ASF dual-hosted git repository.

prashantpogde pushed a commit to branch HDDS-3698-upgrade
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/HDDS-3698-upgrade by this push:
     new 731ef53  HDDS-4587. Merge remote-tracking branch 'upstream/master' into HDDS-3698 (#1822)
731ef53 is described below

commit 731ef5327d3cd7cf335ee8fe2cd9d5442b608ab3
Author: prashantpogde <pr...@gmail.com>
AuthorDate: Thu Feb 4 10:20:40 2021 -0800

    HDDS-4587. Merge remote-tracking branch 'upstream/master' into HDDS-3698 (#1822)
    
    * HDDS-4587. Merge remote-tracking branch 'upstream/master' into HDDS-3698.
    
    * HDDS-4587. Addressing CI failure.
    
    * HDDS-4562. Old bucket needs to be accessible after the cluster was upgraded to the Quota version. (#1677)
    
    Cherry picked from master to fix acceptance test failure in upgrade test. Merging again from this point would have introduced 52 new conflicts.
    
    * HDDS-4770. Upgrade Ratis Thirdparty to 0.6.0 (#1868)
    
    Cherry picked from master because 0.6.0-SNAPSHOT is no longer in the repos
    
    Co-authored-by: micah zhao <mi...@tencent.com>
    Co-authored-by: Doroszlai, Attila <64...@users.noreply.github.com>
---
 .github/workflows/post-commit.yml                  |  10 +-
 HISTORY.md                                         |   4 +-
 README.md                                          |   1 +
 SECURITY.md                                        |  23 +
 hadoop-hdds/client/pom.xml                         |   5 +
 .../apache/hadoop/hdds/scm/OzoneClientConfig.java  |  18 +
 .../apache/hadoop/hdds/scm/XceiverClientRatis.java |   2 +-
 .../hadoop/hdds/scm/storage/BlockInputStream.java  | 135 +++-
 .../hadoop/hdds/scm/storage/BlockOutputStream.java |   7 +-
 .../hadoop/hdds/scm/storage/ChunkInputStream.java  |  67 +-
 .../hdds/scm/storage/DummyBlockInputStream.java    |  31 +-
 .../storage/DummyBlockInputStreamWithRetry.java    |   4 +-
 .../hdds/scm/storage/DummyChunkInputStream.java    |  24 +-
 .../hdds/scm/storage/TestBlockInputStream.java     | 189 +++++-
 .../hdds/scm/storage/TestChunkInputStream.java     |  62 +-
 .../client/src/test/resources/log4j.properties     |  23 +
 .../org/apache/hadoop/hdds/client/OzoneQuota.java  | 135 ++--
 .../org/apache/hadoop/hdds/client/QuotaList.java   |  67 ++
 .../hadoop/hdds/conf/OzoneConfiguration.java       |  17 +
 .../hadoop/hdds/protocol/DatanodeDetails.java      | 119 +++-
 .../java/org/apache/hadoop/hdds/scm/ScmConfig.java |   2 +-
 .../org/apache/hadoop/hdds/scm/ScmConfigKeys.java  |   9 +-
 .../apache/hadoop/hdds/scm/client/ScmClient.java   |  45 +-
 .../protocol/StorageContainerLocationProtocol.java |  18 +-
 .../java/org/apache/hadoop/ozone/OzoneConsts.java  |  17 +-
 .../apache/hadoop/ozone/lease/LeaseManager.java    |  38 +-
 .../common/src/main/resources/ozone-default.xml    |  36 +-
 .../hadoop/hdds/protocol/MockDatanodeDetails.java  |   4 +
 .../hadoop/hdds/conf/ConfigFileAppender.java       |   4 +-
 .../hadoop/hdds/conf/ConfigFileGenerator.java      |  19 +-
 .../apache/hadoop/ozone/HddsDatanodeService.java   |  12 +-
 .../container/common/helpers/ContainerUtils.java   |  14 +-
 .../container/common/helpers/DatanodeIdYaml.java   |  38 +-
 .../ozone/container/common/impl/ContainerData.java |   2 +-
 .../container/common/impl/ContainerDataYaml.java   |   4 +-
 .../container/common/report/ReportPublisher.java   |   2 +-
 .../common/statemachine/DatanodeStateMachine.java  |   2 +
 .../common/statemachine/StateContext.java          | 143 ++++-
 .../commandhandler/DeleteBlocksCommandHandler.java | 191 +++---
 .../SetNodeOperationalStateCommandHandler.java     | 157 +++++
 .../states/endpoint/HeartbeatEndpointTask.java     |  28 +-
 .../common/transport/server/XceiverServerGrpc.java |  20 +-
 .../server/ratis/ContainerStateMachine.java        |  23 +-
 .../transport/server/ratis/XceiverServerRatis.java |  23 +-
 .../container/keyvalue/KeyValueContainer.java      |  46 +-
 .../ozone/container/keyvalue/KeyValueHandler.java  |   9 +-
 .../background/BlockDeletingService.java           | 171 ++++-
 .../metadata/AbstractDatanodeDBDefinition.java     |   2 +-
 .../metadata/DatanodeSchemaOneDBDefinition.java    |   5 +
 .../metadata/DatanodeSchemaTwoDBDefinition.java    |  32 +-
 .../metadata/DatanodeStoreSchemaTwoImpl.java       |  14 +-
 ...mpl.java => DeletedBlocksTransactionCodec.java} |  35 +-
 .../ozone/container/ozoneimpl/OzoneContainer.java  |  39 +-
 .../replication/GrpcReplicationClient.java         |  15 +-
 .../container/replication/ReplicationServer.java   | 144 +++++
 .../replication/ReplicationSupervisor.java         |  22 +-
 .../replication/SimpleContainerDownloader.java     |   6 +-
 .../commands/SetNodeOperationalStateCommand.java   |  89 +++
 .../ozone/container/ContainerTestHelper.java       |  16 +-
 .../container/common/TestBlockDeletingService.java | 279 ++++++--
 .../common/statemachine/TestStateContext.java      | 286 ++++++++-
 .../TestCreatePipelineCommandHandler.java          |   6 +-
 .../container/keyvalue/TestKeyValueContainer.java  | 145 +++--
 .../container/keyvalue/TestTarContainerPacker.java |  11 +-
 .../ReplicationSupervisorScheduling.java           | 125 ++++
 .../replication/TestSimpleContainerDownloader.java |   4 +-
 .../org.mockito.plugins.MockMaker                  |   3 +-
 hadoop-hdds/docs/README.md                         |   2 +-
 hadoop-hdds/docs/config.yaml                       |   2 +
 hadoop-hdds/docs/content/_index.md                 |   2 +-
 hadoop-hdds/docs/content/_index.zh.md              |   4 +-
 .../docs/content/concept/OzoneManager.zh.md        |   4 +-
 hadoop-hdds/docs/content/concept/Recon.zh.md       | 116 ++++
 .../content/concept/StorageContainerManager.zh.md  |   6 +-
 hadoop-hdds/docs/content/design/decommissioning.md |  10 +-
 hadoop-hdds/docs/content/feature/Quota.md          |  66 +-
 hadoop-hdds/docs/content/feature/Quota.zh.md       |  62 +-
 hadoop-hdds/docs/content/feature/Recon.zh.md       |  23 +-
 hadoop-hdds/docs/content/interface/ReconApi.zh.md  | 502 +++++++++++++++
 hadoop-hdds/docs/content/interface/S3.md           |   6 +-
 hadoop-hdds/docs/content/recipe/Prometheus.md      |   6 +-
 hadoop-hdds/docs/content/recipe/Prometheus.zh.md   |   6 +-
 .../docs/content/security/SecuringOzoneHTTP.md     |  12 +-
 .../docs/content/security/SecurityWithRanger.md    |  21 +-
 .../docs/content/security/SecurityWithRanger.zh.md |  18 +-
 hadoop-hdds/docs/content/tools/AuditParser.md      |   2 +-
 hadoop-hdds/docs/dev-support/bin/generate-site.sh  |   9 +-
 hadoop-hdds/docs/pom.xml                           |   4 +-
 hadoop-hdds/docs/static/ozone-logo-monochrome.svg  |   4 +-
 .../themes/ozonedoc/layouts/_default/baseof.html   |   2 +
 .../themes/ozonedoc/layouts/_default/section.html  |   3 +
 .../themes/ozonedoc/layouts/_default/single.html   |   3 +
 .../docs/themes/ozonedoc/layouts/index.html        |  26 +-
 .../themes/ozonedoc/layouts/partials/footer.html   |   9 +
 .../themes/ozonedoc/layouts/partials/header.html   |   4 +-
 .../themes/ozonedoc/layouts/partials/navbar.html   |   4 +-
 .../docs/themes/ozonedoc/static/css/ozonedoc.css   |  30 +-
 .../hadoop/hdds/protocol/SCMSecurityProtocol.java  |  15 +
 .../SCMSecurityProtocolClientSideTranslatorPB.java |  27 +
 ...inerLocationProtocolClientSideTranslatorPB.java |  85 ++-
 .../hdds/security/token/BlockTokenVerifier.java    |  31 +-
 .../certificate/authority/CertificateServer.java   |  12 +
 .../certificate/authority/CertificateStore.java    |  16 +
 .../certificate/authority/DefaultCAServer.java     |  19 +
 .../hadoop/hdds/server/http/HtmlQuoting.java       |   2 +-
 .../hadoop/hdds/server/http/HttpServer2.java       |   3 +-
 .../server/http/RatisNameRewriteSampleBuilder.java |   2 +-
 .../org/apache/hadoop/hdds/utils/db/DBStore.java   |  14 +-
 .../org/apache/hadoop/hdds/utils/db/RDBStore.java  |   6 +-
 .../apache/hadoop/hdds/utils/db/TypedTable.java    |  24 +-
 .../{TableCacheImpl.java => FullTableCache.java}   | 143 ++---
 ...{TableCacheImpl.java => PartialTableCache.java} | 122 ++--
 .../hadoop/hdds/utils/db/cache/TableCache.java     |  30 +-
 .../x509/certificate/authority/MockCAStore.java    |  11 +
 .../client/TestDefaultCertificateClient.java       |  41 +-
 .../apache/hadoop/hdds/server/TestJsonUtils.java   |   2 +-
 .../server/http/TestRatisDropwizardExports.java    |   9 +-
 ...TestTableCacheImpl.java => TestTableCache.java} | 144 ++++-
 .../src/main/proto/ScmAdminProtocol.proto          |  59 +-
 .../interface-client/src/main/proto/hdds.proto     |  11 +-
 .../interface-client/src/main/resources/proto.lock |   8 -
 .../proto/ScmServerDatanodeHeartbeatProtocol.proto |  12 +-
 .../src/main/proto/ScmServerSecurityProtocol.proto |  26 +
 .../hadoop/hdds/scm/SCMCommonPlacementPolicy.java  |   4 +-
 .../hadoop/hdds/scm/block/BlockManagerImpl.java    |   6 +-
 .../hadoop/hdds/scm/block/DeletedBlockLogImpl.java |  74 ++-
 .../hdds/scm/block/SCMBlockDeletingService.java    |   9 +-
 .../hdds/scm/container/ContainerReplicaCount.java  | 271 ++++++++
 .../hdds/scm/container/ReplicationManager.java     | 304 ++++++---
 .../hdds/scm/container/SCMContainerManager.java    |  37 +-
 .../scm/container/states/ContainerStateMap.java    |   2 +-
 .../apache/hadoop/hdds/scm/events/SCMEvents.java   |   6 +
 .../hdds/scm/metadata/X509CertificateCodec.java    |   6 +-
 ...anagerMXBean.java => DatanodeAdminMonitor.java} |  27 +-
 .../hdds/scm/node/DatanodeAdminMonitorImpl.java    | 371 +++++++++++
 .../apache/hadoop/hdds/scm/node/DatanodeInfo.java  |  53 +-
 .../hadoop/hdds/scm/node/DeadNodeHandler.java      |   7 +-
 .../hdds/scm/node/InvalidHostStringException.java  |  34 +
 .../hdds/scm/node/InvalidNodeStateException.java   |  34 +
 .../hadoop/hdds/scm/node/NewNodeHandler.java       |  20 +
 .../hdds/scm/node/NodeDecommissionManager.java     | 369 +++++++++++
 .../apache/hadoop/hdds/scm/node/NodeManager.java   |  59 +-
 .../hadoop/hdds/scm/node/NodeManagerMXBean.java    |   2 +-
 .../hadoop/hdds/scm/node/NodeStateManager.java     | 422 ++++++------
 .../apache/hadoop/hdds/scm/node/NodeStatus.java    | 211 ++++++
 .../hadoop/hdds/scm/node/SCMNodeManager.java       | 247 +++++--
 .../hadoop/hdds/scm/node/SCMNodeMetrics.java       | 103 +--
 .../hdds/scm/node/StartDatanodeAdminHandler.java   |  68 ++
 .../hadoop/hdds/scm/node/states/NodeStateMap.java  | 264 ++++++--
 .../hdds/scm/pipeline/PipelinePlacementPolicy.java |  10 +-
 .../hadoop/hdds/scm/pipeline/PipelineProvider.java |   6 +-
 .../hdds/scm/pipeline/RatisPipelineProvider.java   |   6 +-
 .../hdds/scm/pipeline/SimplePipelineProvider.java  |   1 -
 .../SCMSecurityProtocolServerSideTranslatorPB.java |  25 +
 ...inerLocationProtocolServerSideTranslatorPB.java |  62 +-
 .../hdds/scm/safemode/ContainerSafeModeRule.java   |  19 +-
 .../hdds/scm/safemode/DataNodeSafeModeRule.java    |   5 +-
 .../scm/safemode/HealthyPipelineSafeModeRule.java  |   7 +-
 .../safemode/OneReplicaPipelineSafeModeRule.java   |  24 +-
 .../hadoop/hdds/scm/server/SCMCertStore.java       |  42 ++
 .../hdds/scm/server/SCMClientProtocolServer.java   | 111 +++-
 .../hdds/scm/server/SCMContainerMetrics.java       |  11 +-
 .../hdds/scm/server/SCMDatanodeProtocolServer.java |   8 +
 .../apache/hadoop/hdds/scm/server/SCMMXBean.java   |   6 +-
 .../hdds/scm/server/SCMSecurityProtocolServer.java |  32 +-
 .../hdds/scm/server/StorageContainerManager.java   |  43 +-
 .../main/resources/webapps/scm/scm-overview.html   |  12 +-
 .../hadoop/hdds/scm/block/TestBlockManager.java    |   5 +-
 .../hadoop/hdds/scm/container/MockNodeManager.java |  77 ++-
 .../hdds/scm/container/SimpleMockNodeManager.java  | 332 ++++++++++
 .../scm/container/TestContainerReportHandler.java  |  18 +-
 .../hdds/scm/container/TestReplicationManager.java | 285 ++++++++-
 .../scm/container/TestUnknownContainerReport.java  |   4 +-
 .../algorithms/TestContainerPlacementFactory.java  |  13 +-
 .../TestSCMContainerPlacementCapacity.java         |   4 +-
 .../TestSCMContainerPlacementRackAware.java        |  18 +-
 .../TestSCMContainerPlacementRandom.java           |   4 +-
 .../states/TestContainerReplicaCount.java          | 465 ++++++++++++++
 .../hdds/scm/node/TestContainerPlacement.java      |   2 +-
 .../hdds/scm/node/TestDatanodeAdminMonitor.java    | 530 +++++++++++++++
 .../hadoop/hdds/scm/node/TestDeadNodeHandler.java  |  31 +-
 .../hdds/scm/node/TestNodeDecommissionManager.java | 297 +++++++++
 .../hadoop/hdds/scm/node/TestNodeStateManager.java | 320 ++++++++++
 .../hadoop/hdds/scm/node/TestSCMNodeManager.java   | 226 +++++--
 .../hdds/scm/node/states/TestNodeStateMap.java     | 189 ++++++
 .../TestPipelineDatanodesIntersection.java         |   3 +-
 .../scm/pipeline/TestPipelinePlacementPolicy.java  |   9 +-
 .../scm/pipeline/TestRatisPipelineProvider.java    |   5 +-
 .../hdds/scm/pipeline/TestSCMPipelineManager.java  |  11 +-
 .../choose/algorithms/TestLeaderChoosePolicy.java  |   2 +-
 .../hdds/scm/server/TestSCMContainerMetrics.java   |   2 +
 .../ozone/container/common/TestEndPoint.java       |  50 +-
 .../placement/TestContainerPlacement.java          |   6 +-
 .../testutils/ReplicationNodeManagerMock.java      |  85 ++-
 .../hadoop/ozone/scm/node/TestSCMNodeMetrics.java  |  71 ++-
 hadoop-hdds/tools/pom.xml                          |   6 +
 .../hdds/scm/cli/ContainerOperationClient.java     |  35 +-
 .../hdds/scm/cli/SafeModeWaitSubcommand.java       |  20 +-
 .../org/apache/hadoop/hdds/scm/cli/ScmOption.java  |  13 +
 .../hadoop/hdds/scm/cli/TopologySubcommand.java    |  42 +-
 .../CertCommands.java}                             |  18 +-
 .../hadoop/hdds/scm/cli/cert/InfoSubcommand.java   |  73 +++
 .../hadoop/hdds/scm/cli/cert/ListSubcommand.java   | 102 +++
 .../hdds/scm/cli/cert/ScmCertSubcommand.java       |  33 +-
 .../hadoop/hdds/scm/cli/cert/package-info.java}    |  27 +-
 .../hdds/scm/cli/datanode/DatanodeCommands.java    |   5 +-
 ...deCommands.java => DecommissionSubCommand.java} |  47 +-
 .../hdds/scm/cli/datanode/ListInfoSubcommand.java  |  48 +-
 ...odeCommands.java => MaintenanceSubCommand.java} |  52 +-
 ...deCommands.java => RecommissionSubCommand.java} |  49 +-
 .../scm/cli/datanode/TestListInfoSubcommand.java   | 119 ++++
 .../org/apache/hadoop/ozone/client/BucketArgs.java |  22 +-
 .../apache/hadoop/ozone/client/OzoneBucket.java    |  62 +-
 .../apache/hadoop/ozone/client/OzoneVolume.java    |  66 +-
 .../org/apache/hadoop/ozone/client/VolumeArgs.java |  22 +-
 .../hadoop/ozone/client/io/KeyInputStream.java     |  11 +-
 .../hadoop/ozone/client/io/OzoneInputStream.java   |  11 +-
 .../ozone/client/protocol/ClientProtocol.java      |  12 +-
 .../apache/hadoop/ozone/client/rpc/RpcClient.java  |  40 +-
 .../java/org/apache/hadoop}/ozone/OFSPath.java     |   7 +-
 .../apache/hadoop/ozone/conf/OMClientConfig.java   |  19 +-
 .../org/apache/hadoop/ozone/om/OMConfigKeys.java   |  12 +-
 .../hadoop/ozone/om/exceptions/OMException.java    |   4 +-
 .../hadoop/ozone/om/helpers/OmBucketArgs.java      |  28 +-
 .../hadoop/ozone/om/helpers/OmBucketInfo.java      |  58 +-
 .../hadoop/ozone/om/helpers/OmOzoneAclMap.java     |  97 +--
 .../hadoop/ozone/om/helpers/OmVolumeArgs.java      |  73 ++-
 .../ozone/om/protocol/OzoneManagerProtocol.java    |   4 +-
 ...OzoneManagerProtocolClientSideTranslatorPB.java |   6 +-
 .../apache/hadoop/ozone/web/utils/OzoneUtils.java  |   4 +-
 .../hadoop/ozone/om/helpers/TestOmOzoneAclMap.java |  56 ++
 .../org/apache/hadoop/ozone/csi/NodeService.java   |   5 +-
 hadoop-ozone/dev-support/checks/acceptance.sh      |   2 +-
 hadoop-ozone/dev-support/checks/blockade.sh        |   2 +-
 hadoop-ozone/dev-support/checks/kubernetes.sh      |   2 +-
 hadoop-ozone/dev-support/intellij/ozone-site.xml   |   4 +
 .../dist/src/main/compose/ozone-ha/docker-config   |   1 +
 .../dist/src/main/compose/ozone-mr/test.sh         |   1 +
 hadoop-ozone/dist/src/main/compose/ozone/README.md |   2 +-
 .../dist/src/main/compose/ozone/docker-config      |   2 +
 .../dist/src/main/compose/ozonescripts/start.sh    |   8 +-
 .../dist/src/main/compose/ozonescripts/stop.sh     |   2 +-
 .../compose/{ozone-mr => ozonescripts}/test.sh     |  41 +-
 .../src/main/compose/ozonesecure/docker-config     |   2 +
 hadoop-ozone/dist/src/main/compose/testlib.sh      |   2 +-
 hadoop-ozone/dist/src/main/compose/upgrade/test.sh |   8 +-
 .../dist/src/main/dockerlibexec/transformation.py  |   8 +-
 .../src/main/k8s/definitions/ozone/config.yaml     |   1 +
 .../src/main/smoketest/basic/ozone-shell-lib.robot |  40 +-
 .../dist/src/main/smoketest/createbucketenv.robot  |   2 +-
 .../dist/src/main/smoketest/createmrenv.robot      |   2 +-
 .../src/main/smoketest/debug/ozone-debug.robot     |   2 +-
 .../dist/src/main/smoketest/freon/freon.robot      |  37 --
 .../{topology/cli.robot => freon/generate.robot}   |  26 +-
 .../dist/src/main/smoketest/freon/validate.robot   |  24 +-
 .../dist/src/main/smoketest/gdpr/gdpr.robot        |   2 +-
 .../dist/src/main/smoketest/mapreduce.robot        |   2 +-
 .../dist/src/main/smoketest/ozonefs/ozonefs.robot  |  15 +-
 .../cli.robot => security/admin-cert.robot}        |  29 +-
 .../dist/src/main/smoketest/topology/cli.robot     |   4 +-
 hadoop-ozone/dist/src/shell/ozone/ozone            |  19 +-
 .../fs/contract/AbstractContractUnbufferTest.java  | 159 +++++
 .../fs/ozone/TestOzoneFSWithObjectStoreCreate.java |   9 +
 .../hadoop/fs/ozone/TestOzoneFileSystem.java       |  72 ++-
 .../fs/ozone/TestOzoneFileSystemMetrics.java       | 157 +++++
 .../fs/ozone/TestOzoneFileSystemMissingParent.java | 127 ++++
 .../hadoop/fs/ozone/TestRootedOzoneFileSystem.java |  19 +-
 .../contract/ITestOzoneContractUnbuffer.java}      |  41 +-
 .../rooted/ITestRootedOzoneContractUnbuffer.java}  |  40 +-
 .../hadoop/hdds/scm/pipeline/TestNodeFailure.java  |   2 +
 .../TestRatisPipelineCreateAndDestroy.java         |   3 +-
 .../hadoop/hdds/scm/pipeline/TestSCMRestart.java   |   2 +
 .../hadoop/hdds/upgrade/TestHDDSUpgrade.java       |  10 +-
 .../apache/hadoop/ozone/MiniOzoneClusterImpl.java  |   3 +
 .../hadoop/ozone/MiniOzoneHAClusterImpl.java       |   2 +-
 .../apache/hadoop/ozone/TestMiniOzoneCluster.java  |   2 +
 .../hadoop/ozone/TestMiniOzoneHACluster.java       |   2 +-
 .../hadoop/ozone/TestOzoneConfigurationFields.java |   3 +-
 .../hadoop/ozone/TestStorageContainerManager.java  |  11 +-
 .../ozone/TestStorageContainerManagerHelper.java   |  30 +
 .../rpc/TestBlockOutputStreamWithFailures.java     |   3 +-
 .../rpc/TestCloseContainerHandlingByClient.java    |   2 +
 .../client/rpc/TestContainerStateMachine.java      |   2 +
 .../client/rpc/TestDeleteWithSlowFollower.java     |   1 +
 .../client/rpc/TestDiscardPreallocatedBlocks.java  |   1 +
 .../ozone/client/rpc/TestKeyInputStream.java       | 196 ++++--
 .../client/rpc/TestOzoneAtRestEncryption.java      |  20 +-
 .../rpc/TestOzoneClientRetriesOnException.java     |  32 +-
 .../client/rpc/TestOzoneRpcClientAbstract.java     | 175 ++++-
 .../ozone/client/rpc/TestWatchForCommit.java       |   5 +-
 .../apache/hadoop/ozone/container/TestHelper.java  |  85 ++-
 .../container/metrics/TestContainerMetrics.java    |  13 +-
 .../container/server/TestContainerServer.java      |  75 +--
 .../server/TestSecureContainerServer.java          |  57 +-
 .../apache/hadoop/ozone/om/TestKeyManagerImpl.java |   4 +-
 ...gerRestart.java => TestOMEpochForNonRatis.java} | 154 +----
 .../apache/hadoop/ozone/om/TestOzoneManagerHA.java |  14 +
 .../ozone/om/TestOzoneManagerHAMetadataOnly.java   |  61 +-
 .../hadoop/ozone/om/TestOzoneManagerRestart.java   | 101 ---
 .../ozone/om/parser/TestOMRatisLogParser.java      |  14 +-
 .../ozone/recon/TestReconWithOzoneManagerHA.java   |   2 +-
 .../hadoop/ozone/scm/TestSCMNodeManagerMXBean.java |  30 +-
 .../scm/node/TestDecommissionAndMaintenance.java   | 709 +++++++++++++++++++++
 .../hadoop/ozone/scm/node/TestQueryNode.java       |  67 +-
 .../hadoop/ozone/shell/TestOzoneShellHA.java       |   4 +-
 .../src/test/resources/contract/ozone.xml          |   5 +
 .../src/main/proto/OmClientProtocol.proto          |  15 +-
 .../apache/hadoop/ozone/om/OMMetadataManager.java  |  10 +
 hadoop-ozone/ozone-manager/pom.xml                 |   4 +-
 .../apache/hadoop/ozone/om/KeyDeletingService.java |  17 +-
 .../org/apache/hadoop/ozone/om/KeyManagerImpl.java | 153 +++--
 .../java/org/apache/hadoop/ozone/om/OMMetrics.java |   4 +
 .../hadoop/ozone/om/OmMetadataManagerImpl.java     |  22 +-
 .../org/apache/hadoop/ozone/om/OzoneManager.java   | 114 ++--
 .../org/apache/hadoop/ozone/om/OzoneTrash.java     |  39 +-
 .../hadoop/ozone/om/TrashOzoneFileSystem.java      | 457 +++++++++++++
 .../apache/hadoop/ozone/om/TrashPolicyOzone.java   |  94 ++-
 .../hadoop/ozone/om/codec/OMDBDefinition.java      |  38 +-
 .../apache/hadoop/ozone/om/ha/OMHANodeDetails.java |   3 -
 .../ozone/om/ratis/OzoneManagerDoubleBuffer.java   |  34 +-
 .../ozone/om/ratis/OzoneManagerRatisServer.java    | 204 ++----
 .../om/ratis/OzoneManagerRatisServerConfig.java    |  54 ++
 .../ozone/om/ratis/OzoneManagerStateMachine.java   |   1 -
 .../om/request/bucket/OMBucketCreateRequest.java   |  35 +-
 .../om/request/bucket/OMBucketDeleteRequest.java   |  17 +-
 .../request/bucket/OMBucketSetPropertyRequest.java |  28 +-
 .../om/request/bucket/acl/OMBucketAclRequest.java  |  26 +-
 .../request/bucket/acl/OMBucketAddAclRequest.java  |  20 +-
 .../bucket/acl/OMBucketRemoveAclRequest.java       |  20 +-
 .../request/bucket/acl/OMBucketSetAclRequest.java  |  20 +-
 .../om/request/file/OMDirectoryCreateRequest.java  |  13 +-
 .../ozone/om/request/file/OMFileCreateRequest.java |  14 +-
 .../om/request/key/OMAllocateBlockRequest.java     |   6 +-
 .../ozone/om/request/key/OMKeyCommitRequest.java   |  15 +-
 .../ozone/om/request/key/OMKeyCreateRequest.java   |  15 +-
 .../ozone/om/request/key/OMKeyDeleteRequest.java   |   6 +-
 .../hadoop/ozone/om/request/key/OMKeyRequest.java  |  61 +-
 .../ozone/om/request/key/OMKeysDeleteRequest.java  |   6 +-
 .../ozone/om/request/key/acl/OMKeyAclRequest.java  |  16 +-
 .../om/request/key/acl/OMKeyAddAclRequest.java     |  24 +-
 .../om/request/key/acl/OMKeyRemoveAclRequest.java  |  24 +-
 .../om/request/key/acl/OMKeySetAclRequest.java     |  24 +-
 .../request/key/acl/prefix/OMPrefixAclRequest.java |   9 +-
 .../key/acl/prefix/OMPrefixAddAclRequest.java      |  13 +-
 .../key/acl/prefix/OMPrefixRemoveAclRequest.java   |  13 +-
 .../key/acl/prefix/OMPrefixSetAclRequest.java      |  13 +-
 .../multipart/S3MultipartUploadAbortRequest.java   |   5 +-
 .../S3MultipartUploadCommitPartRequest.java        |   7 +-
 .../ozone/om/request/upgrade/OMPrepareRequest.java |   3 +-
 .../om/request/volume/OMVolumeSetQuotaRequest.java |  14 +-
 .../om/request/volume/acl/OMVolumeAclRequest.java  |  20 +-
 .../request/volume/acl/OMVolumeAddAclRequest.java  |  19 +-
 .../volume/acl/OMVolumeRemoveAclRequest.java       |  19 +-
 .../request/volume/acl/OMVolumeSetAclRequest.java  |  20 +-
 .../om/response/bucket/OMBucketCreateResponse.java |  17 +
 .../om/response/bucket/OMBucketDeleteResponse.java |  18 +
 .../om/response/file/OMFileCreateResponse.java     |   5 +-
 .../om/response/key/OMAllocateBlockResponse.java   |   9 +-
 .../ozone/om/response/key/OMKeyCommitResponse.java |   7 +-
 .../ozone/om/response/key/OMKeyCreateResponse.java |  10 +-
 .../ozone/om/response/key/OMKeyDeleteResponse.java |   7 +-
 .../om/response/key/OMKeysDeleteResponse.java      |   8 +-
 .../multipart/S3MultipartUploadAbortResponse.java  |   8 +-
 .../S3MultipartUploadCommitPartResponse.java       |   8 +-
 ...OzoneManagerProtocolServerSideTranslatorPB.java |  61 +-
 .../protocolPB/OzoneManagerRequestHandler.java     |   2 +-
 .../hadoop/ozone/security/AWSV4AuthValidator.java  |   9 +-
 .../security/OzoneBlockTokenSecretManager.java     |   8 +-
 .../apache/hadoop/ozone/om/TestChunkStreams.java   |   4 +-
 .../apache/hadoop/ozone/om/TestKeyManagerUnit.java | 119 +++-
 .../apache/hadoop/ozone/om/TestOMDBDefinition.java |  74 +++
 .../hadoop/ozone/om/TestOmMetadataManager.java     |  17 +-
 .../ozone/om/request/TestOMRequestUtils.java       |  14 +-
 .../bucket/TestOMBucketSetPropertyRequest.java     |   2 +-
 .../request/file/TestOMDirectoryCreateRequest.java |   2 +-
 .../volume/TestOMVolumeSetQuotaRequest.java        |  20 +-
 .../response/key/TestOMAllocateBlockResponse.java  |  11 +-
 .../om/response/key/TestOMKeyCommitResponse.java   |   7 +-
 .../om/response/key/TestOMKeyCreateResponse.java   |   7 +-
 .../om/response/key/TestOMKeyDeleteResponse.java   |  10 +-
 .../om/response/key/TestOMKeysDeleteResponse.java  |   9 +-
 .../s3/multipart/TestS3MultipartResponse.java      |   6 +-
 .../TestS3MultipartUploadAbortResponse.java        |  18 +-
 .../security/TestOzoneBlockTokenSecretManager.java |  38 ++
 .../TestOzoneDelegationTokenSecretManager.java     |  10 +-
 .../fs/ozone/BasicOzoneClientAdapterImpl.java      |   2 +-
 .../hadoop/fs/ozone/BasicOzoneFileSystem.java      |  27 +-
 .../ozone/BasicRootedOzoneClientAdapterImpl.java   |   3 +-
 .../fs/ozone/BasicRootedOzoneFileSystem.java       |  28 +-
 .../hadoop/fs/ozone/CapableOzoneFSInputStream.java |   3 +-
 .../apache/hadoop/fs/ozone/OzoneClientAdapter.java |   2 +-
 .../apache/hadoop/fs/ozone/OzoneFSInputStream.java |  10 +-
 .../hadoop/fs/ozone/OzoneStreamCapabilities.java   |   9 +
 .../hadoop/fs/ozone/TestBasicOzoneFileSystems.java |  89 +++
 .../org/apache/hadoop/fs/ozone/TestOFSPath.java    |   1 +
 hadoop-ozone/pom.xml                               |   2 +-
 .../recon/schema/ContainerSchemaDefinition.java    |  19 -
 .../hadoop/ozone/recon/ReconControllerModule.java  |   8 +-
 .../ozone/recon/api/ClusterStateEndpoint.java      |   6 +-
 .../hadoop/ozone/recon/api/ContainerEndpoint.java  |  25 +-
 .../hadoop/ozone/recon/api/NodeEndpoint.java       |   7 +-
 .../recon/api/types/MissingContainerMetadata.java  |   2 +-
 .../api/types/UnhealthyContainerMetadata.java      |   2 +-
 .../codec/ContainerReplicaHistoryListCodec.java    |  86 +++
 .../ozone/recon/fsck/ContainerHealthTask.java      |  14 +-
 ...ager.java => ContainerHealthSchemaManager.java} |  51 +-
 .../ozone/recon/persistence/ContainerHistory.java  |  79 +++
 .../ozone/recon/scm/ContainerReplicaHistory.java   |  62 ++
 .../recon/scm/ContainerReplicaHistoryList.java}    |  35 +-
 .../ozone/recon/scm/ReconContainerManager.java     | 215 ++++++-
 .../scm/ReconStorageContainerManagerFacade.java    |  20 +-
 .../recon/spi/ContainerDBServiceProvider.java      |  33 +
 .../spi/impl/ContainerDBServiceProviderImpl.java   |  91 ++-
 .../ozone/recon/spi/impl/ReconDBDefinition.java    |  14 +-
 .../ozone/recon/tasks/ContainerKeyMapperTask.java  |   7 +-
 .../ozone/recon/tasks/FileSizeCountTask.java       |   6 +-
 .../ozone/recon/tasks/OMUpdateEventBatch.java      |  21 +-
 .../hadoop/ozone/recon/tasks/ReconOmTask.java      |   9 -
 .../ozone/recon/tasks/ReconTaskControllerImpl.java |   8 +-
 .../hadoop/ozone/recon/tasks/TableCountTask.java   |   7 +-
 .../ozone/recon/api/TestContainerEndpoint.java     | 194 ++++--
 .../hadoop/ozone/recon/api/TestEndpoints.java      |   5 +-
 .../ozone/recon/fsck/TestContainerHealthTask.java  |  10 +-
 .../scm/AbstractReconContainerManagerTest.java     |   6 +-
 .../ozone/recon/scm/TestReconContainerManager.java | 102 ++-
 .../hadoop/ozone/recon/tasks/DummyReconDBTask.java |   1 -
 .../ozone/recon/tasks/TestFileSizeCountTask.java   |   8 +
 .../recon/tasks/TestReconTaskControllerImpl.java   |   7 -
 .../hadoop/ozone/s3/AWSSignatureProcessor.java     |   6 +-
 .../hadoop/ozone/s3/OzoneClientProducer.java       |   4 +-
 .../apache/hadoop/ozone/s3/SignatureProcessor.java |   2 -
 .../hadoop/ozone/s3/endpoint/BucketEndpoint.java   |  49 +-
 .../hadoop/ozone/s3/endpoint/EndpointBase.java     |  29 +-
 .../hadoop/ozone/s3/endpoint/ObjectEndpoint.java   |  23 +-
 .../hadoop/ozone/s3/exception/S3ErrorTable.java    |   4 +
 .../hadoop/ozone/client/ObjectStoreStub.java       |   2 +-
 .../hadoop/ozone/client/OzoneVolumeStub.java       |   5 +-
 .../ozone/s3/TestSignedChunksInputStream.java      |  12 +-
 .../ozone/s3/endpoint/TestPermissionCheck.java     | 268 ++++++++
 .../org/apache/hadoop/ozone/debug/DBScanner.java   |   6 +-
 .../hadoop/ozone/freon/BaseFreonGenerator.java     |  19 +-
 .../ozone/freon/ClosedContainerReplicator.java     | 213 +++++++
 .../hadoop/ozone/freon/DatanodeChunkGenerator.java | 147 +++--
 .../java/org/apache/hadoop/ozone/freon/Freon.java  |   3 +-
 .../hadoop/ozone/freon/HadoopFsGenerator.java      |  45 +-
 .../ozone/genesis/BenchMarkMetadataStoreReads.java |   8 +-
 .../genesis/BenchMarkMetadataStoreWrites.java      |   6 +-
 .../ozone/genesis/BenchMarkRocksDbStore.java       |   8 +-
 .../org/apache/hadoop/ozone/scm/cli/SQLCLI.java    |   2 -
 .../hadoop/ozone/shell/ClearSpaceQuotaOptions.java |  10 +-
 .../hadoop/ozone/shell/SetSpaceQuotaOptions.java   |   8 +-
 .../ozone/shell/bucket/ClearQuotaHandler.java      |   4 +-
 .../ozone/shell/bucket/CreateBucketHandler.java    |   4 +-
 .../hadoop/ozone/shell/bucket/SetQuotaHandler.java |  10 +-
 .../ozone/shell/volume/ClearQuotaHandler.java      |   4 +-
 .../ozone/shell/volume/CreateVolumeHandler.java    |   4 +-
 .../hadoop/ozone/shell/volume/SetQuotaHandler.java |  10 +-
 pom.xml                                            |  20 +-
 tools/fault-injection-service/README.md            |   2 +-
 459 files changed, 16493 insertions(+), 3903 deletions(-)

diff --git a/.github/workflows/post-commit.yml b/.github/workflows/post-commit.yml
index 44d14c0..497fbce 100644
--- a/.github/workflows/post-commit.yml
+++ b/.github/workflows/post-commit.yml
@@ -199,6 +199,7 @@ jobs:
         run: |
           mkdir -p /mnt/ozone/hadoop-ozone/dist/target
           tar xzvf hadoop-ozone*.tar.gz -C /mnt/ozone/hadoop-ozone/dist/target
+          sudo chmod -R a+rwX /mnt/ozone/hadoop-ozone/dist/target
       - name: Install robotframework
         run: sudo pip install robotframework
       - name: Execute tests
@@ -274,19 +275,12 @@ jobs:
         run: ./hadoop-ozone/dev-support/checks/coverage.sh
       - name: Upload coverage to Sonar
         uses: ./.github/buildenv
-        if: github.repository == 'apache/hadoop-ozone' && github.event_name != 'pull_request'
+        if: github.repository == 'apache/ozone' && github.event_name != 'pull_request'
         with:
           args: ./hadoop-ozone/dev-support/checks/sonar.sh
         env:
           SONAR_TOKEN: ${{ secrets.SONARCLOUD_TOKEN }}
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v1
-        if: github.repository == 'apache/hadoop-ozone' && github.event_name != 'pull_request'
-        with:
-          file: ./target/coverage/all.xml
-          name: codecov-umbrella
-          fail_ci_if_error: false
       - name: Archive build results
         uses: actions/upload-artifact@v2
         with:
diff --git a/HISTORY.md b/HISTORY.md
index 233471c..5069238 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -30,12 +30,12 @@ As a summary:
  * Ozone: provides Object Store semantics with the help of HDDS
  * CBlock: provides mountable volumes with the help of the HDDS layer (based on iScsi protocol)
 
-In the beginning of the year 2017 a new podling project was started inside [Apache Incubator](http://incubator.apache.org/): [Apache Ratis](https://ratis.apache.org/). Ratis is an embeddable RAFT protcol implementation it is which became the corner stone of consensus inside both Ozone and HDDS projects. (Started to [be used](https://issues.apache.org/jira/browse/HDFS-11519) by Ozone in March of 2017) 
+In the beginning of the year 2017 a new podling project was started inside [Apache Incubator](http://incubator.apache.org/): [Apache Ratis](https://ratis.apache.org/). Ratis is an embeddable RAFT protocol implementation it is which became the corner stone of consensus inside both Ozone and HDDS projects. (Started to [be used](https://issues.apache.org/jira/browse/HDFS-11519) by Ozone in March of 2017) 
 
 In the October of 2017 a [discussion](https://lists.apache.org/thread.html/3b5b65ce428f88299e6cb4c5d745ec65917490be9e417d361cc08d7e@%3Chdfs-dev.hadoop.apache.org%3E) has been started on hdfs-dev mailing list to merge the existing functionality to the Apache Hadoop trunk. After a long debate Owen O'Malley [suggested a consensus](https://lists.apache.org/thread.html/c85e5263dcc0ca1d13cbbe3bcfb53236784a39111b8c353f60582eb4@%3Chdfs-dev.hadoop.apache.org%3E) to merge it to the trunk but use s [...]
 
  > * HDSL become a subproject of Hadoop.
- > * HDSL will release separately from Hadoop. Hadoop releases will notcontain HDSL and vice versa.
+ > * HDSL will release separately from Hadoop. Hadoop releases will not contain HDSL and vice versa.
  > * HDSL will get its own jira instance so that the release tags stay separate.
  > * On trunk (as opposed to release branches) HDSL will be a separate module in Hadoop's source tree. This will enable the HDSL to work on their trunk and the Hadoop trunk without making releases for every change.
  > * Hadoop's trunk will only build HDSL if a non-default profile is enabled. When Hadoop creates a release branch, the RM will delete the HDSL module from the branch.
diff --git a/README.md b/README.md
index b4a40bf..2df0f8b 100644
--- a/README.md
+++ b/README.md
@@ -27,6 +27,7 @@ Ozone is a top level project under the [Apache Software Foundation](https://apac
  * Chat: You can find the #ozone channel on the official ASF slack. Invite link is [here](http://s.apache.org/slack-invite).
  * There are Open [Weekly calls](https://cwiki.apache.org/confluence/display/HADOOP/Ozone+Community+Calls) where you can ask anything about Ozone.
      * Past meeting notes are also available from the wiki.
+ * Reporting security issues: Please consult with [SECURITY.md](./SECURITY.md) about reporting security vulnerabilities and issues.
 
 ## Download
 
diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 0000000..8d8a42b
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,23 @@
+# Security Policy
+
+## Supported Versions
+
+The first stable release of Apache Ozone is 1.0, the previous alpha and beta releases are not supported by the community.
+
+| Version       | Supported          |
+| ------------- | ------------------ |
+| 0.3.0 (alpha) | :x:                |
+| 0.4.0 (alpha) | :x:                |
+| 0.4.1 (alpha) | :x:                |
+| 0.5.0 (beta)  | :x:                |
+| 1.0           | :white_check_mark: |
+
+## Reporting a Vulnerability
+
+To report any found security issues or vulnerabilities, please send a mail to security@ozone.apache.org, so that they may be investigated and fixed before the vulnerabilities is published.
+
+This email address is a private mailing list for discussion of potential security vulnerabilities issues.
+
+This mailing list is **NOT** for end-user questions and discussion on security. Please use the dev@ozone.apache.org list for such issues.
+
+In order to post to the list, it is **NOT** necessary to first subscribe to it.
diff --git a/hadoop-hdds/client/pom.xml b/hadoop-hdds/client/pom.xml
index e1b51e8..e3b0824 100644
--- a/hadoop-hdds/client/pom.xml
+++ b/hadoop-hdds/client/pom.xml
@@ -57,5 +57,10 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd">
       <scope>test</scope>
     </dependency>
 
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-log4j12</artifactId>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
 </project>
diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/OzoneClientConfig.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/OzoneClientConfig.java
index b3c774a..c2e0148 100644
--- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/OzoneClientConfig.java
+++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/OzoneClientConfig.java
@@ -54,6 +54,18 @@ public class OzoneClientConfig {
       tags = ConfigTag.CLIENT)
   private int streamBufferSize = 4 * 1024 * 1024;
 
+  @Config(key = "stream.buffer.increment",
+      defaultValue = "0B",
+      type = ConfigType.SIZE,
+      description = "Buffer (defined by ozone.client.stream.buffer.size) "
+          + "will be incremented with this steps. If zero, the full buffer "
+          + "will "
+          + "be created at once. Setting it to a variable between 0 and "
+          + "ozone.client.stream.buffer.size can reduce the memory usage for "
+          + "very small keys, but has a performance overhead.",
+      tags = ConfigTag.CLIENT)
+  private int bufferIncrement = 0;
+
   @Config(key = "stream.buffer.flush.delay",
       defaultValue = "true",
       description = "Default true, when call flush() and determine whether "
@@ -118,6 +130,9 @@ public class OzoneClientConfig {
     Preconditions.checkState(streamBufferFlushSize > 0);
     Preconditions.checkState(streamBufferMaxSize > 0);
 
+    Preconditions.checkArgument(bufferIncrement < streamBufferSize,
+        "Buffer increment should be smaller than the size of the stream "
+            + "buffer");
     Preconditions.checkState(streamBufferMaxSize % streamBufferFlushSize == 0,
         "expected max. buffer size (%s) to be a multiple of flush size (%s)",
         streamBufferMaxSize, streamBufferFlushSize);
@@ -209,4 +224,7 @@ public class OzoneClientConfig {
     this.checksumVerify = checksumVerify;
   }
 
+  public int getBufferIncrement() {
+    return bufferIncrement;
+  }
 }
diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java
index 6e99bf3..49f0cca 100644
--- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java
+++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/XceiverClientRatis.java
@@ -33,6 +33,7 @@ import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicReference;
+import java.util.function.Supplier;
 import java.util.stream.Collectors;
 
 import org.apache.hadoop.hdds.HddsUtils;
@@ -51,7 +52,6 @@ import org.apache.hadoop.hdds.tracing.TracingUtil;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
-import com.google.common.base.Supplier;
 import org.apache.ratis.client.RaftClient;
 import org.apache.ratis.grpc.GrpcTlsConfig;
 import org.apache.ratis.proto.RaftProtos;
diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockInputStream.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockInputStream.java
index 3578bd6..a5f3091 100644
--- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockInputStream.java
+++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockInputStream.java
@@ -24,8 +24,10 @@ import java.io.InputStream;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
+import java.util.concurrent.TimeUnit;
 import java.util.function.Function;
 
+import org.apache.hadoop.fs.CanUnbuffer;
 import org.apache.hadoop.fs.Seekable;
 import org.apache.hadoop.hdds.client.BlockID;
 import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ChunkInfo;
@@ -33,11 +35,13 @@ import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.DatanodeBl
 import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.GetBlockResponseProto;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
 import org.apache.hadoop.hdds.scm.XceiverClientFactory;
-import org.apache.hadoop.hdds.scm.XceiverClientManager;
 import org.apache.hadoop.hdds.scm.XceiverClientSpi;
+import org.apache.hadoop.hdds.scm.client.HddsClientUtils;
 import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException;
+import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException;
 import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
 import org.apache.hadoop.hdds.security.token.OzoneBlockTokenIdentifier;
+import org.apache.hadoop.io.retry.RetryPolicy;
 import org.apache.hadoop.security.token.Token;
 
 import com.google.common.annotations.VisibleForTesting;
@@ -50,7 +54,8 @@ import org.slf4j.LoggerFactory;
  * This class encapsulates all state management for iterating
  * through the sequence of chunks through {@link ChunkInputStream}.
  */
-public class BlockInputStream extends InputStream implements Seekable {
+public class BlockInputStream extends InputStream
+    implements Seekable, CanUnbuffer {
 
   private static final Logger LOG =
       LoggerFactory.getLogger(BlockInputStream.class);
@@ -65,6 +70,9 @@ public class BlockInputStream extends InputStream implements Seekable {
   private XceiverClientFactory xceiverClientFactory;
   private XceiverClientSpi xceiverClient;
   private boolean initialized = false;
+  private final RetryPolicy retryPolicy =
+      HddsClientUtils.createRetryPolicy(3, TimeUnit.SECONDS.toMillis(1));
+  private int retries;
 
   // List of ChunkInputStreams, one for each chunk in the block
   private List<ChunkInputStream> chunkStreams;
@@ -95,25 +103,25 @@ public class BlockInputStream extends InputStream implements Seekable {
   // can be reset if a new position is seeked.
   private int chunkIndexOfPrevPosition;
 
-  private Function<BlockID, Pipeline> refreshPipelineFunction;
+  private final Function<BlockID, Pipeline> refreshPipelineFunction;
 
   public BlockInputStream(BlockID blockId, long blockLen, Pipeline pipeline,
       Token<OzoneBlockTokenIdentifier> token, boolean verifyChecksum,
-      XceiverClientFactory xceiverClientFctry,
+      XceiverClientFactory xceiverClientFactory,
       Function<BlockID, Pipeline> refreshPipelineFunction) {
     this.blockID = blockId;
     this.length = blockLen;
     this.pipeline = pipeline;
     this.token = token;
     this.verifyChecksum = verifyChecksum;
-    this.xceiverClientFactory = xceiverClientFctry;
+    this.xceiverClientFactory = xceiverClientFactory;
     this.refreshPipelineFunction = refreshPipelineFunction;
   }
 
   public BlockInputStream(BlockID blockId, long blockLen, Pipeline pipeline,
                           Token<OzoneBlockTokenIdentifier> token,
                           boolean verifyChecksum,
-                          XceiverClientManager xceiverClientFactory
+                          XceiverClientFactory xceiverClientFactory
   ) {
     this(blockId, blockLen, pipeline, token, verifyChecksum,
         xceiverClientFactory, null);
@@ -129,22 +137,12 @@ public class BlockInputStream extends InputStream implements Seekable {
       return;
     }
 
-    List<ChunkInfo> chunks = null;
+    List<ChunkInfo> chunks;
     try {
       chunks = getChunkInfos();
     } catch (ContainerNotFoundException ioEx) {
-      LOG.error("Unable to read block information from pipeline.");
-      if (refreshPipelineFunction != null) {
-        LOG.debug("Re-fetching pipeline for block {}", blockID);
-        Pipeline newPipeline = refreshPipelineFunction.apply(blockID);
-        if (newPipeline == null || newPipeline.equals(pipeline)) {
-          throw ioEx;
-        } else {
-          LOG.debug("New pipeline got for block {}", blockID);
-          this.pipeline = newPipeline;
-          chunks = getChunkInfos();
-        }
-      }
+      refreshPipeline(ioEx);
+      chunks = getChunkInfos();
     }
 
     if (chunks != null && !chunks.isEmpty()) {
@@ -171,6 +169,24 @@ public class BlockInputStream extends InputStream implements Seekable {
     }
   }
 
+  private void refreshPipeline(IOException cause) throws IOException {
+    LOG.info("Unable to read information for block {} from pipeline {}: {}",
+        blockID, pipeline.getId(), cause.getMessage());
+    if (refreshPipelineFunction != null) {
+      LOG.debug("Re-fetching pipeline for block {}", blockID);
+      Pipeline newPipeline = refreshPipelineFunction.apply(blockID);
+      if (newPipeline == null || newPipeline.sameDatanodes(pipeline)) {
+        LOG.warn("No new pipeline for block {}", blockID);
+        throw cause;
+      } else {
+        LOG.debug("New pipeline got for block {}", blockID);
+        this.pipeline = newPipeline;
+      }
+    } else {
+      throw cause;
+    }
+  }
+
   /**
    * Send RPC call to get the block info from the container.
    * @return List of chunks in this block.
@@ -182,7 +198,7 @@ public class BlockInputStream extends InputStream implements Seekable {
       pipeline = Pipeline.newBuilder(pipeline)
           .setType(HddsProtos.ReplicationType.STAND_ALONE).build();
     }
-    xceiverClient =  xceiverClientFactory.acquireClientForReadData(pipeline);
+    acquireClient();
     boolean success = false;
     List<ChunkInfo> chunks;
     try {
@@ -207,17 +223,25 @@ public class BlockInputStream extends InputStream implements Seekable {
     return chunks;
   }
 
+  protected void acquireClient() throws IOException {
+    xceiverClient = xceiverClientFactory.acquireClientForReadData(pipeline);
+  }
+
   /**
    * Append another ChunkInputStream to the end of the list. Note that the
    * ChunkInputStream is only created here. The chunk will be read from the
    * Datanode only when a read operation is performed on for that chunk.
    */
   protected synchronized void addStream(ChunkInfo chunkInfo) {
-    chunkStreams.add(new ChunkInputStream(chunkInfo, blockID,
-        xceiverClient, verifyChecksum, token));
+    chunkStreams.add(createChunkInputStream(chunkInfo));
+  }
+
+  protected ChunkInputStream createChunkInputStream(ChunkInfo chunkInfo) {
+    return new ChunkInputStream(chunkInfo, blockID,
+        xceiverClientFactory, () -> pipeline, verifyChecksum, token);
   }
 
-  public synchronized long getRemaining() throws IOException {
+  public synchronized long getRemaining() {
     return length - getPos();
   }
 
@@ -266,7 +290,18 @@ public class BlockInputStream extends InputStream implements Seekable {
       // Get the current chunkStream and read data from it
       ChunkInputStream current = chunkStreams.get(chunkIndex);
       int numBytesToRead = Math.min(len, (int)current.getRemaining());
-      int numBytesRead = current.read(b, off, numBytesToRead);
+      int numBytesRead;
+      try {
+        numBytesRead = current.read(b, off, numBytesToRead);
+        retries = 0; // reset retries after successful read
+      } catch (StorageContainerException e) {
+        if (shouldRetryRead(e)) {
+          handleReadError(e);
+          continue;
+        } else {
+          throw e;
+        }
+      }
 
       if (numBytesRead != numBytesToRead) {
         // This implies that there is either data loss or corruption in the
@@ -356,7 +391,7 @@ public class BlockInputStream extends InputStream implements Seekable {
   }
 
   @Override
-  public synchronized long getPos() throws IOException {
+  public synchronized long getPos() {
     if (length == 0) {
       return 0;
     }
@@ -376,9 +411,13 @@ public class BlockInputStream extends InputStream implements Seekable {
 
   @Override
   public synchronized void close() {
+    releaseClient();
+    xceiverClientFactory = null;
+  }
+
+  private void releaseClient() {
     if (xceiverClientFactory != null && xceiverClient != null) {
       xceiverClientFactory.releaseClient(xceiverClient, false);
-      xceiverClientFactory = null;
       xceiverClient = null;
     }
   }
@@ -393,7 +432,7 @@ public class BlockInputStream extends InputStream implements Seekable {
    * @throws IOException if stream is closed
    */
   protected synchronized void checkOpen() throws IOException {
-    if (xceiverClient == null) {
+    if (xceiverClientFactory == null) {
       throw new IOException("BlockInputStream has been closed.");
     }
   }
@@ -416,8 +455,44 @@ public class BlockInputStream extends InputStream implements Seekable {
     return blockPosition;
   }
 
-  @VisibleForTesting
-  synchronized List<ChunkInputStream> getChunkStreams() {
-    return chunkStreams;
+  @Override
+  public void unbuffer() {
+    storePosition();
+    releaseClient();
+
+    final List<ChunkInputStream> inputStreams = this.chunkStreams;
+    if (inputStreams != null) {
+      for (ChunkInputStream is : inputStreams) {
+        is.unbuffer();
+      }
+    }
+  }
+
+  private synchronized void storePosition() {
+    blockPosition = getPos();
+  }
+
+  private boolean shouldRetryRead(IOException cause) throws IOException {
+    RetryPolicy.RetryAction retryAction;
+    try {
+      retryAction = retryPolicy.shouldRetry(cause, ++retries, 0, true);
+    } catch (IOException e) {
+      throw e;
+    } catch (Exception e) {
+      throw new IOException(e);
+    }
+    return retryAction.action == RetryPolicy.RetryAction.RetryDecision.RETRY;
+  }
+
+  private void handleReadError(IOException cause) throws IOException {
+    releaseClient();
+    final List<ChunkInputStream> inputStreams = this.chunkStreams;
+    if (inputStreams != null) {
+      for (ChunkInputStream is : inputStreams) {
+        is.releaseClient();
+      }
+    }
+
+    refreshPipeline(cause);
   }
 }
diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockOutputStream.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockOutputStream.java
index e29bbe3..272120b 100644
--- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockOutputStream.java
+++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockOutputStream.java
@@ -46,14 +46,13 @@ import org.apache.hadoop.ozone.common.Checksum;
 import org.apache.hadoop.ozone.common.ChecksumData;
 import org.apache.hadoop.ozone.common.ChunkBuffer;
 import org.apache.hadoop.ozone.common.OzoneChecksumException;
+import org.apache.hadoop.security.token.Token;
+import org.apache.hadoop.security.token.TokenIdentifier;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import static org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls.putBlockAsync;
 import static org.apache.hadoop.hdds.scm.storage.ContainerProtocolCalls.writeChunkAsync;
-
-import org.apache.hadoop.security.token.Token;
-import org.apache.hadoop.security.token.TokenIdentifier;
 import org.apache.ratis.thirdparty.com.google.protobuf.ByteString;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -281,7 +280,7 @@ public class BlockOutputStream extends OutputStream {
 
   private void allocateNewBufferIfNeeded() {
     if (currentBufferRemaining == 0) {
-      currentBuffer = bufferPool.allocateBuffer(config.getBytesPerChecksum());
+      currentBuffer = bufferPool.allocateBuffer(config.getBufferIncrement());
       currentBufferRemaining = currentBuffer.remaining();
     }
   }
diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/ChunkInputStream.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/ChunkInputStream.java
index cfb3a21..9c03453 100644
--- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/ChunkInputStream.java
+++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/ChunkInputStream.java
@@ -20,14 +20,17 @@ package org.apache.hadoop.hdds.scm.storage;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
+import org.apache.hadoop.fs.CanUnbuffer;
 import org.apache.hadoop.fs.Seekable;
 import org.apache.hadoop.hdds.client.BlockID;
 import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandRequestProto;
 import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto;
 import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ChunkInfo;
 import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ReadChunkResponseProto;
+import org.apache.hadoop.hdds.scm.XceiverClientFactory;
 import org.apache.hadoop.hdds.scm.XceiverClientSpi;
 import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException;
+import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
 import org.apache.hadoop.ozone.common.Checksum;
 import org.apache.hadoop.ozone.common.ChecksumData;
 import org.apache.hadoop.ozone.common.OzoneChecksumException;
@@ -40,18 +43,22 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.nio.ByteBuffer;
 import java.util.List;
+import java.util.function.Supplier;
 
 /**
  * An {@link InputStream} called from BlockInputStream to read a chunk from the
  * container. Each chunk may contain multiple underlying {@link ByteBuffer}
  * instances.
  */
-public class ChunkInputStream extends InputStream implements Seekable {
+public class ChunkInputStream extends InputStream
+    implements Seekable, CanUnbuffer {
 
   private ChunkInfo chunkInfo;
   private final long length;
   private final BlockID blockID;
+  private final XceiverClientFactory xceiverClientFactory;
   private XceiverClientSpi xceiverClient;
+  private final Supplier<Pipeline> pipelineSupplier;
   private boolean verifyChecksum;
   private boolean allocated = false;
   // Buffer to store the chunk data read from the DN container
@@ -69,9 +76,8 @@ public class ChunkInputStream extends InputStream implements Seekable {
 
   // Position of the ChunkInputStream is maintained by this variable (if a
   // seek is performed. This position is w.r.t to the chunk only and not the
-  // block or key. This variable is set only if either the buffers are not
-  // yet allocated or the if the allocated buffers do not cover the seeked
-  // position. Once the chunk is read, this variable is reset.
+  // block or key. This variable is also set before attempting a read to enable
+  // retry.  Once the chunk is read, this variable is reset.
   private long chunkPosition = -1;
 
   private final Token<? extends TokenIdentifier> token;
@@ -79,17 +85,19 @@ public class ChunkInputStream extends InputStream implements Seekable {
   private static final int EOF = -1;
 
   ChunkInputStream(ChunkInfo chunkInfo, BlockID blockId,
-      XceiverClientSpi xceiverClient, boolean verifyChecksum,
-      Token<? extends TokenIdentifier> token) {
+      XceiverClientFactory xceiverClientFactory,
+      Supplier<Pipeline> pipelineSupplier,
+      boolean verifyChecksum, Token<? extends TokenIdentifier> token) {
     this.chunkInfo = chunkInfo;
     this.length = chunkInfo.getLen();
     this.blockID = blockId;
-    this.xceiverClient = xceiverClient;
+    this.xceiverClientFactory = xceiverClientFactory;
+    this.pipelineSupplier = pipelineSupplier;
     this.verifyChecksum = verifyChecksum;
     this.token = token;
   }
 
-  public synchronized long getRemaining() throws IOException {
+  public synchronized long getRemaining() {
     return length - getPos();
   }
 
@@ -98,7 +106,7 @@ public class ChunkInputStream extends InputStream implements Seekable {
    */
   @Override
   public synchronized int read() throws IOException {
-    checkOpen();
+    acquireClient();
     int available = prepareRead(1);
     int dataout = EOF;
 
@@ -143,7 +151,7 @@ public class ChunkInputStream extends InputStream implements Seekable {
     if (len == 0) {
       return 0;
     }
-    checkOpen();
+    acquireClient();
     int total = 0;
     while (len > 0) {
       int available = prepareRead(len);
@@ -196,7 +204,7 @@ public class ChunkInputStream extends InputStream implements Seekable {
   }
 
   @Override
-  public synchronized long getPos() throws IOException {
+  public synchronized long getPos() {
     if (chunkPosition >= 0) {
       return chunkPosition;
     }
@@ -219,19 +227,23 @@ public class ChunkInputStream extends InputStream implements Seekable {
 
   @Override
   public synchronized void close() {
-    if (xceiverClient != null) {
+    releaseClient();
+  }
+
+  protected synchronized void releaseClient() {
+    if (xceiverClientFactory != null && xceiverClient != null) {
+      xceiverClientFactory.releaseClient(xceiverClient, false);
       xceiverClient = null;
     }
   }
 
   /**
-   * Checks if the stream is open.  If not, throw an exception.
-   *
-   * @throws IOException if stream is closed
+   * Acquire new client if previous one was released.
    */
-  protected synchronized void checkOpen() throws IOException {
-    if (xceiverClient == null) {
-      throw new IOException("BlockInputStream has been closed.");
+  protected synchronized void acquireClient() throws IOException {
+    if (xceiverClientFactory != null && xceiverClient == null) {
+      xceiverClient = xceiverClientFactory.acquireClientForReadData(
+          pipelineSupplier.get());
     }
   }
 
@@ -292,6 +304,11 @@ public class ChunkInputStream extends InputStream implements Seekable {
       startByteIndex = bufferOffset + bufferLength;
     }
 
+    // bufferOffset and bufferLength are updated below, but if read fails
+    // and is retried, we need the previous position.  Position is reset after
+    // successful read in adjustBufferPosition()
+    storePosition();
+
     if (verifyChecksum) {
       // Update the bufferOffset and bufferLength as per the checksum
       // boundary requirement.
@@ -437,7 +454,8 @@ public class ChunkInputStream extends InputStream implements Seekable {
   /**
    * Check if the buffers have been allocated data and false otherwise.
    */
-  private boolean buffersAllocated() {
+  @VisibleForTesting
+  protected boolean buffersAllocated() {
     return buffers != null && !buffers.isEmpty();
   }
 
@@ -538,6 +556,10 @@ public class ChunkInputStream extends InputStream implements Seekable {
     this.chunkPosition = -1;
   }
 
+  private void storePosition() {
+    chunkPosition = getPos();
+  }
+
   String getChunkName() {
     return chunkInfo.getChunkName();
   }
@@ -550,4 +572,11 @@ public class ChunkInputStream extends InputStream implements Seekable {
   protected long getChunkPosition() {
     return chunkPosition;
   }
+
+  @Override
+  public synchronized void unbuffer() {
+    storePosition();
+    releaseBuffers();
+    releaseClient();
+  }
 }
diff --git a/hadoop-hdds/client/src/test/java/org/apache/hadoop/hdds/scm/storage/DummyBlockInputStream.java b/hadoop-hdds/client/src/test/java/org/apache/hadoop/hdds/scm/storage/DummyBlockInputStream.java
index 5db722a..1c7968b 100644
--- a/hadoop-hdds/client/src/test/java/org/apache/hadoop/hdds/scm/storage/DummyBlockInputStream.java
+++ b/hadoop-hdds/client/src/test/java/org/apache/hadoop/hdds/scm/storage/DummyBlockInputStream.java
@@ -24,7 +24,7 @@ import java.util.function.Function;
 
 import org.apache.hadoop.hdds.client.BlockID;
 import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ChunkInfo;
-import org.apache.hadoop.hdds.scm.XceiverClientManager;
+import org.apache.hadoop.hdds.scm.XceiverClientFactory;
 import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
 import org.apache.hadoop.hdds.security.token.OzoneBlockTokenIdentifier;
 import org.apache.hadoop.security.token.Token;
@@ -34,9 +34,9 @@ import org.apache.hadoop.security.token.Token;
  */
 class DummyBlockInputStream extends BlockInputStream {
 
-  private List<ChunkInfo> chunks;
+  private final List<ChunkInfo> chunks;
 
-  private Map<String, byte[]> chunkDataMap;
+  private final Map<String, byte[]> chunkDataMap;
 
   @SuppressWarnings("parameternumber")
   DummyBlockInputStream(
@@ -45,23 +45,7 @@ class DummyBlockInputStream extends BlockInputStream {
       Pipeline pipeline,
       Token<OzoneBlockTokenIdentifier> token,
       boolean verifyChecksum,
-      XceiverClientManager xceiverClientManager,
-      List<ChunkInfo> chunkList,
-      Map<String, byte[]> chunkMap) {
-    super(blockId, blockLen, pipeline, token, verifyChecksum,
-        xceiverClientManager);
-    this.chunks = chunkList;
-    this.chunkDataMap = chunkMap;
-  }
-
-  @SuppressWarnings("parameternumber")
-  DummyBlockInputStream(
-      BlockID blockId,
-      long blockLen,
-      Pipeline pipeline,
-      Token<OzoneBlockTokenIdentifier> token,
-      boolean verifyChecksum,
-      XceiverClientManager xceiverClientManager,
+      XceiverClientFactory xceiverClientManager,
       Function<BlockID, Pipeline> refreshFunction,
       List<ChunkInfo> chunkList,
       Map<String, byte[]> chunks) {
@@ -78,11 +62,10 @@ class DummyBlockInputStream extends BlockInputStream {
   }
 
   @Override
-  protected void addStream(ChunkInfo chunkInfo) {
-    TestChunkInputStream testChunkInputStream = new TestChunkInputStream();
-    getChunkStreams().add(new DummyChunkInputStream(testChunkInputStream,
+  protected ChunkInputStream createChunkInputStream(ChunkInfo chunkInfo) {
+    return new DummyChunkInputStream(
         chunkInfo, null, null, false,
-        chunkDataMap.get(chunkInfo.getChunkName()).clone()));
+        chunkDataMap.get(chunkInfo.getChunkName()).clone(), null);
   }
 
   @Override
diff --git a/hadoop-hdds/client/src/test/java/org/apache/hadoop/hdds/scm/storage/DummyBlockInputStreamWithRetry.java b/hadoop-hdds/client/src/test/java/org/apache/hadoop/hdds/scm/storage/DummyBlockInputStreamWithRetry.java
index 1686ed4..51ba2c6 100644
--- a/hadoop-hdds/client/src/test/java/org/apache/hadoop/hdds/scm/storage/DummyBlockInputStreamWithRetry.java
+++ b/hadoop-hdds/client/src/test/java/org/apache/hadoop/hdds/scm/storage/DummyBlockInputStreamWithRetry.java
@@ -26,7 +26,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
 import org.apache.hadoop.hdds.client.BlockID;
 import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ChunkInfo;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
-import org.apache.hadoop.hdds.scm.XceiverClientManager;
+import org.apache.hadoop.hdds.scm.XceiverClientFactory;
 import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException;
 import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
 import org.apache.hadoop.hdds.scm.pipeline.PipelineID;
@@ -49,7 +49,7 @@ final class DummyBlockInputStreamWithRetry
       Pipeline pipeline,
       Token<OzoneBlockTokenIdentifier> token,
       boolean verifyChecksum,
-      XceiverClientManager xceiverClientManager,
+      XceiverClientFactory xceiverClientManager,
       List<ChunkInfo> chunkList,
       Map<String, byte[]> chunkMap,
       AtomicBoolean isRerfreshed) {
diff --git a/hadoop-hdds/client/src/test/java/org/apache/hadoop/hdds/scm/storage/DummyChunkInputStream.java b/hadoop-hdds/client/src/test/java/org/apache/hadoop/hdds/scm/storage/DummyChunkInputStream.java
index e654d11..15f7eda 100644
--- a/hadoop-hdds/client/src/test/java/org/apache/hadoop/hdds/scm/storage/DummyChunkInputStream.java
+++ b/hadoop-hdds/client/src/test/java/org/apache/hadoop/hdds/scm/storage/DummyChunkInputStream.java
@@ -22,8 +22,9 @@ import java.util.List;
 
 import org.apache.hadoop.hdds.client.BlockID;
 import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ChunkInfo;
-import org.apache.hadoop.hdds.scm.XceiverClientSpi;
+import org.apache.hadoop.hdds.scm.XceiverClientFactory;
 
+import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
 import org.apache.ratis.thirdparty.com.google.protobuf.ByteString;
 
 /**
@@ -31,18 +32,18 @@ import org.apache.ratis.thirdparty.com.google.protobuf.ByteString;
  */
 public class DummyChunkInputStream extends ChunkInputStream {
 
-  private byte[] chunkData;
+  private final byte[] chunkData;
 
   // Stores the read chunk data in each readChunk call
-  private List<ByteString> readByteBuffers = new ArrayList<>();
+  private final List<ByteString> readByteBuffers = new ArrayList<>();
 
-  public DummyChunkInputStream(TestChunkInputStream testChunkInputStream,
-      ChunkInfo chunkInfo,
+  public DummyChunkInputStream(ChunkInfo chunkInfo,
       BlockID blockId,
-      XceiverClientSpi xceiverClient,
+      XceiverClientFactory xceiverClientFactory,
       boolean verifyChecksum,
-      byte[] data) {
-    super(chunkInfo, blockId, xceiverClient, verifyChecksum, null);
+      byte[] data, Pipeline pipeline) {
+    super(chunkInfo, blockId, xceiverClientFactory, () -> pipeline,
+        verifyChecksum, null);
     this.chunkData = data;
   }
 
@@ -56,10 +57,15 @@ public class DummyChunkInputStream extends ChunkInputStream {
   }
 
   @Override
-  protected void checkOpen() {
+  protected void acquireClient() {
     // No action needed
   }
 
+  @Override
+  protected void releaseClient() {
+    // no-op
+  }
+
   public List<ByteString> getReadByteBuffers() {
     return readByteBuffers;
   }
diff --git a/hadoop-hdds/client/src/test/java/org/apache/hadoop/hdds/scm/storage/TestBlockInputStream.java b/hadoop-hdds/client/src/test/java/org/apache/hadoop/hdds/scm/storage/TestBlockInputStream.java
index 3f5e12a..940caa7 100644
--- a/hadoop-hdds/client/src/test/java/org/apache/hadoop/hdds/scm/storage/TestBlockInputStream.java
+++ b/hadoop-hdds/client/src/test/java/org/apache/hadoop/hdds/scm/storage/TestBlockInputStream.java
@@ -21,27 +21,50 @@ package org.apache.hadoop.hdds.scm.storage;
 import com.google.common.primitives.Bytes;
 import org.apache.hadoop.hdds.client.BlockID;
 import org.apache.hadoop.hdds.client.ContainerBlockID;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
 import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ChecksumType;
 import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ChunkInfo;
+import org.apache.hadoop.hdds.scm.XceiverClientFactory;
+import org.apache.hadoop.hdds.scm.XceiverClientSpi;
+import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException;
+import org.apache.hadoop.hdds.scm.pipeline.MockPipeline;
+import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
 import org.apache.hadoop.ozone.common.Checksum;
 
+import org.apache.hadoop.ozone.common.OzoneChecksumException;
+import org.apache.hadoop.test.LambdaTestUtils;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.mockito.Mock;
+import org.mockito.junit.MockitoJUnitRunner;
 
 import java.io.EOFException;
+import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Random;
 import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.function.Function;
 
+import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CONTAINER_NOT_FOUND;
+import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CONTAINER_UNHEALTHY;
 import static org.apache.hadoop.hdds.scm.storage.TestChunkInputStream.generateRandomData;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.anyInt;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
 
 /**
  * Tests for {@link BlockInputStream}'s functionality.
  */
+@RunWith(MockitoJUnitRunner.class)
 public class TestBlockInputStream {
 
   private static final int CHUNK_SIZE = 100;
@@ -52,7 +75,9 @@ public class TestBlockInputStream {
   private int blockSize;
   private List<ChunkInfo> chunks;
   private Map<String, byte[]> chunkDataMap;
-  private AtomicBoolean isRefreshed = new AtomicBoolean();
+
+  @Mock
+  private Function<BlockID, Pipeline> refreshPipeline;
 
   @Before
   public void setup() throws Exception {
@@ -61,7 +86,7 @@ public class TestBlockInputStream {
     createChunkList(5);
 
     blockStream = new DummyBlockInputStream(blockID, blockSize, null, null,
-        false, null, chunks, chunkDataMap);
+        false, null, refreshPipeline, chunks, chunkDataMap);
   }
 
   /**
@@ -199,9 +224,11 @@ public class TestBlockInputStream {
   @Test
   public void testRefreshPipelineFunction() throws Exception {
     BlockID blockID = new BlockID(new ContainerBlockID(1, 1));
+    AtomicBoolean isRefreshed = new AtomicBoolean();
     createChunkList(5);
     BlockInputStream blockInputStreamWithRetry =
-        new DummyBlockInputStreamWithRetry(blockID, blockSize, null, null,
+        new DummyBlockInputStreamWithRetry(blockID, blockSize,
+            MockPipeline.createSingleNodePipeline(), null,
             false, null, chunks, chunkDataMap, isRefreshed);
 
     Assert.assertFalse(isRefreshed.get());
@@ -210,4 +237,160 @@ public class TestBlockInputStream {
     blockInputStreamWithRetry.read(b, 0, 200);
     Assert.assertTrue(isRefreshed.get());
   }
+
+  @Test
+  public void testRefreshOnReadFailure() throws Exception {
+    // GIVEN
+    BlockID blockID = new BlockID(new ContainerBlockID(1, 1));
+    Pipeline pipeline = MockPipeline.createSingleNodePipeline();
+    Pipeline newPipeline = MockPipeline.createSingleNodePipeline();
+
+    final int len = 200;
+    final ChunkInputStream stream = mock(ChunkInputStream.class);
+    when(stream.read(any(), anyInt(), anyInt()))
+        .thenThrow(new StorageContainerException("test", CONTAINER_NOT_FOUND))
+        .thenReturn(len);
+    when(stream.getRemaining())
+        .thenReturn((long) len);
+
+    when(refreshPipeline.apply(blockID))
+        .thenReturn(newPipeline);
+
+    BlockInputStream subject = new DummyBlockInputStream(blockID, blockSize,
+        pipeline, null, false, null, refreshPipeline, chunks, null) {
+      @Override
+      protected ChunkInputStream createChunkInputStream(ChunkInfo chunkInfo) {
+        return stream;
+      }
+    };
+    subject.initialize();
+
+    // WHEN
+    byte[] b = new byte[len];
+    int bytesRead = subject.read(b, 0, len);
+
+    // THEN
+    Assert.assertEquals(len, bytesRead);
+    verify(refreshPipeline).apply(blockID);
+  }
+
+  @Test
+  public void testRefreshExitsIfPipelineHasSameNodes() throws Exception {
+    // GIVEN
+    BlockID blockID = new BlockID(new ContainerBlockID(1, 1));
+    Pipeline pipeline = MockPipeline.createSingleNodePipeline();
+
+    final int len = 200;
+    final ChunkInputStream stream = mock(ChunkInputStream.class);
+    when(stream.read(any(), anyInt(), anyInt()))
+        .thenThrow(new StorageContainerException("test", CONTAINER_UNHEALTHY));
+    when(stream.getRemaining())
+        .thenReturn((long) len);
+
+    when(refreshPipeline.apply(blockID))
+        .thenAnswer(invocation -> samePipelineWithNewId(pipeline));
+
+    BlockInputStream subject = new DummyBlockInputStream(blockID, blockSize,
+        pipeline, null, false, null, refreshPipeline, chunks, null) {
+      @Override
+      protected ChunkInputStream createChunkInputStream(ChunkInfo chunkInfo) {
+        return stream;
+      }
+    };
+    subject.initialize();
+
+    // WHEN
+    byte[] b = new byte[len];
+    LambdaTestUtils.intercept(StorageContainerException.class,
+        () -> subject.read(b, 0, len));
+
+    // THEN
+    verify(refreshPipeline).apply(blockID);
+  }
+
+  @Test
+  public void testReadNotRetriedOnOtherException() throws Exception {
+    // GIVEN
+    BlockID blockID = new BlockID(new ContainerBlockID(1, 1));
+    Pipeline pipeline = MockPipeline.createSingleNodePipeline();
+
+    final int len = 200;
+    final ChunkInputStream stream = mock(ChunkInputStream.class);
+    when(stream.read(any(), anyInt(), anyInt()))
+        .thenThrow(new OzoneChecksumException("checksum missing"));
+    when(stream.getRemaining())
+        .thenReturn((long) len);
+
+    BlockInputStream subject = new DummyBlockInputStream(blockID, blockSize,
+        pipeline, null, false, null, refreshPipeline, chunks, null) {
+      @Override
+      protected ChunkInputStream createChunkInputStream(ChunkInfo chunkInfo) {
+        return stream;
+      }
+    };
+    subject.initialize();
+
+    // WHEN
+    byte[] b = new byte[len];
+    LambdaTestUtils.intercept(OzoneChecksumException.class,
+        () -> subject.read(b, 0, len));
+
+    // THEN
+    verify(refreshPipeline, never()).apply(blockID);
+  }
+
+  private Pipeline samePipelineWithNewId(Pipeline pipeline) {
+    List<DatanodeDetails> reverseOrder = new ArrayList<>(pipeline.getNodes());
+    Collections.reverse(reverseOrder);
+    return MockPipeline.createPipeline(reverseOrder);
+  }
+
+  @Test
+  public void testRefreshOnReadFailureAfterUnbuffer() throws Exception {
+    // GIVEN
+    BlockID blockID = new BlockID(new ContainerBlockID(1, 1));
+    Pipeline pipeline = MockPipeline.createSingleNodePipeline();
+    Pipeline newPipeline = MockPipeline.createSingleNodePipeline();
+    XceiverClientFactory clientFactory = mock(XceiverClientFactory.class);
+    XceiverClientSpi client = mock(XceiverClientSpi.class);
+    when(clientFactory.acquireClientForReadData(pipeline))
+        .thenReturn(client);
+
+    final int len = 200;
+    final ChunkInputStream stream = mock(ChunkInputStream.class);
+    when(stream.read(any(), anyInt(), anyInt()))
+        .thenThrow(new StorageContainerException("test", CONTAINER_NOT_FOUND))
+        .thenReturn(len);
+    when(stream.getRemaining())
+        .thenReturn((long) len);
+
+    when(refreshPipeline.apply(blockID))
+        .thenReturn(newPipeline);
+
+    BlockInputStream subject = new BlockInputStream(blockID, blockSize,
+        pipeline, null, false, clientFactory, refreshPipeline) {
+      @Override
+      protected List<ChunkInfo> getChunkInfos() throws IOException {
+        acquireClient();
+        return chunks;
+      }
+
+      @Override
+      protected ChunkInputStream createChunkInputStream(ChunkInfo chunkInfo) {
+        return stream;
+      }
+    };
+    subject.initialize();
+    subject.unbuffer();
+
+    // WHEN
+    byte[] b = new byte[len];
+    int bytesRead = subject.read(b, 0, len);
+
+    // THEN
+    Assert.assertEquals(len, bytesRead);
+    verify(refreshPipeline).apply(blockID);
+    verify(clientFactory).acquireClientForReadData(pipeline);
+    verify(clientFactory).releaseClient(client, false);
+  }
 }
diff --git a/hadoop-hdds/client/src/test/java/org/apache/hadoop/hdds/scm/storage/TestChunkInputStream.java b/hadoop-hdds/client/src/test/java/org/apache/hadoop/hdds/scm/storage/TestChunkInputStream.java
index eea8e1f..cb110b1 100644
--- a/hadoop-hdds/client/src/test/java/org/apache/hadoop/hdds/scm/storage/TestChunkInputStream.java
+++ b/hadoop-hdds/client/src/test/java/org/apache/hadoop/hdds/scm/storage/TestChunkInputStream.java
@@ -20,16 +20,26 @@ package org.apache.hadoop.hdds.scm.storage;
 
 import java.io.EOFException;
 import java.util.Random;
+import java.util.concurrent.atomic.AtomicReference;
 
 import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ChecksumType;
 import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ChunkInfo;
+import org.apache.hadoop.hdds.scm.XceiverClientFactory;
+import org.apache.hadoop.hdds.scm.XceiverClientSpi;
+import org.apache.hadoop.hdds.scm.pipeline.MockPipeline;
+import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
 import org.apache.hadoop.ozone.common.Checksum;
 import org.apache.hadoop.test.GenericTestUtils;
 
+import org.apache.ratis.thirdparty.com.google.protobuf.ByteString;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
 /**
  * Tests for {@link ChunkInputStream}'s functionality.
  */
@@ -59,8 +69,8 @@ public class TestChunkInputStream {
             chunkData, 0, CHUNK_SIZE).getProtoBufMessage())
         .build();
 
-    chunkStream =
-        new DummyChunkInputStream(this, chunkInfo, null, null, true, chunkData);
+    chunkStream = new DummyChunkInputStream(chunkInfo, null, null, true,
+        chunkData, null);
   }
 
   static byte[] generateRandomData(int length) {
@@ -174,4 +184,50 @@ public class TestChunkInputStream {
     chunkStream.read(b2, 0, 20);
     matchWithInputData(b2, 70, 20);
   }
-}
\ No newline at end of file
+
+  @Test
+  public void testUnbuffer() throws Exception {
+    byte[] b1 = new byte[20];
+    chunkStream.read(b1, 0, 20);
+    matchWithInputData(b1, 0, 20);
+
+    chunkStream.unbuffer();
+
+    Assert.assertFalse(chunkStream.buffersAllocated());
+
+    // Next read should start from the position of the last read + 1 i.e. 20
+    byte[] b2 = new byte[20];
+    chunkStream.read(b2, 0, 20);
+    matchWithInputData(b2, 20, 20);
+  }
+
+  @Test
+  public void connectsToNewPipeline() throws Exception {
+    // GIVEN
+    Pipeline pipeline = MockPipeline.createSingleNodePipeline();
+    Pipeline newPipeline = MockPipeline.createSingleNodePipeline();
+    XceiverClientFactory clientFactory = mock(XceiverClientFactory.class);
+    XceiverClientSpi client = mock(XceiverClientSpi.class);
+    when(clientFactory.acquireClientForReadData(pipeline))
+        .thenReturn(client);
+
+    AtomicReference<Pipeline> pipelineRef = new AtomicReference<>(pipeline);
+
+    ChunkInputStream subject = new ChunkInputStream(chunkInfo, null,
+        clientFactory, pipelineRef::get, false, null) {
+      @Override
+      protected ByteString readChunk(ChunkInfo readChunkInfo) {
+        return ByteString.copyFrom(chunkData);
+      }
+    };
+
+    // WHEN
+    subject.unbuffer();
+    pipelineRef.set(newPipeline);
+    int b = subject.read();
+
+    // THEN
+    Assert.assertNotEquals(-1, b);
+    verify(clientFactory).acquireClientForReadData(newPipeline);
+  }
+}
diff --git a/hadoop-hdds/client/src/test/resources/log4j.properties b/hadoop-hdds/client/src/test/resources/log4j.properties
new file mode 100644
index 0000000..bb5cbe5
--- /dev/null
+++ b/hadoop-hdds/client/src/test/resources/log4j.properties
@@ -0,0 +1,23 @@
+#
+#   Licensed to the Apache Software Foundation (ASF) under one or more
+#   contributor license agreements.  See the NOTICE file distributed with
+#   this work for additional information regarding copyright ownership.
+#   The ASF licenses this file to You under the Apache License, Version 2.0
+#   (the "License"); you may not use this file except in compliance with
+#   the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+#
+# log4j configuration used during build and unit tests
+
+log4j.rootLogger=INFO,stdout
+log4j.threshold=ALL
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/client/OzoneQuota.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/client/OzoneQuota.java
index b778e03..56a87e8 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/client/OzoneQuota.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/client/OzoneQuota.java
@@ -43,13 +43,25 @@ public final class OzoneQuota {
   public static final String OZONE_QUOTA_TB = "TB";
 
   /** Quota Units.*/
-  public enum Units {UNDEFINED, BYTES, KB, MB, GB, TB}
+  public enum Units {BYTES, KB, MB, GB, TB}
 
   // Quota to decide how many buckets can be created.
-  private long quotaInCounts;
+  private long quotaInNamespace;
   // Quota to decide how many storage space will be used in bytes.
   private long quotaInBytes;
   private RawQuotaInBytes rawQuotaInBytes;
+  // Data class of Quota.
+  private static QuotaList quotaList;
+
+  /** Setting QuotaList parameters from large to small. */
+  static {
+    quotaList = new QuotaList();
+    quotaList.addQuotaList(OZONE_QUOTA_TB, Units.TB, TB);
+    quotaList.addQuotaList(OZONE_QUOTA_GB, Units.GB, GB);
+    quotaList.addQuotaList(OZONE_QUOTA_MB, Units.MB, MB);
+    quotaList.addQuotaList(OZONE_QUOTA_KB, Units.KB, KB);
+    quotaList.addQuotaList(OZONE_QUOTA_BYTES, Units.BYTES, 1L);
+  }
 
   /**
    * Used to convert user input values into bytes such as: 1MB-> 1048576.
@@ -72,24 +84,17 @@ public final class OzoneQuota {
     }
 
     /**
-     * Returns size in Bytes or -1 if there is no Quota.
+     * Returns size in Bytes or negative num if there is no Quota.
      */
     public long sizeInBytes() {
-      switch (this.unit) {
-      case BYTES:
-        return this.getSize();
-      case KB:
-        return this.getSize() * KB;
-      case MB:
-        return this.getSize() * MB;
-      case GB:
-        return this.getSize() * GB;
-      case TB:
-        return this.getSize() * TB;
-      case UNDEFINED:
-      default:
-        return -1;
+      long sQuota = -1L;
+      for (Units quota : quotaList.getUnitQuotaArray()) {
+        if (quota == this.unit) {
+          sQuota = quotaList.getQuotaSize(quota);
+          break;
+        }
       }
+      return this.getSize() * sQuota;
     }
 
     @Override
@@ -120,11 +125,11 @@ public final class OzoneQuota {
   /**
    * Constructor for Ozone Quota.
    *
-   * @param quotaInCounts Volume quota in counts
+   * @param quotaInNamespace Volume quota in counts
    * @param rawQuotaInBytes RawQuotaInBytes value
    */
-  private OzoneQuota(long quotaInCounts, RawQuotaInBytes rawQuotaInBytes) {
-    this.quotaInCounts = quotaInCounts;
+  private OzoneQuota(long quotaInNamespace, RawQuotaInBytes rawQuotaInBytes) {
+    this.quotaInNamespace = quotaInNamespace;
     this.rawQuotaInBytes = rawQuotaInBytes;
     this.quotaInBytes = rawQuotaInBytes.sizeInBytes();
   }
@@ -144,12 +149,12 @@ public final class OzoneQuota {
    * Quota Object.
    *
    * @param quotaInBytes Volume quota in bytes
-   * @param quotaInCounts Volume quota in counts
+   * @param quotaInNamespace Volume quota in counts
    *
    * @return OzoneQuota object
    */
   public static OzoneQuota parseQuota(String quotaInBytes,
-      long quotaInCounts) {
+      long quotaInNamespace) {
 
     if (Strings.isNullOrEmpty(quotaInBytes)) {
       throw new IllegalArgumentException(
@@ -164,46 +169,22 @@ public final class OzoneQuota {
     long quotaMultiplyExact = 0;
 
     try {
-      if (uppercase.endsWith(OZONE_QUOTA_KB)) {
-        size = uppercase
-            .substring(0, uppercase.length() - OZONE_QUOTA_KB.length());
-        currUnit = Units.KB;
-        quotaMultiplyExact = Math.multiplyExact(Long.parseLong(size), KB);
-      }
-
-      if (uppercase.endsWith(OZONE_QUOTA_MB)) {
-        size = uppercase
-            .substring(0, uppercase.length() - OZONE_QUOTA_MB.length());
-        currUnit = Units.MB;
-        quotaMultiplyExact = Math.multiplyExact(Long.parseLong(size), MB);
-      }
-
-      if (uppercase.endsWith(OZONE_QUOTA_GB)) {
-        size = uppercase
-            .substring(0, uppercase.length() - OZONE_QUOTA_GB.length());
-        currUnit = Units.GB;
-        quotaMultiplyExact = Math.multiplyExact(Long.parseLong(size), GB);
-      }
-
-      if (uppercase.endsWith(OZONE_QUOTA_TB)) {
-        size = uppercase
-            .substring(0, uppercase.length() - OZONE_QUOTA_TB.length());
-        currUnit = Units.TB;
-        quotaMultiplyExact = Math.multiplyExact(Long.parseLong(size), TB);
-      }
-
-      if (uppercase.endsWith(OZONE_QUOTA_BYTES)) {
-        size = uppercase
-            .substring(0, uppercase.length() - OZONE_QUOTA_BYTES.length());
-        currUnit = Units.BYTES;
-        quotaMultiplyExact = Math.multiplyExact(Long.parseLong(size), 1L);
+      for (String quota : quotaList.getOzoneQuotaArray()) {
+        if (uppercase.endsWith((quota))) {
+          size = uppercase
+              .substring(0, uppercase.length() - quota.length());
+          currUnit = quotaList.getUnits(quota);
+          quotaMultiplyExact = Math.multiplyExact(Long.parseLong(size),
+              quotaList.getQuotaSize(currUnit));
+          break;
+        }
       }
       nSize = Long.parseLong(size);
     } catch (NumberFormatException e) {
       throw new IllegalArgumentException("Invalid values for quota, to ensure" +
-          " that the Quota format is legal(supported values are BYTES, KB, " +
-          "MB, GB and TB).");
-    } catch  (ArithmeticException e) {
+          " that the Quota format is legal(supported values are BYTES, " +
+          " KB, MB, GB and TB).");
+    } catch (ArithmeticException e) {
       LOG.debug("long overflow:\n{}", quotaMultiplyExact);
       throw new IllegalArgumentException("Invalid values for quota, the quota" +
           " value cannot be greater than Long.MAX_VALUE BYTES");
@@ -213,7 +194,7 @@ public final class OzoneQuota {
       throw new IllegalArgumentException("Quota cannot be negative.");
     }
 
-    return new OzoneQuota(quotaInCounts,
+    return new OzoneQuota(quotaInNamespace,
         new RawQuotaInBytes(currUnit, nSize));
   }
 
@@ -222,35 +203,25 @@ public final class OzoneQuota {
    * Returns OzoneQuota corresponding to size in bytes.
    *
    * @param quotaInBytes in bytes to be converted
-   * @param quotaInCounts in counts to be converted
+   * @param quotaInNamespace in counts to be converted
    *
    * @return OzoneQuota object
    */
   public static OzoneQuota getOzoneQuota(long quotaInBytes,
-      long quotaInCounts) {
-    long size;
-    Units unit;
-    if (quotaInBytes % TB == 0) {
-      size = quotaInBytes / TB;
-      unit = Units.TB;
-    } else if (quotaInBytes % GB == 0) {
-      size = quotaInBytes / GB;
-      unit = Units.GB;
-    } else if (quotaInBytes % MB == 0) {
-      size = quotaInBytes / MB;
-      unit = Units.MB;
-    } else if (quotaInBytes % KB == 0) {
-      size = quotaInBytes / KB;
-      unit = Units.KB;
-    } else {
-      size = quotaInBytes;
-      unit = Units.BYTES;
+      long quotaInNamespace) {
+    long size = 1L;
+    Units unit = Units.BYTES;
+    for (Long quota : quotaList.getSizeQuotaArray()) {
+      if (quotaInBytes % quota == 0) {
+        size = quotaInBytes / quota;
+        unit = quotaList.getQuotaUnit(quota);
+      }
     }
-    return new OzoneQuota(quotaInCounts, new RawQuotaInBytes(unit, size));
+    return new OzoneQuota(quotaInNamespace, new RawQuotaInBytes(unit, size));
   }
 
-  public long getQuotaInCounts() {
-    return quotaInCounts;
+  public long getQuotaInNamespace() {
+    return quotaInNamespace;
   }
 
   public long getQuotaInBytes() {
@@ -260,6 +231,6 @@ public final class OzoneQuota {
   @Override
   public String toString() {
     return "Space Bytes Quota: " + rawQuotaInBytes.toString() + "\n" +
-        "Counts Quota: " + quotaInCounts;
+        "Counts Quota: " + quotaInNamespace;
   }
 }
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/client/QuotaList.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/client/QuotaList.java
new file mode 100644
index 0000000..205cca1
--- /dev/null
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/client/QuotaList.java
@@ -0,0 +1,67 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdds.client;
+
+import java.util.ArrayList;
+
+/**
+ *This class contains arraylist for storage constant used in OzoneQuota.
+ */
+public class QuotaList {
+  private ArrayList<String> ozoneQuota;
+  private ArrayList<OzoneQuota.Units> unitQuota;
+  private ArrayList<Long> sizeQuota;
+
+  public QuotaList(){
+    ozoneQuota = new ArrayList<String>();
+    unitQuota = new ArrayList<OzoneQuota.Units>();
+    sizeQuota = new ArrayList<Long>();
+  }
+
+  public void addQuotaList(String oQuota, OzoneQuota.Units uQuota, Long sQuota){
+    ozoneQuota.add(oQuota);
+    unitQuota.add(uQuota);
+    sizeQuota.add(sQuota);
+  }
+
+  public ArrayList<String> getOzoneQuotaArray() {
+    return this.ozoneQuota;
+  }
+
+  public ArrayList<Long> getSizeQuotaArray() {
+    return this.sizeQuota;
+  }
+
+  public ArrayList<OzoneQuota.Units> getUnitQuotaArray() {
+    return this.unitQuota;
+  }
+
+  public OzoneQuota.Units getUnits(String oQuota){
+    return unitQuota.get(ozoneQuota.indexOf(oQuota));
+  }
+
+  public Long getQuotaSize(OzoneQuota.Units uQuota){
+    return sizeQuota.get(unitQuota.indexOf(uQuota));
+  }
+
+  public OzoneQuota.Units getQuotaUnit(Long sQuota){
+    return unitQuota.get(sizeQuota.indexOf(sQuota));
+  }
+
+}
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/conf/OzoneConfiguration.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/conf/OzoneConfiguration.java
index 9cfe0f6..d73c605 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/conf/OzoneConfiguration.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/conf/OzoneConfiguration.java
@@ -38,9 +38,13 @@ import java.util.stream.Collectors;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdds.annotation.InterfaceAudience;
+import org.apache.hadoop.hdds.scm.ScmConfigKeys;
 import org.apache.hadoop.hdds.utils.LegacyHadoopConfigurationSource;
 
 import com.google.common.base.Preconditions;
+import org.apache.ratis.server.RaftServerConfigKeys;
+
+import static org.apache.hadoop.hdds.ratis.RatisHelper.HDDS_DATANODE_RATIS_PREFIX_KEY;
 
 /**
  * Configuration for ozone.
@@ -49,6 +53,8 @@ import com.google.common.base.Preconditions;
 public class OzoneConfiguration extends Configuration
     implements MutableConfigurationSource {
   static {
+    addDeprecatedKeys();
+
     activate();
   }
 
@@ -287,4 +293,15 @@ public class OzoneConfiguration extends Configuration
     }
     return configMap;
   }
+
+  private static void addDeprecatedKeys(){
+    Configuration.addDeprecations(new DeprecationDelta[]{
+        new DeprecationDelta("ozone.datanode.pipeline.limit",
+            ScmConfigKeys.OZONE_DATANODE_PIPELINE_LIMIT),
+        new DeprecationDelta(HDDS_DATANODE_RATIS_PREFIX_KEY + "."
+           + RaftServerConfigKeys.PREFIX + "." + "rpcslowness.timeout",
+           HDDS_DATANODE_RATIS_PREFIX_KEY + "."
+           + RaftServerConfigKeys.PREFIX + "." + "rpc.slowness.timeout")
+    });
+  }
 }
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java
index 1a42f3a..60fedd2 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/protocol/DatanodeDetails.java
@@ -18,17 +18,19 @@
 
 package org.apache.hadoop.hdds.protocol;
 
-import com.google.common.base.Preconditions;
-import com.google.common.base.Strings;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.UUID;
+
 import org.apache.hadoop.hdds.annotation.InterfaceAudience;
 import org.apache.hadoop.hdds.annotation.InterfaceStability;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails.Port.Name;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
 import org.apache.hadoop.hdds.scm.net.NetConstants;
 import org.apache.hadoop.hdds.scm.net.NodeImpl;
 
-import java.util.ArrayList;
-import java.util.List;
-import java.util.UUID;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Strings;
 
 /**
  * DatanodeDetails class contains details about DataNode like:
@@ -57,6 +59,8 @@ public class DatanodeDetails extends NodeImpl implements
   private long setupTime;
   private String revision;
   private String buildDate;
+  private HddsProtos.NodeOperationalState persistedOpState;
+  private long persistedOpStateExpiryEpochSec = 0;
 
   /**
    * Constructs DatanodeDetails instance. DatanodeDetails.Builder is used
@@ -71,11 +75,16 @@ public class DatanodeDetails extends NodeImpl implements
    * @param setupTime the setup time of DataNode
    * @param revision DataNodes's revision
    * @param buildDate DataNodes's build timestamp
+   * @param persistedOpState Operational State stored on DN.
+   * @param persistedOpStateExpiryEpochSec Seconds after the epoch the stored
+   *                                       state should expire.
    */
   @SuppressWarnings("parameternumber")
   private DatanodeDetails(UUID uuid, String ipAddress, String hostName,
       String networkLocation, List<Port> ports, String certSerialId,
-      String version, long setupTime, String revision, String buildDate) {
+      String version, long setupTime, String revision, String buildDate,
+      HddsProtos.NodeOperationalState persistedOpState,
+      long persistedOpStateExpiryEpochSec) {
     super(hostName, networkLocation, NetConstants.NODE_COST_DEFAULT);
     this.uuid = uuid;
     this.uuidString = uuid.toString();
@@ -87,6 +96,8 @@ public class DatanodeDetails extends NodeImpl implements
     this.setupTime = setupTime;
     this.revision = revision;
     this.buildDate = buildDate;
+    this.persistedOpState = persistedOpState;
+    this.persistedOpStateExpiryEpochSec = persistedOpStateExpiryEpochSec;
   }
 
   public DatanodeDetails(DatanodeDetails datanodeDetails) {
@@ -103,6 +114,9 @@ public class DatanodeDetails extends NodeImpl implements
     this.setupTime = datanodeDetails.setupTime;
     this.revision = datanodeDetails.revision;
     this.buildDate = datanodeDetails.buildDate;
+    this.persistedOpState = datanodeDetails.getPersistedOpState();
+    this.persistedOpStateExpiryEpochSec =
+        datanodeDetails.getPersistedOpStateExpiryEpochSec();
   }
 
   /**
@@ -171,6 +185,10 @@ public class DatanodeDetails extends NodeImpl implements
     ports.add(port);
   }
 
+  public void setPort(Name name, int port) {
+    setPort(new Port(name, port));
+  }
+
   /**
    * Returns all the Ports used by DataNode.
    *
@@ -181,6 +199,46 @@ public class DatanodeDetails extends NodeImpl implements
   }
 
   /**
+   * Return the persistedOpState. If the stored value is null, return the
+   * default value of IN_SERVICE.
+   *
+   * @return The OperationalState persisted on the datanode.
+   */
+  public HddsProtos.NodeOperationalState getPersistedOpState() {
+    if (persistedOpState == null) {
+      return HddsProtos.NodeOperationalState.IN_SERVICE;
+    } else {
+      return persistedOpState;
+    }
+  }
+
+  /**
+   * Set the persistedOpState for this instance.
+   *
+   * @param state The new operational state.
+   */
+  public void setPersistedOpState(HddsProtos.NodeOperationalState state) {
+    this.persistedOpState = state;
+  }
+
+  /**
+   * Get the persistedOpStateExpiryEpochSec for the instance.
+   * @return Seconds from the epoch when the operational state should expire.
+   */
+  public long getPersistedOpStateExpiryEpochSec() {
+    return persistedOpStateExpiryEpochSec;
+  }
+
+  /**
+   * Set persistedOpStateExpiryEpochSec.
+   * @param expiry The number of second after the epoch the operational state
+   *               should expire.
+   */
+  public void setPersistedOpStateExpiryEpochSec(long expiry) {
+    this.persistedOpStateExpiryEpochSec = expiry;
+  }
+
+  /**
    * Given the name returns port number, null if the asked port is not found.
    *
    * @param name Name of the port
@@ -231,6 +289,13 @@ public class DatanodeDetails extends NodeImpl implements
     if (datanodeDetailsProto.hasNetworkLocation()) {
       builder.setNetworkLocation(datanodeDetailsProto.getNetworkLocation());
     }
+    if (datanodeDetailsProto.hasPersistedOpState()) {
+      builder.setPersistedOpState(datanodeDetailsProto.getPersistedOpState());
+    }
+    if (datanodeDetailsProto.hasPersistedOpStateExpiry()) {
+      builder.setPersistedOpStateExpiry(
+          datanodeDetailsProto.getPersistedOpStateExpiry());
+    }
     return builder.build();
   }
 
@@ -294,6 +359,10 @@ public class DatanodeDetails extends NodeImpl implements
     if (!Strings.isNullOrEmpty(getNetworkLocation())) {
       builder.setNetworkLocation(getNetworkLocation());
     }
+    if (persistedOpState != null) {
+      builder.setPersistedOpState(persistedOpState);
+    }
+    builder.setPersistedOpStateExpiry(persistedOpStateExpiryEpochSec);
 
     for (Port port : ports) {
       builder.addPorts(HddsProtos.Port.newBuilder()
@@ -342,6 +411,8 @@ public class DatanodeDetails extends NodeImpl implements
         ", networkLocation: " +
         getNetworkLocation() +
         ", certSerialId: " + certSerialId +
+        ", persistedOpState: " + persistedOpState +
+        ", persistedOpStateExpiryEpochSec: " + persistedOpStateExpiryEpochSec +
         "}";
   }
 
@@ -385,6 +456,8 @@ public class DatanodeDetails extends NodeImpl implements
     private long setupTime;
     private String revision;
     private String buildDate;
+    private HddsProtos.NodeOperationalState persistedOpState;
+    private long persistedOpStateExpiryEpochSec = 0;
 
     /**
      * Default private constructor. To create Builder instance use
@@ -412,6 +485,9 @@ public class DatanodeDetails extends NodeImpl implements
       this.setupTime = details.getSetupTime();
       this.revision = details.getRevision();
       this.buildDate = details.getBuildDate();
+      this.persistedOpState = details.getPersistedOpState();
+      this.persistedOpStateExpiryEpochSec =
+          details.getPersistedOpStateExpiryEpochSec();
       return this;
     }
 
@@ -542,6 +618,31 @@ public class DatanodeDetails extends NodeImpl implements
       return this;
     }
 
+    /*
+     * Adds persistedOpState.
+     *
+     * @param state The operational state persisted on the datanode
+     *
+     * @return DatanodeDetails.Builder
+     */
+    public Builder setPersistedOpState(HddsProtos.NodeOperationalState state){
+      this.persistedOpState = state;
+      return this;
+    }
+
+    /*
+     * Adds persistedOpStateExpiryEpochSec.
+     *
+     * @param expiry The seconds after the epoch the operational state should
+     *              expire.
+     *
+     * @return DatanodeDetails.Builder
+     */
+    public Builder setPersistedOpStateExpiry(long expiry){
+      this.persistedOpStateExpiryEpochSec = expiry;
+      return this;
+    }
+
     /**
      * Builds and returns DatanodeDetails instance.
      *
@@ -553,8 +654,8 @@ public class DatanodeDetails extends NodeImpl implements
         networkLocation = NetConstants.DEFAULT_RACK;
       }
       DatanodeDetails dn = new DatanodeDetails(id, ipAddress, hostName,
-          networkLocation, ports, certSerialId,
-          version, setupTime, revision, buildDate);
+          networkLocation, ports, certSerialId, version, setupTime, revision,
+          buildDate, persistedOpState, persistedOpStateExpiryEpochSec);
       if (networkName != null) {
         dn.setNetworkName(networkName);
       }
@@ -583,7 +684,7 @@ public class DatanodeDetails extends NodeImpl implements
      * Ports that are supported in DataNode.
      */
     public enum Name {
-      STANDALONE, RATIS, REST
+      STANDALONE, RATIS, REST, REPLICATION
     }
 
     private Name name;
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfig.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfig.java
index e9c283d..7a144d8 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfig.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfig.java
@@ -79,7 +79,7 @@ public class ScmConfig {
 
   @Config(key = "block.deletion.per-interval.max",
       type = ConfigType.INT,
-      defaultValue = "10000",
+      defaultValue = "20000",
       tags = { ConfigTag.SCM, ConfigTag.DELETION},
       description =
           "Maximum number of blocks which SCM processes during an interval. "
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java
index 7b01e07..e5958b7 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java
@@ -239,7 +239,7 @@ public final class ScmConfigKeys {
   public static final String OZONE_SCM_HEARTBEAT_RPC_TIMEOUT =
       "ozone.scm.heartbeat.rpc-timeout";
   public static final String OZONE_SCM_HEARTBEAT_RPC_TIMEOUT_DEFAULT =
-      "1s";
+      "5s";
 
   public static final String OZONE_SCM_HEARTBEAT_RPC_RETRY_COUNT =
       "ozone.scm.heartbeat.rpc-retry-count";
@@ -298,7 +298,7 @@ public final class ScmConfigKeys {
   // Pipeline placement policy:
   // Upper limit for how many pipelines a datanode can engage in.
   public static final String OZONE_DATANODE_PIPELINE_LIMIT =
-          "ozone.datanode.pipeline.limit";
+          "ozone.scm.datanode.pipeline.limit";
   public static final int OZONE_DATANODE_PIPELINE_LIMIT_DEFAULT = 2;
 
   // Upper limit for how many pipelines can be created
@@ -364,6 +364,11 @@ public final class ScmConfigKeys {
   public static final String HDDS_TRACING_ENABLED = "hdds.tracing.enabled";
   public static final boolean HDDS_TRACING_ENABLED_DEFAULT = false;
 
+  public static final String OZONE_SCM_DATANODE_ADMIN_MONITOR_INTERVAL =
+      "ozone.scm.datanode.admin.monitor.interval";
+  public static final String OZONE_SCM_DATANODE_ADMIN_MONITOR_INTERVAL_DEFAULT =
+      "30s";
+
   /**
    * Never constructed.
    */
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java
index 84831c1..bab99b4 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java
@@ -145,15 +145,52 @@ public interface ScmClient extends Closeable {
       String owner) throws IOException;
 
   /**
-   * Returns a set of Nodes that meet a query criteria.
-   * @param nodeStatuses - Criteria that we want the node to have.
+   * Returns a set of Nodes that meet a query criteria. Passing null for opState
+   * or nodeState acts like a wild card, returning all nodes in that state.
+   * @param opState - Operational State of the node, eg IN_SERVICE,
+   *                DECOMMISSIONED, etc
+   * @param nodeState - Health of the nodeCriteria that we want the node to
+   *                  have, eg HEALTHY, STALE etc
    * @param queryScope - Query scope - Cluster or pool.
    * @param poolName - if it is pool, a pool name is required.
    * @return A set of nodes that meet the requested criteria.
    * @throws IOException
    */
-  List<HddsProtos.Node> queryNode(HddsProtos.NodeState nodeStatuses,
-      HddsProtos.QueryScope queryScope, String poolName) throws IOException;
+  List<HddsProtos.Node> queryNode(HddsProtos.NodeOperationalState opState,
+      HddsProtos.NodeState nodeState, HddsProtos.QueryScope queryScope,
+      String poolName) throws IOException;
+
+  /**
+   * Allows a list of hosts to be decommissioned. The hosts are identified
+   * by their hostname and optionally port in the format foo.com:port.
+   * @param hosts A list of hostnames, optionally with port
+   * @throws IOException
+   */
+  void decommissionNodes(List<String> hosts) throws IOException;
+
+  /**
+   * Allows a list of hosts in maintenance or decommission states to be placed
+   * back in service. The hosts are identified by their hostname and optionally
+   * port in the format foo.com:port.
+   * @param hosts A list of hostnames, optionally with port
+   * @throws IOException
+   */
+  void recommissionNodes(List<String> hosts) throws IOException;
+
+  /**
+   * Place the list of datanodes into maintenance mode. If a non-zero endDtm
+   * is passed, the hosts will automatically exit maintenance mode after the
+   * given time has passed. Passing an end time of zero means the hosts will
+   * remain in maintenance indefinitely.
+   * The hosts are identified by their hostname and optionally port in the
+   * format foo.com:port.
+   * @param hosts A list of hostnames, optionally with port
+   * @param endHours The number of hours from now which maintenance will end or
+   *                 zero if maintenance must be manually ended.
+   * @throws IOException
+   */
+  void startMaintenanceNodes(List<String> hosts, int endHours)
+      throws IOException;
 
   /**
    * Creates a specified replication pipeline.
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java
index 94ef442..3739ed3 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java
@@ -120,12 +120,22 @@ public interface StorageContainerLocationProtocol extends Closeable {
   void deleteContainer(long containerID) throws IOException;
 
   /**
-   *  Queries a list of Node Statuses.
-   * @param state
+   *  Queries a list of Node Statuses. Passing a null for either opState or
+   *  state acts like a wildcard returning all nodes in that state.
+   * @param opState The node operational state
+   * @param state The node health
    * @return List of Datanodes.
    */
-  List<HddsProtos.Node> queryNode(HddsProtos.NodeState state,
-      HddsProtos.QueryScope queryScope, String poolName) throws IOException;
+  List<HddsProtos.Node> queryNode(HddsProtos.NodeOperationalState opState,
+      HddsProtos.NodeState state, HddsProtos.QueryScope queryScope,
+      String poolName) throws IOException;
+
+  void decommissionNodes(List<String> nodes) throws IOException;
+
+  void recommissionNodes(List<String> nodes) throws IOException;
+
+  void startMaintenanceNodes(List<String> nodes, int endInHours)
+      throws IOException;
 
   /**
    * Close a container.
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java
index 81470b2..591bf3a 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java
@@ -23,6 +23,8 @@ import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.ratis.thirdparty.io.grpc.Context;
 import org.apache.ratis.thirdparty.io.grpc.Metadata;
 
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
 import java.util.regex.Pattern;
 
 import static org.apache.ratis.thirdparty.io.grpc.Metadata.ASCII_STRING_MARSHALLER;
@@ -253,11 +255,15 @@ public final class OzoneConsts {
   // versions, requiring this property to be tracked on a per container basis.
   // V1: All data in default column family.
   public static final String SCHEMA_V1 = "1";
-  // V2: Metadata, block data, and deleted blocks in their own column families.
+  // V2: Metadata, block data, and delete transactions in their own
+  // column families.
   public static final String SCHEMA_V2 = "2";
   // Most recent schema version that all new containers should be created with.
   public static final String SCHEMA_LATEST = SCHEMA_V2;
 
+  public static final String[] SCHEMA_VERSIONS =
+      new String[] {SCHEMA_V1, SCHEMA_V2};
+
   // Supported store types.
   public static final String OZONE = "ozone";
   public static final String S3 = "s3";
@@ -269,8 +275,9 @@ public final class OzoneConsts {
   public static final String SRC_KEY = "srcKey";
   public static final String DST_KEY = "dstKey";
   public static final String USED_BYTES = "usedBytes";
+  public static final String USED_NAMESPACE = "usedNamespace";
   public static final String QUOTA_IN_BYTES = "quotaInBytes";
-  public static final String QUOTA_IN_COUNTS = "quotaInCounts";
+  public static final String QUOTA_IN_NAMESPACE = "quotaInNamespace";
   public static final String OBJECT_ID = "objectID";
   public static final String UPDATE_ID = "updateID";
   public static final String CLIENT_ID = "clientID";
@@ -352,7 +359,7 @@ public final class OzoneConsts {
   public static final String GDPR_FLAG = "gdprEnabled";
   public static final String GDPR_ALGORITHM_NAME = "AES";
   public static final int GDPR_DEFAULT_RANDOM_SECRET_LENGTH = 16;
-  public static final String GDPR_CHARSET = "UTF-8";
+  public static final Charset GDPR_CHARSET = StandardCharsets.UTF_8;
   public static final String GDPR_LENGTH = "length";
   public static final String GDPR_SECRET = "secret";
   public static final String GDPR_ALGORITHM = "algorithm";
@@ -383,6 +390,10 @@ public final class OzoneConsts {
   // An on-disk transient marker file used when replacing DB with checkpoint
   public static final String DB_TRANSIENT_MARKER = "dbInconsistentMarker";
 
+  public static final String OM_RATIS_SNAPSHOT_DIR = "snapshot";
+
+  public static final long DEFAULT_OM_UPDATE_ID = -1L;  
+
   // An on-disk marker file used to indicate that the OM is in prepare and
   // should remain prepared even after a restart.
   public static final String PREPARE_MARKER = "prepareMarker";
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/lease/LeaseManager.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/lease/LeaseManager.java
index 5e52b40..68ae49b 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/lease/LeaseManager.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/lease/LeaseManager.java
@@ -17,9 +17,6 @@
 
 package org.apache.hadoop.ozone.lease;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.Callable;
@@ -28,6 +25,8 @@ import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 
 import static org.apache.hadoop.ozone.lease.Lease.messageForResource;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * LeaseManager is someone who can provide you leases based on your
@@ -46,6 +45,7 @@ public class LeaseManager<T> {
 
   private final String name;
   private final long defaultTimeout;
+  private final Object monitor = new Object();
   private Map<T, Lease<T>> activeLeases;
   private LeaseMonitor leaseMonitor;
   private Thread leaseMonitorThread;
@@ -115,12 +115,14 @@ public class LeaseManager<T> {
     if (LOG.isDebugEnabled()) {
       LOG.debug("Acquiring lease on {} for {} milliseconds", resource, timeout);
     }
-    if(activeLeases.containsKey(resource)) {
+    if (activeLeases.containsKey(resource)) {
       throw new LeaseAlreadyExistException(messageForResource(resource));
     }
     Lease<T> lease = new Lease<>(resource, timeout);
     activeLeases.put(resource, lease);
-    leaseMonitorThread.interrupt();
+    synchronized (monitor) {
+      monitor.notifyAll();
+    }
     return lease;
   }
 
@@ -135,7 +137,7 @@ public class LeaseManager<T> {
   public Lease<T> get(T resource) throws LeaseNotFoundException {
     checkStatus();
     Lease<T> lease = activeLeases.get(resource);
-    if(lease != null) {
+    if (lease != null) {
       return lease;
     }
     throw new LeaseNotFoundException(messageForResource(resource));
@@ -156,7 +158,7 @@ public class LeaseManager<T> {
       LOG.debug("Releasing lease on {}", resource);
     }
     Lease<T> lease = activeLeases.remove(resource);
-    if(lease == null) {
+    if (lease == null) {
       throw new LeaseNotFoundException(messageForResource(resource));
     }
     lease.invalidate();
@@ -171,11 +173,13 @@ public class LeaseManager<T> {
     checkStatus();
     LOG.debug("Shutting down LeaseManager service");
     leaseMonitor.disable();
-    leaseMonitorThread.interrupt();
-    for(T resource : activeLeases.keySet()) {
+    synchronized (monitor) {
+      monitor.notifyAll();
+    }
+    for (T resource : activeLeases.keySet()) {
       try {
         release(resource);
-      }  catch(LeaseNotFoundException ex) {
+      } catch (LeaseNotFoundException ex) {
         //Ignore the exception, someone might have released the lease
       }
     }
@@ -187,7 +191,7 @@ public class LeaseManager<T> {
    * running.
    */
   private void checkStatus() {
-    if(!isRunning) {
+    if (!isRunning) {
       throw new LeaseManagerNotRunningException("LeaseManager not running.");
     }
   }
@@ -198,8 +202,8 @@ public class LeaseManager<T> {
    */
   private final class LeaseMonitor implements Runnable {
 
-    private volatile boolean monitor = true;
     private final ExecutorService executorService;
+    private volatile boolean running = true;
 
     private LeaseMonitor() {
       this.executorService = Executors.newCachedThreadPool();
@@ -207,7 +211,7 @@ public class LeaseManager<T> {
 
     @Override
     public void run() {
-      while (monitor) {
+      while (running) {
         LOG.debug("{}-LeaseMonitor: checking for lease expiry", name);
         long sleepTime = Long.MAX_VALUE;
 
@@ -230,12 +234,12 @@ public class LeaseManager<T> {
         }
 
         try {
-          if(!Thread.interrupted()) {
-            Thread.sleep(sleepTime);
+          synchronized (monitor) {
+            monitor.wait(sleepTime);
           }
         } catch (InterruptedException e) {
           // This means a new lease is added to activeLeases.
-          LOG.error("Execution was interrupted ", e);
+          LOG.warn("Lease manager is interrupted. Shutting down...", e);
           Thread.currentThread().interrupt();
         }
       }
@@ -246,7 +250,7 @@ public class LeaseManager<T> {
      * will stop lease monitor.
      */
     public void disable() {
-      monitor = false;
+      running = false;
     }
   }
 
diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml
index d8402f7..e20d416 100644
--- a/hadoop-hdds/common/src/main/resources/ozone-default.xml
+++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml
@@ -394,7 +394,7 @@
   </property>
   <property>
     <name>ozone.key.deleting.limit.per.task</name>
-    <value>1000</value>
+    <value>20000</value>
     <tag>OM, PERFORMANCE</tag>
     <description>
       A maximum number of keys to be scanned by key deleting service
@@ -776,10 +776,12 @@
     </description>
   </property>
   <property>
-  <name>ozone.datanode.pipeline.limit</name>
+  <name>ozone.scm.datanode.pipeline.limit</name>
   <value>2</value>
   <tag>OZONE, SCM, PIPELINE</tag>
   <description>Max number of pipelines per datanode can be engaged in.
+    Setting the value to 0 means the pipeline limit per dn will be determined
+    by the no of metadata volumes reported per dn.
   </description>
   </property>
   <property>
@@ -807,7 +809,7 @@
   <property>
     <name>ozone.scm.pipeline.leader-choose.policy</name>
     <value>
-      org.apache.hadoop.hdds.scm.pipeline.leader.choose.algorithms.DefaultLeaderChoosePolicy
+      org.apache.hadoop.hdds.scm.pipeline.leader.choose.algorithms.MinLeaderCountChoosePolicy
     </value>
     <tag>OZONE, SCM, PIPELINE</tag>
     <description>
@@ -943,7 +945,7 @@
   </property>
   <property>
     <name>ozone.scm.heartbeat.rpc-timeout</name>
-    <value>1s</value>
+    <value>5s</value>
     <tag>OZONE, MANAGEMENT</tag>
     <description>
       Timeout value for the RPC from Datanode to SCM.
@@ -1523,7 +1525,7 @@
 
   <property>
     <name>ozone.om.ratis.enable</name>
-    <value>false</value>
+    <value>true</value>
     <tag>OZONE, OM, RATIS, MANAGEMENT</tag>
     <description>Property to enable or disable Ratis server on OM.
     Please note - this is a temporary property to disable OM Ratis server.
@@ -1644,24 +1646,13 @@
   </property>
 
   <property>
-    <name>ozone.om.ratis.server.role.check.interval</name>
-    <value>15s</value>
-    <tag>OZONE, OM, RATIS, MANAGEMENT</tag>
-    <description>The interval between OM leader performing a role
-      check on its ratis server. Ratis server informs OM if it
-      loses the leader role. The scheduled check is an secondary
-      check to ensure that the leader role is updated periodically
-      .</description>
-  </property>
-
-  <property>
     <name>ozone.om.ratis.snapshot.dir</name>
     <value/>
     <tag>OZONE, OM, STORAGE, MANAGEMENT, RATIS</tag>
     <description>This directory is used for storing OM's snapshot
       related files like the ratisSnapshotIndex and DB checkpoint from leader
       OM.
-      If undefined, OM snapshot dir will fallback to ozone.om.ratis.storage.dir.
+      If undefined, OM snapshot dir will fallback to ozone.metadata.dirs.
       This fallback approach is not recommended for production environments.
     </description>
   </property>
@@ -2336,6 +2327,17 @@
     </description>
   </property>
   <property>
+    <name>ozone.scm.datanode.admin.monitor.interval</name>
+    <value>30s</value>
+    <tag>SCM</tag>
+    <description>
+      This sets how frequently the datanode admin monitor runs to check for
+      nodes added to the admin workflow or removed from it. The progress
+      of decommissioning and entering maintenance nodes is also checked to see
+      if they have completed.
+    </description>
+  </property>
+  <property>
     <name>ozone.client.list.trash.keys.max</name>
     <value>1000</value>
     <tag>OZONE, CLIENT</tag>
diff --git a/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/protocol/MockDatanodeDetails.java b/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/protocol/MockDatanodeDetails.java
index 06a1bf0..41ae6ec 100644
--- a/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/protocol/MockDatanodeDetails.java
+++ b/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/protocol/MockDatanodeDetails.java
@@ -17,6 +17,8 @@
  */
 package org.apache.hadoop.hdds.protocol;
 
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
+
 import java.io.IOException;
 import java.net.ServerSocket;
 import java.util.Random;
@@ -101,6 +103,8 @@ public final class MockDatanodeDetails {
         .addPort(ratisPort)
         .addPort(restPort)
         .setNetworkLocation(networkLocation)
+        .setPersistedOpState(HddsProtos.NodeOperationalState.IN_SERVICE)
+        .setPersistedOpStateExpiry(0)
         .build();
   }
 
diff --git a/hadoop-hdds/config/src/main/java/org/apache/hadoop/hdds/conf/ConfigFileAppender.java b/hadoop-hdds/config/src/main/java/org/apache/hadoop/hdds/conf/ConfigFileAppender.java
index 9f1c087..4256ac8 100644
--- a/hadoop-hdds/config/src/main/java/org/apache/hadoop/hdds/conf/ConfigFileAppender.java
+++ b/hadoop-hdds/config/src/main/java/org/apache/hadoop/hdds/conf/ConfigFileAppender.java
@@ -28,6 +28,7 @@ import javax.xml.transform.dom.DOMSource;
 import javax.xml.transform.stream.StreamResult;
 import java.io.InputStream;
 import java.io.Writer;
+import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import java.util.stream.Collectors;
 
@@ -117,7 +118,8 @@ public class ConfigFileAppender {
       factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
       Transformer transformer = factory.newTransformer();
 
-      transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
+      transformer.setOutputProperty(OutputKeys.ENCODING,
+              StandardCharsets.UTF_8.name());
       transformer.setOutputProperty(OutputKeys.INDENT, "yes");
       transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount",
           "2");
diff --git a/hadoop-hdds/config/src/main/java/org/apache/hadoop/hdds/conf/ConfigFileGenerator.java b/hadoop-hdds/config/src/main/java/org/apache/hadoop/hdds/conf/ConfigFileGenerator.java
index f3d71be..a4d7dc8 100644
--- a/hadoop-hdds/config/src/main/java/org/apache/hadoop/hdds/conf/ConfigFileGenerator.java
+++ b/hadoop-hdds/config/src/main/java/org/apache/hadoop/hdds/conf/ConfigFileGenerator.java
@@ -67,6 +67,7 @@ public class ConfigFileGenerator extends AbstractProcessor {
     try {
 
       //load existing generated config (if exists)
+      boolean resourceExists = true;
       ConfigFileAppender appender = new ConfigFileAppender();
       try (InputStream input = filer
           .getResource(StandardLocation.CLASS_OUTPUT, "",
@@ -74,6 +75,7 @@ public class ConfigFileGenerator extends AbstractProcessor {
         appender.load(input);
       } catch (FileNotFoundException | NoSuchFileException ex) {
         appender.init();
+        resourceExists = false;
       }
 
       Set<? extends Element> annotatedElements =
@@ -100,15 +102,16 @@ public class ConfigFileGenerator extends AbstractProcessor {
         }
 
       }
-      FileObject resource = filer
-          .createResource(StandardLocation.CLASS_OUTPUT, "",
-              OUTPUT_FILE_NAME);
-
-      try (Writer writer = new OutputStreamWriter(
-          resource.openOutputStream(), StandardCharsets.UTF_8)) {
-        appender.write(writer);
+      if (!resourceExists) {
+        FileObject resource = filer
+            .createResource(StandardLocation.CLASS_OUTPUT, "",
+                OUTPUT_FILE_NAME);
+
+        try (Writer writer = new OutputStreamWriter(
+            resource.openOutputStream(), StandardCharsets.UTF_8)) {
+          appender.write(writer);
+        }
       }
-
     } catch (IOException e) {
       processingEnv.getMessager().printMessage(Kind.ERROR,
           "Can't generate the config file from annotation: " + e);
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java
index cfb22e3..2d1d4e3 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/HddsDatanodeService.java
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.ozone;
 
+import javax.management.ObjectName;
 import java.io.File;
 import java.io.IOException;
 import java.net.InetAddress;
@@ -27,10 +28,9 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.UUID;
-import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicBoolean;
 
-import com.sun.jmx.mbeanserver.Introspector;
 import org.apache.hadoop.conf.Configurable;
 import org.apache.hadoop.hdds.DFSConfigKeysLegacy;
 import org.apache.hadoop.hdds.HddsUtils;
@@ -42,7 +42,6 @@ import org.apache.hadoop.hdds.conf.OzoneConfiguration;
 import org.apache.hadoop.hdds.protocol.DatanodeDetails;
 import org.apache.hadoop.hdds.protocol.proto.SCMSecurityProtocolProtos.SCMGetCertResponseProto;
 import org.apache.hadoop.hdds.protocolPB.SCMSecurityProtocolClientSideTranslatorPB;
-import org.apache.hadoop.hdds.utils.HddsServerUtil;
 import org.apache.hadoop.hdds.scm.ScmConfigKeys;
 import org.apache.hadoop.hdds.security.x509.SecurityConfig;
 import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient;
@@ -50,6 +49,7 @@ import org.apache.hadoop.hdds.security.x509.certificate.client.DNCertificateClie
 import org.apache.hadoop.hdds.security.x509.certificates.utils.CertificateSignRequest;
 import org.apache.hadoop.hdds.server.http.RatisDropwizardExports;
 import org.apache.hadoop.hdds.tracing.TracingUtil;
+import org.apache.hadoop.hdds.utils.HddsServerUtil;
 import org.apache.hadoop.hdds.utils.HddsVersionInfo;
 import org.apache.hadoop.metrics2.util.MBeans;
 import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils;
@@ -61,22 +61,20 @@ import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.authentication.client.AuthenticationException;
 import org.apache.hadoop.util.ServicePlugin;
+import org.apache.hadoop.util.Time;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
+import com.sun.jmx.mbeanserver.Introspector;
 import static org.apache.hadoop.hdds.security.x509.certificate.utils.CertificateCodec.getX509Certificate;
 import static org.apache.hadoop.hdds.security.x509.certificates.utils.CertificateSignRequest.getEncodedString;
 import static org.apache.hadoop.ozone.OzoneConfigKeys.HDDS_DATANODE_PLUGINS_KEY;
 import static org.apache.hadoop.util.ExitUtil.terminate;
-
-import org.apache.hadoop.util.Time;
 import org.bouncycastle.pkcs.PKCS10CertificationRequest;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import picocli.CommandLine.Command;
 
-import javax.management.ObjectName;
-
 /**
  * Datanode service plugin to start the HDDS container services.
  */
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java
index d9f3221..b53fe7e 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/ContainerUtils.java
@@ -21,6 +21,8 @@ package org.apache.hadoop.ozone.container.common.helpers;
 import static org.apache.commons.io.FilenameUtils.removeExtension;
 import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CONTAINER_CHECKSUM_ERROR;
 import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.NO_SUCH_ALGORITHM;
+import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CLOSED_CONTAINER_IO;
+import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Result.CONTAINER_NOT_OPEN;
 import static org.apache.hadoop.hdds.scm.protocolPB.ContainerCommandResponseBuilders.getContainerCommandResponse;
 import static org.apache.hadoop.ozone.container.common.impl.ContainerData.CHARSET_ENCODING;
 
@@ -77,8 +79,16 @@ public final class ContainerUtils {
       ContainerCommandRequestProto request) {
     String logInfo = "Operation: {} , Trace ID: {} , Message: {} , " +
         "Result: {} , StorageContainerException Occurred.";
-    log.info(logInfo, request.getCmdType(), request.getTraceID(),
-        ex.getMessage(), ex.getResult().getValueDescriptor().getName(), ex);
+    if (ex.getResult() == CLOSED_CONTAINER_IO ||
+        ex.getResult() == CONTAINER_NOT_OPEN) {
+      if (log.isDebugEnabled()) {
+        log.debug(logInfo, request.getCmdType(), request.getTraceID(),
+            ex.getMessage(), ex.getResult().getValueDescriptor().getName(), ex);
+      }
+    } else {
+      log.info(logInfo, request.getCmdType(), request.getTraceID(),
+          ex.getMessage(), ex.getResult().getValueDescriptor().getName(), ex);
+    }
     return getContainerCommandResponse(request, ex.getResult(), ex.getMessage())
         .build();
   }
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/DatanodeIdYaml.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/DatanodeIdYaml.java
index 44a12c2..3b14641 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/DatanodeIdYaml.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/helpers/DatanodeIdYaml.java
@@ -23,6 +23,7 @@ import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.OutputStreamWriter;
 import java.io.Writer;
+import java.nio.charset.StandardCharsets;
 import java.util.LinkedHashMap;
 import java.util.Map;
 import java.util.UUID;
@@ -30,6 +31,7 @@ import java.util.UUID;
 import org.apache.commons.collections.CollectionUtils;
 import org.apache.commons.collections.MapUtils;
 import org.apache.hadoop.hdds.protocol.DatanodeDetails;
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
 import org.yaml.snakeyaml.DumperOptions;
 import org.yaml.snakeyaml.Yaml;
 
@@ -57,7 +59,7 @@ public final class DatanodeIdYaml {
     Yaml yaml = new Yaml(options);
 
     try (Writer writer = new OutputStreamWriter(
-        new FileOutputStream(path), "UTF-8")) {
+        new FileOutputStream(path), StandardCharsets.UTF_8)) {
       yaml.dump(getDatanodeDetailsYaml(datanodeDetails), writer);
     }
   }
@@ -83,6 +85,12 @@ public final class DatanodeIdYaml {
           .setIpAddress(datanodeDetailsYaml.getIpAddress())
           .setHostName(datanodeDetailsYaml.getHostName())
           .setCertSerialId(datanodeDetailsYaml.getCertSerialId());
+      if (datanodeDetailsYaml.getPersistedOpState() != null) {
+        builder.setPersistedOpState(HddsProtos.NodeOperationalState.valueOf(
+            datanodeDetailsYaml.getPersistedOpState()));
+      }
+      builder.setPersistedOpStateExpiry(
+          datanodeDetailsYaml.getPersistedOpStateExpiryEpochSec());
 
       if (!MapUtils.isEmpty(datanodeDetailsYaml.getPortDetails())) {
         for (Map.Entry<String, Integer> portEntry :
@@ -106,6 +114,8 @@ public final class DatanodeIdYaml {
     private String ipAddress;
     private String hostName;
     private String certSerialId;
+    private String persistedOpState;
+    private long persistedOpStateExpiryEpochSec = 0;
     private Map<String, Integer> portDetails;
 
     public DatanodeDetailsYaml() {
@@ -114,11 +124,15 @@ public final class DatanodeIdYaml {
 
     private DatanodeDetailsYaml(String uuid, String ipAddress,
                                 String hostName, String certSerialId,
+                                String persistedOpState,
+                                long persistedOpStateExpiryEpochSec,
                                 Map<String, Integer> portDetails) {
       this.uuid = uuid;
       this.ipAddress = ipAddress;
       this.hostName = hostName;
       this.certSerialId = certSerialId;
+      this.persistedOpState = persistedOpState;
+      this.persistedOpStateExpiryEpochSec = persistedOpStateExpiryEpochSec;
       this.portDetails = portDetails;
     }
 
@@ -138,6 +152,14 @@ public final class DatanodeIdYaml {
       return certSerialId;
     }
 
+    public String getPersistedOpState() {
+      return persistedOpState;
+    }
+
+    public long getPersistedOpStateExpiryEpochSec() {
+      return persistedOpStateExpiryEpochSec;
+    }
+
     public Map<String, Integer> getPortDetails() {
       return portDetails;
     }
@@ -158,6 +180,14 @@ public final class DatanodeIdYaml {
       this.certSerialId = certSerialId;
     }
 
+    public void setPersistedOpState(String persistedOpState) {
+      this.persistedOpState = persistedOpState;
+    }
+
+    public void setPersistedOpStateExpiryEpochSec(long opStateExpiryEpochSec) {
+      this.persistedOpStateExpiryEpochSec = opStateExpiryEpochSec;
+    }
+
     public void setPortDetails(Map<String, Integer> portDetails) {
       this.portDetails = portDetails;
     }
@@ -173,11 +203,17 @@ public final class DatanodeIdYaml {
       }
     }
 
+    String persistedOpString = null;
+    if (datanodeDetails.getPersistedOpState() != null) {
+      persistedOpString = datanodeDetails.getPersistedOpState().name();
+    }
     return new DatanodeDetailsYaml(
         datanodeDetails.getUuid().toString(),
         datanodeDetails.getIpAddress(),
         datanodeDetails.getHostName(),
         datanodeDetails.getCertSerialId(),
+        persistedOpString,
+        datanodeDetails.getPersistedOpStateExpiryEpochSec(),
         portDetails);
   }
 }
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerData.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerData.java
index ba34a29..19cc1e2 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerData.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerData.java
@@ -291,7 +291,7 @@ public abstract class ContainerData {
    * @return - boolean
    */
   public synchronized boolean isValid() {
-    return !(ContainerDataProto.State.INVALID == state);
+    return ContainerDataProto.State.INVALID != state;
   }
 
   /**
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerDataYaml.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerDataYaml.java
index 74cbbc0..757d7e8 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerDataYaml.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerDataYaml.java
@@ -18,7 +18,6 @@
 
 package org.apache.hadoop.ozone.container.common.impl;
 
-import java.beans.IntrospectionException;
 import java.io.ByteArrayInputStream;
 import java.io.File;
 import java.io.FileInputStream;
@@ -201,8 +200,7 @@ public final class ContainerDataYaml {
    */
   private static class ContainerDataRepresenter extends Representer {
     @Override
-    protected Set<Property> getProperties(Class<? extends Object> type)
-        throws IntrospectionException {
+    protected Set<Property> getProperties(Class<? extends Object> type) {
       Set<Property> set = super.getProperties(type);
       Set<Property> filtered = new TreeSet<Property>();
 
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/report/ReportPublisher.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/report/ReportPublisher.java
index 685a1d9..5d181ec 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/report/ReportPublisher.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/report/ReportPublisher.java
@@ -69,7 +69,7 @@ public abstract class ReportPublisher<T extends GeneratedMessage>
   public void run() {
     publishReport();
     if (!executor.isShutdown() &&
-        !(context.getState() == DatanodeStates.SHUTDOWN)) {
+        (context.getState() != DatanodeStates.SHUTDOWN)) {
       executor.schedule(this,
           getReportFrequency(), TimeUnit.MILLISECONDS);
     }
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java
index c0e57e8..d0034df 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/DatanodeStateMachine.java
@@ -51,6 +51,7 @@ import org.apache.hadoop.ozone.container.common.statemachine.commandhandler.Dele
 import org.apache.hadoop.ozone.container.common.statemachine.commandhandler.DeleteContainerCommandHandler;
 import org.apache.hadoop.ozone.container.common.statemachine.commandhandler.FinalizeNewLayoutVersionCommandHandler;
 import org.apache.hadoop.ozone.container.common.statemachine.commandhandler.ReplicateContainerCommandHandler;
+import org.apache.hadoop.ozone.container.common.statemachine.commandhandler.SetNodeOperationalStateCommandHandler;
 import org.apache.hadoop.ozone.container.keyvalue.TarContainerPacker;
 import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer;
 import org.apache.hadoop.ozone.container.replication.ContainerReplicator;
@@ -178,6 +179,7 @@ public class DatanodeStateMachine implements Closeable {
         .addHandler(new ClosePipelineCommandHandler())
         .addHandler(new CreatePipelineCommandHandler(conf))
         .addHandler(new FinalizeNewLayoutVersionCommandHandler())
+        .addHandler(new SetNodeOperationalStateCommandHandler(conf))
         .setConnectionManager(connectionManager)
         .setContainer(container)
         .setContext(context)
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/StateContext.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/StateContext.java
index 4cd769f..f87561a 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/StateContext.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/StateContext.java
@@ -32,15 +32,23 @@ import java.util.concurrent.ThreadPoolExecutor;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.atomic.AtomicReference;
 import java.util.concurrent.locks.Lock;
 import java.util.concurrent.locks.ReentrantLock;
 import java.util.function.Consumer;
 
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.Sets;
+import com.google.protobuf.Descriptors.Descriptor;
 import org.apache.hadoop.hdds.conf.ConfigurationSource;
 import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.CommandStatus.Status;
+import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.CommandStatusReportsProto;
 import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerAction;
+import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReportsProto;
+import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.IncrementalContainerReportProto;
+import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.NodeReportProto;
 import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineAction;
+import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReportsProto;
 import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto;
 import org.apache.hadoop.ozone.container.common.states.DatanodeState;
 import org.apache.hadoop.ozone.container.common.states.datanode.InitDatanodeState;
@@ -63,6 +71,27 @@ import org.slf4j.LoggerFactory;
  * Current Context of State Machine.
  */
 public class StateContext {
+
+  @VisibleForTesting
+  final static String CONTAINER_REPORTS_PROTO_NAME =
+      ContainerReportsProto.getDescriptor().getFullName();
+  @VisibleForTesting
+  final static String NODE_REPORT_PROTO_NAME =
+      NodeReportProto.getDescriptor().getFullName();
+  @VisibleForTesting
+  final static String PIPELINE_REPORTS_PROTO_NAME =
+      PipelineReportsProto.getDescriptor().getFullName();
+  @VisibleForTesting
+  final static String COMMAND_STATUS_REPORTS_PROTO_NAME =
+      CommandStatusReportsProto.getDescriptor().getFullName();
+  @VisibleForTesting
+  final static String INCREMENTAL_CONTAINER_REPORT_PROTO_NAME =
+      IncrementalContainerReportProto.getDescriptor().getFullName();
+  // Accepted types of reports that can be queued to incrementalReportsQueue
+  private final static Set<String> ACCEPTED_INCREMENTAL_REPORT_TYPE_SET =
+      Sets.newHashSet(COMMAND_STATUS_REPORTS_PROTO_NAME,
+          INCREMENTAL_CONTAINER_REPORT_PROTO_NAME);
+
   static final Logger LOG =
       LoggerFactory.getLogger(StateContext.class);
   private final Queue<SCMCommand> commandQueue;
@@ -72,7 +101,13 @@ public class StateContext {
   private final AtomicLong stateExecutionCount;
   private final ConfigurationSource conf;
   private final Set<InetSocketAddress> endpoints;
-  private final Map<InetSocketAddress, List<GeneratedMessage>> reports;
+  // Only the latest full report of each type is kept
+  private final AtomicReference<GeneratedMessage> containerReports;
+  private final AtomicReference<GeneratedMessage> nodeReport;
+  private final AtomicReference<GeneratedMessage> pipelineReports;
+  // Incremental reports are queued in the map below
+  private final Map<InetSocketAddress, List<GeneratedMessage>>
+      incrementalReportsQueue;
   private final Map<InetSocketAddress, Queue<ContainerAction>> containerActions;
   private final Map<InetSocketAddress, Queue<PipelineAction>> pipelineActions;
   private DatanodeStateMachine.DatanodeStates state;
@@ -102,7 +137,10 @@ public class StateContext {
     this.parent = parent;
     commandQueue = new LinkedList<>();
     cmdStatusMap = new ConcurrentHashMap<>();
-    reports = new HashMap<>();
+    incrementalReportsQueue = new HashMap<>();
+    containerReports = new AtomicReference<>();
+    nodeReport = new AtomicReference<>();
+    pipelineReports = new AtomicReference<>();
     endpoints = new HashSet<>();
     containerActions = new HashMap<>();
     pipelineActions = new HashMap<>();
@@ -190,17 +228,34 @@ public class StateContext {
   public boolean getShutdownOnError() {
     return shutdownOnError;
   }
+
   /**
    * Adds the report to report queue.
    *
    * @param report report to be added
    */
   public void addReport(GeneratedMessage report) {
-    if (report != null) {
-      synchronized (reports) {
-        for (InetSocketAddress endpoint : endpoints) {
-          reports.get(endpoint).add(report);
+    if (report == null) {
+      return;
+    }
+    final Descriptor descriptor = report.getDescriptorForType();
+    Preconditions.checkState(descriptor != null);
+    final String reportType = descriptor.getFullName();
+    Preconditions.checkState(reportType != null);
+    for (InetSocketAddress endpoint : endpoints) {
+      if (reportType.equals(CONTAINER_REPORTS_PROTO_NAME)) {
+        containerReports.set(report);
+      } else if (reportType.equals(NODE_REPORT_PROTO_NAME)) {
+        nodeReport.set(report);
+      } else if (reportType.equals(PIPELINE_REPORTS_PROTO_NAME)) {
+        pipelineReports.set(report);
+      } else if (ACCEPTED_INCREMENTAL_REPORT_TYPE_SET.contains(reportType)) {
+        synchronized (incrementalReportsQueue) {
+          incrementalReportsQueue.get(endpoint).add(report);
         }
+      } else {
+        throw new IllegalArgumentException(
+            "Unidentified report message type: " + reportType);
       }
     }
   }
@@ -214,9 +269,24 @@ public class StateContext {
    */
   public void putBackReports(List<GeneratedMessage> reportsToPutBack,
                              InetSocketAddress endpoint) {
-    synchronized (reports) {
-      if (reports.containsKey(endpoint)){
-        reports.get(endpoint).addAll(0, reportsToPutBack);
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("endpoint: {}, size of reportsToPutBack: {}",
+          endpoint, reportsToPutBack.size());
+    }
+    // We don't expect too much reports to be put back
+    for (GeneratedMessage report : reportsToPutBack) {
+      final Descriptor descriptor = report.getDescriptorForType();
+      Preconditions.checkState(descriptor != null);
+      final String reportType = descriptor.getFullName();
+      Preconditions.checkState(reportType != null);
+      if (!ACCEPTED_INCREMENTAL_REPORT_TYPE_SET.contains(reportType)) {
+        throw new IllegalArgumentException(
+            "Unaccepted report message type: " + reportType);
+      }
+    }
+    synchronized (incrementalReportsQueue) {
+      if (incrementalReportsQueue.containsKey(endpoint)){
+        incrementalReportsQueue.get(endpoint).addAll(0, reportsToPutBack);
       }
     }
   }
@@ -232,6 +302,22 @@ public class StateContext {
     return getReports(endpoint, Integer.MAX_VALUE);
   }
 
+  List<GeneratedMessage> getIncrementalReports(
+      InetSocketAddress endpoint, int maxLimit) {
+    List<GeneratedMessage> reportsToReturn = new LinkedList<>();
+    synchronized (incrementalReportsQueue) {
+      List<GeneratedMessage> reportsForEndpoint =
+          incrementalReportsQueue.get(endpoint);
+      if (reportsForEndpoint != null) {
+        List<GeneratedMessage> tempList = reportsForEndpoint.subList(
+            0, min(reportsForEndpoint.size(), maxLimit));
+        reportsToReturn.addAll(tempList);
+        tempList.clear();
+      }
+    }
+    return reportsToReturn;
+  }
+
   /**
    * Returns available reports from the report queue with a max limit on
    * list size, or empty list if the queue is empty.
@@ -240,15 +326,19 @@ public class StateContext {
    */
   public List<GeneratedMessage> getReports(InetSocketAddress endpoint,
                                            int maxLimit) {
-    List<GeneratedMessage> reportsToReturn = new LinkedList<>();
-    synchronized (reports) {
-      List<GeneratedMessage> reportsForEndpoint = reports.get(endpoint);
-      if (reportsForEndpoint != null) {
-        List<GeneratedMessage> tempList = reportsForEndpoint.subList(
-            0, min(reportsForEndpoint.size(), maxLimit));
-        reportsToReturn.addAll(tempList);
-        tempList.clear();
-      }
+    List<GeneratedMessage> reportsToReturn =
+        getIncrementalReports(endpoint, maxLimit);
+    GeneratedMessage report = containerReports.get();
+    if (report != null) {
+      reportsToReturn.add(report);
+    }
+    report = nodeReport.get();
+    if (report != null) {
+      reportsToReturn.add(report);
+    }
+    report = pipelineReports.get();
+    if (report != null) {
+      reportsToReturn.add(report);
     }
     return reportsToReturn;
   }
@@ -580,7 +670,22 @@ public class StateContext {
       this.endpoints.add(endpoint);
       this.containerActions.put(endpoint, new LinkedList<>());
       this.pipelineActions.put(endpoint, new LinkedList<>());
-      this.reports.put(endpoint, new LinkedList<>());
+      this.incrementalReportsQueue.put(endpoint, new LinkedList<>());
     }
   }
+
+  @VisibleForTesting
+  public GeneratedMessage getContainerReports() {
+    return containerReports.get();
+  }
+
+  @VisibleForTesting
+  public GeneratedMessage getNodeReport() {
+    return nodeReport.get();
+  }
+
+  @VisibleForTesting
+  public GeneratedMessage getPipelineReports() {
+    return pipelineReports.get();
+  }
 }
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java
index 91ab4c9..10e6797 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/DeleteBlocksCommandHandler.java
@@ -42,6 +42,8 @@ import org.apache.hadoop.ozone.container.common.impl.ContainerSet;
 import org.apache.hadoop.ozone.container.common.statemachine
     .SCMConnectionManager;
 import org.apache.hadoop.ozone.container.common.statemachine.StateContext;
+import org.apache.hadoop.ozone.container.metadata.DatanodeStore;
+import org.apache.hadoop.ozone.container.metadata.DatanodeStoreSchemaTwoImpl;
 import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer;
 import org.apache.hadoop.ozone.protocol.commands.CommandStatus;
 import org.apache.hadoop.ozone.protocol.commands.DeleteBlockCommandStatus;
@@ -59,6 +61,8 @@ import java.util.function.Consumer;
 
 import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos
     .Result.CONTAINER_NOT_FOUND;
+import static org.apache.hadoop.ozone.OzoneConsts.SCHEMA_V1;
+import static org.apache.hadoop.ozone.OzoneConsts.SCHEMA_V2;
 
 /**
  * Handle block deletion commands.
@@ -116,6 +120,7 @@ public class DeleteBlocksCommandHandler implements CommandHandler {
             DeleteBlockTransactionResult.newBuilder();
         txResultBuilder.setTxID(entry.getTxID());
         long containerId = entry.getContainerID();
+        int newDeletionBlocks = 0;
         try {
           Container cont = containerSet.getContainer(containerId);
           if (cont == null) {
@@ -129,7 +134,16 @@ public class DeleteBlocksCommandHandler implements CommandHandler {
                 cont.getContainerData();
             cont.writeLock();
             try {
-              deleteKeyValueContainerBlocks(containerData, entry);
+              if (containerData.getSchemaVersion().equals(SCHEMA_V1)) {
+                markBlocksForDeletionSchemaV1(containerData, entry);
+              } else if (containerData.getSchemaVersion().equals(SCHEMA_V2)) {
+                markBlocksForDeletionSchemaV2(containerData, entry,
+                    newDeletionBlocks, entry.getTxID());
+              } else {
+                throw new UnsupportedOperationException(
+                    "Only schema version 1 and schema version 2 are "
+                        + "supported.");
+              }
             } finally {
               cont.writeUnlock();
             }
@@ -187,107 +201,140 @@ public class DeleteBlocksCommandHandler implements CommandHandler {
    * @param delTX a block deletion transaction.
    * @throws IOException if I/O error occurs.
    */
-  private void deleteKeyValueContainerBlocks(
-      KeyValueContainerData containerData, DeletedBlocksTransaction delTX)
-      throws IOException {
+
+  private void markBlocksForDeletionSchemaV2(
+      KeyValueContainerData containerData, DeletedBlocksTransaction delTX,
+      int newDeletionBlocks, long txnID) throws IOException {
     long containerId = delTX.getContainerID();
-    if (LOG.isDebugEnabled()) {
-      LOG.debug("Processing Container : {}, DB path : {}", containerId,
-          containerData.getMetadataPath());
+    if (!isTxnIdValid(containerId, containerData, delTX)) {
+      return;
     }
-
-    if (delTX.getTxID() < containerData.getDeleteTransactionId()) {
-      if (LOG.isDebugEnabled()) {
-        LOG.debug(String.format("Ignoring delete blocks for containerId: %d."
-                + " Outdated delete transactionId %d < %d", containerId,
-            delTX.getTxID(), containerData.getDeleteTransactionId()));
+    try (ReferenceCountedDB containerDB = BlockUtils
+        .getDB(containerData, conf)) {
+      DatanodeStore ds = containerDB.getStore();
+      DatanodeStoreSchemaTwoImpl dnStoreTwoImpl =
+          (DatanodeStoreSchemaTwoImpl) ds;
+      Table<Long, DeletedBlocksTransaction> delTxTable =
+          dnStoreTwoImpl.getDeleteTransactionTable();
+      try (BatchOperation batch = containerDB.getStore().getBatchHandler()
+          .initBatchOperation()) {
+        delTxTable.putWithBatch(batch, txnID, delTX);
+        newDeletionBlocks += delTX.getLocalIDList().size();
+        updateMetaData(containerData, delTX, newDeletionBlocks, containerDB,
+            batch);
+        containerDB.getStore().getBatchHandler().commitBatchOperation(batch);
       }
-      return;
     }
+  }
 
+  private void markBlocksForDeletionSchemaV1(
+      KeyValueContainerData containerData, DeletedBlocksTransaction delTX)
+      throws IOException {
+    long containerId = delTX.getContainerID();
+    if (!isTxnIdValid(containerId, containerData, delTX)) {
+      return;
+    }
     int newDeletionBlocks = 0;
-    try(ReferenceCountedDB containerDB =
-            BlockUtils.getDB(containerData, conf)) {
+    try (ReferenceCountedDB containerDB = BlockUtils
+        .getDB(containerData, conf)) {
       Table<String, BlockData> blockDataTable =
-              containerDB.getStore().getBlockDataTable();
+          containerDB.getStore().getBlockDataTable();
       Table<String, ChunkInfoList> deletedBlocksTable =
-              containerDB.getStore().getDeletedBlocksTable();
+          containerDB.getStore().getDeletedBlocksTable();
 
-      for (Long blkLong : delTX.getLocalIDList()) {
-        String blk = blkLong.toString();
-        BlockData blkInfo = blockDataTable.get(blk);
-        if (blkInfo != null) {
-          String deletingKey = OzoneConsts.DELETING_KEY_PREFIX + blk;
-
-          if (blockDataTable.get(deletingKey) != null
-              || deletedBlocksTable.get(blk) != null) {
-            if (LOG.isDebugEnabled()) {
-              LOG.debug(String.format(
-                  "Ignoring delete for block %s in container %d."
-                      + " Entry already added.", blk, containerId));
+      try (BatchOperation batch = containerDB.getStore().getBatchHandler()
+          .initBatchOperation()) {
+        for (Long blkLong : delTX.getLocalIDList()) {
+          String blk = blkLong.toString();
+          BlockData blkInfo = blockDataTable.get(blk);
+          if (blkInfo != null) {
+            String deletingKey = OzoneConsts.DELETING_KEY_PREFIX + blk;
+            if (blockDataTable.get(deletingKey) != null
+                || deletedBlocksTable.get(blk) != null) {
+              if (LOG.isDebugEnabled()) {
+                LOG.debug(String.format(
+                    "Ignoring delete for block %s in container %d."
+                        + " Entry already added.", blk, containerId));
+              }
+              continue;
             }
-            continue;
-          }
-
-          try(BatchOperation batch = containerDB.getStore()
-              .getBatchHandler().initBatchOperation()) {
             // Found the block in container db,
             // use an atomic update to change its state to deleting.
             blockDataTable.putWithBatch(batch, deletingKey, blkInfo);
             blockDataTable.deleteWithBatch(batch, blk);
-            containerDB.getStore().getBatchHandler()
-                .commitBatchOperation(batch);
             newDeletionBlocks++;
             if (LOG.isDebugEnabled()) {
               LOG.debug("Transited Block {} to DELETING state in container {}",
                   blk, containerId);
             }
-          } catch (IOException e) {
-            // if some blocks failed to delete, we fail this TX,
-            // without sending this ACK to SCM, SCM will resend the TX
-            // with a certain number of retries.
-            throw new IOException(
-                "Failed to delete blocks for TXID = " + delTX.getTxID(), e);
-          }
-        } else {
-          if (LOG.isDebugEnabled()) {
-            LOG.debug("Block {} not found or already under deletion in"
-                + " container {}, skip deleting it.", blk, containerId);
+          } else {
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("Block {} not found or already under deletion in"
+                  + " container {}, skip deleting it.", blk, containerId);
+            }
           }
         }
+        updateMetaData(containerData, delTX, newDeletionBlocks, containerDB,
+            batch);
+        containerDB.getStore().getBatchHandler().commitBatchOperation(batch);
+      } catch (IOException e) {
+        // if some blocks failed to delete, we fail this TX,
+        // without sending this ACK to SCM, SCM will resend the TX
+        // with a certain number of retries.
+        throw new IOException(
+            "Failed to delete blocks for TXID = " + delTX.getTxID(), e);
       }
+    }
+  }
 
-      if (newDeletionBlocks > 0) {
-        // Finally commit the DB counters.
-        try(BatchOperation batchOperation =
-                containerDB.getStore().getBatchHandler().initBatchOperation()) {
-          Table< String, Long > metadataTable = containerDB.getStore()
-              .getMetadataTable();
+  private void updateMetaData(KeyValueContainerData containerData,
+      DeletedBlocksTransaction delTX, int newDeletionBlocks,
+      ReferenceCountedDB containerDB, BatchOperation batchOperation)
+      throws IOException {
+    if (newDeletionBlocks > 0) {
+      // Finally commit the DB counters.
+      Table<String, Long> metadataTable =
+          containerDB.getStore().getMetadataTable();
 
-          // In memory is updated only when existing delete transactionID is
-          // greater.
-          if (delTX.getTxID() > containerData.getDeleteTransactionId()) {
-            // Update in DB pending delete key count and delete transaction ID.
-            metadataTable.putWithBatch(batchOperation,
-                OzoneConsts.DELETE_TRANSACTION_KEY, delTX.getTxID());
-          }
+      // In memory is updated only when existing delete transactionID is
+      // greater.
+      if (delTX.getTxID() > containerData.getDeleteTransactionId()) {
+        // Update in DB pending delete key count and delete transaction ID.
+        metadataTable
+            .putWithBatch(batchOperation, OzoneConsts.DELETE_TRANSACTION_KEY,
+                delTX.getTxID());
+      }
 
-          long pendingDeleteBlocks =
-              containerData.getNumPendingDeletionBlocks() + newDeletionBlocks;
-          metadataTable.putWithBatch(batchOperation,
-              OzoneConsts.PENDING_DELETE_BLOCK_COUNT, pendingDeleteBlocks);
+      long pendingDeleteBlocks =
+          containerData.getNumPendingDeletionBlocks() + newDeletionBlocks;
+      metadataTable
+          .putWithBatch(batchOperation, OzoneConsts.PENDING_DELETE_BLOCK_COUNT,
+              pendingDeleteBlocks);
 
-          containerDB.getStore().getBatchHandler()
-              .commitBatchOperation(batchOperation);
+      // update pending deletion blocks count and delete transaction ID in
+      // in-memory container status
+      containerData.updateDeleteTransactionId(delTX.getTxID());
+      containerData.incrPendingDeletionBlocks(newDeletionBlocks);
+    }
+  }
 
-          // update pending deletion blocks count and delete transaction ID in
-          // in-memory container status
-          containerData.updateDeleteTransactionId(delTX.getTxID());
+  private boolean isTxnIdValid(long containerId,
+      KeyValueContainerData containerData, DeletedBlocksTransaction delTX) {
+    boolean b = true;
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Processing Container : {}, DB path : {}", containerId,
+          containerData.getMetadataPath());
+    }
 
-          containerData.incrPendingDeletionBlocks(newDeletionBlocks);
-        }
+    if (delTX.getTxID() < containerData.getDeleteTransactionId()) {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug(String.format("Ignoring delete blocks for containerId: %d."
+                + " Outdated delete transactionId %d < %d", containerId,
+            delTX.getTxID(), containerData.getDeleteTransactionId()));
       }
+      b = false;
     }
+    return b;
   }
 
   @Override
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/SetNodeOperationalStateCommandHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/SetNodeOperationalStateCommandHandler.java
new file mode 100644
index 0000000..4a46d5f
--- /dev/null
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/SetNodeOperationalStateCommandHandler.java
@@ -0,0 +1,157 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership.  The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.ozone.container.common.statemachine.commandhandler;
+
+
+import com.google.common.base.Preconditions;
+import org.apache.hadoop.hdds.conf.ConfigurationSource;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
+import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos;
+import org.apache.hadoop.hdds.scm.ScmConfigKeys;
+import org.apache.hadoop.hdds.utils.HddsServerUtil;
+import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils;
+import org.apache.hadoop.ozone.container.common.statemachine.SCMConnectionManager;
+import org.apache.hadoop.ozone.container.common.statemachine.StateContext;
+import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer;
+import org.apache.hadoop.ozone.protocol.commands.SCMCommand;
+import org.apache.hadoop.ozone.protocol.commands.SetNodeOperationalStateCommand;
+import org.apache.hadoop.util.Time;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.hdds.protocol.proto.
+    StorageContainerDatanodeProtocolProtos.SCMCommandProto.Type;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
+
+
+/**
+ * Handle the SetNodeOperationalStateCommand sent from SCM to the datanode
+ * to persist the current operational state.
+ */
+public class SetNodeOperationalStateCommandHandler implements CommandHandler {
+
+  private static final Logger LOG =
+      LoggerFactory.getLogger(SetNodeOperationalStateCommandHandler.class);
+  private final ConfigurationSource conf;
+  private final AtomicInteger invocationCount = new AtomicInteger(0);
+  private final AtomicLong totalTime = new AtomicLong(0);
+
+  /**
+   * Set Node State command handler.
+   *
+   * @param conf - Configuration for the datanode.
+   */
+  public SetNodeOperationalStateCommandHandler(ConfigurationSource conf) {
+    this.conf = conf;
+  }
+
+  /**
+   * Handles a given SCM command.
+   *
+   * @param command - SCM Command
+   * @param container - Ozone Container.
+   * @param context - Current Context.
+   * @param connectionManager - The SCMs that we are talking to.
+   */
+  @Override
+  public void handle(SCMCommand command, OzoneContainer container,
+      StateContext context, SCMConnectionManager connectionManager) {
+    long startTime = Time.monotonicNow();
+    invocationCount.incrementAndGet();
+    StorageContainerDatanodeProtocolProtos.SetNodeOperationalStateCommandProto
+        setNodeCmdProto = null;
+
+    if (command.getType() != Type.setNodeOperationalStateCommand) {
+      LOG.warn("Skipping handling command, expected command "
+              + "type {} but found {}",
+          Type.setNodeOperationalStateCommand, command.getType());
+      return;
+    }
+    SetNodeOperationalStateCommand setNodeCmd =
+        (SetNodeOperationalStateCommand) command;
+    setNodeCmdProto = setNodeCmd.getProto();
+    DatanodeDetails dni = context.getParent().getDatanodeDetails();
+    dni.setPersistedOpState(setNodeCmdProto.getNodeOperationalState());
+    dni.setPersistedOpStateExpiryEpochSec(
+        setNodeCmd.getStateExpiryEpochSeconds());
+    try {
+      persistDatanodeDetails(dni);
+    } catch (IOException ioe) {
+      LOG.error("Failed to persist the datanode state", ioe);
+      // TODO - this should probably be raised, but it will break the command
+      //      handler interface.
+    }
+    totalTime.addAndGet(Time.monotonicNow() - startTime);
+  }
+
+  // TODO - this duplicates code in HddsDatanodeService and InitDatanodeState
+  //        Need to refactor.
+  private void persistDatanodeDetails(DatanodeDetails dnDetails)
+      throws IOException {
+    String idFilePath = HddsServerUtil.getDatanodeIdFilePath(conf);
+    if (idFilePath == null || idFilePath.isEmpty()) {
+      LOG.error("A valid path is needed for config setting {}",
+          ScmConfigKeys.OZONE_SCM_DATANODE_ID_DIR);
+      throw new IllegalArgumentException(
+          ScmConfigKeys.OZONE_SCM_DATANODE_ID_DIR +
+              " must be defined. See" +
+              " https://wiki.apache.org/hadoop/Ozone#Configuration" +
+              " for details on configuring Ozone.");
+    }
+
+    Preconditions.checkNotNull(idFilePath);
+    File idFile = new File(idFilePath);
+    ContainerUtils.writeDatanodeDetailsTo(dnDetails, idFile);
+  }
+
+  /**
+   * Returns the command type that this command handler handles.
+   *
+   * @return Type
+   */
+  @Override
+  public StorageContainerDatanodeProtocolProtos.SCMCommandProto.Type
+      getCommandType() {
+    return Type.setNodeOperationalStateCommand;
+  }
+
+  /**
+   * Returns number of times this handler has been invoked.
+   *
+   * @return int
+   */
+  @Override
+  public int getInvocationCount() {
+    return invocationCount.intValue();
+  }
+
+  /**
+   * Returns the average time this function takes to run.
+   *
+   * @return long
+   */
+  @Override
+  public long getAverageRunTime() {
+    final int invocations = invocationCount.get();
+    return invocations == 0 ?
+        0 : totalTime.get() / invocations;
+  }
+}
\ No newline at end of file
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/HeartbeatEndpointTask.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/HeartbeatEndpointTask.java
index 4a40496..7c6819d 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/HeartbeatEndpointTask.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/HeartbeatEndpointTask.java
@@ -56,6 +56,7 @@ import org.apache.hadoop.ozone.protocol.commands.DeleteContainerCommand;
 import org.apache.hadoop.ozone.protocol.commands.FinalizeNewLayoutVersionCommand;
 import org.apache.hadoop.ozone.protocol.commands.ReplicateContainerCommand;
 
+import org.apache.hadoop.ozone.protocol.commands.SetNodeOperationalStateCommand;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -177,15 +178,15 @@ public class HeartbeatEndpointTask
       if (LOG.isDebugEnabled()) {
         LOG.debug("Sending heartbeat message :: {}", request.toString());
       }
-      SCMHeartbeatResponseProto reponse = rpcEndpoint.getEndPoint()
+      SCMHeartbeatResponseProto response = rpcEndpoint.getEndPoint()
           .sendHeartbeat(request);
-      processResponse(reponse, datanodeDetailsProto);
+      processResponse(response, datanodeDetailsProto);
       rpcEndpoint.setLastSuccessfulHeartbeat(ZonedDateTime.now());
       rpcEndpoint.zeroMissedCount();
     } catch (IOException ex) {
+      Preconditions.checkState(requestBuilder != null);
       // put back the reports which failed to be sent
       putBackReports(requestBuilder);
-
       rpcEndpoint.logIfNeeded(ex);
     } finally {
       rpcEndpoint.unlock();
@@ -196,12 +197,9 @@ public class HeartbeatEndpointTask
   // TODO: Make it generic.
   private void putBackReports(SCMHeartbeatRequestProto.Builder requestBuilder) {
     List<GeneratedMessage> reports = new LinkedList<>();
-    if (requestBuilder.hasContainerReport()) {
-      reports.add(requestBuilder.getContainerReport());
-    }
-    if (requestBuilder.hasNodeReport()) {
-      reports.add(requestBuilder.getNodeReport());
-    }
+    // We only put back CommandStatusReports and IncrementalContainerReport
+    // because those are incremental. Container/Node/PipelineReport are
+    // accumulative so we can keep only the latest of each.
     if (requestBuilder.getCommandStatusReportsCount() != 0) {
       reports.addAll(requestBuilder.getCommandStatusReportsList());
     }
@@ -229,6 +227,7 @@ public class HeartbeatEndpointTask
           } else {
             requestBuilder.setField(descriptor, report);
           }
+          break;
         }
       }
     }
@@ -377,6 +376,17 @@ public class HeartbeatEndpointTask
         }
         this.context.addCommand(finalizeNewLayoutVersionCommand);
         break;
+      case setNodeOperationalStateCommand:
+        SetNodeOperationalStateCommand setNodeOperationalStateCommand =
+            SetNodeOperationalStateCommand.getFromProtobuf(
+                commandResponseProto.getSetNodeOperationalStateCommandProto());
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("Received SCM set operational state command. State: {} " +
+              "Expiry: {}", setNodeOperationalStateCommand.getOpState(),
+              setNodeOperationalStateCommand.getStateExpiryEpochSeconds());
+        }
+        this.context.addCommand(setNodeOperationalStateCommand);
+        break;
       default:
         throw new IllegalArgumentException("Unknown response : "
             + commandResponseProto.getCommandType().name());
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java
index 3647af1..d59efdc 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/XceiverServerGrpc.java
@@ -45,9 +45,7 @@ import com.google.common.base.Preconditions;
 import io.opentracing.Scope;
 import io.opentracing.Span;
 import io.opentracing.util.GlobalTracer;
-import org.apache.ratis.thirdparty.io.grpc.BindableService;
 import org.apache.ratis.thirdparty.io.grpc.Server;
-import org.apache.ratis.thirdparty.io.grpc.ServerBuilder;
 import org.apache.ratis.thirdparty.io.grpc.ServerInterceptors;
 import org.apache.ratis.thirdparty.io.grpc.netty.GrpcSslContexts;
 import org.apache.ratis.thirdparty.io.grpc.netty.NettyServerBuilder;
@@ -78,8 +76,7 @@ public final class XceiverServerGrpc implements XceiverServerSpi {
    */
   public XceiverServerGrpc(DatanodeDetails datanodeDetails,
       ConfigurationSource conf,
-      ContainerDispatcher dispatcher, CertificateClient caClient,
-      BindableService... additionalServices) {
+      ContainerDispatcher dispatcher, CertificateClient caClient) {
     Preconditions.checkNotNull(conf);
 
     this.id = datanodeDetails.getUuid();
@@ -92,17 +89,10 @@ public final class XceiverServerGrpc implements XceiverServerSpi {
       this.port = 0;
     }
 
-    NettyServerBuilder nettyServerBuilder =
-        ((NettyServerBuilder) ServerBuilder.forPort(port))
-            .maxInboundMessageSize(OzoneConsts.OZONE_SCM_CHUNK_MAX_SIZE);
-
-    GrpcServerInterceptor tracingInterceptor = new GrpcServerInterceptor();
-    nettyServerBuilder.addService(ServerInterceptors.intercept(
-        new GrpcXceiverService(dispatcher), tracingInterceptor));
-
-    for (BindableService service : additionalServices) {
-      nettyServerBuilder.addService(service);
-    }
+    NettyServerBuilder nettyServerBuilder = NettyServerBuilder.forPort(port)
+        .maxInboundMessageSize(OzoneConsts.OZONE_SCM_CHUNK_MAX_SIZE)
+        .addService(ServerInterceptors.intercept(
+            new GrpcXceiverService(dispatcher), new GrpcServerInterceptor()));
 
     SecurityConfig secConf = new SecurityConfig(conf);
     if (secConf.isGrpcTlsEnabled()) {
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java
index 42373bd..5182279 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/ContainerStateMachine.java
@@ -299,8 +299,8 @@ public class ContainerStateMachine extends BaseStateMachine {
             snapshotFile);
         throw ioe;
       }
-      LOG.info("{}: Finished taking a snapshot at:{} file:{} time:{}", gid, ti,
-          snapshotFile, (Time.monotonicNow() - startTime));
+      LOG.info("{}: Finished taking a snapshot at:{} file:{} took: {} ms",
+          gid, ti, snapshotFile, (Time.monotonicNow() - startTime));
       return ti.getIndex();
     }
     return -1;
@@ -418,9 +418,9 @@ public class ContainerStateMachine extends BaseStateMachine {
       ContainerCommandRequestProto requestProto, long entryIndex, long term,
       long startTime) {
     final WriteChunkRequestProto write = requestProto.getWriteChunk();
-    RaftServer server = ratisServer.getServer();
     try {
-      if (server.getDivision(gid).getInfo().isLeader()) {
+      RaftServer.Division division = ratisServer.getServerDivision();
+      if (division.getInfo().isLeader()) {
         stateMachineDataCache.put(entryIndex, write.getData());
       }
     } catch (InterruptedException ioe) {
@@ -445,7 +445,7 @@ public class ContainerStateMachine extends BaseStateMachine {
             return runCommand(requestProto, context);
           } catch (Exception e) {
             LOG.error("{}: writeChunk writeStateMachineData failed: blockId" +
-                "{} logIndex {} chunkName {} {}", gid, write.getBlockID(),
+                "{} logIndex {} chunkName {}", gid, write.getBlockID(),
                 entryIndex, write.getChunkData().getChunkName(), e);
             metrics.incNumWriteDataFails();
             // write chunks go in parallel. It's possible that one write chunk
@@ -458,8 +458,8 @@ public class ContainerStateMachine extends BaseStateMachine {
 
     writeChunkFutureMap.put(entryIndex, writeChunkFuture);
     if (LOG.isDebugEnabled()) {
-      LOG.error("{}: writeChunk writeStateMachineData : blockId" +
-              "{} logIndex {} chunkName {} {}", gid, write.getBlockID(),
+      LOG.debug("{}: writeChunk writeStateMachineData : blockId" +
+              "{} logIndex {} chunkName {}", gid, write.getBlockID(),
           entryIndex, write.getChunkData().getChunkName());
     }
     // Remove the future once it finishes execution from the
@@ -760,7 +760,8 @@ public class ContainerStateMachine extends BaseStateMachine {
             }
           }, getCommandExecutor(requestProto));
       future.thenApply(r -> {
-        if (trx.getServerRole() == RaftPeerRole.LEADER) {
+        if (trx.getServerRole() == RaftPeerRole.LEADER
+            && trx.getStateMachineContext() != null) {
           long startTime = (long) trx.getStateMachineContext();
           metrics.incPipelineLatency(cmdType,
               Time.monotonicNowNanos() - startTime);
@@ -808,6 +809,12 @@ public class ContainerStateMachine extends BaseStateMachine {
         }
         return applyTransactionFuture;
       }).whenComplete((r, t) ->  {
+        if (t != null) {
+          stateMachineHealthy.set(false);
+          LOG.error("gid {} : ApplyTransaction failed. cmd {} logIndex "
+                  + "{} exception {}", gid, requestProto.getCmdType(),
+              index, t);
+        }
         applyTransactionSemaphore.release();
         metrics.recordApplyTransactionCompletion(
             Time.monotonicNowNanos() - applyTxnStartTime);
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java
index eca0b1c..faa69a8 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/transport/server/ratis/XceiverServerRatis.java
@@ -515,6 +515,15 @@ public final class XceiverServerRatis implements XceiverServerSpi {
     return server;
   }
 
+  public RaftServer.Division getServerDivision() throws IOException {
+    return getServerDivision(server.getGroupIds().iterator().next());
+  }
+
+  public RaftServer.Division getServerDivision(RaftGroupId id)
+      throws IOException {
+    return server.getDivision(id);
+  }
+
   private void processReply(RaftClientReply reply) throws IOException {
     // NotLeader exception is thrown only when the raft server to which the
     // request is submitted is not the leader. The request will be rejected
@@ -596,10 +605,16 @@ public final class XceiverServerRatis implements XceiverServerSpi {
   private RaftClientRequest createRaftClientRequest(
       ContainerCommandRequestProto request, HddsProtos.PipelineID pipelineID,
       RaftClientRequest.Type type) {
-    return new RaftClientRequest(clientId, server.getId(),
-        RaftGroupId.valueOf(PipelineID.getFromProtobuf(pipelineID).getId()),
-        nextCallId(), ContainerCommandRequestMessage.toMessage(request, null),
-        type, null);
+    return RaftClientRequest.newBuilder()
+        .setClientId(clientId)
+        .setServerId(server.getId())
+        .setGroupId(
+            RaftGroupId.valueOf(
+                PipelineID.getFromProtobuf(pipelineID).getId()))
+        .setCallId(nextCallId())
+        .setMessage(ContainerCommandRequestMessage.toMessage(request, null))
+        .setType(type)
+        .build();
   }
 
   private GroupInfoRequest createGroupInfoRequest(
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java
index a239b5f..53d6162 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueContainer.java
@@ -516,22 +516,38 @@ public class KeyValueContainer implements Container<KeyValueContainerData> {
   @Override
   public void exportContainerData(OutputStream destination,
       ContainerPacker<KeyValueContainerData> packer) throws IOException {
-    // Closed/ Quasi closed containers are considered for replication by
-    // replication manager if they are under-replicated.
-    ContainerProtos.ContainerDataProto.State state =
-        getContainerData().getState();
-    if (!(state == ContainerProtos.ContainerDataProto.State.CLOSED ||
-        state == ContainerDataProto.State.QUASI_CLOSED)) {
-      throw new IllegalStateException(
-          "Only closed/quasi closed containers could be exported: " +
-              "Where as ContainerId="
-              + getContainerData().getContainerID() + " is in state " + state);
+    writeLock();
+    try {
+      // Closed/ Quasi closed containers are considered for replication by
+      // replication manager if they are under-replicated.
+      ContainerProtos.ContainerDataProto.State state =
+          getContainerData().getState();
+      if (!(state == ContainerProtos.ContainerDataProto.State.CLOSED ||
+          state == ContainerDataProto.State.QUASI_CLOSED)) {
+        throw new IllegalStateException(
+            "Only (quasi)closed containers can be exported, but " +
+                "ContainerId=" + getContainerData().getContainerID() +
+                " is in state " + state);
+      }
+
+      try {
+        compactDB();
+        // Close DB (and remove from cache) to avoid concurrent modification
+        // while packing it.
+        BlockUtils.removeDB(containerData, config);
+      } finally {
+        readLock();
+        writeUnlock();
+      }
+
+      packer.pack(this, destination);
+    } finally {
+      if (lock.isWriteLockedByCurrentThread()) {
+        writeUnlock();
+      } else {
+        readUnlock();
+      }
     }
-    compactDB();
-    // Close DB (and remove from cache) to avoid concurrent modification while
-    // packing it.
-    BlockUtils.removeDB(containerData, config);
-    packer.pack(this, destination);
   }
 
   /**
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
index 70f4ffc..dbc2a97 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/KeyValueHandler.java
@@ -933,13 +933,8 @@ public class KeyValueHandler extends Handler {
       final OutputStream outputStream,
       final TarContainerPacker packer)
       throws IOException{
-    container.readLock();
-    try {
-      final KeyValueContainer kvc = (KeyValueContainer) container;
-      kvc.exportContainerData(outputStream, packer);
-    } finally {
-      container.readUnlock();
-    }
+    final KeyValueContainer kvc = (KeyValueContainer) container;
+    kvc.exportContainerData(outputStream, packer);
   }
 
   @Override
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingService.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingService.java
index b03b7d7..3dab1fa 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingService.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/keyvalue/statemachine/background/BlockDeletingService.java
@@ -20,11 +20,12 @@ package org.apache.hadoop.ozone.container.keyvalue.statemachine.background;
 
 import java.io.File;
 import java.io.IOException;
+import java.util.UUID;
 import java.util.LinkedList;
+import java.util.Objects;
+import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
-import java.util.Objects;
-import java.util.UUID;
 import java.util.concurrent.TimeUnit;
 import java.util.stream.Collectors;
 
@@ -32,14 +33,15 @@ import org.apache.hadoop.hdds.conf.ConfigurationSource;
 import org.apache.hadoop.hdds.scm.ScmConfigKeys;
 import org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException;
 import org.apache.hadoop.hdds.scm.pipeline.PipelineID;
-import org.apache.hadoop.hdds.utils.BackgroundService;
-import org.apache.hadoop.hdds.utils.BackgroundTask;
-import org.apache.hadoop.hdds.utils.BackgroundTaskQueue;
 import org.apache.hadoop.hdds.utils.BackgroundTaskResult;
 import org.apache.hadoop.hdds.utils.db.BatchOperation;
 import org.apache.hadoop.hdds.utils.MetadataKeyFilters;
+import org.apache.hadoop.hdds.utils.BackgroundTaskQueue;
+import org.apache.hadoop.hdds.utils.BackgroundService;
+import org.apache.hadoop.hdds.utils.BackgroundTask;
 import org.apache.hadoop.hdds.utils.MetadataKeyFilters.KeyPrefixFilter;
 import org.apache.hadoop.hdds.utils.db.Table;
+import org.apache.hadoop.hdds.utils.db.TableIterator;
 import org.apache.hadoop.ozone.container.common.helpers.BlockData;
 import org.apache.hadoop.ozone.container.common.impl.ContainerData;
 import org.apache.hadoop.ozone.container.common.impl.TopNOrderedContainerDeletionChoosingPolicy;
@@ -50,14 +52,22 @@ import org.apache.hadoop.ozone.container.common.transport.server.ratis.XceiverSe
 import org.apache.hadoop.ozone.container.common.utils.ReferenceCountedDB;
 import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData;
 import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils;
+import org.apache.hadoop.ozone.container.metadata.DatanodeStore;
+import org.apache.hadoop.ozone.container.metadata.DatanodeStoreSchemaTwoImpl;
 import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer;
 import org.apache.hadoop.util.Time;
+import org.apache.hadoop.hdds.protocol.proto
+    .StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction;
 
 import com.google.common.collect.Lists;
+
 import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_CONTAINER_LIMIT_PER_INTERVAL;
 import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_CONTAINER_LIMIT_PER_INTERVAL_DEFAULT;
 import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_LIMIT_PER_CONTAINER;
 import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_LIMIT_PER_CONTAINER_DEFAULT;
+import static org.apache.hadoop.ozone.OzoneConsts.SCHEMA_V1;
+import static org.apache.hadoop.ozone.OzoneConsts.SCHEMA_V2;
+
 import org.apache.ratis.thirdparty.com.google.protobuf.InvalidProtocolBufferException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -66,7 +76,7 @@ import org.slf4j.LoggerFactory;
  * A per-datanode container block deleting service takes in charge
  * of deleting staled ozone blocks.
  */
-// TODO: Fix BlockDeletingService to work with new StorageLayer
+
 public class BlockDeletingService extends BackgroundService {
 
   private static final Logger LOG =
@@ -244,21 +254,54 @@ public class BlockDeletingService extends BackgroundService {
 
     @Override
     public BackgroundTaskResult call() throws Exception {
-      ContainerBackgroundTaskResult crr = new ContainerBackgroundTaskResult();
+      ContainerBackgroundTaskResult crr;
       final Container container = ozoneContainer.getContainerSet()
           .getContainer(containerData.getContainerID());
       container.writeLock();
+      File dataDir = new File(containerData.getChunksPath());
       long startTime = Time.monotonicNow();
       // Scan container's db and get list of under deletion blocks
       try (ReferenceCountedDB meta = BlockUtils.getDB(containerData, conf)) {
+        if (containerData.getSchemaVersion().equals(SCHEMA_V1)) {
+          crr = deleteViaSchema1(meta, container, dataDir, startTime);
+        } else if (containerData.getSchemaVersion().equals(SCHEMA_V2)) {
+          crr = deleteViaSchema2(meta, container, dataDir, startTime);
+        } else {
+          throw new UnsupportedOperationException(
+              "Only schema version 1 and schema version 2 are supported.");
+        }
+        return crr;
+      } finally {
+        container.writeUnlock();
+      }
+    }
+
+    public boolean checkDataDir(File dataDir) {
+      boolean b = true;
+      if (!dataDir.exists() || !dataDir.isDirectory()) {
+        LOG.error("Invalid container data dir {} : "
+            + "does not exist or not a directory", dataDir.getAbsolutePath());
+        b = false;
+      }
+      return b;
+    }
+
+    public ContainerBackgroundTaskResult deleteViaSchema1(
+        ReferenceCountedDB meta, Container container, File dataDir,
+        long startTime) throws IOException {
+      ContainerBackgroundTaskResult crr = new ContainerBackgroundTaskResult();
+      if (!checkDataDir(dataDir)) {
+        return crr;
+      }
+      try {
         Table<String, BlockData> blockDataTable =
-                meta.getStore().getBlockDataTable();
+            meta.getStore().getBlockDataTable();
 
         // # of blocks to delete is throttled
         KeyPrefixFilter filter = MetadataKeyFilters.getDeletingKeyFilter();
         List<? extends Table.KeyValue<String, BlockData>> toDeleteBlocks =
             blockDataTable.getSequentialRangeKVs(null, blockLimitPerTask,
-                    filter);
+                filter);
         if (toDeleteBlocks.isEmpty()) {
           LOG.debug("No under deletion block found in container : {}",
               containerData.getContainerID());
@@ -267,12 +310,6 @@ public class BlockDeletingService extends BackgroundService {
         List<String> succeedBlocks = new LinkedList<>();
         LOG.debug("Container : {}, To-Delete blocks : {}",
             containerData.getContainerID(), toDeleteBlocks.size());
-        File dataDir = new File(containerData.getChunksPath());
-        if (!dataDir.exists() || !dataDir.isDirectory()) {
-          LOG.error("Invalid container data dir {} : "
-              + "does not exist or not a directory", dataDir.getAbsolutePath());
-          return crr;
-        }
 
         Handler handler = Objects.requireNonNull(ozoneContainer.getDispatcher()
             .getHandler(container.getContainerType()));
@@ -292,7 +329,7 @@ public class BlockDeletingService extends BackgroundService {
 
         // Once blocks are deleted... remove the blockID from blockDataTable.
         try(BatchOperation batch = meta.getStore().getBatchHandler()
-                .initBatchOperation()) {
+            .initBatchOperation()) {
           for (String entry : succeedBlocks) {
             blockDataTable.deleteWithBatch(batch, entry);
           }
@@ -312,8 +349,106 @@ public class BlockDeletingService extends BackgroundService {
         }
         crr.addAll(succeedBlocks);
         return crr;
-      } finally {
-        container.writeUnlock();
+      } catch (IOException exception) {
+        LOG.warn(
+            "Deletion operation was not successful for container: " + container
+                .getContainerData().getContainerID(), exception);
+        throw exception;
+      }
+    }
+
+    public ContainerBackgroundTaskResult deleteViaSchema2(
+        ReferenceCountedDB meta, Container container, File dataDir,
+        long startTime) throws IOException {
+      ContainerBackgroundTaskResult crr = new ContainerBackgroundTaskResult();
+      if (!checkDataDir(dataDir)) {
+        return crr;
+      }
+      try {
+        Table<String, BlockData> blockDataTable =
+            meta.getStore().getBlockDataTable();
+        DatanodeStore ds = meta.getStore();
+        DatanodeStoreSchemaTwoImpl dnStoreTwoImpl =
+            (DatanodeStoreSchemaTwoImpl) ds;
+        Table<Long, DeletedBlocksTransaction>
+            deleteTxns = dnStoreTwoImpl.getDeleteTransactionTable();
+        List<DeletedBlocksTransaction> delBlocks = new ArrayList<>();
+        int totalBlocks = 0;
+        try (TableIterator<Long,
+            ? extends Table.KeyValue<Long, DeletedBlocksTransaction>> iter =
+            dnStoreTwoImpl.getDeleteTransactionTable().iterator()) {
+          while (iter.hasNext() && (totalBlocks < blockLimitPerTask)) {
+            DeletedBlocksTransaction delTx = iter.next().getValue();
+            totalBlocks += delTx.getLocalIDList().size();
+            delBlocks.add(delTx);
+          }
+        }
+
+        if (delBlocks.isEmpty()) {
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("No transaction found in container : {}",
+                containerData.getContainerID());
+          }
+          return crr;
+        }
+
+        LOG.debug("Container : {}, To-Delete blocks : {}",
+            containerData.getContainerID(), delBlocks.size());
+
+        Handler handler = Objects.requireNonNull(ozoneContainer.getDispatcher()
+            .getHandler(container.getContainerType()));
+
+        deleteTransactions(delBlocks, handler, blockDataTable, container);
+
+        // Once blocks are deleted... remove the blockID from blockDataTable
+        // and also remove the transactions from txnTable.
+        try(BatchOperation batch = meta.getStore().getBatchHandler()
+            .initBatchOperation()) {
+          for (DeletedBlocksTransaction delTx : delBlocks) {
+            deleteTxns.deleteWithBatch(batch, delTx.getTxID());
+            for (Long blk : delTx.getLocalIDList()) {
+              String bID = blk.toString();
+              meta.getStore().getBlockDataTable().deleteWithBatch(batch, bID);
+            }
+          }
+          meta.getStore().getBatchHandler().commitBatchOperation(batch);
+          containerData.updateAndCommitDBCounters(meta, batch,
+              totalBlocks);
+          // update count of pending deletion blocks and block count in
+          // in-memory container status.
+          containerData.decrPendingDeletionBlocks(totalBlocks);
+          containerData.decrKeyCount(totalBlocks);
+        }
+
+        LOG.info("Container: {}, deleted blocks: {}, task elapsed time: {}ms",
+            containerData.getContainerID(), totalBlocks,
+            Time.monotonicNow() - startTime);
+
+        return crr;
+      } catch (IOException exception) {
+        LOG.warn(
+            "Deletion operation was not successful for container: " + container
+                .getContainerData().getContainerID(), exception);
+        throw exception;
+      }
+    }
+
+    private void deleteTransactions(List<DeletedBlocksTransaction> delBlocks,
+        Handler handler, Table<String, BlockData> blockDataTable,
+        Container container) throws IOException {
+      for (DeletedBlocksTransaction entry : delBlocks) {
+        for (Long blkLong : entry.getLocalIDList()) {
+          String blk = blkLong.toString();
+          BlockData blkInfo = blockDataTable.get(blk);
+          LOG.debug("Deleting block {}", blk);
+          try {
+            handler.deleteBlock(container, blkInfo);
+          } catch (InvalidProtocolBufferException e) {
+            LOG.error("Failed to parse block info for block {}", blk, e);
+          } catch (IOException e) {
+            LOG.error("Failed to delete files for block {}", blk, e);
+          }
+        }
       }
     }
 
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/AbstractDatanodeDBDefinition.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/AbstractDatanodeDBDefinition.java
index 8895475..2fb1174 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/AbstractDatanodeDBDefinition.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/AbstractDatanodeDBDefinition.java
@@ -60,7 +60,7 @@ public abstract class AbstractDatanodeDBDefinition implements DBDefinition {
   @Override
   public DBColumnFamilyDefinition[] getColumnFamilies() {
     return new DBColumnFamilyDefinition[] {getBlockDataColumnFamily(),
-            getMetadataColumnFamily(), getDeletedBlocksColumnFamily()};
+        getMetadataColumnFamily(), getDeletedBlocksColumnFamily()};
   }
 
   public abstract DBColumnFamilyDefinition<String, BlockData>
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeSchemaOneDBDefinition.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeSchemaOneDBDefinition.java
index faf399d..7d5e053 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeSchemaOneDBDefinition.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeSchemaOneDBDefinition.java
@@ -88,4 +88,9 @@ public class DatanodeSchemaOneDBDefinition
       getDeletedBlocksColumnFamily() {
     return DELETED_BLOCKS;
   }
+
+  public DBColumnFamilyDefinition[] getColumnFamilies() {
+    return new DBColumnFamilyDefinition[] {getBlockDataColumnFamily(),
+        getMetadataColumnFamily(), getDeletedBlocksColumnFamily() };
+  }
 }
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeSchemaTwoDBDefinition.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeSchemaTwoDBDefinition.java
index 2ac56f2..1fabd13 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeSchemaTwoDBDefinition.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeSchemaTwoDBDefinition.java
@@ -17,16 +17,19 @@
  */
 package org.apache.hadoop.ozone.container.metadata;
 
+import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos;
 import org.apache.hadoop.hdds.utils.db.DBColumnFamilyDefinition;
 import org.apache.hadoop.hdds.utils.db.LongCodec;
 import org.apache.hadoop.hdds.utils.db.StringCodec;
 import org.apache.hadoop.ozone.container.common.helpers.BlockData;
 import org.apache.hadoop.ozone.container.common.helpers.ChunkInfoList;
+import org.apache.hadoop.hdds.protocol.proto
+    .StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction;
 
 /**
  * This class defines the RocksDB structure for datanodes following schema
- * version 2, where the block data, metadata, and deleted block ids are put in
- * their own separate column families.
+ * version 2, where the block data, metadata, and transactions which are to be
+ * deleted are put in their own separate column families.
  */
 public class DatanodeSchemaTwoDBDefinition extends
         AbstractDatanodeDBDefinition {
@@ -34,7 +37,7 @@ public class DatanodeSchemaTwoDBDefinition extends
   public static final DBColumnFamilyDefinition<String, BlockData>
           BLOCK_DATA =
           new DBColumnFamilyDefinition<>(
-                  "block_data",
+                  "blockData",
                   String.class,
                   new StringCodec(),
                   BlockData.class,
@@ -52,17 +55,33 @@ public class DatanodeSchemaTwoDBDefinition extends
   public static final DBColumnFamilyDefinition<String, ChunkInfoList>
           DELETED_BLOCKS =
           new DBColumnFamilyDefinition<>(
-                  "deleted_blocks",
+                  "deletedBlocks",
                   String.class,
                   new StringCodec(),
                   ChunkInfoList.class,
                   new ChunkInfoListCodec());
 
+  public static final DBColumnFamilyDefinition<Long, DeletedBlocksTransaction>
+      DELETE_TRANSACTION =
+      new DBColumnFamilyDefinition<>(
+          "deleteTxns",
+          Long.class,
+          new LongCodec(),
+          StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction.class,
+          new DeletedBlocksTransactionCodec());
+
   protected DatanodeSchemaTwoDBDefinition(String dbPath) {
     super(dbPath);
   }
 
   @Override
+  public DBColumnFamilyDefinition[] getColumnFamilies() {
+    return new DBColumnFamilyDefinition[] {getBlockDataColumnFamily(),
+        getMetadataColumnFamily(), getDeletedBlocksColumnFamily(),
+        getDeleteTransactionsColumnFamily()};
+  }
+
+  @Override
   public DBColumnFamilyDefinition<String, BlockData>
       getBlockDataColumnFamily() {
     return BLOCK_DATA;
@@ -78,4 +97,9 @@ public class DatanodeSchemaTwoDBDefinition extends
       getDeletedBlocksColumnFamily() {
     return DELETED_BLOCKS;
   }
+
+  public DBColumnFamilyDefinition<Long, DeletedBlocksTransaction>
+      getDeleteTransactionsColumnFamily() {
+    return DELETE_TRANSACTION;
+  }
 }
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeStoreSchemaTwoImpl.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeStoreSchemaTwoImpl.java
index df9b8c0..db8fe6b 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeStoreSchemaTwoImpl.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeStoreSchemaTwoImpl.java
@@ -18,6 +18,9 @@
 package org.apache.hadoop.ozone.container.metadata;
 
 import org.apache.hadoop.hdds.conf.ConfigurationSource;
+import org.apache.hadoop.hdds.protocol.proto.
+    StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction;
+import org.apache.hadoop.hdds.utils.db.Table;
 
 import java.io.IOException;
 
@@ -26,10 +29,13 @@ import java.io.IOException;
  * three column families/tables:
  * 1. A block data table.
  * 2. A metadata table.
- * 3. A deleted blocks table.
+ * 3. A Delete Transaction Table.
  */
 public class DatanodeStoreSchemaTwoImpl extends AbstractDatanodeStore {
 
+  private final Table<Long, DeletedBlocksTransaction>
+      deleteTransactionTable;
+
   /**
    * Constructs the datanode store and starts the DB Services.
    *
@@ -41,5 +47,11 @@ public class DatanodeStoreSchemaTwoImpl extends AbstractDatanodeStore {
       throws IOException {
     super(config, containerID, new DatanodeSchemaTwoDBDefinition(dbPath),
         openReadOnly);
+    this.deleteTransactionTable = new DatanodeSchemaTwoDBDefinition(dbPath)
+        .getDeleteTransactionsColumnFamily().getTable(getStore());
+  }
+
+  public Table<Long, DeletedBlocksTransaction> getDeleteTransactionTable() {
+    return deleteTransactionTable;
   }
 }
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeStoreSchemaTwoImpl.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DeletedBlocksTransactionCodec.java
similarity index 54%
copy from hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeStoreSchemaTwoImpl.java
copy to hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DeletedBlocksTransactionCodec.java
index df9b8c0..90c26fe 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DatanodeStoreSchemaTwoImpl.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/DeletedBlocksTransactionCodec.java
@@ -17,29 +17,30 @@
  */
 package org.apache.hadoop.ozone.container.metadata;
 
-import org.apache.hadoop.hdds.conf.ConfigurationSource;
+import org.apache.hadoop.hdds.utils.db.Codec;
+import org.apache.hadoop.hdds.protocol.proto
+    .StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction;
 
 import java.io.IOException;
 
 /**
- * Constructs a datanode store in accordance with schema version 2, which uses
- * three column families/tables:
- * 1. A block data table.
- * 2. A metadata table.
- * 3. A deleted blocks table.
+ * Supports encoding and decoding {@link DeletedBlocksTransaction} objects.
  */
-public class DatanodeStoreSchemaTwoImpl extends AbstractDatanodeStore {
+public class DeletedBlocksTransactionCodec
+    implements Codec<DeletedBlocksTransaction> {
 
-  /**
-   * Constructs the datanode store and starts the DB Services.
-   *
-   * @param config - Ozone Configuration.
-   * @throws IOException - on Failure.
-   */
-  public DatanodeStoreSchemaTwoImpl(ConfigurationSource config,
-      long containerID, String dbPath, boolean openReadOnly)
+  @Override public byte[] toPersistedFormat(
+      DeletedBlocksTransaction deletedBlocksTransaction) {
+    return deletedBlocksTransaction.toByteArray();
+  }
+
+  @Override public DeletedBlocksTransaction fromPersistedFormat(byte[] rawData)
       throws IOException {
-    super(config, containerID, new DatanodeSchemaTwoDBDefinition(dbPath),
-        openReadOnly);
+    return DeletedBlocksTransaction.parseFrom(rawData);
+  }
+
+  @Override public DeletedBlocksTransaction copyObject(
+      DeletedBlocksTransaction deletedBlocksTransaction) {
+    throw new UnsupportedOperationException();
   }
 }
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java
index a44ef38..3ecddac 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/OzoneContainer.java
@@ -29,6 +29,7 @@ import java.util.function.Consumer;
 
 import org.apache.hadoop.hdds.conf.ConfigurationSource;
 import org.apache.hadoop.hdds.protocol.DatanodeDetails;
+import org.apache.hadoop.hdds.protocol.DatanodeDetails.Port.Name;
 import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerType;
 import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos;
 import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto;
@@ -52,8 +53,8 @@ import org.apache.hadoop.ozone.container.common.transport.server.ratis.XceiverSe
 import org.apache.hadoop.ozone.container.common.volume.HddsVolume;
 import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet;
 import org.apache.hadoop.ozone.container.keyvalue.statemachine.background.BlockDeletingService;
-import org.apache.hadoop.ozone.container.replication.GrpcReplicationService;
-import org.apache.hadoop.ozone.container.replication.OnDemandContainerReplicationSource;
+import org.apache.hadoop.ozone.container.replication.ReplicationServer;
+import org.apache.hadoop.ozone.container.replication.ReplicationServer.ReplicationConfig;
 import org.apache.hadoop.util.DiskChecker.DiskOutOfSpaceException;
 
 import com.google.common.annotations.VisibleForTesting;
@@ -85,19 +86,25 @@ public class OzoneContainer {
   private List<ContainerDataScanner> dataScanners;
   private final BlockDeletingService blockDeletingService;
   private final GrpcTlsConfig tlsClientConfig;
+  private final ReplicationServer replicationServer;
+  private DatanodeDetails datanodeDetails;
 
   /**
    * Construct OzoneContainer object.
+   *
    * @param datanodeDetails
    * @param conf
    * @param certClient
    * @throws DiskOutOfSpaceException
    * @throws IOException
    */
-  public OzoneContainer(DatanodeDetails datanodeDetails, ConfigurationSource
-      conf, StateContext context, CertificateClient certClient)
+  public OzoneContainer(
+      DatanodeDetails datanodeDetails, ConfigurationSource
+      conf, StateContext context, CertificateClient certClient
+  )
       throws IOException {
     config = conf;
+    this.datanodeDetails = datanodeDetails;
     volumeSet = new MutableVolumeSet(datanodeDetails.getUuidString(), conf);
     volumeSet.setFailedVolumeListener(this::handleVolumeFailures);
     containerSet = new ContainerSet();
@@ -135,14 +142,22 @@ public class OzoneContainer {
      * XceiverServerGrpc is the read channel
      */
     controller = new ContainerController(containerSet, handlers);
+
     writeChannel = XceiverServerRatis.newXceiverServerRatis(
         datanodeDetails, config, hddsDispatcher, controller, certClient,
         context);
+
+    replicationServer = new ReplicationServer(
+        controller,
+        conf.getObject(ReplicationConfig.class),
+        secConf,
+        certClient);
+
     readChannel = new XceiverServerGrpc(
-        datanodeDetails, config, hddsDispatcher, certClient,
-        createReplicationService());
+        datanodeDetails, config, hddsDispatcher, certClient);
     Duration svcInterval = conf.getObject(
             DatanodeConfiguration.class).getBlockDeletionInterval();
+
     long serviceTimeout = config
         .getTimeDuration(OZONE_BLOCK_DELETING_SERVICE_TIMEOUT,
             OZONE_BLOCK_DELETING_SERVICE_TIMEOUT_DEFAULT,
@@ -158,10 +173,7 @@ public class OzoneContainer {
     return tlsClientConfig;
   }
 
-  private GrpcReplicationService createReplicationService() {
-    return new GrpcReplicationService(
-        new OnDemandContainerReplicationSource(controller));
-  }
+
 
   /**
    * Build's container map.
@@ -169,7 +181,7 @@ public class OzoneContainer {
   private void buildContainerSet() {
     Iterator<HddsVolume> volumeSetIterator = volumeSet.getVolumesList()
         .iterator();
-    ArrayList<Thread> volumeThreads = new ArrayList<Thread>();
+    ArrayList<Thread> volumeThreads = new ArrayList<>();
     long startTime = System.currentTimeMillis();
 
     //TODO: diskchecker should be run before this, to see how disks are.
@@ -242,6 +254,10 @@ public class OzoneContainer {
   public void start(String scmId) throws IOException {
     LOG.info("Attempting to start container services.");
     startContainerScrub();
+
+    replicationServer.start();
+    datanodeDetails.setPort(Name.REPLICATION, replicationServer.getPort());
+
     writeChannel.start();
     readChannel.start();
     hddsDispatcher.init();
@@ -256,6 +272,7 @@ public class OzoneContainer {
     //TODO: at end of container IO integration work.
     LOG.info("Attempting to stop container services.");
     stopContainerScrub();
+    replicationServer.stop();
     writeChannel.stop();
     readChannel.stop();
     this.handlers.values().forEach(Handler::stop);
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/GrpcReplicationClient.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/GrpcReplicationClient.java
index 275321d..53dac9d 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/GrpcReplicationClient.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/GrpcReplicationClient.java
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.ozone.container.replication;
 
+import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
@@ -40,6 +41,7 @@ import org.apache.ratis.thirdparty.io.grpc.ManagedChannel;
 import org.apache.ratis.thirdparty.io.grpc.netty.GrpcSslContexts;
 import org.apache.ratis.thirdparty.io.grpc.netty.NettyChannelBuilder;
 import org.apache.ratis.thirdparty.io.grpc.stub.StreamObserver;
+import org.apache.ratis.thirdparty.io.netty.handler.ssl.ClientAuth;
 import org.apache.ratis.thirdparty.io.netty.handler.ssl.SslContextBuilder;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -58,20 +60,27 @@ public class GrpcReplicationClient implements AutoCloseable{
 
   private final Path workingDirectory;
 
-  public GrpcReplicationClient(String host, int port, Path workingDir,
-      SecurityConfig secConfig, X509Certificate caCert) throws IOException {
+  public GrpcReplicationClient(
+      String host, int port, Path workingDir,
+      SecurityConfig secConfig, X509Certificate caCert
+  ) throws IOException {
     NettyChannelBuilder channelBuilder =
         NettyChannelBuilder.forAddress(host, port)
             .usePlaintext()
             .maxInboundMessageSize(OzoneConsts.OZONE_SCM_CHUNK_MAX_SIZE);
 
-    if (secConfig.isGrpcTlsEnabled()) {
+    if (secConfig.isSecurityEnabled()) {
       channelBuilder.useTransportSecurity();
 
       SslContextBuilder sslContextBuilder = GrpcSslContexts.forClient();
       if (caCert != null) {
         sslContextBuilder.trustManager(caCert);
       }
+
+      sslContextBuilder.clientAuth(ClientAuth.REQUIRE);
+      sslContextBuilder.keyManager(
+          new File(secConfig.getCertificateFileName()),
+          new File(secConfig.getPrivateKeyFileName()));
       if (secConfig.useTestCert()) {
         channelBuilder.overrideAuthority("localhost");
       }
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationServer.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationServer.java
new file mode 100644
index 0000000..e8f831b
--- /dev/null
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationServer.java
@@ -0,0 +1,144 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ozone.container.replication;
+
+import javax.net.ssl.SSLException;
+import java.io.IOException;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.hadoop.hdds.conf.Config;
+import org.apache.hadoop.hdds.conf.ConfigGroup;
+import org.apache.hadoop.hdds.conf.ConfigTag;
+import org.apache.hadoop.hdds.security.x509.SecurityConfig;
+import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient;
+import org.apache.hadoop.hdds.tracing.GrpcServerInterceptor;
+import org.apache.hadoop.ozone.OzoneConsts;
+import org.apache.hadoop.ozone.container.ozoneimpl.ContainerController;
+
+import org.apache.ratis.thirdparty.io.grpc.Server;
+import org.apache.ratis.thirdparty.io.grpc.ServerInterceptors;
+import org.apache.ratis.thirdparty.io.grpc.netty.GrpcSslContexts;
+import org.apache.ratis.thirdparty.io.grpc.netty.NettyServerBuilder;
+import org.apache.ratis.thirdparty.io.netty.handler.ssl.ClientAuth;
+import org.apache.ratis.thirdparty.io.netty.handler.ssl.SslContextBuilder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Separated network server for server2server container replication.
+ */
+public class ReplicationServer {
+
+  private static final Logger LOG =
+      LoggerFactory.getLogger(ReplicationServer.class);
+
+  private Server server;
+
+  private SecurityConfig secConf;
+
+  private CertificateClient caClient;
+
+  private ContainerController controller;
+
+  private int port;
+
+  public ReplicationServer(
+      ContainerController controller,
+      ReplicationConfig replicationConfig,
+      SecurityConfig secConf,
+      CertificateClient caClient
+  ) {
+    this.secConf = secConf;
+    this.caClient = caClient;
+    this.controller = controller;
+    this.port = replicationConfig.getPort();
+    init();
+  }
+
+  public void init() {
+    NettyServerBuilder nettyServerBuilder = NettyServerBuilder.forPort(port)
+        .maxInboundMessageSize(OzoneConsts.OZONE_SCM_CHUNK_MAX_SIZE)
+        .addService(ServerInterceptors.intercept(new GrpcReplicationService(
+            new OnDemandContainerReplicationSource(controller)
+        ), new GrpcServerInterceptor()));
+
+    if (secConf.isSecurityEnabled()) {
+      try {
+        SslContextBuilder sslContextBuilder = SslContextBuilder.forServer(
+            caClient.getPrivateKey(), caClient.getCertificate());
+
+        sslContextBuilder = GrpcSslContexts.configure(
+            sslContextBuilder, secConf.getGrpcSslProvider());
+
+        sslContextBuilder.clientAuth(ClientAuth.REQUIRE);
+        sslContextBuilder.trustManager(caClient.getCACertificate());
+
+        nettyServerBuilder.sslContext(sslContextBuilder.build());
+      } catch (SSLException ex) {
+        throw new IllegalArgumentException(
+            "Unable to setup TLS for secure datanode replication GRPC "
+                + "endpoint.", ex);
+      }
+    }
+
+    server = nettyServerBuilder.build();
+  }
+
+  public void start() throws IOException {
+    server.start();
+
+    if (port == 0) {
+      LOG.info("{} is started using port {}", getClass().getSimpleName(),
+          server.getPort());
+    }
+
+    port = server.getPort();
+
+  }
+
+  public void stop() {
+    try {
+      server.shutdown().awaitTermination(10L, TimeUnit.SECONDS);
+    } catch (InterruptedException ex) {
+      LOG.warn("{} couldn't be stopped gracefully", getClass().getSimpleName());
+    }
+  }
+
+  public int getPort() {
+    return port;
+  }
+
+  @ConfigGroup(prefix = "hdds.datanode.replication")
+  public static final class ReplicationConfig {
+
+    @Config(key = "port", defaultValue = "9886", description = "Port used for"
+        + " the server2server replication server", tags = {
+        ConfigTag.MANAGEMENT})
+    private int port;
+
+    public int getPort() {
+      return port;
+    }
+
+    public ReplicationConfig setPort(int portParam) {
+      this.port = portParam;
+      return this;
+    }
+  }
+
+}
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisor.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisor.java
index cb281f0..6becf62 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisor.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisor.java
@@ -25,11 +25,11 @@ import java.util.concurrent.ThreadPoolExecutor;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicLong;
 
-import com.google.common.util.concurrent.ThreadFactoryBuilder;
 import org.apache.hadoop.ozone.container.common.impl.ContainerSet;
 import org.apache.hadoop.ozone.container.replication.ReplicationTask.Status;
 
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -44,6 +44,7 @@ public class ReplicationSupervisor {
   private final ContainerSet containerSet;
   private final ContainerReplicator replicator;
   private final ExecutorService executor;
+
   private final AtomicLong requestCounter = new AtomicLong();
   private final AtomicLong successCounter = new AtomicLong();
   private final AtomicLong failureCounter = new AtomicLong();
@@ -58,7 +59,8 @@ public class ReplicationSupervisor {
   @VisibleForTesting
   ReplicationSupervisor(
       ContainerSet containerSet, ContainerReplicator replicator,
-      ExecutorService executor) {
+      ExecutorService executor
+  ) {
     this.containerSet = containerSet;
     this.replicator = replicator;
     this.containersInFlight = ConcurrentHashMap.newKeySet();
@@ -67,9 +69,10 @@ public class ReplicationSupervisor {
 
   public ReplicationSupervisor(
       ContainerSet containerSet,
-      ContainerReplicator replicator, int poolSize) {
+      ContainerReplicator replicator, int poolSize
+  ) {
     this(containerSet, replicator, new ThreadPoolExecutor(
-        0, poolSize, 60, TimeUnit.SECONDS,
+        poolSize, poolSize, 60, TimeUnit.SECONDS,
         new LinkedBlockingQueue<>(),
         new ThreadFactoryBuilder().setDaemon(true)
             .setNameFormat("ContainerReplicationThread-%d")
@@ -85,6 +88,12 @@ public class ReplicationSupervisor {
     }
   }
 
+  @VisibleForTesting
+  public void shutdownAfterFinish() throws InterruptedException {
+    executor.shutdown();
+    executor.awaitTermination(1L, TimeUnit.DAYS);
+  }
+
   public void stop() {
     try {
       executor.shutdown();
@@ -100,6 +109,7 @@ public class ReplicationSupervisor {
   /**
    * Get the number of containers currently being downloaded
    * or scheduled for download.
+   *
    * @return Count of in-flight replications.
    */
   @VisibleForTesting
@@ -107,10 +117,10 @@ public class ReplicationSupervisor {
     return containersInFlight.size();
   }
 
-  private final class TaskRunner implements Runnable {
+  public final class TaskRunner implements Runnable {
     private final ReplicationTask task;
 
-    private TaskRunner(ReplicationTask task) {
+    public TaskRunner(ReplicationTask task) {
       this.task = task;
     }
 
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/SimpleContainerDownloader.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/SimpleContainerDownloader.java
index 5d8a86b..0967503 100644
--- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/SimpleContainerDownloader.java
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/replication/SimpleContainerDownloader.java
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.ozone.container.replication;
 
+import java.io.IOException;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.security.cert.X509Certificate;
@@ -87,6 +88,7 @@ public class SimpleContainerDownloader implements ContainerDownloader {
         if (result == null) {
           result = downloadContainer(containerId, datanode);
         } else {
+
           result = result.exceptionally(t -> {
             LOG.error("Error on replicating container: " + containerId, t);
             try {
@@ -128,11 +130,11 @@ public class SimpleContainerDownloader implements ContainerDownloader {
   protected CompletableFuture<Path> downloadContainer(
       long containerId,
       DatanodeDetails datanode
-  ) throws Exception {
+  ) throws IOException {
     CompletableFuture<Path> result;
     GrpcReplicationClient grpcReplicationClient =
         new GrpcReplicationClient(datanode.getIpAddress(),
-            datanode.getPort(Name.STANDALONE).getValue(),
+            datanode.getPort(Name.REPLICATION).getValue(),
             workingDirectory, securityConfig, caCert);
     result = grpcReplicationClient.download(containerId)
         .thenApply(r -> {
diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/SetNodeOperationalStateCommand.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/SetNodeOperationalStateCommand.java
new file mode 100644
index 0000000..3ff7949
--- /dev/null
+++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/protocol/commands/SetNodeOperationalStateCommand.java
@@ -0,0 +1,89 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership.  The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.ozone.protocol.commands;
+
+import com.google.common.base.Preconditions;
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
+import org.apache.hadoop.hdds.protocol.proto
+    .StorageContainerDatanodeProtocolProtos.SCMCommandProto;
+import org.apache.hadoop.hdds.protocol.proto
+    .StorageContainerDatanodeProtocolProtos.SetNodeOperationalStateCommandProto;
+
+/**
+ * A command used to persist the current node operational state on the datanode.
+ */
+public class SetNodeOperationalStateCommand
+    extends SCMCommand<SetNodeOperationalStateCommandProto> {
+
+  private final HddsProtos.NodeOperationalState opState;
+  private long stateExpiryEpochSeconds;
+
+  /**
+   * Ctor that creates a SetNodeOperationalStateCommand.
+   *
+   * @param id    - Command ID. Something a time stamp would suffice.
+   * @param state - OperationalState that want the node to be set into.
+   * @param stateExpiryEpochSeconds The epoch time when the state should
+   *                                expire, or zero for the state to remain
+   *                                indefinitely.
+   */
+  public SetNodeOperationalStateCommand(long id,
+      HddsProtos.NodeOperationalState state, long stateExpiryEpochSeconds) {
+    super(id);
+    this.opState = state;
+    this.stateExpiryEpochSeconds = stateExpiryEpochSeconds;
+  }
+
+  /**
+   * Returns the type of this command.
+   *
+   * @return Type  - This is setNodeOperationalStateCommand.
+   */
+  @Override
+  public SCMCommandProto.Type getType() {
+    return SCMCommandProto.Type.setNodeOperationalStateCommand;
+  }
+
+  /**
+   * Gets the protobuf message of this object.
+   *
+   * @return A protobuf message.
+   */
+  @Override
+  public SetNodeOperationalStateCommandProto getProto() {
+    return SetNodeOperationalStateCommandProto.newBuilder()
+        .setCmdId(getId())
+        .setNodeOperationalState(opState)
+        .setStateExpiryEpochSeconds(stateExpiryEpochSeconds).build();
+  }
+
+  public HddsProtos.NodeOperationalState getOpState() {
+    return opState;
+  }
+
+  public long getStateExpiryEpochSeconds() {
+    return stateExpiryEpochSeconds;
+  }
+
+  public static SetNodeOperationalStateCommand getFromProtobuf(
+      SetNodeOperationalStateCommandProto cmdProto) {
+    Preconditions.checkNotNull(cmdProto);
+    return new SetNodeOperationalStateCommand(cmdProto.getCmdId(),
+        cmdProto.getNodeOperationalState(),
+        cmdProto.getStateExpiryEpochSeconds());
+  }
+}
\ No newline at end of file
diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ContainerTestHelper.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ContainerTestHelper.java
index 24c598a..40cfbba 100644
--- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ContainerTestHelper.java
+++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ContainerTestHelper.java
@@ -44,13 +44,11 @@ import org.apache.hadoop.ozone.common.ChunkBuffer;
 import org.apache.hadoop.ozone.common.OzoneChecksumException;
 import org.apache.hadoop.ozone.container.common.helpers.BlockData;
 import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo;
-import org.apache.hadoop.ozone.container.common.transport.server.XceiverServerSpi;
 import org.apache.hadoop.ozone.container.common.transport.server.ratis.XceiverServerRatis;
 import org.apache.hadoop.security.token.Token;
 
 import com.google.common.base.Preconditions;
 import com.google.common.base.Strings;
-import org.apache.ratis.protocol.RaftGroupId;
 import org.apache.ratis.server.RaftServer;
 import org.apache.ratis.statemachine.StateMachine;
 import org.junit.Assert;
@@ -589,14 +587,12 @@ public final class ContainerTestHelper {
           " not exist in datanode:" + dn.getDatanodeDetails().getUuid());
     }
 
-    XceiverServerSpi serverSpi = dn.getDatanodeStateMachine().
-        getContainer().getWriteChannel();
-    RaftServer server = (((XceiverServerRatis) serverSpi).getServer());
-    RaftGroupId groupId =
-        pipeline == null ? server.getGroupIds().iterator().next() :
-            RatisHelper.newRaftGroup(pipeline).getGroupId();
-
-    return server.getDivision(groupId);
+    XceiverServerRatis server =
+        (XceiverServerRatis) (dn.getDatanodeStateMachine().
+        getContainer().getWriteChannel());
+    return pipeline == null ? server.getServerDivision() :
+        server.getServerDivision(
+            RatisHelper.newRaftGroup(pipeline).getGroupId());
   }
 
   public static StateMachine getStateMachine(HddsDatanodeService dn,
diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java
index 2eb6a39..96d4228 100644
--- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java
+++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/TestBlockDeletingService.java
@@ -21,9 +21,10 @@ package org.apache.hadoop.ozone.container.common;
 import java.io.File;
 import java.io.IOException;
 import java.nio.ByteBuffer;
+import java.util.UUID;
+import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
-import java.util.UUID;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeoutException;
 
@@ -34,9 +35,13 @@ import org.apache.hadoop.hdds.conf.ConfigurationSource;
 import org.apache.hadoop.hdds.conf.MutableConfigurationSource;
 import org.apache.hadoop.hdds.conf.OzoneConfiguration;
 import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
+import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos;
 import org.apache.hadoop.hdds.scm.ScmConfigKeys;
 import org.apache.hadoop.hdds.utils.BackgroundService;
 import org.apache.hadoop.hdds.utils.MetadataKeyFilters;
+import org.apache.hadoop.hdds.utils.db.BatchOperation;
+import org.apache.hadoop.hdds.utils.db.Table;
+import org.apache.hadoop.hdds.utils.db.TableIterator;
 import org.apache.hadoop.ozone.OzoneConsts;
 import org.apache.hadoop.ozone.common.Checksum;
 import org.apache.hadoop.ozone.common.ChunkBuffer;
@@ -55,7 +60,6 @@ import org.apache.hadoop.ozone.container.common.utils.ReferenceCountedDB;
 import org.apache.hadoop.ozone.container.common.volume.MutableVolumeSet;
 import org.apache.hadoop.ozone.container.common.volume.RoundRobinVolumeChoosingPolicy;
 import org.apache.hadoop.ozone.container.common.volume.VolumeSet;
-import org.apache.hadoop.ozone.container.keyvalue.ChunkLayoutTestInfo;
 import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer;
 import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData;
 import org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler;
@@ -64,11 +68,14 @@ import org.apache.hadoop.ozone.container.keyvalue.impl.FilePerBlockStrategy;
 import org.apache.hadoop.ozone.container.keyvalue.impl.FilePerChunkStrategy;
 import org.apache.hadoop.ozone.container.keyvalue.interfaces.ChunkManager;
 import org.apache.hadoop.ozone.container.keyvalue.statemachine.background.BlockDeletingService;
+import org.apache.hadoop.ozone.container.metadata.DatanodeStore;
+import org.apache.hadoop.ozone.container.metadata.DatanodeStoreSchemaTwoImpl;
 import org.apache.hadoop.ozone.container.ozoneimpl.OzoneContainer;
 import org.apache.hadoop.ozone.container.testutils.BlockDeletingServiceTestImpl;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.test.GenericTestUtils.LogCapturer;
 
+import static java.util.stream.Collectors.toList;
 import static org.apache.commons.lang3.RandomStringUtils.randomAlphanumeric;
 
 import org.junit.AfterClass;
@@ -82,7 +89,11 @@ import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_CONTA
 import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_LIMIT_PER_CONTAINER;
 import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_LIMIT_PER_CONTAINER_DEFAULT;
 import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_BLOCK_DELETING_SERVICE_INTERVAL;
+import static org.apache.hadoop.ozone.OzoneConsts.SCHEMA_VERSIONS;
+import static org.apache.hadoop.ozone.OzoneConsts.SCHEMA_V1;
+import static org.apache.hadoop.ozone.OzoneConsts.SCHEMA_V2;
 import static org.apache.hadoop.ozone.container.common.impl.ChunkLayOutVersion.FILE_PER_BLOCK;
+import static org.apache.hadoop.ozone.container.common.states.endpoint.VersionEndpointTask.LOG;
 import static org.mockito.ArgumentMatchers.any;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
@@ -101,16 +112,38 @@ public class TestBlockDeletingService {
   private static MutableConfigurationSource conf;
 
   private final ChunkLayOutVersion layout;
+  private final String schemaVersion;
   private int blockLimitPerTask;
   private static VolumeSet volumeSet;
 
-  public TestBlockDeletingService(ChunkLayOutVersion layout) {
-    this.layout = layout;
+  public TestBlockDeletingService(LayoutInfo layoutInfo) {
+    this.layout = layoutInfo.layout;
+    this.schemaVersion = layoutInfo.schemaVersion;
   }
 
   @Parameterized.Parameters
   public static Iterable<Object[]> parameters() {
-    return ChunkLayoutTestInfo.chunkLayoutParameters();
+    return LayoutInfo.layoutList.stream().map(each -> new Object[] {each})
+        .collect(toList());
+  }
+
+  public static class LayoutInfo {
+    private final String schemaVersion;
+    private final ChunkLayOutVersion layout;
+
+    public LayoutInfo(String schemaVersion, ChunkLayOutVersion layout) {
+      this.schemaVersion = schemaVersion;
+      this.layout = layout;
+    }
+
+    private static List<LayoutInfo> layoutList = new ArrayList<>();
+    static {
+      for (ChunkLayOutVersion ch : ChunkLayOutVersion.getAllVersions()) {
+        for (String sch : SCHEMA_VERSIONS) {
+          layoutList.add(new LayoutInfo(sch, ch));
+        }
+      }
+    }
   }
 
   @BeforeClass
@@ -158,6 +191,7 @@ public class TestBlockDeletingService {
     }
     byte[] arr = randomAlphanumeric(1048576).getBytes(UTF_8);
     ChunkBuffer buffer = ChunkBuffer.wrap(ByteBuffer.wrap(arr));
+    int txnID = 0;
     for (int x = 0; x < numOfContainers; x++) {
       long containerID = ContainerTestHelper.getTestContainerID();
       KeyValueContainerData data =
@@ -165,55 +199,164 @@ public class TestBlockDeletingService {
               ContainerTestHelper.CONTAINER_MAX_SIZE,
               UUID.randomUUID().toString(), datanodeUuid);
       data.closeContainer();
+      data.setSchemaVersion(schemaVersion);
       KeyValueContainer container = new KeyValueContainer(data, conf);
       container.create(volumeSet,
           new RoundRobinVolumeChoosingPolicy(), scmId);
       containerSet.addContainer(container);
       data = (KeyValueContainerData) containerSet.getContainer(
           containerID).getContainerData();
-      long chunkLength = 100;
-      try(ReferenceCountedDB metadata = BlockUtils.getDB(data, conf)) {
-        for (int j = 0; j < numOfBlocksPerContainer; j++) {
-          BlockID blockID =
-              ContainerTestHelper.getTestBlockID(containerID);
-          String deleteStateName = OzoneConsts.DELETING_KEY_PREFIX +
-              blockID.getLocalID();
-          BlockData kd = new BlockData(blockID);
-          List<ContainerProtos.ChunkInfo> chunks = Lists.newArrayList();
-          for (int k = 0; k < numOfChunksPerBlock; k++) {
-            final String chunkName = String.format("block.%d.chunk.%d", j, k);
-            final long offset = k * chunkLength;
-            ContainerProtos.ChunkInfo info =
-                ContainerProtos.ChunkInfo.newBuilder()
-                    .setChunkName(chunkName)
-                    .setLen(chunkLength)
-                    .setOffset(offset)
-                    .setChecksumData(Checksum.getNoChecksumDataProto())
-                    .build();
-            chunks.add(info);
-            ChunkInfo chunkInfo = new ChunkInfo(chunkName, offset, chunkLength);
-            ChunkBuffer chunkData = buffer.duplicate(0, (int) chunkLength);
-            chunkManager.writeChunk(container, blockID, chunkInfo, chunkData,
-                WRITE_STAGE);
-            chunkManager.writeChunk(container, blockID, chunkInfo, chunkData,
-                COMMIT_STAGE);
-          }
-          kd.setChunks(chunks);
-          metadata.getStore().getBlockDataTable().put(
-                  deleteStateName, kd);
-          container.getContainerData().incrPendingDeletionBlocks(1);
-        }
-        container.getContainerData().setKeyCount(numOfBlocksPerContainer);
-        // Set block count, bytes used and pending delete block count.
-        metadata.getStore().getMetadataTable().put(
-                OzoneConsts.BLOCK_COUNT, (long)numOfBlocksPerContainer);
-        metadata.getStore().getMetadataTable().put(
-                OzoneConsts.CONTAINER_BYTES_USED,
-            chunkLength * numOfChunksPerBlock * numOfBlocksPerContainer);
-        metadata.getStore().getMetadataTable().put(
-                OzoneConsts.PENDING_DELETE_BLOCK_COUNT,
-                (long)numOfBlocksPerContainer);
+      if (data.getSchemaVersion().equals(SCHEMA_V1)) {
+        createPendingDeleteBlocksSchema1(numOfBlocksPerContainer, data,
+            containerID, numOfChunksPerBlock, buffer, chunkManager, container);
+      } else if (data.getSchemaVersion().equals(SCHEMA_V2)) {
+        createPendingDeleteBlocksSchema2(numOfBlocksPerContainer, txnID,
+            containerID, numOfChunksPerBlock, buffer, chunkManager, container,
+            data);
+      } else {
+        throw new UnsupportedOperationException(
+            "Only schema version 1 and schema version 2 are "
+                + "supported.");
+      }
+    }
+  }
+
+  @SuppressWarnings("checkstyle:parameternumber")
+  private void createPendingDeleteBlocksSchema1(int numOfBlocksPerContainer,
+      KeyValueContainerData data, long containerID, int numOfChunksPerBlock,
+      ChunkBuffer buffer, ChunkManager chunkManager,
+      KeyValueContainer container) {
+    BlockID blockID = null;
+    try (ReferenceCountedDB metadata = BlockUtils.getDB(data, conf)) {
+      for (int j = 0; j < numOfBlocksPerContainer; j++) {
+        blockID = ContainerTestHelper.getTestBlockID(containerID);
+        String deleteStateName =
+            OzoneConsts.DELETING_KEY_PREFIX + blockID.getLocalID();
+        BlockData kd = new BlockData(blockID);
+        List<ContainerProtos.ChunkInfo> chunks = Lists.newArrayList();
+        putChunksInBlock(numOfChunksPerBlock, j, chunks, buffer, chunkManager,
+            container, blockID);
+        kd.setChunks(chunks);
+        metadata.getStore().getBlockDataTable().put(deleteStateName, kd);
+        container.getContainerData().incrPendingDeletionBlocks(1);
+      }
+      updateMetaData(data, container, numOfBlocksPerContainer,
+          numOfChunksPerBlock);
+    } catch (IOException exception) {
+      LOG.info("Exception " + exception);
+      LOG.warn("Failed to put block: " + blockID + " in BlockDataTable.");
+    }
+  }
+
+  @SuppressWarnings("checkstyle:parameternumber")
+  private void createPendingDeleteBlocksSchema2(int numOfBlocksPerContainer,
+      int txnID, long containerID, int numOfChunksPerBlock, ChunkBuffer buffer,
+      ChunkManager chunkManager, KeyValueContainer container,
+      KeyValueContainerData data) {
+    List<Long> containerBlocks = new ArrayList<>();
+    int blockCount = 0;
+    for (int i = 0; i < numOfBlocksPerContainer; i++) {
+      txnID = txnID + 1;
+      BlockID blockID = ContainerTestHelper.getTestBlockID(containerID);
+      BlockData kd = new BlockData(blockID);
+      List<ContainerProtos.ChunkInfo> chunks = Lists.newArrayList();
+      putChunksInBlock(numOfChunksPerBlock, i, chunks, buffer, chunkManager,
+          container, blockID);
+      kd.setChunks(chunks);
+      String bID = null;
+      try (ReferenceCountedDB metadata = BlockUtils.getDB(data, conf)) {
+        bID = blockID.getLocalID() + "";
+        metadata.getStore().getBlockDataTable().put(bID, kd);
+      } catch (IOException exception) {
+        LOG.info("Exception = " + exception);
+        LOG.warn("Failed to put block: " + bID + " in BlockDataTable.");
+      }
+      container.getContainerData().incrPendingDeletionBlocks(1);
+
+      // In below if statements we are checking if a single container
+      // consists of more blocks than 'blockLimitPerTask' then we create
+      // (totalBlocksInContainer / blockLimitPerTask) transactions which
+      // consists of blocks equal to blockLimitPerTask and last transaction
+      // consists of blocks equal to
+      // (totalBlocksInContainer % blockLimitPerTask).
+      containerBlocks.add(blockID.getLocalID());
+      blockCount++;
+      if (blockCount == blockLimitPerTask || i == (numOfBlocksPerContainer
+          - 1)) {
+        createTxn(data, containerBlocks, txnID, containerID);
+        containerBlocks.clear();
+        blockCount = 0;
+      }
+    }
+    updateMetaData(data, container, numOfBlocksPerContainer,
+        numOfChunksPerBlock);
+  }
+
+  private void createTxn(KeyValueContainerData data, List<Long> containerBlocks,
+      int txnID, long containerID) {
+    try (ReferenceCountedDB metadata = BlockUtils.getDB(data, conf)) {
+      StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction dtx =
+          StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction
+              .newBuilder().setTxID(txnID).setContainerID(containerID)
+              .addAllLocalID(containerBlocks).setCount(0).build();
+      try (BatchOperation batch = metadata.getStore().getBatchHandler()
+          .initBatchOperation()) {
+        DatanodeStore ds = metadata.getStore();
+        DatanodeStoreSchemaTwoImpl dnStoreTwoImpl =
+            (DatanodeStoreSchemaTwoImpl) ds;
+        dnStoreTwoImpl.getDeleteTransactionTable()
+            .putWithBatch(batch, (long) txnID, dtx);
+        metadata.getStore().getBatchHandler().commitBatchOperation(batch);
       }
+    } catch (IOException exception) {
+      LOG.warn("Transaction creation was not successful for txnID: " + txnID
+          + " consisting of " + containerBlocks.size() + " blocks.");
+    }
+  }
+
+  private void putChunksInBlock(int numOfChunksPerBlock, int i,
+      List<ContainerProtos.ChunkInfo> chunks, ChunkBuffer buffer,
+      ChunkManager chunkManager, KeyValueContainer container, BlockID blockID) {
+    long chunkLength = 100;
+    try {
+      for (int k = 0; k < numOfChunksPerBlock; k++) {
+        final String chunkName = String.format("block.%d.chunk.%d", i, k);
+        final long offset = k * chunkLength;
+        ContainerProtos.ChunkInfo info =
+            ContainerProtos.ChunkInfo.newBuilder().setChunkName(chunkName)
+                .setLen(chunkLength).setOffset(offset)
+                .setChecksumData(Checksum.getNoChecksumDataProto()).build();
+        chunks.add(info);
+        ChunkInfo chunkInfo = new ChunkInfo(chunkName, offset, chunkLength);
+        ChunkBuffer chunkData = buffer.duplicate(0, (int) chunkLength);
+        chunkManager
+            .writeChunk(container, blockID, chunkInfo, chunkData, WRITE_STAGE);
+        chunkManager
+            .writeChunk(container, blockID, chunkInfo, chunkData, COMMIT_STAGE);
+      }
+    } catch (IOException ex) {
+      LOG.warn("Putting chunks in blocks was not successful for BlockID: "
+          + blockID);
+    }
+  }
+
+  private void updateMetaData(KeyValueContainerData data,
+      KeyValueContainer container, int numOfBlocksPerContainer,
+      int numOfChunksPerBlock) {
+    long chunkLength = 100;
+    try (ReferenceCountedDB metadata = BlockUtils.getDB(data, conf)) {
+      container.getContainerData().setKeyCount(numOfBlocksPerContainer);
+      // Set block count, bytes used and pending delete block count.
+      metadata.getStore().getMetadataTable()
+          .put(OzoneConsts.BLOCK_COUNT, (long) numOfBlocksPerContainer);
+      metadata.getStore().getMetadataTable()
+          .put(OzoneConsts.CONTAINER_BYTES_USED,
+              chunkLength * numOfChunksPerBlock * numOfBlocksPerContainer);
+      metadata.getStore().getMetadataTable()
+          .put(OzoneConsts.PENDING_DELETE_BLOCK_COUNT,
+              (long) numOfBlocksPerContainer);
+    } catch (IOException exception) {
+      LOG.warn("Meta Data update was not successful for container: "+container);
     }
   }
 
@@ -231,11 +374,32 @@ public class TestBlockDeletingService {
    * Get under deletion blocks count from DB,
    * note this info is parsed from container.db.
    */
-  private int getUnderDeletionBlocksCount(ReferenceCountedDB meta)
-      throws IOException {
-    return meta.getStore().getBlockDataTable()
-        .getRangeKVs(null, 100,
-        MetadataKeyFilters.getDeletingKeyFilter()).size();
+  private int getUnderDeletionBlocksCount(ReferenceCountedDB meta,
+      KeyValueContainerData data) throws IOException {
+    if (data.getSchemaVersion().equals(SCHEMA_V1)) {
+      return meta.getStore().getBlockDataTable()
+          .getRangeKVs(null, 100, MetadataKeyFilters.getDeletingKeyFilter())
+          .size();
+    } else if (data.getSchemaVersion().equals(SCHEMA_V2)) {
+      int pendingBlocks = 0;
+      DatanodeStore ds = meta.getStore();
+      DatanodeStoreSchemaTwoImpl dnStoreTwoImpl =
+          (DatanodeStoreSchemaTwoImpl) ds;
+      try (
+          TableIterator<Long, ? extends Table.KeyValue<Long, 
+              StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction>> 
+              iter = dnStoreTwoImpl.getDeleteTransactionTable().iterator()) {
+        while (iter.hasNext()) {
+          StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction
+              delTx = iter.next().getValue();
+          pendingBlocks += delTx.getLocalIDList().size();
+        }
+      }
+      return pendingBlocks;
+    } else {
+      throw new UnsupportedOperationException(
+          "Only schema version 1 and schema version 2 are supported.");
+    }
   }
 
 
@@ -261,6 +425,7 @@ public class TestBlockDeletingService {
     // Ensure 1 container was created
     List<ContainerData> containerData = Lists.newArrayList();
     containerSet.listContainer(0L, 1, containerData);
+    KeyValueContainerData data = (KeyValueContainerData) containerData.get(0);
     Assert.assertEquals(1, containerData.size());
 
     try(ReferenceCountedDB meta = BlockUtils.getDB(
@@ -280,7 +445,7 @@ public class TestBlockDeletingService {
       Assert.assertEquals(0, transactionId);
 
       // Ensure there are 3 blocks under deletion and 0 deleted blocks
-      Assert.assertEquals(3, getUnderDeletionBlocksCount(meta));
+      Assert.assertEquals(3, getUnderDeletionBlocksCount(meta, data));
       Assert.assertEquals(3, meta.getStore().getMetadataTable()
           .get(OzoneConsts.PENDING_DELETE_BLOCK_COUNT).longValue());
 
@@ -348,6 +513,9 @@ public class TestBlockDeletingService {
   public void testBlockDeletionTimeout() throws Exception {
     conf.setInt(OZONE_BLOCK_DELETING_CONTAINER_LIMIT_PER_INTERVAL, 10);
     conf.setInt(OZONE_BLOCK_DELETING_LIMIT_PER_CONTAINER, 2);
+    this.blockLimitPerTask =
+        conf.getInt(OZONE_BLOCK_DELETING_LIMIT_PER_CONTAINER,
+            OZONE_BLOCK_DELETING_LIMIT_PER_CONTAINER_DEFAULT);
     ContainerSet containerSet = new ContainerSet();
     createToDeleteBlocks(containerSet, 1, 3, 1);
     ContainerMetrics metrics = ContainerMetrics.create(conf);
@@ -394,7 +562,7 @@ public class TestBlockDeletingService {
       LogCapturer newLog = LogCapturer.captureLogs(BackgroundService.LOG);
       GenericTestUtils.waitFor(() -> {
         try {
-          return getUnderDeletionBlocksCount(meta) == 0;
+          return getUnderDeletionBlocksCount(meta, data) == 0;
         } catch (IOException ignored) {
         }
         return false;
@@ -445,6 +613,9 @@ public class TestBlockDeletingService {
         TopNOrderedContainerDeletionChoosingPolicy.class.getName());
     conf.setInt(OZONE_BLOCK_DELETING_CONTAINER_LIMIT_PER_INTERVAL, 1);
     conf.setInt(OZONE_BLOCK_DELETING_LIMIT_PER_CONTAINER, 1);
+    this.blockLimitPerTask =
+        conf.getInt(OZONE_BLOCK_DELETING_LIMIT_PER_CONTAINER,
+            OZONE_BLOCK_DELETING_LIMIT_PER_CONTAINER_DEFAULT);
     ContainerSet containerSet = new ContainerSet();
 
     int containerCount = 2;
diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/TestStateContext.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/TestStateContext.java
index d3032c3..e9c39d3 100644
--- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/TestStateContext.java
+++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/TestStateContext.java
@@ -23,11 +23,19 @@ import static org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProt
 import static org.apache.hadoop.test.GenericTestUtils.waitFor;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
 
 import java.net.InetSocketAddress;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.Executors;
 import java.util.concurrent.ExecutorService;
@@ -37,6 +45,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
 
 import com.google.common.util.concurrent.ThreadFactoryBuilder;
+import com.google.protobuf.Descriptors.Descriptor;
 import org.apache.hadoop.hdds.conf.OzoneConfiguration;
 import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerAction;
 import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineAction;
@@ -53,6 +62,271 @@ import com.google.protobuf.GeneratedMessage;
  */
 public class TestStateContext {
 
+  /**
+   * Only accepted types of reports can be put back to the report queue.
+   */
+  @Test
+  public void testPutBackReports() {
+    OzoneConfiguration conf = new OzoneConfiguration();
+    DatanodeStateMachine datanodeStateMachineMock =
+        mock(DatanodeStateMachine.class);
+
+    StateContext ctx = new StateContext(conf, DatanodeStates.getInitState(),
+        datanodeStateMachineMock);
+    InetSocketAddress scm1 = new InetSocketAddress("scm1", 9001);
+    ctx.addEndpoint(scm1);
+    InetSocketAddress scm2 = new InetSocketAddress("scm2", 9001);
+    ctx.addEndpoint(scm2);
+
+    Map<String, Integer> expectedReportCount = new HashMap<>();
+
+    // Case 1: Put back an incremental report
+
+    ctx.putBackReports(Collections.singletonList(newMockReport(
+        StateContext.COMMAND_STATUS_REPORTS_PROTO_NAME)), scm1);
+    // scm2 report queue should be empty
+    checkReportCount(ctx.getAllAvailableReports(scm2), expectedReportCount);
+    // Check scm1 queue
+    expectedReportCount.put(
+        StateContext.COMMAND_STATUS_REPORTS_PROTO_NAME, 1);
+    checkReportCount(ctx.getAllAvailableReports(scm1), expectedReportCount);
+    // getReports dequeues incremental reports
+    expectedReportCount.clear();
+
+    ctx.putBackReports(Collections.singletonList(newMockReport(
+        StateContext.INCREMENTAL_CONTAINER_REPORT_PROTO_NAME)), scm2);
+    // scm1 report queue should be empty
+    checkReportCount(ctx.getAllAvailableReports(scm1), expectedReportCount);
+    // Check scm2 queue
+    expectedReportCount.put(
+        StateContext.INCREMENTAL_CONTAINER_REPORT_PROTO_NAME, 1);
+    checkReportCount(ctx.getAllAvailableReports(scm2), expectedReportCount);
+    // getReports dequeues incremental reports
+    expectedReportCount.clear();
+
+    // Case 2: Attempt to put back a full report
+
+    try {
+      ctx.putBackReports(Collections.singletonList(
+          newMockReport(StateContext.CONTAINER_REPORTS_PROTO_NAME)), scm1);
+      fail("Should throw exception when putting back unaccepted reports!");
+    } catch (IllegalArgumentException ignored) {
+    }
+    try {
+      ctx.putBackReports(Collections.singletonList(
+          newMockReport(StateContext.NODE_REPORT_PROTO_NAME)), scm2);
+      fail("Should throw exception when putting back unaccepted reports!");
+    } catch (IllegalArgumentException ignored) {
+    }
+    try {
+      ctx.putBackReports(Collections.singletonList(
+          newMockReport(StateContext.PIPELINE_REPORTS_PROTO_NAME)), scm1);
+      fail("Should throw exception when putting back unaccepted reports!");
+    } catch (IllegalArgumentException ignored) {
+    }
+
+    // Case 3: Put back mixed types of incremental reports
+
+    ctx.putBackReports(Arrays.asList(
+        newMockReport(StateContext.COMMAND_STATUS_REPORTS_PROTO_NAME),
+        newMockReport(StateContext.INCREMENTAL_CONTAINER_REPORT_PROTO_NAME),
+        newMockReport(StateContext.INCREMENTAL_CONTAINER_REPORT_PROTO_NAME),
+        newMockReport(StateContext.INCREMENTAL_CONTAINER_REPORT_PROTO_NAME),
+        newMockReport(StateContext.COMMAND_STATUS_REPORTS_PROTO_NAME)
+    ), scm1);
+    // scm2 report queue should be empty
+    checkReportCount(ctx.getAllAvailableReports(scm2), expectedReportCount);
+    // Check scm1 queue
+    expectedReportCount.put(
+        StateContext.COMMAND_STATUS_REPORTS_PROTO_NAME, 2);
+    expectedReportCount.put(
+        StateContext.INCREMENTAL_CONTAINER_REPORT_PROTO_NAME, 3);
+    checkReportCount(ctx.getAllAvailableReports(scm1), expectedReportCount);
+    // getReports dequeues incremental reports
+    expectedReportCount.clear();
+
+    // Case 4: Attempt to put back mixed types of full reports
+
+    try {
+      ctx.putBackReports(Arrays.asList(
+          newMockReport(StateContext.CONTAINER_REPORTS_PROTO_NAME),
+          newMockReport(StateContext.NODE_REPORT_PROTO_NAME),
+          newMockReport(StateContext.PIPELINE_REPORTS_PROTO_NAME)
+      ), scm1);
+      fail("Should throw exception when putting back unaccepted reports!");
+    } catch (IllegalArgumentException ignored) {
+    }
+
+    // Case 5: Attempt to put back mixed full and incremental reports
+
+    try {
+      ctx.putBackReports(Arrays.asList(
+          newMockReport(StateContext.CONTAINER_REPORTS_PROTO_NAME),
+          newMockReport(StateContext.COMMAND_STATUS_REPORTS_PROTO_NAME),
+          newMockReport(StateContext.INCREMENTAL_CONTAINER_REPORT_PROTO_NAME)
+      ), scm2);
+      fail("Should throw exception when putting back unaccepted reports!");
+    } catch (IllegalArgumentException ignored) {
+    }
+  }
+
+  @Test
+  public void testReportQueueWithAddReports() {
+    OzoneConfiguration conf = new OzoneConfiguration();
+    DatanodeStateMachine datanodeStateMachineMock =
+        mock(DatanodeStateMachine.class);
+
+    StateContext ctx = new StateContext(conf, DatanodeStates.getInitState(),
+        datanodeStateMachineMock);
+    InetSocketAddress scm1 = new InetSocketAddress("scm1", 9001);
+    ctx.addEndpoint(scm1);
+    InetSocketAddress scm2 = new InetSocketAddress("scm2", 9001);
+    ctx.addEndpoint(scm2);
+    // Check initial state
+    assertEquals(0, ctx.getAllAvailableReports(scm1).size());
+    assertEquals(0, ctx.getAllAvailableReports(scm2).size());
+
+    Map<String, Integer> expectedReportCount = new HashMap<>();
+
+    // Add a bunch of ContainerReports
+    batchAddReports(ctx, StateContext.CONTAINER_REPORTS_PROTO_NAME, 128);
+    // Should only keep the latest one
+    expectedReportCount.put(StateContext.CONTAINER_REPORTS_PROTO_NAME, 1);
+    checkReportCount(ctx.getAllAvailableReports(scm1), expectedReportCount);
+    checkReportCount(ctx.getAllAvailableReports(scm2), expectedReportCount);
+
+    // Add a bunch of NodeReport
+    batchAddReports(ctx, StateContext.NODE_REPORT_PROTO_NAME, 128);
+    // Should only keep the latest one
+    expectedReportCount.put(StateContext.NODE_REPORT_PROTO_NAME, 1);
+    checkReportCount(ctx.getAllAvailableReports(scm1), expectedReportCount);
+    checkReportCount(ctx.getAllAvailableReports(scm2), expectedReportCount);
+
+    // Add a bunch of PipelineReports
+    batchAddReports(ctx, StateContext.PIPELINE_REPORTS_PROTO_NAME, 128);
+    // Should only keep the latest one
+    expectedReportCount.put(StateContext.PIPELINE_REPORTS_PROTO_NAME, 1);
+    checkReportCount(ctx.getAllAvailableReports(scm1), expectedReportCount);
+    checkReportCount(ctx.getAllAvailableReports(scm2), expectedReportCount);
+
+    // Add a bunch of PipelineReports
+    batchAddReports(ctx, StateContext.PIPELINE_REPORTS_PROTO_NAME, 128);
+    // Should only keep the latest one
+    expectedReportCount.put(StateContext.PIPELINE_REPORTS_PROTO_NAME, 1);
+    checkReportCount(ctx.getAllAvailableReports(scm1), expectedReportCount);
+    checkReportCount(ctx.getAllAvailableReports(scm2), expectedReportCount);
+
+    // Add a bunch of CommandStatusReports
+    batchAddReports(ctx,
+        StateContext.COMMAND_STATUS_REPORTS_PROTO_NAME, 128);
+    expectedReportCount.put(
+        StateContext.COMMAND_STATUS_REPORTS_PROTO_NAME, 128);
+    // Should keep all of them
+    checkReportCount(ctx.getAllAvailableReports(scm1), expectedReportCount);
+    checkReportCount(ctx.getAllAvailableReports(scm2), expectedReportCount);
+    // getReports dequeues incremental reports
+    expectedReportCount.remove(
+        StateContext.COMMAND_STATUS_REPORTS_PROTO_NAME);
+
+    // Add a bunch of IncrementalContainerReport
+    batchAddReports(ctx,
+        StateContext.INCREMENTAL_CONTAINER_REPORT_PROTO_NAME, 128);
+    // Should keep all of them
+    expectedReportCount.put(
+        StateContext.INCREMENTAL_CONTAINER_REPORT_PROTO_NAME, 128);
+    checkReportCount(ctx.getAllAvailableReports(scm1), expectedReportCount);
+    checkReportCount(ctx.getAllAvailableReports(scm2), expectedReportCount);
+    // getReports dequeues incremental reports
+    expectedReportCount.remove(
+        StateContext.INCREMENTAL_CONTAINER_REPORT_PROTO_NAME);
+  }
+
+  void batchAddReports(StateContext ctx, String reportName, int count) {
+    for (int i = 0; i < count; i++) {
+      ctx.addReport(newMockReport(reportName));
+    }
+  }
+
+  void checkReportCount(List<GeneratedMessage> reports,
+      Map<String, Integer> expectedReportCount) {
+    Map<String, Integer> reportCount = new HashMap<>();
+    for (GeneratedMessage report : reports) {
+      final String reportName = report.getDescriptorForType().getFullName();
+      reportCount.put(reportName, reportCount.getOrDefault(reportName, 0) + 1);
+    }
+    // Verify
+    assertEquals(expectedReportCount, reportCount);
+  }
+
+  /**
+   * Check if Container, Node and Pipeline report APIs work as expected.
+   */
+  @Test
+  public void testContainerNodePipelineReportAPIs() {
+    OzoneConfiguration conf = new OzoneConfiguration();
+    DatanodeStateMachine datanodeStateMachineMock =
+        mock(DatanodeStateMachine.class);
+
+    // ContainerReports
+    StateContext context1 = newStateContext(conf, datanodeStateMachineMock);
+    assertNull(context1.getContainerReports());
+    assertNull(context1.getNodeReport());
+    assertNull(context1.getPipelineReports());
+    GeneratedMessage containerReports =
+        newMockReport(StateContext.CONTAINER_REPORTS_PROTO_NAME);
+    context1.addReport(containerReports);
+
+    assertNotNull(context1.getContainerReports());
+    assertEquals(StateContext.CONTAINER_REPORTS_PROTO_NAME,
+        context1.getContainerReports().getDescriptorForType().getFullName());
+    assertNull(context1.getNodeReport());
+    assertNull(context1.getPipelineReports());
+
+    // NodeReport
+    StateContext context2 = newStateContext(conf, datanodeStateMachineMock);
+    GeneratedMessage nodeReport =
+        newMockReport(StateContext.NODE_REPORT_PROTO_NAME);
+    context2.addReport(nodeReport);
+
+    assertNull(context2.getContainerReports());
+    assertNotNull(context2.getNodeReport());
+    assertEquals(StateContext.NODE_REPORT_PROTO_NAME,
+        context2.getNodeReport().getDescriptorForType().getFullName());
+    assertNull(context2.getPipelineReports());
+
+    // PipelineReports
+    StateContext context3 = newStateContext(conf, datanodeStateMachineMock);
+    GeneratedMessage pipelineReports =
+        newMockReport(StateContext.PIPELINE_REPORTS_PROTO_NAME);
+    context3.addReport(pipelineReports);
+
+    assertNull(context3.getContainerReports());
+    assertNull(context3.getNodeReport());
+    assertNotNull(context3.getPipelineReports());
+    assertEquals(StateContext.PIPELINE_REPORTS_PROTO_NAME,
+        context3.getPipelineReports().getDescriptorForType().getFullName());
+  }
+
+  private StateContext newStateContext(OzoneConfiguration conf,
+      DatanodeStateMachine datanodeStateMachineMock) {
+    StateContext stateContext = new StateContext(conf,
+        DatanodeStates.getInitState(), datanodeStateMachineMock);
+    InetSocketAddress scm1 = new InetSocketAddress("scm1", 9001);
+    stateContext.addEndpoint(scm1);
+    InetSocketAddress scm2 = new InetSocketAddress("scm2", 9001);
+    stateContext.addEndpoint(scm2);
+    return stateContext;
+  }
+
+  private GeneratedMessage newMockReport(String messageType) {
+    GeneratedMessage pipelineReports = mock(GeneratedMessage.class);
+    when(pipelineReports.getDescriptorForType()).thenReturn(
+        mock(Descriptor.class));
+    when(pipelineReports.getDescriptorForType().getFullName()).thenReturn(
+        messageType);
+    return pipelineReports;
+  }
+
   @Test
   public void testReportAPIs() {
     OzoneConfiguration conf = new OzoneConfiguration();
@@ -64,8 +338,14 @@ public class TestStateContext {
     InetSocketAddress scm1 = new InetSocketAddress("scm1", 9001);
     InetSocketAddress scm2 = new InetSocketAddress("scm2", 9001);
 
-    // Try to add report with endpoint. Should not be stored.
-    stateContext.addReport(mock(GeneratedMessage.class));
+    GeneratedMessage generatedMessage = mock(GeneratedMessage.class);
+    when(generatedMessage.getDescriptorForType()).thenReturn(
+        mock(Descriptor.class));
+    when(generatedMessage.getDescriptorForType().getFullName()).thenReturn(
+        "hadoop.hdds.CommandStatusReportsProto");
+
+    // Try to add report with zero endpoint. Should not be stored.
+    stateContext.addReport(generatedMessage);
     assertTrue(stateContext.getAllAvailableReports(scm1).isEmpty());
 
     // Add 2 scm endpoints.
@@ -73,7 +353,7 @@ public class TestStateContext {
     stateContext.addEndpoint(scm2);
 
     // Add report. Should be added to all endpoints.
-    stateContext.addReport(mock(GeneratedMessage.class));
+    stateContext.addReport(generatedMessage);
     List<GeneratedMessage> allAvailableReports =
         stateContext.getAllAvailableReports(scm1);
     assertEquals(1, allAvailableReports.size());
diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestCreatePipelineCommandHandler.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestCreatePipelineCommandHandler.java
index febd1c3..d23f1c4 100644
--- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestCreatePipelineCommandHandler.java
+++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/statemachine/commandhandler/TestCreatePipelineCommandHandler.java
@@ -44,6 +44,7 @@ import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.mockito.Mockito;
+import org.mockito.stubbing.Answer;
 import org.powermock.api.mockito.PowerMockito;
 import org.powermock.core.classloader.annotations.PrepareForTest;
 import org.powermock.modules.junit4.PowerMockRunner;
@@ -79,7 +80,10 @@ public class TestCreatePipelineCommandHandler {
     Mockito.when(raftClient.getGroupManagementApi(
         Mockito.any(RaftPeerId.class))).thenReturn(raftClientGroupManager);
     PowerMockito.mockStatic(RaftClient.class);
-    PowerMockito.when(RaftClient.newBuilder()).thenReturn(builder);
+    // Work around for mockito bug:
+    // https://github.com/powermock/powermock/issues/992
+    PowerMockito.when(RaftClient.newBuilder()).thenAnswer(
+        (Answer<RaftClient.Builder>) invocation -> builder);
   }
 
   private RaftClient.Builder mockRaftClientBuilder() {
diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainer.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainer.java
index 25d8b1d..4000e34 100644
--- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainer.java
+++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestKeyValueContainer.java
@@ -29,7 +29,6 @@ import org.apache.hadoop.hdds.scm.container.common.helpers
 import org.apache.hadoop.hdds.utils.db.Table;
 import org.apache.hadoop.ozone.OzoneConsts;
 import org.apache.hadoop.ozone.container.common.helpers.BlockData;
-import org.apache.hadoop.ozone.container.common.helpers.ChunkInfo;
 import org.apache.hadoop.ozone.container.common.impl.ChunkLayOutVersion;
 import org.apache.hadoop.ozone.container.common.impl.ContainerDataYaml;
 import org.apache.hadoop.ozone.container.common.volume.HddsVolume;
@@ -59,15 +58,19 @@ import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.IOException;
-import java.util.ArrayList;
+import java.io.OutputStream;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.List;
 import java.util.UUID;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
 
 import static org.apache.ratis.util.Preconditions.assertTrue;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.fail;
 import static org.mockito.ArgumentMatchers.anyList;
 import static org.mockito.ArgumentMatchers.anyLong;
@@ -125,36 +128,9 @@ public class TestKeyValueContainer {
     keyValueContainer = new KeyValueContainer(keyValueContainerData, CONF);
   }
 
-  private void addBlocks(int count) throws Exception {
-    long containerId = keyValueContainerData.getContainerID();
-
-    try(ReferenceCountedDB metadataStore = BlockUtils.getDB(keyValueContainer
-        .getContainerData(), CONF)) {
-      for (int i = 0; i < count; i++) {
-        // Creating BlockData
-        BlockID blockID = new BlockID(containerId, i);
-        BlockData blockData = new BlockData(blockID);
-        blockData.addMetadata(OzoneConsts.VOLUME, OzoneConsts.OZONE);
-        blockData.addMetadata(OzoneConsts.OWNER,
-            OzoneConsts.OZONE_SIMPLE_HDFS_USER);
-        List<ContainerProtos.ChunkInfo> chunkList = new ArrayList<>();
-        ChunkInfo info = new ChunkInfo(String.format("%d.data.%d", blockID
-            .getLocalID(), 0), 0, 1024);
-        chunkList.add(info.getProtoBufMessage());
-        blockData.setChunks(chunkList);
-        metadataStore.getStore().getBlockDataTable()
-                .put(Long.toString(blockID.getLocalID()), blockData);
-      }
-    }
-  }
-
   @Test
   public void testCreateContainer() throws Exception {
-
-    // Create Container.
-    keyValueContainer.create(volumeSet, volumeChoosingPolicy, scmId);
-
-    keyValueContainerData = keyValueContainer.getContainerData();
+    createContainer();
 
     String containerMetaDataPath = keyValueContainerData.getMetadataPath();
     String chunksPath = keyValueContainerData.getChunksPath();
@@ -171,38 +147,11 @@ public class TestKeyValueContainer {
 
   @Test
   public void testContainerImportExport() throws Exception {
-
     long containerId = keyValueContainer.getContainerData().getContainerID();
-    // Create Container.
-    keyValueContainer.create(volumeSet, volumeChoosingPolicy, scmId);
-
-
-    keyValueContainerData = keyValueContainer
-        .getContainerData();
-
-    keyValueContainerData.setState(
-        ContainerProtos.ContainerDataProto.State.CLOSED);
-
+    createContainer();
     long numberOfKeysToWrite = 12;
-    //write one few keys to check the key count after import
-    try(ReferenceCountedDB metadataStore =
-        BlockUtils.getDB(keyValueContainerData, CONF)) {
-      Table<String, BlockData> blockDataTable =
-              metadataStore.getStore().getBlockDataTable();
-
-      for (long i = 0; i < numberOfKeysToWrite; i++) {
-        blockDataTable.put("test" + i, new BlockData(new BlockID(i, i)));
-      }
-
-      // As now when we put blocks, we increment block count and update in DB.
-      // As for test, we are doing manually so adding key count to DB.
-      metadataStore.getStore().getMetadataTable()
-              .put(OzoneConsts.BLOCK_COUNT, numberOfKeysToWrite);
-    }
-
-    Map<String, String> metadata = new HashMap<>();
-    metadata.put("key1", "value1");
-    keyValueContainer.update(metadata, true);
+    closeContainer();
+    populate(numberOfKeysToWrite);
 
     //destination path
     File folderToExport = folder.newFile("exported.tar.gz");
@@ -261,6 +210,76 @@ public class TestKeyValueContainer {
 
   }
 
+  /**
+   * Create the container on disk.
+   */
+  private void createContainer() throws StorageContainerException {
+    keyValueContainer.create(volumeSet, volumeChoosingPolicy, scmId);
+    keyValueContainerData = keyValueContainer.getContainerData();
+  }
+
+  /**
+   * Add some keys to the container.
+   */
+  private void populate(long numberOfKeysToWrite) throws IOException {
+    try (ReferenceCountedDB metadataStore =
+        BlockUtils.getDB(keyValueContainer.getContainerData(), CONF)) {
+      Table<String, BlockData> blockDataTable =
+              metadataStore.getStore().getBlockDataTable();
+
+      for (long i = 0; i < numberOfKeysToWrite; i++) {
+        blockDataTable.put("test" + i, new BlockData(new BlockID(i, i)));
+      }
+
+      // As now when we put blocks, we increment block count and update in DB.
+      // As for test, we are doing manually so adding key count to DB.
+      metadataStore.getStore().getMetadataTable()
+              .put(OzoneConsts.BLOCK_COUNT, numberOfKeysToWrite);
+    }
+
+    Map<String, String> metadata = new HashMap<>();
+    metadata.put("key1", "value1");
+    keyValueContainer.update(metadata, true);
+  }
+
+  /**
+   * Set container state to CLOSED.
+   */
+  private void closeContainer() {
+    keyValueContainerData.setState(
+        ContainerProtos.ContainerDataProto.State.CLOSED);
+  }
+
+  @Test
+  public void concurrentExport() throws Exception {
+    createContainer();
+    populate(100);
+    closeContainer();
+
+    AtomicReference<String> failed = new AtomicReference<>();
+
+    TarContainerPacker packer = new TarContainerPacker();
+    List<Thread> threads = IntStream.range(0, 20)
+        .mapToObj(i -> new Thread(() -> {
+          try {
+            File file = folder.newFile("concurrent" + i + ".tar.gz");
+            try (OutputStream out = new FileOutputStream(file)) {
+              keyValueContainer.exportContainerData(out, packer);
+            }
+          } catch (Exception e) {
+            failed.compareAndSet(null, e.getMessage());
+          }
+        }))
+        .collect(Collectors.toList());
+
+    threads.forEach(Thread::start);
+    for (Thread thread : threads) {
+      thread.join();
+    }
+
+    assertNull(failed.get());
+  }
+
   @Test
   public void testDuplicateContainer() throws Exception {
     try {
@@ -293,8 +312,7 @@ public class TestKeyValueContainer {
 
   @Test
   public void testDeleteContainer() throws Exception {
-    keyValueContainerData.setState(ContainerProtos.ContainerDataProto.State
-        .CLOSED);
+    closeContainer();
     keyValueContainer = new KeyValueContainer(
         keyValueContainerData, CONF);
     keyValueContainer.create(volumeSet, volumeChoosingPolicy, scmId);
@@ -373,8 +391,7 @@ public class TestKeyValueContainer {
   @Test
   public void testUpdateContainerUnsupportedRequest() throws Exception {
     try {
-      keyValueContainerData.setState(
-          ContainerProtos.ContainerDataProto.State.CLOSED);
+      closeContainer();
       keyValueContainer = new KeyValueContainer(keyValueContainerData, CONF);
       keyValueContainer.create(volumeSet, volumeChoosingPolicy, scmId);
       Map<String, String> metadata = new HashMap<>();
diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestTarContainerPacker.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestTarContainerPacker.java
index bee77c7..d248ac1 100644
--- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestTarContainerPacker.java
+++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/keyvalue/TestTarContainerPacker.java
@@ -23,7 +23,7 @@ import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.FileWriter;
 import java.io.IOException;
-import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
@@ -55,7 +55,6 @@ import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
 
-import static java.nio.charset.StandardCharsets.UTF_8;
 import static org.apache.commons.compress.compressors.CompressorStreamFactory.GZIP;
 
 /**
@@ -187,7 +186,7 @@ public class TestTarContainerPacker {
     //read the container descriptor only
     try (FileInputStream input = new FileInputStream(targetFile.toFile())) {
       String containerYaml = new String(packer.unpackContainerDescriptor(input),
-          Charset.forName(UTF_8.name()));
+          StandardCharsets.UTF_8);
       Assert.assertEquals(TEST_DESCRIPTOR_FILE_CONTENT, containerYaml);
     }
 
@@ -203,7 +202,7 @@ public class TestTarContainerPacker {
     try (FileInputStream input = new FileInputStream(targetFile.toFile())) {
       descriptor =
           new String(packer.unpackContainerData(destinationContainer, input),
-              Charset.forName(UTF_8.name()));
+              StandardCharsets.UTF_8);
     }
 
     assertExampleMetadataDbIsGood(
@@ -359,7 +358,7 @@ public class TestTarContainerPacker {
 
     try (FileInputStream testFile = new FileInputStream(dbFile.toFile())) {
       List<String> strings = IOUtils
-          .readLines(testFile, Charset.forName(UTF_8.name()));
+          .readLines(testFile, StandardCharsets.UTF_8);
       Assert.assertEquals(1, strings.size());
       Assert.assertEquals(TEST_DB_FILE_CONTENT, strings.get(0));
     }
@@ -377,7 +376,7 @@ public class TestTarContainerPacker {
 
     try (FileInputStream testFile = new FileInputStream(chunkFile.toFile())) {
       List<String> strings = IOUtils
-          .readLines(testFile, Charset.forName(UTF_8.name()));
+          .readLines(testFile, StandardCharsets.UTF_8);
       Assert.assertEquals(1, strings.size());
       Assert.assertEquals(TEST_CHUNK_FILE_CONTENT, strings.get(0));
     }
diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisorScheduling.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisorScheduling.java
new file mode 100644
index 0000000..2c517cb
--- /dev/null
+++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/ReplicationSupervisorScheduling.java
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ozone.container.replication;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.UUID;
+
+import org.apache.hadoop.hdds.protocol.DatanodeDetails;
+import org.apache.hadoop.hdds.protocol.MockDatanodeDetails;
+import org.apache.hadoop.ozone.container.common.impl.ContainerSet;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Helper to check scheduling efficiency.
+ * <p>
+ * This unit test is not enabled (doesn't start with Test) but can be used
+ * to validate changes manually.
+ */
+public class ReplicationSupervisorScheduling {
+
+  private final Random random = new Random();
+
+  @Test
+  public void test() throws InterruptedException {
+    List<DatanodeDetails> datanodes = new ArrayList<>();
+    datanodes.add(MockDatanodeDetails.randomDatanodeDetails());
+    datanodes.add(MockDatanodeDetails.randomDatanodeDetails());
+
+    //locks representing the limited resource of remote and local disks
+
+    //datanode -> disk -> lock object (remote resources)
+    Map<UUID, Map<Integer, Object>> volumeLocks = new HashMap<>();
+
+    //disk -> lock (local resources)
+    Map<Integer, Object> destinationLocks = new HashMap<>();
+
+    //init the locks
+    for (DatanodeDetails datanode : datanodes) {
+      volumeLocks.put(datanode.getUuid(), new HashMap<>());
+      for (int i = 0; i < 10; i++) {
+        volumeLocks.get(datanode.getUuid()).put(i, new Object());
+      }
+    }
+
+    for (int i = 0; i < 10; i++) {
+      destinationLocks.put(i, new Object());
+    }
+
+    ContainerSet cs = new ContainerSet();
+
+    ReplicationSupervisor rs = new ReplicationSupervisor(cs,
+
+        //simplified executor emulating the current sequential download +
+        //import.
+        task -> {
+
+          //download, limited by the number of source datanodes
+          final DatanodeDetails sourceDatanode =
+              task.getSources().get(random.nextInt(task.getSources().size()));
+
+          final Map<Integer, Object> volumes =
+              volumeLocks.get(sourceDatanode.getUuid());
+          synchronized (volumes.get(random.nextInt(volumes.size()))) {
+            System.out.println("Downloading " + task.getContainerId() + " from "
+                + sourceDatanode.getUuid());
+            try {
+              Thread.sleep(1000);
+            } catch (InterruptedException ex) {
+              ex.printStackTrace();
+            }
+          }
+
+          //import, limited by the destination datanode
+          final int volumeIndex = random.nextInt(destinationLocks.size());
+          synchronized (destinationLocks.get(volumeIndex)) {
+            System.out.println(
+                "Importing " + task.getContainerId() + " to disk "
+                    + volumeIndex);
+
+            try {
+              Thread.sleep(1000);
+            } catch (InterruptedException ex) {
+              ex.printStackTrace();
+            }
+          }
+
+        }, 10);
+
+    final long start = System.currentTimeMillis();
+
+    //schedule 100 container replication
+    for (int i = 0; i < 100; i++) {
+      List<DatanodeDetails> sources = new ArrayList<>();
+      sources.add(datanodes.get(random.nextInt(datanodes.size())));
+      rs.addTask(new ReplicationTask(i, sources));
+    }
+    rs.shutdownAfterFinish();
+    final long executionTime = System.currentTimeMillis() - start;
+    System.out.println(executionTime);
+    Assert.assertTrue("Execution was too slow : " + executionTime + " ms",
+        executionTime < 100_000);
+  }
+
+}
diff --git a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/TestSimpleContainerDownloader.java b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/TestSimpleContainerDownloader.java
index f29b157..7070425 100644
--- a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/TestSimpleContainerDownloader.java
+++ b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/replication/TestSimpleContainerDownloader.java
@@ -115,7 +115,7 @@ public class TestSimpleContainerDownloader {
           @Override
           protected CompletableFuture<Path> downloadContainer(
               long containerId, DatanodeDetails datanode
-          ) throws Exception {
+          ) {
             //download is always successful.
             return CompletableFuture
                 .completedFuture(Paths.get(datanode.getUuidString()));
@@ -169,7 +169,7 @@ public class TestSimpleContainerDownloader {
       protected CompletableFuture<Path> downloadContainer(
           long containerId,
           DatanodeDetails datanode
-      ) throws Exception {
+      ) {
 
         if (datanodes.contains(datanode)) {
           if (directException) {
diff --git a/hadoop-ozone/dist/src/main/compose/ozonescripts/stop.sh b/hadoop-hdds/container-service/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker
old mode 100755
new mode 100644
similarity index 91%
copy from hadoop-ozone/dist/src/main/compose/ozonescripts/stop.sh
copy to hadoop-hdds/container-service/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker
index a3ce08a..3c9e1c8
--- a/hadoop-ozone/dist/src/main/compose/ozonescripts/stop.sh
+++ b/hadoop-hdds/container-service/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker
@@ -1,4 +1,3 @@
-#!/usr/bin/env bash
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -14,4 +13,4 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-docker-compose exec scm /opt/hadoop/sbin/stop-ozone.sh
+mock-maker-inline
\ No newline at end of file
diff --git a/hadoop-hdds/docs/README.md b/hadoop-hdds/docs/README.md
index 8d5cdb7..c5c9167 100644
--- a/hadoop-hdds/docs/README.md
+++ b/hadoop-hdds/docs/README.md
@@ -14,7 +14,7 @@
   See the License for the specific language governing permissions and
   limitations under the License.
 -->
-# Hadoop Ozone/HDDS docs
+# Apache Ozone/HDDS docs
 
 This subproject contains the inline documentation for Ozone/HDDS components.
 
diff --git a/hadoop-hdds/docs/config.yaml b/hadoop-hdds/docs/config.yaml
index d0c69c6..44af7c4 100644
--- a/hadoop-hdds/docs/config.yaml
+++ b/hadoop-hdds/docs/config.yaml
@@ -24,6 +24,8 @@ languages:
     languageName: 中文
     weight: 2
 title: "Ozone"
+params:
+  ghrepo: https://github.com/apache/ozone/
 theme: "ozonedoc"
 pygmentsCodeFences: true
 uglyurls: true
diff --git a/hadoop-hdds/docs/content/_index.md b/hadoop-hdds/docs/content/_index.md
index 7890f6f..be0b303 100644
--- a/hadoop-hdds/docs/content/_index.md
+++ b/hadoop-hdds/docs/content/_index.md
@@ -21,7 +21,7 @@ weight: -10
   limitations under the License.
 -->
 
-# Apache Hadoop Ozone
+# Apache Ozone
 
 {{<figure class="ozone-usage" src="/ozone-usage.png" width="60%">}}
 
diff --git a/hadoop-hdds/docs/content/_index.zh.md b/hadoop-hdds/docs/content/_index.zh.md
index 689490b..57011d1 100644
--- a/hadoop-hdds/docs/content/_index.zh.md
+++ b/hadoop-hdds/docs/content/_index.zh.md
@@ -20,7 +20,7 @@ weight: -10
   limitations under the License.
 -->
 
-# Apache Hadoop Ozone
+# Apache Ozone
 
 {{<figure src="/ozone-usage.png" width="60%">}}
 
@@ -29,7 +29,7 @@ Ozone 不仅能存储数十亿个不同大小的对象,还支持在容器化
 
 Apache Spark、Hive 和 YARN 等应用无需任何修改即可使用 Ozone。Ozone 提供了 [Java API]({{<
 ref "JavaApi.zh.md" >}})、[S3 接口]({{< ref "S3.zh.md" >}})和命令行接口,极大地方便了 Ozone
- 在不同应用场景下的的使用。
+ 在不同应用场景下的使用。
 
 Ozone 的管理由卷、桶和键组成:
 
diff --git a/hadoop-hdds/docs/content/concept/OzoneManager.zh.md b/hadoop-hdds/docs/content/concept/OzoneManager.zh.md
index 3fc7fbf..2767805 100644
--- a/hadoop-hdds/docs/content/concept/OzoneManager.zh.md
+++ b/hadoop-hdds/docs/content/concept/OzoneManager.zh.md
@@ -77,6 +77,8 @@ Ozone Manager 使用 Apache Ratis(一种 Raft 协议的开源实现)来复
 
 为了详细地了解 Ozone Manager ,本节针对它所提供的网络服务和持久化状态提供一个快速概述。
 
+### Ozone Manager 提供的网络服务
+
 Ozone 为客户端和管理命令提供网络服务,主要的服务如下:
 
  * 键、桶、卷 / 增删改查
@@ -93,7 +95,7 @@ Ozone 为客户端和管理命令提供网络服务,主要的服务如下:
    * ServiceList(用于服务发现)
    * DBUpdates(用于 [Recon]({{< ref path="feature/Recon.md" lang="en" >}}) 下载快照)
  
- **持久化状态**
+### 持久化状态
 
 以下数据将保存在 Ozone Manager 端的指定 RocksDB 目录中:
 
diff --git a/hadoop-hdds/docs/content/concept/Recon.zh.md b/hadoop-hdds/docs/content/concept/Recon.zh.md
new file mode 100644
index 0000000..5c67351
--- /dev/null
+++ b/hadoop-hdds/docs/content/concept/Recon.zh.md
@@ -0,0 +1,116 @@
+---
+title: "Recon"
+date: "2020-10-27"
+weight: 8
+menu: 
+  main:
+     parent: 概念
+summary: Recon 作为 Ozone 的管理和监视控制台。
+---
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+Recon 充当 Ozone 的管理和监视控制台。它提供了 Ozone 的鸟瞰图,并通过基于 REST 的 API 和丰富的网页用户界面(Web UI)展示了集群的当前状态,从而帮助用户解决任何问题。
+
+
+## 高层次设计
+
+{{<figure src="/concept/ReconHighLevelDesign.png" width="800px">}}
+
+<br/>
+
+在较高的层次上,Recon 收集和汇总来自 Ozone Manager(OM)、Storage Container Manager(SCM)和数据节点(DN)的元数据,并充当中央管理和监视控制台。Ozone 管理员可以使用 Recon 查询系统的当前状态,而不会使 OM  或 SCM 过载。
+
+Recon 维护多个数据库,以支持批处理,更快的查询和持久化聚合信息。它维护 OM DB 和 SCM DB 的本地副本,以及用于持久存储聚合信息的 SQL 数据库。
+
+Recon 还与 Prometheus 集成,提供一个 HTTP 端点来查询 Prometheus 的 Ozone 指标,并在网页用户界面(Web UI)中显示一些关键时间点的指标。
+
+## Recon 和 Ozone Manager
+
+{{<figure src="/concept/ReconOmDesign.png" width="800px">}}
+
+<br/>
+
+Recon 最初从领导者 OM 的 HTTP 端点获取 OM rocks DB 的完整快照,解压缩文件并初始化 RocksDB 以进行本地查询。通过对最后一个应用的序列 ID 的 RPC 调用,定期请求领导者 OM 进行增量更新,从而使数据库保持同步。如果由于某种原因而无法检索增量更新或将其应用于本地数据库,则会再次请求一个完整快照以使本地数据库与 OM DB 保持同步。因此,Recon 可能会显示陈旧的信息,因为本地数据库不会总是同步的。
+
+## Recon 和 Storage Container Manager
+
+{{<figure src="/concept/ReconScmDesign.png" width="800px">}}
+
+<br/>
+
+Recon 还充当数据节点的被动 SCM。在集群中配置 Recon 时,所有数据节点都向 Recon 注册,并像 SCM 一样向 Recon 发送心跳、容器报告、增量容器报告等。Recon 使用它从数据节点得到的所有信息在本地构建自己的 SCM rocks DB 副本。Recon 从不向数据节点发送任何命令作为响应,而只是充当被动 SCM 以更快地查找 SCM 元数据。
+
+## <a name="task-framework"></a> 任务框架
+
+Recon 有其自己的任务框架,可对从 OM 和 SCM 获得的数据进行批处理。一个任务可以在 OM DB 或 SCM DB 上监听和操作数据库事件,如`PUT`、`DELETE`、`UPDATE`等。在此基础上,任务实现`org.apache.hadoop.ozone.recon.tasks.ReconOmTask`或者扩展`org.apache.hadoop.ozone.recon.scm.ReconScmTask`。
+
+`ReconOmTask`的一个示例是`ContainerKeyMapperTask`,它在 RocksDB 中持久化保留了容器 -> 键映射。当容器被报告丢失或处于不健康的运行状态时,这有助于了解哪些键是容器的一部分。另一个示例是`FileSizeCountTask`,它跟踪 SQL 数据库中给定文件大小范围内的文件计数。这些任务有两种情况的实现:
+ 
+ - 完整快照(reprocess())
+ - 增量更新(process())
+ 
+当从领导者 OM 获得 OM DB 的完整快照时,将对所有注册的 OM 任务调用 reprocess()。在随后的增量更新中,将在这些 OM 任务上调用 process()。
+
+`ReconScmTask`的示例是`ContainerHealthTask`,它以可配置的时间间隔运行,扫描所有容器的列表,并将不健康容器的状态(`MISSING`、`MIS_REPLICATED`、`UNDER_REPLICATED`、`OVER_REPLICATED`)持久化保留在 SQL 表中。此信息用于确定集群中是否有丢失的容器。
+
+## Recon 和 Prometheus
+
+Recon 可以与任何配置为收集指标的 Prometheus 实例集成,并且可以在数据节点和 Pipelines 页面的 Recon UI 中显示有用的信息。Recon 还公开了一个代理端点 ([/指标]({{< ref path="interface/ReconApi.zh.md#metrics" >}})) 来查询 Prometheus。可以通过将此配置`ozone.recon.prometheus.http.endpoint`设置为 Prometheus 端点如`ozone.recon.prometheus.http.endpoint=localhost:9090`来启用此集成。
+
+## API 参考
+
+[链接到完整的 API 参考]({{< ref path="interface/ReconApi.zh.md" >}})
+   
+## 持久化状态
+
+ * [OM database]({{< ref "concept/OzoneManager.zh.md#持久化状态" >}})的本地副本
+ * [SCM database]({{< ref "concept/StorageContainerManager.zh.md#持久化状态" >}})的本地副本
+ * 以下数据在 Recon 中持久化在指定的 RocksDB 目录下: 
+     * ContainerKey 表
+         * 存储映射(容器,键) -> 计数
+     * ContainerKeyCount 表
+         * 存储容器 ID  -> 容器内的键数
+ * 以下数据存储在已配置的 SQL 数据库中(默认为 Derby ):
+     * GlobalStats 表
+         * 一个键 -> Value table 用于存储集群中出现的卷/桶/键的总数等聚合信息
+     * FileCountBySize 表
+         * 跟踪集群中文件大小范围内的文件数量
+     * ReconTaskStatus 表
+         * 跟踪在[Recon 任务框架](#task-framework)中已注册的 OM 和 SCM DB 任务的状态和最后运行时间戳
+     * ContainerHistory 表
+         * 存储容器副本 -> 具有最新已知时间戳记的数据节点映射。当一个容器被报告丢失时,它被用来确定最后已知的数据节点。
+     * UnhealthyContainers 表
+         * 随时跟踪集群中所有不健康的容器(MISSING、UNDER_REPLICATED、OVER_REPLICATED、MIS_REPLICATED)
+
+
+## 需要关注的配置项
+
+配置项 |默认值 | <div style="width:300px;">描述</div>
+----|---------|------------
+ozone.recon.http-address | 0.0.0.0:9888 | Recon web UI 监听的地址和基本端口。
+ozone.recon.address | 0.0.0.0:9891 | Recon 的 RPC 地址。
+ozone.recon.db.dir | none | Recon Server 存储其元数据的目录。
+ozone.recon.om.db.dir | none | Recon Server 存储其 OM 快照 DB 的目录。
+ozone.recon.om.snapshot<br>.task.interval.delay | 10m | Recon 以分钟间隔请求 OM DB 快照。
+ozone.recon.task<br>.missingcontainer.interval | 300s | 定期检查集群中不健康容器的时间间隔。
+ozone.recon.sql.db.jooq.dialect | DERBY | 请参考 [SQL 方言](https://www.jooq.org/javadoc/latest/org.jooq/org/jooq/SQLDialect.html) 来指定不同的方言。
+ozone.recon.sql.db.jdbc.url | jdbc:derby:${ozone.recon.db.dir}<br>/ozone_recon_derby.db | Recon SQL database 的 jdbc url。
+ozone.recon.sql.db.username | none | Recon SQL数据库的用户名。
+ozone.recon.sql.db.password | none | Recon SQL数据库的密码。
+ozone.recon.sql.db.driver | org.apache.derby.jdbc<br>.EmbeddedDriver | Recon SQL数据库的 jdbc driver。
+
diff --git a/hadoop-hdds/docs/content/concept/StorageContainerManager.zh.md b/hadoop-hdds/docs/content/concept/StorageContainerManager.zh.md
index 1c63f1b..7adecde 100644
--- a/hadoop-hdds/docs/content/concept/StorageContainerManager.zh.md
+++ b/hadoop-hdds/docs/content/concept/StorageContainerManager.zh.md
@@ -44,7 +44,7 @@ SCM 负责创建 Ozone 集群。当通过 `init` 命令启动 SCM 时,SCM 将
 
 针对 Storage Container Manager 的详细视图,本节提供有关网络服务和持久化数据的快速概述。
 
-**Storage Container Manager 提供的网络服务:**
+### Storage Container Manager 提供的网络服务:
 
  * 管道: 列出/删除/激活/停用
     * 管道是形成一组复制组的数据节点
@@ -62,7 +62,7 @@ SCM 负责创建 Ozone 集群。当通过 `init` 命令启动 SCM 时,SCM 将
    
  注意:客户端不能直接连接 SCM 。
  
-**持久化状态**
+### 持久化状态
  
  以下数据持久化在 Storage Container Manager 端的指定 RocksDB 目录中
  
@@ -83,4 +83,4 @@ ozone.scm.container.size | 5GB | Ozone 使用的默认容器的大小
 ozone.scm.block.size | 256MB |  数据块的默认大小
 hdds.scm.safemode.min.datanode | 1 | 能够启动实际工作所需的最小数据节点数
 ozone.scm.http-address | 0.0.0.0:9876 | SCM 服务端使用的 HTTP 地址
-ozone.metadata.dirs | none | 存储持久化数据的目录(RocksDB)
\ No newline at end of file
+ozone.metadata.dirs | none | 存储持久化数据的目录(RocksDB)
diff --git a/hadoop-hdds/docs/content/design/decommissioning.md b/hadoop-hdds/docs/content/design/decommissioning.md
index 6c8e08e..e4abdf6 100644
--- a/hadoop-hdds/docs/content/design/decommissioning.md
+++ b/hadoop-hdds/docs/content/design/decommissioning.md
@@ -36,7 +36,7 @@ Goals:
  * The progress of the decommissioning should be trackable
  * The nodes under decommissioning / maintenance mode should not been used for new pipelines / containers
  * The state of the datanodes should be persisted / replicated by the SCM (in HDFS the decommissioning info exclude/include lists are replicated manually by the admin). If datanode is marked for decommissioning this state be available after SCM and/or Datanode restarts.
- * We need to support validations before decommissioing (but the violations can be ignored by the admin).
+ * We need to support validations before decommissioning (but the violations can be ignored by the admin).
  * The administrator should be notified when a node can be turned off.
  * The maintenance mode can be time constrained: if the node marked for maintenance for 1 week and the node is not up after one week, the containers should be considered as lost (DEAD node) and should be replicated.
 
@@ -128,7 +128,7 @@ One other difference with maintenance mode and decommissioning, is that you must
   STALE                | Some heartbeats were missing for an already missing nodes.
   DEAD                 | The stale node has not been recovered.
   ENTERING_MAINTENANCE | The in-progress state, scheduling is disabled but the node can't not been turned off due to in-progress replication.
-  IN_MAINTENANCE       | Node can be turned off but we expecteed to get it back and have all the replicas.
+  IN_MAINTENANCE       | Node can be turned off but we expected to get it back and have all the replicas.
   DECOMMISSIONING      | The in-progress state, scheduling is disabled, all the containers should be replicated to other nodes.
   DECOMMISSIONED       | The node can be turned off, all the containers are replicated to other machine
 
@@ -148,7 +148,7 @@ The Algorithm is pretty simple from the Decommission or Maintenance point of vie
 
    * Container is closed.
    * We have at least one HEALTHY copy at all times.
-   * For entering DECOMMISSIONED mode `maintenance + healthy` must equal to `expectedeCount`
+   * For entering DECOMMISSIONED mode `maintenance + healthy` must equal to `expectedCount`
 
  5. We will update the node state to DECOMMISSIONED or IN_MAINTENANCE reached state.
 
@@ -186,7 +186,7 @@ Replica count = expectedCount - currentCount
 
 In case the _Replica count_ is positive, it means that we need to make more replicas. If the number is negative, it means that we are over replicated and we need to remove some replicas of this container. If the Replica count for a container is zero; it means that we have the expected number of containers in the cluster.
 
-To support idempontent placement strategies we should substract the in-fligt replications from the result: If there are one in-flight replication process and two replicas we won't start a new replication command unless the original command is timed out. The timeout is configured with `hdds.scm.replication.event.timeout` and the default value is 10 minutes.
+To support idempotent placement strategies we should subtract the in-flight replications from the result: If there are one in-flight replication process and two replicas we won't start a new replication command unless the original command is timed out. The timeout is configured with `hdds.scm.replication.event.timeout` and the default value is 10 minutes.
 
 More preciously the current algorithm is the following:
 
@@ -249,7 +249,7 @@ The following conditions should be true for all the containers and all the conta
 **From DECOMMISSIONING to DECOMMISSIONED**:
 
  * There are at least one healthy replica
- * We have three replicas (both helthy and maintenance)
+ * We have three replicas (both healthy and maintenance)
 
 Which means that our stop condition can be formalized as:
 
diff --git a/hadoop-hdds/docs/content/feature/Quota.md b/hadoop-hdds/docs/content/feature/Quota.md
index 933bbb5..ecab238 100644
--- a/hadoop-hdds/docs/content/feature/Quota.md
+++ b/hadoop-hdds/docs/content/feature/Quota.md
@@ -32,11 +32,32 @@ So far, we know that Ozone allows users to create volumes, buckets, and keys. A
 1. Storage Space level quota
 
 Administrators should be able to define how much storage space a Volume or Bucket can use. The following Settings for Storage space quota are currently supported:
+
 a. By default, the quota for volume and bucket is not enabled.
-b. When volume quota is enabled, the total size of bucket quota cannot exceed volume.
+
+b. When volume quota is enabled, the total quota of the buckets, cannot exceed the volume quota.
+
 c. Bucket quota can be set separately without enabling Volume quota. The size of bucket quota is unrestricted at this point.
+
 d. Volume quota is not currently supported separately, and volume quota takes effect only if bucket quota is set. Because ozone only check the usedBytes of the bucket when we write the key.
 
+e. If the cluster is upgraded from old version less than 1.1.0, use of quota on older volumes and buckets(We can confirm by looking at the info for the volume or bucket, and if the quota value is -2 the volume or bucket is old) is not recommended. Since the old key is not counted to the bucket's usedBytes, the quota setting is inaccurate at this point.
+
+f. If volume's quota is enabled then bucket's quota cannot be cleared. 
+
+2. Namespace quota
+
+Administrators should be able to define how many namespace a Volume or Bucket can use. The following settings for namespace quota are supported: 
+
+a. By default, the namespace quota for volume and bucket is not enabled (thus unlimited quota).
+
+b. When volume namespace quota is enabled, the total number of buckets under the volume, cannot exceed the volume namespace quota.
+
+c. When bucket namespace quota is enabled, the total number of keys under the bucket, cannot exceed the bucket namespace quota.
+
+d. Linked buckets do not consume namespace quota.
+
+e. If the cluster is upgraded from old version less than 1.1.0, use of quota on older volumes and buckets(We can confirm by looking at the info for the volume or bucket, and if the quota value is -2 then volume or bucket is old) is not recommended. Since the old key is not counted to the bucket's namespace quota, the quota setting is inaccurate at this point.
 
 ## Client usage
 ### Storage Space level quota
@@ -66,9 +87,10 @@ This behavior changes the quota for Bucket1 to 10GB
 
 Total bucket quota should not be greater than its Volume quota. If we have a 10MB Volume, The sum of the sizes of all buckets under this volume cannot exceed 10MB, otherwise the bucket set quota fails.
 
-#### Clear the quota for Volume1. The Bucket cleanup command is similar.
+#### Clear the quota for volume and bucket
 ```shell
 bin/ozone sh volume clrquota --space-quota /volume1
+bin/ozone sh bucket clrquota --space-quota /volume1/bucket1
 ```
 
 #### Check quota and usedBytes for volume and bucket
@@ -76,4 +98,42 @@ bin/ozone sh volume clrquota --space-quota /volume1
 bin/ozone sh volume info /volume1
 bin/ozone sh bucket info /volume1/bucket1
 ```
-We can get the quota value and usedBytes in the info of volume and bucket.
\ No newline at end of file
+We can get the quota value and usedBytes in the info of volume and bucket.
+
+### Namespace quota
+Namespace quota is a number that represents how many unique names can be used. This number cannot be greater than LONG.MAX_VALUE in Java.
+
+#### Volume Namespace quota
+```shell
+bin/ozone sh volume create --namespace-quota 100 /volume1
+```
+This means setting the namespace quota of Volume1 to 100.
+
+```shell
+bin/ozone sh volume setquota --namespace-quota 1000 /volume1
+```
+This behavior changes the namespace quota of Volume1 to 1000.
+
+#### Bucket Namespace quota
+```shell
+bin/ozone sh bucket create --namespace-quota 100 /volume1/bucket1
+```
+That means bucket1 allows us to use 100 of namespace.
+
+```shell
+bin/ozone sh bucket setquota --namespace-quota 1000 /volume1/bucket1 
+```
+This behavior changes the quota for Bucket1 to 1000.
+
+#### Clear the quota for volume and bucket
+```shell
+bin/ozone sh volume clrquota --namespace-quota /volume1
+bin/ozone sh bucket clrquota --namespace-quota /volume1/bucket1
+```
+
+#### Check quota and usedNamespace for volume and bucket
+```shell
+bin/ozone sh volume info /volume1
+bin/ozone sh bucket info /volume1/bucket1
+```
+We can get the quota value and usedNamespace in the info of volume and bucket.
\ No newline at end of file
diff --git a/hadoop-hdds/docs/content/feature/Quota.zh.md b/hadoop-hdds/docs/content/feature/Quota.zh.md
index b3f0c3c..eb6e084 100644
--- a/hadoop-hdds/docs/content/feature/Quota.zh.md
+++ b/hadoop-hdds/docs/content/feature/Quota.zh.md
@@ -30,10 +30,32 @@ menu:
 1. Storage space级别配额
 
  管理员应该能够定义一个Volume或Bucket可以使用多少存储空间。目前支持以下storage space quota的设置:
+ 
  a. 默认情况下volume和bucket的quota不启用。
+ 
  b. 当volume quota启用时,bucket quota的总大小不能超过volume。
+ 
  c. 可以在不启用volume quota的情况下单独给bucket设置quota。此时bucket quota的大小是不受限制的。
+ 
  d. 目前不支持单独设置volume quota,只有在设置了bucket quota的情况下volume quota才会生效。因为ozone在写入key时只检查bucket的usedBytes。
+ 
+ e. 如果集群从小于1.1.0的旧版本升级而来,则不建议在旧volume和bucket(可以通过查看volume或者bucket的info确认,如果quota值是-2,那么这个volume或者bucket就是旧的)上使用配额。由于旧的key没有计算到bucket的usedBytes中,所以此时配额设置是不准确的。
+ 
+ f. 如果volume quota被启用,那么bucket quota将不能被清除。
+
+2. 命名空间配额
+
+ 管理员应当能够定义一个Volume或Bucket可以使用多少命名空间。目前支持命名空间的配额设置为:
+
+ a. 默认情况下volume和bucket的命名空间配额不启用(即无限配额)。
+
+ b. 当volume命名空间配额启用时,该volume的bucket数目不能超过此配额。
+
+ c. 当bucket的命名空间配额启用时,该bucket的key数目不能超过此配额。
+
+ d. Linked bucket不消耗命名空间配额。
+
+ e. 如果集群从小于1.1.0的旧版本升级而来,则不建议在旧volume和bucket(可以通过查看volume或者bucket的info确认,如果quota值是-2,那么这个volume或者bucket就是旧的)上使用配额。由于旧的key没有计算到bucket的命名空间配额中,所以此时配额设置是不准确的。
 
 ## 客户端用法
 ### Storage space级别配额
@@ -62,9 +84,10 @@ bin/ozone sh bucket setquota  --space-quota 10GB /volume1/bucket1
 
 bucket的总配额 不应大于其Volume的配额。让我们看一个例子,如果我们有一个10MB的Volume,该volume下所有bucket的大小之和不能超过10MB,否则设置bucket quota将失败。
 
-#### 清除Volume1的配额, Bucket清除命令与此类似
+#### 清除volume和bucket的配额
 ```shell
 bin/ozone sh volume clrquota --space-quota /volume1
+bin/ozone sh bucket clrquota --space-quota /volume1/bucket1
 ```
 #### 查看volume和bucket的quota值以及usedBytes
 ```shell
@@ -72,3 +95,40 @@ bin/ozone sh volume info /volume1
 bin/ozone sh bucket info /volume1/bucket1
 ```
 我们能够在volume和bucket的info中查看quota及usedBytes的值
+
+### Namespace quota
+命名空间配额是一个数字,其代表由多少个名字能够使用。这个数字不能超过Java long数据类型的最大值。
+
+#### Volume Namespace quota
+```shell
+bin/ozone sh volume create --namespace-quota 100 /volume1
+```
+这意味着将volume1的命名空间配额设置为100。
+
+```shell
+bin/ozone sh volume setquota --namespace-quota 1000 /volume1
+```
+此行为将volume1的命名空间配额更改为1000。
+
+#### Bucket Namespace quota
+```shell
+bin/ozone sh bucket create --namespace-quota 100 /volume1/bucket1
+```
+这意味着bucket1允许我们使用100的命名空间。
+
+```shell
+bin/ozone sh bucket setquota --namespace-quota 1000 /volume1/bucket1 
+```
+该行为将bucket1的命名空间配额更改为1000。
+
+#### 清除volume和bucket的配额
+```shell
+bin/ozone sh volume clrquota --namespace-quota /volume1
+bin/ozone sh bucket clrquota --namespace-quota /volume1/bucket1
+```
+#### 查看volume和bucket的quota值以及usedNamespace
+```shell
+bin/ozone sh volume info /volume1
+bin/ozone sh bucket info /volume1/bucket1
+```
+我们能够在volume和bucket的info中查看quota及usedNamespace的值
diff --git a/hadoop-hdds/docs/content/feature/Recon.zh.md b/hadoop-hdds/docs/content/feature/Recon.zh.md
index 5a41620..b7d04a7 100644
--- a/hadoop-hdds/docs/content/feature/Recon.zh.md
+++ b/hadoop-hdds/docs/content/feature/Recon.zh.md
@@ -1,5 +1,5 @@
 ---
-title: "Recon"
+title: "Recon 服务器"
 weight: 7
 menu:
    main:
@@ -23,27 +23,10 @@ summary: Recon 是 Ozone 中用于分析服务的网页用户界面(Web UI)
   limitations under the License.
 -->
 
-Recon 是 Ozone 中用于分析服务的网页用户界面(Web UI)。它是一个可选组件,但强烈建议您使用,因为它可以增加可视性。
+Recon 作为 Ozone 的管理和监听控制台。它是一个可选组件,但强烈建议将其添加到集群中,因为 Recon 可以在关键时刻帮助您对集群进行故障排除。请参阅 [Recon 架构]({{< ref "concept/Recon.zh.md" >}}) 以获得详细的架构概述和 [Recon API]({{< ref path="interface/ReconApi.zh.md" >}}) 文档,以获得 HTTP API 参考。
 
-Recon 从 Ozone 集群中**收集**所有数据,并将其存储在 SQL数据库中,以便进一步分析。
-
- 1. Ozone Manager 的数据是通过异步过程在后台下载的。OM 会定期创建 RocksDB 快照,并将增量数据复制到 Recon 进行处理。
-
- 2. 数据节点不仅可以将心跳发送到 SCM,也能发送到 Recon。Recon 可以成为心跳的唯读(Read-only)监听器,并根据收到的信息更新本地数据库。
-
-当 Recon 配置完成时,我们便可以启动服务。
+Recon 是一个自带 HTTP 网页服务器的服务,可以通过以下命令启动。
 
 {{< highlight bash >}}
 ozone --daemon start recon
 {{< /highlight >}}
-
-## 需要关注的配置项
-
-配置项 | 默认值 | 描述
--------|--------|-----
-ozone.recon.http-address | 0.0.0.0:9888 | Recon web UI 监听的地址和基本端口。
-ozone.recon.address | 0.0.0.0:9891 | Recon 的 RPC 地址。
-ozone.recon.db.dir | none | Recon Server 存储其元数据的目录。
-ozone.recon.om.db.dir | none | Recon Server 存储其 OM 快照 DB 的目录。
-ozone.recon.om.snapshot.task.interval.delay | 10m | Recon 以分钟间隔请求 OM DB 快照。
-
diff --git a/hadoop-hdds/docs/content/interface/ReconApi.zh.md b/hadoop-hdds/docs/content/interface/ReconApi.zh.md
new file mode 100644
index 0000000..a134fd4
--- /dev/null
+++ b/hadoop-hdds/docs/content/interface/ReconApi.zh.md
@@ -0,0 +1,502 @@
+---
+title: Recon API
+weight: 4
+menu:
+   main:
+      parent: "编程接口"
+summary: Recon 服务器支持 HTTP 端点,以帮助故障排除和监听 Ozone 集群。
+---
+
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+Recon API v1 是一组 HTTP 端点,可以帮助您了解 Ozone 集群的当前状态,并在需要时进行故障排除。
+
+### HTTP 端点
+
+#### 容器
+
+* **/containers**
+
+    **URL 结构**
+    ```
+    GET /api/v1/containers
+    ```
+
+    **参数**
+
+    * prevKey (可选)
+    
+        只回传ID大于给定的 prevKey 的容器。
+        示例:prevKey=1
+
+    * limit (可选)
+    
+        只回传有限数量的结果。默认限制是1000。
+    
+    **回传**
+    
+    回传所有 ContainerMetadata 对象。
+    
+    ```json
+    {
+      "data": {
+        "totalCount": 3,
+        "containers": [
+          {
+            "ContainerID": 1,
+            "NumberOfKeys": 834
+          },
+          {
+            "ContainerID": 2,
+            "NumberOfKeys": 833
+          },
+          {
+            "ContainerID": 3,
+            "NumberOfKeys": 833
+          }
+        ]
+      }
+    }
+    ```
+
+* **/containers/:id/keys**
+
+    **URL 结构**
+    ```
+    GET /api/v1/containers/:id/keys
+    ```
+    
+    **参数**
+    
+    * prevKey (可选)
+     
+        只回传在给定的 prevKey 键前缀之后的键。
+        示例:prevKey=/vol1/bucket1/key1
+        
+    * limit (可选)
+    
+        只回传有限数量的结果。默认限制是1000。
+        
+    **回传**
+    
+    回传给定容器 ID 的所有 KeyMetadata 对象。
+    
+    ```json
+    {
+      "totalCount":7,
+      "keys": [
+        {
+          "Volume":"vol-1-73141",
+          "Bucket":"bucket-3-35816",
+          "Key":"key-0-43637",
+          "DataSize":1000,
+          "Versions":[0],
+          "Blocks": {
+            "0": [
+              {
+                "containerID":1,
+                "localID":105232659753992201
+              }
+            ]
+          },
+          "CreationTime":"2020-11-18T18:09:17.722Z",
+          "ModificationTime":"2020-11-18T18:09:30.405Z"
+        },
+        ...
+      ]
+    }
+    ```
+* **/containers/missing**
+    
+    **URL 结构**
+    ```
+    GET /api/v1/containers/missing
+    ```
+    
+    **参数**
+    
+    没有参数。
+    
+    **回传**
+    
+    回传所有丢失容器的 MissingContainerMetadata 对象。
+    
+    ```json
+    {
+    	"totalCount": 26,
+    	"containers": [{
+    		"containerID": 1,
+    		"missingSince": 1605731029145,
+    		"keys": 7,
+    		"pipelineID": "88646d32-a1aa-4e1a",
+    		"replicas": [{
+    			"containerId": 1,
+    			"datanodeHost": "localhost-1",
+    			"firstReportTimestamp": 1605724047057,
+    			"lastReportTimestamp": 1605731201301
+    		}, 
+            ...
+            ]
+    	},
+        ...
+        ]
+    }
+    ```
+* **/containers/:id/replicaHistory**
+
+    **URL 结构**
+    ```
+    GET /api/v1/containers/:id/replicaHistory
+    ```
+    
+    **参数**
+    
+    没有参数。
+    
+    **回传**
+
+    回传给定容器 ID 的所有 ContainerHistory 对象。
+    
+    ```json
+    [
+      {
+        "containerId": 1,
+        "datanodeHost": "localhost-1",
+        "firstReportTimestamp": 1605724047057,
+        "lastReportTimestamp": 1605730421294
+      },
+      ...
+    ]
+    ```
+* **/containers/unhealthy**
+
+    **URL 结构**
+     ```
+     GET /api/v1/containers/unhealthy
+     ```
+     
+    **参数**
+    
+    * batchNum (可选)
+
+        回传结果的批号(如“页码”)。
+        传递1,将回传记录1到limit。传递2,将回传limit + 1到2 * limit,依此类推。
+        
+    * limit (可选)
+    
+        只回传有限数量的结果。默认限制是1000。
+        
+    **回传**
+    
+    回传所有不健康容器的 UnhealthyContainerMetadata 对象。
+    
+     ```json
+     {
+     	"missingCount": 2,
+     	"underReplicatedCount": 0,
+     	"overReplicatedCount": 0,
+     	"misReplicatedCount": 0,
+     	"containers": [{
+     		"containerID": 1,
+     		"containerState": "MISSING",
+     		"unhealthySince": 1605731029145,
+     		"expectedReplicaCount": 3,
+     		"actualReplicaCount": 0,
+     		"replicaDeltaCount": 3,
+     		"reason": null,
+     		"keys": 7,
+     		"pipelineID": "88646d32-a1aa-4e1a",
+     		"replicas": [{
+     			"containerId": 1,
+     			"datanodeHost": "localhost-1",
+     			"firstReportTimestamp": 1605722960125,
+     			"lastReportTimestamp": 1605731230509
+     		}, 
+            ...
+            ]
+     	},
+        ...
+        ]
+     } 
+     ```
+     
+* **/containers/unhealthy/:state**
+
+    **URL 结构**
+    ```
+    GET /api/v1/containers/unhealthy/:state
+    ```
+     
+    **参数**
+    
+    * batchNum (可选)
+    
+        回传结果的批号(如“页码”)。
+        传递1,将回传记录1到limit。传递2,将回传limit + 1到2 * limit,依此类推。
+        
+    * limit (可选)
+    
+        只回传有限数量的结果。默认限制是1000。
+        
+    **回传**
+    
+    回传处于给定状态的容器的 UnhealthyContainerMetadata 对象。
+    不健康的容器状态可能为`MISSING`, `MIS_REPLICATED`, `UNDER_REPLICATED`, `OVER_REPLICATED`。
+    响应结构与`/containers/unhealthy`相同。
+    
+#### 集群状态
+
+* **/clusterState**
+
+    **URL 结构**
+    ```
+    GET /api/v1/clusterState
+    ```
+     
+    **参数**
+    
+    没有参数。
+    
+    **回传**
+    
+    返回 Ozone 集群当前状态的摘要。
+    
+     ```json
+     {
+     	"pipelines": 5,
+     	"totalDatanodes": 4,
+     	"healthyDatanodes": 4,
+     	"storageReport": {
+     		"capacity": 1081719668736,
+     		"used": 1309212672,
+     		"remaining": 597361258496
+     	},
+     	"containers": 26,
+     	"volumes": 6,
+     	"buckets": 26,
+     	"keys": 25
+     }
+     ```
+     
+#### 数据节点
+
+* **/datanodes**
+
+    **URL 结构**
+    ```
+    GET /api/v1/datanodes
+    ```
+    
+    **参数**
+    
+    没有参数。
+    
+    **回传**
+    
+    回传集群中的所有数据节点。
+    
+    ```json
+    {
+     	"totalCount": 4,
+     	"datanodes": [{
+     		"uuid": "f8f8cb45-3ab2-4123",
+     		"hostname": "localhost-1",
+     		"state": "HEALTHY",
+     		"lastHeartbeat": 1605738400544,
+     		"storageReport": {
+     			"capacity": 270429917184,
+     			"used": 358805504,
+     			"remaining": 119648149504
+     		},
+     		"pipelines": [{
+     			"pipelineID": "b9415b20-b9bd-4225",
+     			"replicationType": "RATIS",
+     			"replicationFactor": 3,
+     			"leaderNode": "localhost-2"
+     		}, {
+     			"pipelineID": "3bf4a9e9-69cc-4d20",
+     			"replicationType": "RATIS",
+     			"replicationFactor": 1,
+     			"leaderNode": "localhost-1"
+     		}],
+     		"containers": 17,
+     		"leaderCount": 1
+     	},
+        ...
+        ]
+     }
+     ```
+     
+#### 管道
+
+* **/pipelines**
+
+    **URL 结构**
+    ```
+    GET /api/v1/pipelines
+    ```
+    **参数**
+    
+    没有参数
+    
+    **回传**
+    
+    回传在集群中的所有管道。
+    
+    ```json
+     {
+     	"totalCount": 5,
+     	"pipelines": [{
+     		"pipelineId": "b9415b20-b9bd-4225",
+     		"status": "OPEN",
+     		"leaderNode": "localhost-1",
+     		"datanodes": ["localhost-1", "localhost-2", "localhost-3"],
+     		"lastLeaderElection": 0,
+     		"duration": 23166128,
+     		"leaderElections": 0,
+     		"replicationType": "RATIS",
+     		"replicationFactor": 3,
+     		"containers": 0
+     	},
+        ...
+        ]
+     }
+     ```  
+
+#### 任务
+
+* **/task/status**
+
+    **URL 结构**
+    ```
+    GET /api/v1/task/status
+    ```
+    
+    **参数**
+    
+    没有参数
+    
+    **回传**
+    
+    回传所有 Recon 任务的状态。
+  
+    ```json
+     [
+       {
+     	"taskName": "OmDeltaRequest",
+     	"lastUpdatedTimestamp": 1605724099147,
+     	"lastUpdatedSeqNumber": 186
+       },
+       ...
+     ]
+    ```
+    
+#### 使用率
+
+* **/utilization/fileCount**
+
+    **URL 结构**
+    ```
+    GET /api/v1/utilization/fileCount
+    ```
+    
+    **参数**
+    
+    * volume (可选)
+    
+        根据给定的卷名过滤结果。
+        
+    * bucket (可选)
+    
+        根据给定的桶名过滤结果。
+        
+    * fileSize (可选)
+
+        根据给定的文件大小筛选结果。
+        
+    **回传**
+    
+    回传不同文件范围内的文件计数,其中响应对象中的`fileSize`是文件大小范围的上限。
+    
+    ```json
+     [{
+     	"volume": "vol-2-04168",
+     	"bucket": "bucket-0-11685",
+     	"fileSize": 1024,
+     	"count": 1
+     }, {
+     	"volume": "vol-2-04168",
+     	"bucket": "bucket-1-41795",
+     	"fileSize": 1024,
+     	"count": 1
+     }, {
+     	"volume": "vol-2-04168",
+     	"bucket": "bucket-2-93377",
+     	"fileSize": 1024,
+     	"count": 1
+     }, {
+     	"volume": "vol-2-04168",
+     	"bucket": "bucket-3-50336",
+     	"fileSize": 1024,
+     	"count": 2
+     }]
+    ```
+    
+#### <a name="metrics"></a> 指标
+
+* **/metrics/:api**
+
+    **URL 结构**
+    ```
+    GET /api/v1/metrics/:api
+    ```
+    
+    **参数**
+
+    请参阅 [Prometheus HTTP API 参考](https://prometheus.io/docs/prometheus/latest/querying/api/) 以获取完整的查询文档。
+
+    **回传**
+
+    这是 Prometheus 的代理端点,并回传与 Prometheus 端点相同的响应。
+    示例:/api/v1/metrics/query?query=ratis_leader_election_electionCount
+    
+     ```json
+     {
+       "status": "success",
+       "data": {
+         "resultType": "vector",
+         "result": [
+           {
+             "metric": {
+               "__name__": "ratis_leader_election_electionCount",
+               "exported_instance": "33a5ac1d-8c65-4c74-a0b8-9314dfcccb42",
+               "group": "group-03CA9397D54B",
+               "instance": "ozone_datanode_1:9882",
+               "job": "ozone"
+             },
+             "value": [
+               1599159384.455,
+               "5"
+             ]
+           }
+         ]
+       }
+     }
+     ```
+
+
diff --git a/hadoop-hdds/docs/content/interface/S3.md b/hadoop-hdds/docs/content/interface/S3.md
index 3404cb8..6511642 100644
--- a/hadoop-hdds/docs/content/interface/S3.md
+++ b/hadoop-hdds/docs/content/interface/S3.md
@@ -120,10 +120,10 @@ Ozone has one more element in the name-space hierarchy compared to S3: the volum
 To make any other buckets available with the S3 interface a "symbolic linked" bucket can be created:
 
 ```bash
-ozone sh create volume /s3v
-ozone sh create volume /vol1
+ozone sh volume create /s3v
+ozone sh volume create /vol1
 
-ozone sh create bucket /vol1/bucket1
+ozone sh bucket create /vol1/bucket1
 ozone sh bucket link /vol1/bucket1 /s3v/common-bucket
 ```
 
diff --git a/hadoop-hdds/docs/content/recipe/Prometheus.md b/hadoop-hdds/docs/content/recipe/Prometheus.md
index f63b46e..9c852e0 100644
--- a/hadoop-hdds/docs/content/recipe/Prometheus.md
+++ b/hadoop-hdds/docs/content/recipe/Prometheus.md
@@ -46,9 +46,9 @@ _Note_: for Docker compose based pseudo cluster put the \
 
 * Restart the Ozone Manager and Storage Container Manager and check the prometheus endpoints:
 
- * http://scm:9874/prom
+ * http://scm:9876/prom
 
- * http://ozoneManager:9876/prom
+ * http://ozoneManager:9874/prom
 
 * Create a prometheus.yaml configuration with the previous endpoints:
 
@@ -93,4 +93,4 @@ The ozone distribution contains a ready-to-use, dockerized environment to try ou
 cd compose/ozone
 export COMPOSE_FILE=docker-compose.yaml:monitoring.yaml
 docker-compose up -d
-```
\ No newline at end of file
+```
diff --git a/hadoop-hdds/docs/content/recipe/Prometheus.zh.md b/hadoop-hdds/docs/content/recipe/Prometheus.zh.md
index 069b340..bb64edc 100644
--- a/hadoop-hdds/docs/content/recipe/Prometheus.zh.md
+++ b/hadoop-hdds/docs/content/recipe/Prometheus.zh.md
@@ -44,9 +44,9 @@ _注意_: 对于基于 docker-compose 方式的伪集群,在 `docker-config` 
 
 * 重启 OM 和 SCM,检查端点:
 
- * http://scm:9874/prom
+ * http://scm:9876/prom
 
- * http://ozoneManager:9876/prom
+ * http://ozoneManager:9874/prom
 
 * 根据这两个端点,创建 prometheus.yaml 配置文件:
 
@@ -91,4 +91,4 @@ Ozone 发行包中包含了一个即开即用的容器化环境来试用 Ozone 
 cd compose/ozone
 export COMPOSE_FILE=docker-compose.yaml:monitoring.yaml
 docker-compose up -d
-```
\ No newline at end of file
+```
diff --git a/hadoop-hdds/docs/content/security/SecuringOzoneHTTP.md b/hadoop-hdds/docs/content/security/SecuringOzoneHTTP.md
index b08536e..62916a9 100644
--- a/hadoop-hdds/docs/content/security/SecuringOzoneHTTP.md
+++ b/hadoop-hdds/docs/content/security/SecuringOzoneHTTP.md
@@ -128,17 +128,17 @@ change ozone.recon.http.auth.simple.anonymous_allowed to true.
 ### Enable SIMPLE authentication for SCM HTTP
 Property| Value
 -----------------------------------|-----------------------------------------
-ozone.scm.http.auth.type | simple
-ozone.scm.http.auth.simple.anonymous_allowed | false
+hdds.scm.http.auth.type | simple
+hdds.scm.http.auth.simple.anonymous_allowed | false
 
 If you don't want to specify the user.name in the query string parameter, 
-change ozone.scm.http.auth.simple.anonymous_allowed to true.
+change hdds.scm.http.auth.simple.anonymous_allowed to true.
 
 ### Enable SIMPLE authentication for DATANODE HTTP
 Property| Value
 -----------------------------------|-----------------------------------------
-ozone.datanode.http.auth.type | simple
-ozone.datanode.http.auth.simple.anonymous_allowed | false
+hdds.datanode.http.auth.type | simple
+hdds.datanode.http.auth.simple.anonymous_allowed | false
 
 If you don't want to specify the user.name in the query string parameter, 
-change ozone.datanode.http.auth.simple.anonymous_allowed to true.
+change hdds.datanode.http.auth.simple.anonymous_allowed to true.
diff --git a/hadoop-hdds/docs/content/security/SecurityWithRanger.md b/hadoop-hdds/docs/content/security/SecurityWithRanger.md
index 7daaf81..ee86a11 100644
--- a/hadoop-hdds/docs/content/security/SecurityWithRanger.md
+++ b/hadoop-hdds/docs/content/security/SecurityWithRanger.md
@@ -27,8 +27,9 @@ icon: user
 
 
 Apache Ranger™ is a framework to enable, monitor and manage comprehensive data
-security across the Hadoop platform. Any version of Apache Ranger which is greater
-than 1.20 is aware of Ozone, and can manage an Ozone cluster.
+security across the Hadoop platform. Apache Ranger has supported Ozone authentication 
+since version 2.0. However, due to some bugs in 2.0, Apache Ranger 
+2.1 and later versions are recommended.
 
 
 To use Apache Ranger, you must have Apache Ranger installed in your Hadoop
@@ -44,3 +45,19 @@ Property|Value
 --------|------------------------------------------------------------
 ozone.acl.enabled         | true
 ozone.acl.authorizer.class| org.apache.ranger.authorization.ozone.authorizer.RangerOzoneAuthorizer
+
+The Ranger permissions corresponding to the Ozone operations are as follows:
+
+| operation&permission | Volume  permission | Bucket permission | Key permission |
+| :--- | :--- | :--- | :--- |
+| Create  volume | CREATE | | |
+| List volume | LIST | | |
+| Get volume Info | READ | | |
+| Delete volume | DELETE | | |
+| Create  bucket | READ | CREATE | |
+| List bucket | LIST, READ | | |
+| Get bucket info | READ | READ | |
+| Delete bucket | READ | DELETE | |
+| List key | READ | LIST, READ | |
+| Write key | READ | READ | CREATE, WRITE |
+| Read key | READ | READ | READ |
diff --git a/hadoop-hdds/docs/content/security/SecurityWithRanger.zh.md b/hadoop-hdds/docs/content/security/SecurityWithRanger.zh.md
index 4d40a17..e7ff33e 100644
--- a/hadoop-hdds/docs/content/security/SecurityWithRanger.zh.md
+++ b/hadoop-hdds/docs/content/security/SecurityWithRanger.zh.md
@@ -26,7 +26,7 @@ icon: user
 -->
 
 
-Apache Ranger™ 是一个用于管理和监控 Hadoop 平台复杂数据权限的框架。版本大于 1.20 的 Apache Ranger 都可以用于管理 Ozone 集群。
+Apache Ranger™ 是一个用于管理和监控 Hadoop 平台复杂数据权限的框架。Apache Ranger 从2.0版本开始支持Ozone鉴权。但由于在2.0中存在一些bug,因此我们更推荐使用Apache Ranger 2.1及以后版本。
 
 你需要先在你的 Hadoop 集群上安装 Apache Ranger,安装指南可以参考 [Apache Ranger 官网](https://ranger.apache.org/index.html).
 
@@ -36,3 +36,19 @@ Apache Ranger™ 是一个用于管理和监控 Hadoop 平台复杂数据权限
 --------|------------------------------------------------------------
 ozone.acl.enabled         | true
 ozone.acl.authorizer.class| org.apache.ranger.authorization.ozone.authorizer.RangerOzoneAuthorizer
+
+Ozone各类操作对应Ranger权限如下:
+
+| operation&permission | Volume  permission | Bucket permission | Key permission |
+| :--- | :--- | :--- | :--- |
+| Create volume | CREATE | | |
+| List volume | LIST | | |
+| Get volume Info | READ | | |
+| Delete volume | DELETE | | |
+| Create  bucket | READ | CREATE | |
+| List bucket | LIST, READ | | |
+| Get bucket info | READ | READ | |
+| Delete bucket | READ | DELETE | |
+| List key | READ | LIST, READ | |
+| Write key | READ | READ | CREATE, WRITE |
+| Read key | READ | READ | READ |
\ No newline at end of file
diff --git a/hadoop-hdds/docs/content/tools/AuditParser.md b/hadoop-hdds/docs/content/tools/AuditParser.md
index e4da208..ee2acd9 100644
--- a/hadoop-hdds/docs/content/tools/AuditParser.md
+++ b/hadoop-hdds/docs/content/tools/AuditParser.md
@@ -21,7 +21,7 @@ summary: Audit Parser tool can be used for querying the ozone audit logs.
 -->
 
 Audit Parser tool can be used for querying the ozone audit logs.
-This tool creates a sqllite database at the specified path. If the database
+This tool creates a sqlite database at the specified path. If the database
 already exists, it will avoid creating a database.
 
 The database contains only one table called `audit` defined as:
diff --git a/hadoop-hdds/docs/dev-support/bin/generate-site.sh b/hadoop-hdds/docs/dev-support/bin/generate-site.sh
index 4dfbebc..3d7baa8 100755
--- a/hadoop-hdds/docs/dev-support/bin/generate-site.sh
+++ b/hadoop-hdds/docs/dev-support/bin/generate-site.sh
@@ -24,8 +24,15 @@ if [ ! "$(which hugo)" ]; then
    exit 0
 fi
 
+export OZONE_VERSION=$(mvn help:evaluate -Dexpression=ozone.version -q -DforceStdout)
+
+ENABLE_GIT_INFO=
+if git -C $(pwd) status >& /dev/null; then
+  ENABLE_GIT_INFO="--enableGitInfo"
+fi
+
 DESTDIR="$DOCDIR/target/classes/docs"
 mkdir -p "$DESTDIR"
 cd "$DOCDIR"
-hugo -d "$DESTDIR" "$@"
+hugo "${ENABLE_GIT_INFO}" -d "$DESTDIR" "$@"
 cd -
diff --git a/hadoop-hdds/docs/pom.xml b/hadoop-hdds/docs/pom.xml
index 404b6c2..3a6aea0 100644
--- a/hadoop-hdds/docs/pom.xml
+++ b/hadoop-hdds/docs/pom.xml
@@ -24,8 +24,8 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd">
   </parent>
   <artifactId>hadoop-hdds-docs</artifactId>
   <version>1.1.0-SNAPSHOT</version>
-  <description>Apache Hadoop HDDS/Ozone Documentation</description>
-  <name>Apache Hadoop HDDS/Ozone Documentation</name>
+  <description>Apache Ozone/HDDS Documentation</description>
+  <name>Apache Ozone/HDDS Documentation</name>
   <packaging>jar</packaging>
 
   <dependencies>
diff --git a/hadoop-hdds/docs/static/ozone-logo-monochrome.svg b/hadoop-hdds/docs/static/ozone-logo-monochrome.svg
index cd046a0..89cc166 100644
--- a/hadoop-hdds/docs/static/ozone-logo-monochrome.svg
+++ b/hadoop-hdds/docs/static/ozone-logo-monochrome.svg
@@ -28,7 +28,7 @@
    sodipodi:docname="ozone_bolt.svg"
    inkscape:version="0.92.4 (5da689c313, 2019-01-14)">
   <title
-     id="title39">Apache Hadoop Ozone Logo</title>
+     id="title39">Apache Ozone Logo</title>
   <metadata
      id="metadata32">
     <rdf:RDF>
@@ -37,7 +37,7 @@
         <dc:format>image/svg+xml</dc:format>
         <dc:type
            rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
-        <dc:title>Apache Hadoop Ozone Logo</dc:title>
+        <dc:title>Apache Ozone Logo</dc:title>
         <cc:license
            rdf:resource="https://www.apache.org/licenses/LICENSE-2.0" />
       </cc:Work>
diff --git a/hadoop-hdds/docs/themes/ozonedoc/layouts/_default/baseof.html b/hadoop-hdds/docs/themes/ozonedoc/layouts/_default/baseof.html
index c46f829..0d3810d 100644
--- a/hadoop-hdds/docs/themes/ozonedoc/layouts/_default/baseof.html
+++ b/hadoop-hdds/docs/themes/ozonedoc/layouts/_default/baseof.html
@@ -20,6 +20,7 @@
 
 {{ partial "navbar.html" . }}
 
+<div class="wrapper">
 <div class="container-fluid">
     <div class="row">
         {{ partial "sidebar.html" . }}
@@ -33,6 +34,7 @@
         </div>
     </div>
 </div>
+    <div class="push"></div>
 </div>
 
 {{ partial "footer.html" . }}
diff --git a/hadoop-hdds/docs/themes/ozonedoc/layouts/_default/section.html b/hadoop-hdds/docs/themes/ozonedoc/layouts/_default/section.html
index c4408d5..2963c80 100644
--- a/hadoop-hdds/docs/themes/ozonedoc/layouts/_default/section.html
+++ b/hadoop-hdds/docs/themes/ozonedoc/layouts/_default/section.html
@@ -20,6 +20,7 @@
 
 {{ partial "navbar.html" . }}
 
+<div class="wrapper">
 <div class="container-fluid">
     <div class="row">
         {{ partial "sidebar.html" . }}
@@ -67,6 +68,8 @@
         </div>
     </div>
 </div>
+    <div class="push"></div>
+</div>
 
 {{ partial "footer.html" . }}
 
diff --git a/hadoop-hdds/docs/themes/ozonedoc/layouts/_default/single.html b/hadoop-hdds/docs/themes/ozonedoc/layouts/_default/single.html
index 208a971..d4e439a 100644
--- a/hadoop-hdds/docs/themes/ozonedoc/layouts/_default/single.html
+++ b/hadoop-hdds/docs/themes/ozonedoc/layouts/_default/single.html
@@ -20,6 +20,7 @@
 
   {{ partial "navbar.html" . }}
 
+  <div class="wrapper">
   <div class="container-fluid">
     <div class="row">
       {{ partial "sidebar.html" . }}
@@ -53,6 +54,8 @@
       </div>
     </div>
   </div>
+    <div class="push"></div>
+  </div>
 
   {{ partial "footer.html" . }}
 
diff --git a/hadoop-hdds/docs/themes/ozonedoc/layouts/index.html b/hadoop-hdds/docs/themes/ozonedoc/layouts/index.html
index 75725a2..3a6bce1 100644
--- a/hadoop-hdds/docs/themes/ozonedoc/layouts/index.html
+++ b/hadoop-hdds/docs/themes/ozonedoc/layouts/index.html
@@ -18,20 +18,22 @@
 
   <body>
 
-{{ partial "navbar.html" . }}
-
-    <div class="container-fluid">
-      <div class="row">
-        {{ partial "sidebar.html" . }}
-        <div class="col-sm-10 col-sm-offset-2 col-md-10 col-md-offset-2 main">
-            {{ partial "languages.html" .}}
-
-            {{ .Content }}
+    <div class="wrapper">
+        {{ partial "navbar.html" . }}
+
+        <div class="container-fluid">
+          <div class="row">
+            {{ partial "sidebar.html" . }}
+            <div class="col-sm-10 col-sm-offset-2 col-md-10 col-md-offset-2 main">
+                {{ partial "languages.html" .}}
+
+                {{ .Content }}
+            </div>
+          </div>
         </div>
-      </div>
+        <div class="push"></div>
     </div>
-
-{{ partial "footer.html" . }}
+    {{ partial "footer.html" . }}
 
   </body>
 </html>
diff --git a/hadoop-hdds/docs/themes/ozonedoc/layouts/partials/footer.html b/hadoop-hdds/docs/themes/ozonedoc/layouts/partials/footer.html
index 20bf76e..7683482 100644
--- a/hadoop-hdds/docs/themes/ozonedoc/layouts/partials/footer.html
+++ b/hadoop-hdds/docs/themes/ozonedoc/layouts/partials/footer.html
@@ -14,6 +14,15 @@
   See the License for the specific language governing permissions and
   limitations under the License.
 -->
+
+<footer class="footer">
+  <div class="container">
+    <span class="small text-muted">
+      Version: {{ getenv "OZONE_VERSION" }}{{ with .GitInfo }}, Last Modified: {{ .AuthorDate.Format "January 2, 2006" }} <a class="hide-child link primary-color" href="{{$.Site.Params.ghrepo}}commit/{{ .Hash }}">{{ .AbbreviatedHash }}</a>{{end }}
+    </span>
+  </div>
+</footer>
+
 <!-- Bootstrap core JavaScript
 ================================================== -->
 <!-- Placed at the end of the document so the pages load faster -->
diff --git a/hadoop-hdds/docs/themes/ozonedoc/layouts/partials/header.html b/hadoop-hdds/docs/themes/ozonedoc/layouts/partials/header.html
index a4e24c9..8f475b6 100644
--- a/hadoop-hdds/docs/themes/ozonedoc/layouts/partials/header.html
+++ b/hadoop-hdds/docs/themes/ozonedoc/layouts/partials/header.html
@@ -21,9 +21,9 @@
     <meta http-equiv="X-UA-Compatible" content="IE=edge">
     <meta name="viewport" content="width=device-width, initial-scale=1">
     <!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags -->
-    <meta name="description" content="Hadoop Ozone Documentation">
+    <meta name="description" content="Apache Ozone Documentation">
 
-    <title>Documentation for Apache Hadoop Ozone</title>
+    <title>Documentation for Apache Ozone</title>
 
     <!-- Bootstrap core CSS -->
     <link href="{{ "css/bootstrap.min.css" | relURL}}" rel="stylesheet">
diff --git a/hadoop-hdds/docs/themes/ozonedoc/layouts/partials/navbar.html b/hadoop-hdds/docs/themes/ozonedoc/layouts/partials/navbar.html
index f942e4a..d4c9f2e 100644
--- a/hadoop-hdds/docs/themes/ozonedoc/layouts/partials/navbar.html
+++ b/hadoop-hdds/docs/themes/ozonedoc/layouts/partials/navbar.html
@@ -27,9 +27,9 @@
         <img src="{{ "ozone-logo-small.png" | relURL }}"/>
       </a>
       <a class="navbar-brand hidden-xs" href="{{ "index.html" | relLangURL }}">
-        Apache Hadoop Ozone/HDDS documentation
+        Apache Ozone/HDDS documentation
       </a>
-      <a class="navbar-brand visible-xs-inline" href="#">Hadoop Ozone</a>
+      <a class="navbar-brand visible-xs-inline" href="#">Apache Ozone</a>
     </div>
     <div id="navbar" class="navbar-collapse collapse">
       <ul class="nav navbar-nav navbar-right">
diff --git a/hadoop-hdds/docs/themes/ozonedoc/static/css/ozonedoc.css b/hadoop-hdds/docs/themes/ozonedoc/static/css/ozonedoc.css
index aa57c92..90068cc 100644
--- a/hadoop-hdds/docs/themes/ozonedoc/static/css/ozonedoc.css
+++ b/hadoop-hdds/docs/themes/ozonedoc/static/css/ozonedoc.css
@@ -20,6 +20,11 @@
  * Base structure
  */
 
+html, body {
+  height: 100%;
+  margin: 0;
+}
+
 /* Move down content because we have a fixed navbar that is 50px tall */
 body {
   padding-top: 50px;
@@ -181,4 +186,27 @@ figure.ozone-usage {
 
 table.table {
   margin: 20px 20px 40px;
-}
\ No newline at end of file
+}
+
+.footer,
+.push {
+  height: 50px;
+}
+
+.footer {
+  background-color: #f5f5f5;
+}
+
+.wrapper {
+  min-height: 100%;
+
+  /* Equal to height of footer */
+  /* But also accounting for potential margin-bottom of last child */
+  margin-bottom: -50px;
+}
+
+.footer .container {
+  padding-top: 10px;
+  padding-bottom: 10px;
+  text-align: center;
+}
diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/protocol/SCMSecurityProtocol.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/protocol/SCMSecurityProtocol.java
index 2df063f..52dc033 100644
--- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/protocol/SCMSecurityProtocol.java
+++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/protocol/SCMSecurityProtocol.java
@@ -17,7 +17,10 @@
 package org.apache.hadoop.hdds.protocol;
 
 import java.io.IOException;
+import java.util.List;
+
 import org.apache.hadoop.hdds.annotation.InterfaceAudience;
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DatanodeDetailsProto;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos.OzoneManagerDetailsProto;
 import org.apache.hadoop.hdds.scm.ScmConfig;
@@ -77,4 +80,16 @@ public interface SCMSecurityProtocol {
    */
   String getCACertificate() throws IOException;
 
+  /**
+   * Get list of certificates meet the query criteria.
+   *
+   * @param type            - node type: OM/SCM/DN.
+   * @param startSerialId   - start certificate serial id.
+   * @param count           - max number of certificates returned in a batch.
+   * @param isRevoked       - whether list for revoked certs only.
+   * @return list of PEM encoded certificate strings.
+   */
+  List<String> listCertificate(HddsProtos.NodeType type, long startSerialId,
+      int count, boolean isRevoked) throws IOException;
+
 }
diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/protocolPB/SCMSecurityProtocolClientSideTranslatorPB.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/protocolPB/SCMSecurityProtocolClientSideTranslatorPB.java
index efe79a7..aeef50e 100644
--- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/protocolPB/SCMSecurityProtocolClientSideTranslatorPB.java
+++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/protocolPB/SCMSecurityProtocolClientSideTranslatorPB.java
@@ -18,9 +18,11 @@ package org.apache.hadoop.hdds.protocolPB;
 
 import java.io.Closeable;
 import java.io.IOException;
+import java.util.List;
 import java.util.function.Consumer;
 
 import org.apache.hadoop.hdds.protocol.SCMSecurityProtocol;
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos.DatanodeDetailsProto;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos.OzoneManagerDetailsProto;
 import org.apache.hadoop.hdds.protocol.proto.SCMSecurityProtocolProtos;
@@ -28,6 +30,7 @@ import org.apache.hadoop.hdds.protocol.proto.SCMSecurityProtocolProtos.SCMGetCAC
 import org.apache.hadoop.hdds.protocol.proto.SCMSecurityProtocolProtos.SCMGetCertResponseProto;
 import org.apache.hadoop.hdds.protocol.proto.SCMSecurityProtocolProtos.SCMGetCertificateRequestProto;
 import org.apache.hadoop.hdds.protocol.proto.SCMSecurityProtocolProtos.SCMGetDataNodeCertRequestProto;
+import org.apache.hadoop.hdds.protocol.proto.SCMSecurityProtocolProtos.SCMListCertificateRequestProto;
 import org.apache.hadoop.hdds.protocol.proto.SCMSecurityProtocolProtos.SCMSecurityRequest;
 import org.apache.hadoop.hdds.protocol.proto.SCMSecurityProtocolProtos.SCMSecurityRequest.Builder;
 import org.apache.hadoop.hdds.protocol.proto.SCMSecurityProtocolProtos.SCMSecurityResponse;
@@ -202,6 +205,30 @@ public class SCMSecurityProtocolClientSideTranslatorPB implements
   }
 
   /**
+   *
+   * @param role            - node type: OM/SCM/DN.
+   * @param startSerialId   - start cert serial id.
+   * @param count           - max number of certificates returned in a batch.
+   * @param isRevoked       - whether return revoked cert only.
+   * @return
+   * @throws IOException
+   */
+  @Override
+  public List<String> listCertificate(HddsProtos.NodeType role,
+      long startSerialId, int count, boolean isRevoked) throws IOException {
+    SCMListCertificateRequestProto protoIns = SCMListCertificateRequestProto
+        .newBuilder()
+        .setRole(role)
+        .setStartCertId(startSerialId)
+        .setCount(count)
+        .setIsRevoked(isRevoked)
+        .build();
+    return submitRequest(Type.ListCertificate,
+        builder -> builder.setListCertificateRequest(protoIns))
+        .getListCertificateResponseProto().getCertificatesList();
+  }
+
+  /**
    * Return the proxy object underlying this protocol translator.
    *
    * @return the proxy object underlying this protocol translator.
diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java
index 318b424..f21bfdb 100644
--- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java
+++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java
@@ -67,6 +67,9 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolPro
 import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ScmContainerLocationResponse;
 import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartReplicationManagerRequestProto;
 import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StopReplicationManagerRequestProto;
+import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartMaintenanceNodesRequestProto;
+import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionNodesRequestProto;
+import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.RecommissionNodesRequestProto;
 import org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.Type;
 import org.apache.hadoop.hdds.scm.ScmInfo;
 import org.apache.hadoop.hdds.scm.container.ContainerInfo;
@@ -292,23 +295,89 @@ public final class StorageContainerLocationProtocolClientSideTranslatorPB
   }
 
   /**
-   * Queries a list of Node Statuses.
+   * Queries a list of Nodes based on their operational state or health state.
+   * Passing a null for either value acts as a wildcard for that state.
+   *
+   * @param opState The operation state of the node
+   * @param nodeState The health of the node
+   * @return List of Datanodes.
    */
   @Override
-  public List<HddsProtos.Node> queryNode(HddsProtos.NodeState
-      nodeStatuses, HddsProtos.QueryScope queryScope, String poolName)
+  public List<HddsProtos.Node> queryNode(
+      HddsProtos.NodeOperationalState opState, HddsProtos.NodeState
+      nodeState, HddsProtos.QueryScope queryScope, String poolName)
       throws IOException {
     // TODO : We support only cluster wide query right now. So ignoring checking
     // queryScope and poolName
-    Preconditions.checkNotNull(nodeStatuses);
-    NodeQueryRequestProto request = NodeQueryRequestProto.newBuilder()
-        .setState(nodeStatuses)
+    NodeQueryRequestProto.Builder builder = NodeQueryRequestProto.newBuilder()
         .setTraceID(TracingUtil.exportCurrentSpan())
-        .setScope(queryScope).setPoolName(poolName).build();
+        .setScope(queryScope).setPoolName(poolName);
+    if (opState != null) {
+      builder.setOpState(opState);
+    }
+    if (nodeState != null) {
+      builder.setState(nodeState);
+    }
+    NodeQueryRequestProto request = builder.build();
     NodeQueryResponseProto response = submitRequest(Type.QueryNode,
-        builder -> builder.setNodeQueryRequest(request)).getNodeQueryResponse();
+        builder1 -> builder1.setNodeQueryRequest(request))
+        .getNodeQueryResponse();
     return response.getDatanodesList();
+  }
+
+  /**
+   * Attempts to decommission the list of nodes.
+   * @param nodes The list of hostnames or hostname:ports to decommission
+   * @throws IOException
+   */
+  @Override
+  public void decommissionNodes(List<String> nodes) throws IOException {
+    Preconditions.checkNotNull(nodes);
+    DecommissionNodesRequestProto request =
+        DecommissionNodesRequestProto.newBuilder()
+        .addAllHosts(nodes)
+        .build();
+    submitRequest(Type.DecommissionNodes,
+        builder -> builder.setDecommissionNodesRequest(request));
+  }
+
+  /**
+   * Attempts to recommission the list of nodes.
+   * @param nodes The list of hostnames or hostname:ports to recommission
+   * @throws IOException
+   */
+  @Override
+  public void recommissionNodes(List<String> nodes) throws IOException {
+    Preconditions.checkNotNull(nodes);
+    RecommissionNodesRequestProto request =
+        RecommissionNodesRequestProto.newBuilder()
+            .addAllHosts(nodes)
+            .build();
+    submitRequest(Type.RecommissionNodes,
+        builder -> builder.setRecommissionNodesRequest(request));
+  }
 
+  /**
+   * Attempts to put the list of nodes into maintenance mode.
+   *
+   * @param nodes The list of hostnames or hostname:ports to put into
+   *              maintenance
+   * @param endInHours A number of hours from now where the nodes will be taken
+   *                   out of maintenance automatically. Passing zero will
+   *                   allow the nodes to stay in maintenance indefinitely
+   * @throws IOException
+   */
+  @Override
+  public void startMaintenanceNodes(List<String> nodes, int endInHours)
+      throws IOException {
+    Preconditions.checkNotNull(nodes);
+    StartMaintenanceNodesRequestProto request =
+        StartMaintenanceNodesRequestProto.newBuilder()
+            .addAllHosts(nodes)
+            .setEndInHours(endInHours)
+            .build();
+    submitRequest(Type.StartMaintenanceNodes,
+        builder -> builder.setStartMaintenanceNodesRequest(request));
   }
 
   /**
diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/token/BlockTokenVerifier.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/token/BlockTokenVerifier.java
index ea222df..0c2249a 100644
--- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/token/BlockTokenVerifier.java
+++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/token/BlockTokenVerifier.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.hdds.security.token;
 import com.google.common.base.Strings;
 import org.apache.hadoop.hdds.HddsUtils;
 import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
 import org.apache.hadoop.hdds.security.exception.SCMSecurityException;
 import org.apache.hadoop.hdds.security.x509.SecurityConfig;
 import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient;
@@ -35,6 +36,13 @@ import java.io.DataInputStream;
 import java.io.IOException;
 import java.security.cert.X509Certificate;
 
+import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Type.GetBlock;
+import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Type.GetSmallFile;
+import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Type.PutBlock;
+import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Type.PutSmallFile;
+import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Type.ReadChunk;
+import static org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.Type.WriteChunk;
+
 
 /**
  * Verify token and return a UGI with token if authenticated.
@@ -73,14 +81,13 @@ public class BlockTokenVerifier implements TokenVerifier {
     OzoneBlockTokenIdentifier tokenId = new OzoneBlockTokenIdentifier();
     try {
       token.decodeFromUrlString(tokenStr);
-      if (LOGGER.isDebugEnabled()) {
-        LOGGER.debug("Verifying token:{} for user:{} ", token, user);
-      }
       ByteArrayInputStream buf = new ByteArrayInputStream(
           token.getIdentifier());
       DataInputStream in = new DataInputStream(buf);
       tokenId.readFields(in);
-
+      if (LOGGER.isDebugEnabled()) {
+        LOGGER.debug("Verifying token:{} for user:{} ", tokenId, user);
+      }
     } catch (IOException ex) {
       throw new BlockTokenException("Failed to decode token : " + tokenStr);
     }
@@ -118,7 +125,21 @@ public class BlockTokenVerifier implements TokenVerifier {
           " by user: " + tokenUser);
     }
 
-    // TODO: check cmd type and the permissions(AccessMode) in the token
+    if (cmd == ReadChunk || cmd == GetBlock || cmd == GetSmallFile) {
+      if (!tokenId.getAccessModes().contains(
+          HddsProtos.BlockTokenSecretProto.AccessModeProto.READ)) {
+        throw new BlockTokenException("Block token with " + id
+            + " doesn't have READ permission");
+      }
+    } else if (cmd == WriteChunk || cmd == PutBlock || cmd == PutSmallFile) {
+      if (!tokenId.getAccessModes().contains(
+          HddsProtos.BlockTokenSecretProto.AccessModeProto.WRITE)) {
+        throw new BlockTokenException("Block token with " + id
+            + " doesn't have WRITE permission");
+      }
+    } else {
+      throw new BlockTokenException("Block token does not support " + cmd);
+    }
   }
 
   public static boolean isTestStub() {
diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/authority/CertificateServer.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/authority/CertificateServer.java
index b1d7d6b..76512c5 100644
--- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/authority/CertificateServer.java
+++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/authority/CertificateServer.java
@@ -19,6 +19,7 @@
 
 package org.apache.hadoop.hdds.security.x509.certificate.authority;
 
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
 import org.apache.hadoop.hdds.security.exception.SCMSecurityException;
 import org.apache.hadoop.hdds.security.x509.SecurityConfig;
 import org.apache.hadoop.hdds.security.x509.certificate.authority.CertificateApprover.ApprovalType;
@@ -28,6 +29,7 @@ import org.bouncycastle.pkcs.PKCS10CertificationRequest;
 import java.io.IOException;
 import java.security.cert.CertificateException;
 import java.security.cert.X509Certificate;
+import java.util.List;
 import java.util.concurrent.Future;
 
 /**
@@ -112,6 +114,16 @@ public interface CertificateServer {
    * framework.
    */
 
+  /**
+   * List certificates.
+   * @param type            - node type: OM/SCM/DN
+   * @param startSerialId   - start certificate serial id
+   * @param count           - max number of certificates returned in a batch
+   * @return
+   * @throws IOException
+   */
+  List<X509Certificate> listCertificate(HddsProtos.NodeType type,
+      long startSerialId, int count, boolean isRevoked) throws IOException;
 
   /**
    * Make it explicit what type of CertificateServer we are creating here.
diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/authority/CertificateStore.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/authority/CertificateStore.java
index 961d048..3ddb640 100644
--- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/authority/CertificateStore.java
+++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/authority/CertificateStore.java
@@ -19,9 +19,12 @@
 
 package org.apache.hadoop.hdds.security.x509.certificate.authority;
 
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
+
 import java.io.IOException;
 import java.math.BigInteger;
 import java.security.cert.X509Certificate;
+import java.util.List;
 
 /**
  * This interface allows the DefaultCA to be portable and use different DB
@@ -70,6 +73,19 @@ public interface CertificateStore {
       throws IOException;
 
   /**
+   *
+   * @param role - role of the certificate owner (OM/DN).
+   * @param startSerialID - start cert serial id.
+   * @param count - max number of certs returned.
+   * @param certType cert type (valid/revoked).
+   * @return list of X509 certificates.
+   * @throws IOException
+   */
+  List<X509Certificate> listCertificate(HddsProtos.NodeType role,
+      BigInteger startSerialID, int count, CertType certType)
+      throws IOException;
+
+  /**
    * Different kind of Certificate stores.
    */
   enum CertType {
diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/authority/DefaultCAServer.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/authority/DefaultCAServer.java
index 2378260..0523209 100644
--- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/authority/DefaultCAServer.java
+++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/security/x509/certificate/authority/DefaultCAServer.java
@@ -22,6 +22,7 @@ package org.apache.hadoop.hdds.security.x509.certificate.authority;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import org.apache.commons.validator.routines.DomainValidator;
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
 import org.apache.hadoop.hdds.security.exception.SCMSecurityException;
 import org.apache.hadoop.hdds.security.x509.SecurityConfig;
 import org.apache.hadoop.hdds.security.x509.certificate.authority.PKIProfiles.DefaultProfile;
@@ -51,6 +52,7 @@ import java.security.spec.InvalidKeySpecException;
 import java.time.LocalDate;
 import java.time.LocalDateTime;
 import java.time.LocalTime;
+import java.util.List;
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.Future;
 import java.util.function.Consumer;
@@ -289,6 +291,23 @@ public class DefaultCAServer implements CertificateServer {
   }
 
   /**
+   *
+   * @param role            - node type: OM/SCM/DN.
+   * @param startSerialId   - start cert serial id.
+   * @param count           - max number of certificates returned in a batch.
+   * @param isRevoked       - whether return revoked cert only.
+   * @return
+   * @throws IOException
+   */
+  @Override
+  public List<X509Certificate> listCertificate(HddsProtos.NodeType role,
+      long startSerialId, int count, boolean isRevoked) throws IOException {
+    return store.listCertificate(role, BigInteger.valueOf(startSerialId), count,
+        isRevoked? CertificateStore.CertType.REVOKED_CERTS :
+            CertificateStore.CertType.VALID_CERTS);
+  }
+
+  /**
    * Generates a Self Signed CertificateServer. These are the steps in
    * generating a Self-Signed CertificateServer.
    * <p>
diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/HtmlQuoting.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/HtmlQuoting.java
index f4262f9..44a1d00 100644
--- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/HtmlQuoting.java
+++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/HtmlQuoting.java
@@ -127,7 +127,7 @@ public final class HtmlQuoting {
       ByteArrayOutputStream buffer = new ByteArrayOutputStream();
       try {
         quoteHtmlChars(buffer, bytes, 0, bytes.length);
-        return buffer.toString("UTF-8");
+        return buffer.toString(StandardCharsets.UTF_8.name());
       } catch (IOException ioe) {
         // Won't happen, since it is a bytearrayoutputstream
         return null;
diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/HttpServer2.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/HttpServer2.java
index 9282c84..9aad94a 100644
--- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/HttpServer2.java
+++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/HttpServer2.java
@@ -38,6 +38,7 @@ import java.net.InetSocketAddress;
 import java.net.MalformedURLException;
 import java.net.URI;
 import java.net.URL;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Enumeration;
@@ -1522,7 +1523,7 @@ public final class HttpServer2 implements FilterContainer {
       }
       response.setContentType("text/plain; charset=UTF-8");
       try (PrintStream out = new PrintStream(
-          response.getOutputStream(), false, "UTF-8")) {
+          response.getOutputStream(), false, StandardCharsets.UTF_8.name())) {
         ReflectionUtils.printThreadInfo(out, "");
       }
       ReflectionUtils.logThreadInfo(LOG, "jsp requested", 1);
diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/RatisNameRewriteSampleBuilder.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/RatisNameRewriteSampleBuilder.java
index cbee652..e3fb737 100644
--- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/RatisNameRewriteSampleBuilder.java
+++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/server/http/RatisNameRewriteSampleBuilder.java
@@ -26,7 +26,7 @@ import java.util.regex.Pattern;
 import io.prometheus.client.Collector.MetricFamilySamples.Sample;
 import io.prometheus.client.dropwizard.samplebuilder.DefaultSampleBuilder;
 import org.apache.logging.log4j.util.Strings;
-import static org.apache.ratis.server.metrics.RaftLogMetrics.RATIS_APPLICATION_NAME_METRICS;
+import static org.apache.ratis.metrics.RatisMetrics.RATIS_APPLICATION_NAME_METRICS;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/DBStore.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/DBStore.java
index 71766bd..f0096ed 100644
--- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/DBStore.java
+++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/DBStore.java
@@ -25,7 +25,7 @@ import java.util.ArrayList;
 import java.util.Map;
 
 import org.apache.hadoop.hdds.annotation.InterfaceStability;
-import org.apache.hadoop.hdds.utils.db.cache.TableCacheImpl;
+import org.apache.hadoop.hdds.utils.db.cache.TableCache;
 
 /**
  * The DBStore interface provides the ability to create Tables, which store
@@ -49,8 +49,7 @@ public interface DBStore extends AutoCloseable, BatchOperationHandler {
 
   /**
    * Gets an existing TableStore with implicit key/value conversion and
-   * with default cleanup policy for cache. Default cache clean up policy is
-   * manual.
+   * with default cache type for cache. Default cache type is partial cache.
    *
    * @param name - Name of the TableStore to get
    * @param keyType
@@ -63,12 +62,17 @@ public interface DBStore extends AutoCloseable, BatchOperationHandler {
 
   /**
    * Gets an existing TableStore with implicit key/value conversion and
-   * with specified cleanup policy for cache.
+   * with specified cache type.
+   * @param name - Name of the TableStore to get
+   * @param keyType
+   * @param valueType
+   * @param cacheType
+   * @return - TableStore.
    * @throws IOException
    */
   <KEY, VALUE> Table<KEY, VALUE> getTable(String name,
       Class<KEY> keyType, Class<VALUE> valueType,
-      TableCacheImpl.CacheCleanupPolicy cleanupPolicy) throws IOException;
+      TableCache.CacheType cacheType) throws IOException;
 
   /**
    * Lists the Known list of Tables in a DB.
diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBStore.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBStore.java
index adbd2eb..252363c 100644
--- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBStore.java
+++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBStore.java
@@ -33,10 +33,10 @@ import java.util.stream.Collectors;
 import org.apache.hadoop.hdds.HddsUtils;
 import org.apache.hadoop.hdds.StringUtils;
 import org.apache.hadoop.hdds.utils.RocksDBStoreMBean;
+import org.apache.hadoop.hdds.utils.db.cache.TableCache;
 import org.apache.hadoop.metrics2.util.MBeans;
 
 import com.google.common.base.Preconditions;
-import org.apache.hadoop.hdds.utils.db.cache.TableCacheImpl;
 import org.apache.ratis.thirdparty.com.google.common.annotations.VisibleForTesting;
 import org.rocksdb.ColumnFamilyDescriptor;
 import org.rocksdb.ColumnFamilyHandle;
@@ -310,9 +310,9 @@ public class RDBStore implements DBStore {
   @Override
   public <K, V> Table<K, V> getTable(String name,
       Class<K> keyType, Class<V> valueType,
-      TableCacheImpl.CacheCleanupPolicy cleanupPolicy) throws IOException {
+      TableCache.CacheType cacheType) throws IOException {
     return new TypedTable<>(getTable(name), codecRegistry, keyType,
-        valueType, cleanupPolicy);
+        valueType, cacheType);
   }
 
   @Override
diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/TypedTable.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/TypedTable.java
index 1c88290..5e44384 100644
--- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/TypedTable.java
+++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/TypedTable.java
@@ -30,9 +30,10 @@ import org.apache.hadoop.hdds.utils.MetadataKeyFilters;
 import org.apache.hadoop.hdds.utils.db.cache.CacheKey;
 import org.apache.hadoop.hdds.utils.db.cache.CacheResult;
 import org.apache.hadoop.hdds.utils.db.cache.CacheValue;
-import org.apache.hadoop.hdds.utils.db.cache.TableCacheImpl;
+import org.apache.hadoop.hdds.utils.db.cache.FullTableCache;
+import org.apache.hadoop.hdds.utils.db.cache.PartialTableCache;
+import org.apache.hadoop.hdds.utils.db.cache.TableCache.CacheType;
 import org.apache.hadoop.hdds.utils.db.cache.TableCache;
-import org.apache.hadoop.hdds.utils.db.cache.TableCacheImpl.CacheCleanupPolicy;
 
 import static org.apache.hadoop.hdds.utils.db.cache.CacheResult.CacheStatus.EXISTS;
 import static org.apache.hadoop.hdds.utils.db.cache.CacheResult.CacheStatus.NOT_EXIST;
@@ -61,8 +62,7 @@ public class TypedTable<KEY, VALUE> implements Table<KEY, VALUE> {
 
   /**
    * Create an TypedTable from the raw table.
-   * Default cleanup policy used for the table is
-   * {@link CacheCleanupPolicy#MANUAL}.
+   * Default cache type for the table is {@link CacheType#PARTIAL_CACHE}.
    * @param rawTable
    * @param codecRegistry
    * @param keyType
@@ -73,30 +73,30 @@ public class TypedTable<KEY, VALUE> implements Table<KEY, VALUE> {
       CodecRegistry codecRegistry, Class<KEY> keyType,
       Class<VALUE> valueType) throws IOException {
     this(rawTable, codecRegistry, keyType, valueType,
-        CacheCleanupPolicy.MANUAL);
+        CacheType.PARTIAL_CACHE);
   }
 
   /**
-   * Create an TypedTable from the raw table with specified cleanup policy
-   * for table cache.
+   * Create an TypedTable from the raw table with specified cache type.
    * @param rawTable
    * @param codecRegistry
    * @param keyType
    * @param valueType
-   * @param cleanupPolicy
+   * @param cacheType
+   * @throws IOException
    */
   public TypedTable(
       Table<byte[], byte[]> rawTable,
       CodecRegistry codecRegistry, Class<KEY> keyType,
       Class<VALUE> valueType,
-      TableCacheImpl.CacheCleanupPolicy cleanupPolicy) throws IOException {
+      CacheType cacheType) throws IOException {
     this.rawTable = rawTable;
     this.codecRegistry = codecRegistry;
     this.keyType = keyType;
     this.valueType = valueType;
-    cache = new TableCacheImpl<>(cleanupPolicy);
 
-    if (cleanupPolicy == CacheCleanupPolicy.NEVER) {
+    if (cacheType == CacheType.FULL_CACHE) {
+      cache = new FullTableCache<>();
       //fill cache
       try(TableIterator<KEY, ? extends KeyValue<KEY, VALUE>> tableIterator =
               iterator()) {
@@ -111,6 +111,8 @@ public class TypedTable<KEY, VALUE> implements Table<KEY, VALUE> {
               new CacheValue<>(Optional.of(kv.getValue()), EPOCH_DEFAULT));
         }
       }
+    } else {
+      cache = new PartialTableCache<>();
     }
   }
 
diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/cache/TableCacheImpl.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/cache/FullTableCache.java
similarity index 53%
copy from hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/cache/TableCacheImpl.java
copy to hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/cache/FullTableCache.java
index d35522d..2754b59 100644
--- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/cache/TableCacheImpl.java
+++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/cache/FullTableCache.java
@@ -24,13 +24,13 @@ import java.util.List;
 import java.util.Map;
 import java.util.NavigableSet;
 import java.util.Set;
-import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentSkipListMap;
 import java.util.concurrent.ConcurrentSkipListSet;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.ThreadFactory;
-import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.locks.ReadWriteLock;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.util.concurrent.ThreadFactoryBuilder;
@@ -40,64 +40,74 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /**
- * Cache implementation for the table. Depending on the cache clean up policy
- * this cache will be full cache or partial cache.
- *
- * If cache cleanup policy is set as {@link CacheCleanupPolicy#MANUAL},
- * this will be a partial cache.
- *
- * If cache cleanup policy is set as {@link CacheCleanupPolicy#NEVER},
- * this will be a full cache.
+ * Cache implementation for the table. Full Table cache, where the DB state
+ * and cache state will be same for these tables.
  */
 @Private
 @Evolving
-public class TableCacheImpl<CACHEKEY extends CacheKey,
+public class FullTableCache<CACHEKEY extends CacheKey,
     CACHEVALUE extends CacheValue> implements TableCache<CACHEKEY, CACHEVALUE> {
 
   public static final Logger LOG =
-      LoggerFactory.getLogger(TableCacheImpl.class);
+      LoggerFactory.getLogger(FullTableCache.class);
 
   private final Map<CACHEKEY, CACHEVALUE> cache;
   private final NavigableSet<EpochEntry<CACHEKEY>> epochEntries;
   private ExecutorService executorService;
-  private CacheCleanupPolicy cleanupPolicy;
-
 
+  private final ReadWriteLock lock;
 
-  public TableCacheImpl(CacheCleanupPolicy cleanupPolicy) {
 
+  public FullTableCache() {
     // As for full table cache only we need elements to be inserted in sorted
-    // manner, so that list will be easy. For other we can go with Hash map.
-    if (cleanupPolicy == CacheCleanupPolicy.NEVER) {
-      cache = new ConcurrentSkipListMap<>();
-    } else {
-      cache = new ConcurrentHashMap<>();
-    }
+    // manner, so that list will be easy. But look ups have log(N) time
+    // complexity.
+
+    // Here lock is required to protect cache because cleanup is not done
+    // under any ozone level locks like bucket/volume, there is a chance of
+    // cleanup which are not flushed to disks when request processing thread
+    // updates entries.
+    cache = new ConcurrentSkipListMap<>();
+
+    lock = new ReentrantReadWriteLock();
+
     epochEntries = new ConcurrentSkipListSet<>();
+
     // Created a singleThreadExecutor, so one cleanup will be running at a
     // time.
     ThreadFactory build = new ThreadFactoryBuilder().setDaemon(true)
-        .setNameFormat("PartialTableCache Cleanup Thread - %d").build();
+        .setNameFormat("FullTableCache Cleanup Thread - %d").build();
     executorService = Executors.newSingleThreadExecutor(build);
-    this.cleanupPolicy = cleanupPolicy;
   }
 
   @Override
   public CACHEVALUE get(CACHEKEY cachekey) {
-    return cache.get(cachekey);
+    try {
+      lock.readLock().lock();
+      return cache.get(cachekey);
+    } finally {
+      lock.readLock().unlock();
+    }
   }
 
   @Override
   public void loadInitial(CACHEKEY cacheKey, CACHEVALUE cacheValue) {
     // No need to add entry to epochEntries. Adding to cache is required during
     // normal put operation.
+    // No need of acquiring lock, this is performed only during startup. No
+    // operations happening at that time.
     cache.put(cacheKey, cacheValue);
   }
 
   @Override
   public void put(CACHEKEY cacheKey, CACHEVALUE value) {
-    cache.put(cacheKey, value);
-    epochEntries.add(new EpochEntry<>(value.getEpoch(), cacheKey));
+    try {
+      lock.writeLock().lock();
+      cache.put(cacheKey, value);
+      epochEntries.add(new EpochEntry<>(value.getEpoch(), cacheKey));
+    } finally {
+      lock.writeLock().unlock();
+    }
   }
 
   public void cleanup(List<Long> epochs) {
@@ -115,9 +125,8 @@ public class TableCacheImpl<CACHEKEY extends CacheKey,
   }
 
   @VisibleForTesting
-  protected void evictCache(List<Long> epochs) {
+  public void evictCache(List<Long> epochs) {
     EpochEntry<CACHEKEY> currentEntry;
-    final AtomicBoolean removed = new AtomicBoolean();
     CACHEKEY cachekey;
     long lastEpoch = epochs.get(epochs.size() - 1);
     for (Iterator<EpochEntry<CACHEKEY>> iterator = epochEntries.iterator();
@@ -125,44 +134,34 @@ public class TableCacheImpl<CACHEKEY extends CacheKey,
       currentEntry = iterator.next();
       cachekey = currentEntry.getCachekey();
       long currentEpoch = currentEntry.getEpoch();
-      CacheValue cacheValue = cache.computeIfPresent(cachekey, ((k, v) -> {
-        if (cleanupPolicy == CacheCleanupPolicy.MANUAL) {
-          if (v.getEpoch() == currentEpoch && epochs.contains(v.getEpoch())) {
-            LOG.debug("CacheKey {} with epoch {} is removed from cache",
-                k.getCacheKey(), currentEpoch);
-            iterator.remove();
-            removed.set(true);
-            return null;
-          }
-        } else if (cleanupPolicy == CacheCleanupPolicy.NEVER) {
-          // Remove only entries which are marked for delete.
-          if (v.getEpoch() == currentEpoch && epochs.contains(v.getEpoch())
-              && v.getCacheValue() == null) {
-            LOG.debug("CacheKey {} with epoch {} is removed from cache",
-                k.getCacheKey(), currentEpoch);
-            removed.set(true);
-            iterator.remove();
-            return null;
-          }
-        }
-        return v;
-      }));
-
-      // If override entries, then for those epoch entries, there will be no
-      // entry in cache. This can occur in the case we have cleaned up the
-      // override cache entry, but in epoch entry it is still lying around.
-      // This is done to cleanup epoch entries.
-      if (!removed.get() && cacheValue == null) {
-        LOG.debug("CacheKey {} with epoch {} is removed from epochEntry for " +
-                "a key not existing in cache", cachekey.getCacheKey(),
-            currentEpoch);
-        iterator.remove();
-      } else if (currentEpoch >= lastEpoch) {
-        // If currentEntry epoch is greater than last epoch provided, we have
-        // deleted all entries less than specified epoch. So, we can break.
+
+      // If currentEntry epoch is greater than last epoch provided, we have
+      // deleted all entries less than specified epoch. So, we can break.
+      if (currentEpoch > lastEpoch) {
         break;
       }
-      removed.set(false);
+
+      // Acquire lock to avoid race between cleanup and add to cache entry by
+      // client requests.
+      try {
+        lock.writeLock().lock();
+        if (epochs.contains(currentEpoch)) {
+          // Remove epoch entry, as the entry is there in epoch list.
+          iterator.remove();
+          // Remove only entries which are marked for delete from the cache.
+          cache.computeIfPresent(cachekey, ((k, v) -> {
+            if (v.getCacheValue() == null && v.getEpoch() == currentEpoch) {
+              LOG.debug("CacheKey {} with epoch {} is removed from cache",
+                  k.getCacheKey(), currentEpoch);
+              return null;
+            }
+            return v;
+          }));
+        }
+      } finally {
+        lock.writeLock().unlock();
+      }
+
     }
   }
 
@@ -170,12 +169,7 @@ public class TableCacheImpl<CACHEKEY extends CacheKey,
 
     CACHEVALUE cachevalue = cache.get(cachekey);
     if (cachevalue == null) {
-      if (cleanupPolicy == CacheCleanupPolicy.NEVER) {
-        return new CacheResult<>(CacheResult.CacheStatus.NOT_EXIST, null);
-      } else {
-        return new CacheResult<>(CacheResult.CacheStatus.MAY_EXIST,
-            null);
-      }
+      return new CacheResult<>(CacheResult.CacheStatus.NOT_EXIST, null);
     } else {
       if (cachevalue.getCacheValue() != null) {
         return new CacheResult<>(CacheResult.CacheStatus.EXISTS, cachevalue);
@@ -193,13 +187,4 @@ public class TableCacheImpl<CACHEKEY extends CacheKey,
     return epochEntries;
   }
 
-  /**
-   * Cleanup policies for table cache.
-   */
-  public enum CacheCleanupPolicy {
-    NEVER, // Cache will not be cleaned up. This mean's the table maintains
-    // full cache.
-    MANUAL // Cache will be cleaned up, once after flushing to DB. It is
-    // caller's responsibility to flush to DB, before calling cleanup cache.
-  }
 }
diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/cache/TableCacheImpl.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/cache/PartialTableCache.java
similarity index 52%
rename from hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/cache/TableCacheImpl.java
rename to hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/cache/PartialTableCache.java
index d35522d..0bf03c5 100644
--- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/cache/TableCacheImpl.java
+++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/cache/PartialTableCache.java
@@ -25,12 +25,10 @@ import java.util.Map;
 import java.util.NavigableSet;
 import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.ConcurrentSkipListMap;
 import java.util.concurrent.ConcurrentSkipListSet;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.ThreadFactory;
-import java.util.concurrent.atomic.AtomicBoolean;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.util.concurrent.ThreadFactoryBuilder;
@@ -40,46 +38,45 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /**
- * Cache implementation for the table. Depending on the cache clean up policy
- * this cache will be full cache or partial cache.
- *
- * If cache cleanup policy is set as {@link CacheCleanupPolicy#MANUAL},
- * this will be a partial cache.
- *
- * If cache cleanup policy is set as {@link CacheCleanupPolicy#NEVER},
- * this will be a full cache.
+ * Cache implementation for the table. Partial Table cache, where the DB state
+ * and cache state will not be same. Partial table cache holds entries until
+ * flush to DB happens.
  */
 @Private
 @Evolving
-public class TableCacheImpl<CACHEKEY extends CacheKey,
+public class PartialTableCache<CACHEKEY extends CacheKey,
     CACHEVALUE extends CacheValue> implements TableCache<CACHEKEY, CACHEVALUE> {
 
   public static final Logger LOG =
-      LoggerFactory.getLogger(TableCacheImpl.class);
+      LoggerFactory.getLogger(PartialTableCache.class);
 
   private final Map<CACHEKEY, CACHEVALUE> cache;
   private final NavigableSet<EpochEntry<CACHEKEY>> epochEntries;
   private ExecutorService executorService;
-  private CacheCleanupPolicy cleanupPolicy;
 
 
+  public PartialTableCache() {
+    // We use concurrent Hash map for O(1) lookup for get API.
+    // During list operation for partial cache we anyway merge between DB and
+    // cache state. So entries in cache does not need to be in sorted order.
 
-  public TableCacheImpl(CacheCleanupPolicy cleanupPolicy) {
+    // And as concurrentHashMap computeIfPresent which is used by cleanup is
+    // atomic operation, and ozone level locks like bucket/volume locks
+    // protect updating same key, here it is not required to hold cache
+    // level locks during update/cleanup operation.
+
+    // 1. During update, it is caller responsibility to hold volume/bucket
+    // locks.
+    // 2. During cleanup which removes entry, while request is updating cache
+    // that should be guarded by concurrentHashMap guaranty.
+    cache = new ConcurrentHashMap<>();
 
-    // As for full table cache only we need elements to be inserted in sorted
-    // manner, so that list will be easy. For other we can go with Hash map.
-    if (cleanupPolicy == CacheCleanupPolicy.NEVER) {
-      cache = new ConcurrentSkipListMap<>();
-    } else {
-      cache = new ConcurrentHashMap<>();
-    }
     epochEntries = new ConcurrentSkipListSet<>();
     // Created a singleThreadExecutor, so one cleanup will be running at a
     // time.
     ThreadFactory build = new ThreadFactoryBuilder().setDaemon(true)
         .setNameFormat("PartialTableCache Cleanup Thread - %d").build();
     executorService = Executors.newSingleThreadExecutor(build);
-    this.cleanupPolicy = cleanupPolicy;
   }
 
   @Override
@@ -89,9 +86,7 @@ public class TableCacheImpl<CACHEKEY extends CacheKey,
 
   @Override
   public void loadInitial(CACHEKEY cacheKey, CACHEVALUE cacheValue) {
-    // No need to add entry to epochEntries. Adding to cache is required during
-    // normal put operation.
-    cache.put(cacheKey, cacheValue);
+    // Do nothing for partial table cache.
   }
 
   @Override
@@ -115,9 +110,8 @@ public class TableCacheImpl<CACHEKEY extends CacheKey,
   }
 
   @VisibleForTesting
-  protected void evictCache(List<Long> epochs) {
+  public void evictCache(List<Long> epochs) {
     EpochEntry<CACHEKEY> currentEntry;
-    final AtomicBoolean removed = new AtomicBoolean();
     CACHEKEY cachekey;
     long lastEpoch = epochs.get(epochs.size() - 1);
     for (Iterator<EpochEntry<CACHEKEY>> iterator = epochEntries.iterator();
@@ -125,44 +119,31 @@ public class TableCacheImpl<CACHEKEY extends CacheKey,
       currentEntry = iterator.next();
       cachekey = currentEntry.getCachekey();
       long currentEpoch = currentEntry.getEpoch();
-      CacheValue cacheValue = cache.computeIfPresent(cachekey, ((k, v) -> {
-        if (cleanupPolicy == CacheCleanupPolicy.MANUAL) {
-          if (v.getEpoch() == currentEpoch && epochs.contains(v.getEpoch())) {
-            LOG.debug("CacheKey {} with epoch {} is removed from cache",
-                k.getCacheKey(), currentEpoch);
-            iterator.remove();
-            removed.set(true);
-            return null;
-          }
-        } else if (cleanupPolicy == CacheCleanupPolicy.NEVER) {
-          // Remove only entries which are marked for delete.
-          if (v.getEpoch() == currentEpoch && epochs.contains(v.getEpoch())
-              && v.getCacheValue() == null) {
-            LOG.debug("CacheKey {} with epoch {} is removed from cache",
-                k.getCacheKey(), currentEpoch);
-            removed.set(true);
-            iterator.remove();
+
+      // If currentEntry epoch is greater than last epoch provided, we have
+      // deleted all entries less than specified epoch. So, we can break.
+      if (currentEpoch > lastEpoch) {
+        break;
+      }
+
+      // As ConcurrentHashMap computeIfPresent is atomic, there is no race
+      // condition between cache cleanup and requests updating same cache entry.
+      if (epochs.contains(currentEpoch)) {
+        // Remove epoch entry, as the entry is there in epoch list.
+        iterator.remove();
+        cache.computeIfPresent(cachekey, ((k, v) -> {
+          // If cache epoch entry matches with current Epoch, remove entry
+          // from cache.
+          if (v.getEpoch() == currentEpoch) {
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("CacheKey {} with epoch {} is removed from cache",
+                  k.getCacheKey(), currentEpoch);
+            }
             return null;
           }
-        }
-        return v;
-      }));
-
-      // If override entries, then for those epoch entries, there will be no
-      // entry in cache. This can occur in the case we have cleaned up the
-      // override cache entry, but in epoch entry it is still lying around.
-      // This is done to cleanup epoch entries.
-      if (!removed.get() && cacheValue == null) {
-        LOG.debug("CacheKey {} with epoch {} is removed from epochEntry for " +
-                "a key not existing in cache", cachekey.getCacheKey(),
-            currentEpoch);
-        iterator.remove();
-      } else if (currentEpoch >= lastEpoch) {
-        // If currentEntry epoch is greater than last epoch provided, we have
-        // deleted all entries less than specified epoch. So, we can break.
-        break;
+          return v;
+        }));
       }
-      removed.set(false);
     }
   }
 
@@ -170,19 +151,13 @@ public class TableCacheImpl<CACHEKEY extends CacheKey,
 
     CACHEVALUE cachevalue = cache.get(cachekey);
     if (cachevalue == null) {
-      if (cleanupPolicy == CacheCleanupPolicy.NEVER) {
-        return new CacheResult<>(CacheResult.CacheStatus.NOT_EXIST, null);
-      } else {
-        return new CacheResult<>(CacheResult.CacheStatus.MAY_EXIST,
+      return new CacheResult<>(CacheResult.CacheStatus.MAY_EXIST,
             null);
-      }
     } else {
       if (cachevalue.getCacheValue() != null) {
         return new CacheResult<>(CacheResult.CacheStatus.EXISTS, cachevalue);
       } else {
         // When entity is marked for delete, cacheValue will be set to null.
-        // In that case we can return NOT_EXIST irrespective of cache cleanup
-        // policy.
         return new CacheResult<>(CacheResult.CacheStatus.NOT_EXIST, null);
       }
     }
@@ -193,13 +168,4 @@ public class TableCacheImpl<CACHEKEY extends CacheKey,
     return epochEntries;
   }
 
-  /**
-   * Cleanup policies for table cache.
-   */
-  public enum CacheCleanupPolicy {
-    NEVER, // Cache will not be cleaned up. This mean's the table maintains
-    // full cache.
-    MANUAL // Cache will be cleaned up, once after flushing to DB. It is
-    // caller's responsibility to flush to DB, before calling cleanup cache.
-  }
 }
diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/cache/TableCache.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/cache/TableCache.java
index 8acb708..ab4b73d 100644
--- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/cache/TableCache.java
+++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/cache/TableCache.java
@@ -47,9 +47,9 @@ public interface TableCache<CACHEKEY extends CacheKey,
   CACHEVALUE get(CACHEKEY cacheKey);
 
   /**
-   * This method should be called for tables with cache cleanup policy
-   * {@link TableCacheImpl.CacheCleanupPolicy#NEVER} after system restart to
-   * fill up the cache.
+   * This method should be called for tables with cache type full cache.
+   * {@link TableCache.CacheType#FULL_CACHE} after system
+   * restart to fill up the cache.
    * @param cacheKey
    * @param cacheValue
    */
@@ -73,6 +73,9 @@ public interface TableCache<CACHEKEY extends CacheKey,
    */
   void cleanup(List<Long> epochs);
 
+  @VisibleForTesting
+  void evictCache(List<Long> epochs);
+
   /**
    * Return the size of the cache.
    * @return size
@@ -92,15 +95,13 @@ public interface TableCache<CACHEKEY extends CacheKey,
    * {@link CacheResult.CacheStatus#EXISTS}
    *
    * If it does not exist:
-   *  If cache clean up policy is
-   *  {@link TableCacheImpl.CacheCleanupPolicy#NEVER} it means table cache is
-   *  full cache. It return's {@link CacheResult} with null
-   *  and status as {@link CacheResult.CacheStatus#NOT_EXIST}.
+   *  If cache type is
+   *  {@link TableCache.CacheType#FULL_CACHE}. It return's {@link CacheResult}
+   *  with null and status as {@link CacheResult.CacheStatus#NOT_EXIST}.
    *
-   *  If cache clean up policy is
-   *  {@link TableCacheImpl.CacheCleanupPolicy#MANUAL} it means
-   *  table cache is partial cache. It return's {@link CacheResult} with
-   *  null and status as MAY_EXIST.
+   *  If cache type is
+   *  {@link TableCache.CacheType#PARTIAL_CACHE}.
+   *  It return's {@link CacheResult} with null and status as MAY_EXIST.
    *
    * @param cachekey
    */
@@ -109,4 +110,11 @@ public interface TableCache<CACHEKEY extends CacheKey,
 
   @VisibleForTesting
   Set<EpochEntry<CACHEKEY>> getEpochEntrySet();
+
+  enum CacheType {
+    FULL_CACHE, //  This mean's the table maintains full cache. Cache and DB
+    // state are same.
+    PARTIAL_CACHE // This is partial table cache, cache state is partial state
+    // compared to DB state.
+  }
 }
diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/authority/MockCAStore.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/authority/MockCAStore.java
index 1dea512..633ae19 100644
--- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/authority/MockCAStore.java
+++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/authority/MockCAStore.java
@@ -19,9 +19,13 @@
 
 package org.apache.hadoop.hdds.security.x509.certificate.authority;
 
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
+
 import java.io.IOException;
 import java.math.BigInteger;
 import java.security.cert.X509Certificate;
+import java.util.Collections;
+import java.util.List;
 
 /**
  *
@@ -51,4 +55,11 @@ public class MockCAStore implements CertificateStore {
       throws IOException {
     return null;
   }
+
+  @Override
+  public List<X509Certificate> listCertificate(HddsProtos.NodeType role,
+      BigInteger startSerialID, int count, CertType certType)
+      throws IOException {
+    return Collections.emptyList();
+  }
 }
diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/client/TestDefaultCertificateClient.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/client/TestDefaultCertificateClient.java
index f389cdb..053520a 100644
--- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/client/TestDefaultCertificateClient.java
+++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/security/x509/certificate/client/TestDefaultCertificateClient.java
@@ -50,6 +50,7 @@ import org.apache.hadoop.security.ssl.KeyStoreTestUtil;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.test.LambdaTestUtils;
 
+import static java.nio.charset.StandardCharsets.UTF_8;
 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.*;
 import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_METADATA_DIR_NAME;
 import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_NAMES;
@@ -75,7 +76,6 @@ public class TestDefaultCertificateClient {
   private Path dnMetaDirPath;
   private SecurityConfig omSecurityConfig;
   private SecurityConfig dnSecurityConfig;
-  private final static String UTF = "UTF-8";
   private final static String DN_COMPONENT = DNCertificateClient.COMPONENT_NAME;
   private final static String OM_COMPONENT = OMCertificateClient.COMPONENT_NAME;
   private KeyCodec omKeyCodec;
@@ -201,7 +201,7 @@ public class TestDefaultCertificateClient {
 
   @Test
   public void testSignDataStream() throws Exception {
-    String data = RandomStringUtils.random(100, UTF);
+    String data = RandomStringUtils.random(100);
     FileUtils.deleteQuietly(Paths.get(
         omSecurityConfig.getKeyLocation(OM_COMPONENT).toString(),
         omSecurityConfig.getPrivateKeyFileName()).toFile());
@@ -212,13 +212,12 @@ public class TestDefaultCertificateClient {
     // Expect error when there is no private key to sign.
     LambdaTestUtils.intercept(IOException.class, "Error while " +
             "signing the stream",
-        () -> omCertClient.signDataStream(IOUtils.toInputStream(data,
-            UTF)));
+        () -> omCertClient.signDataStream(IOUtils.toInputStream(data, UTF_8)));
 
     generateKeyPairFiles();
     byte[] sign = omCertClient.signDataStream(IOUtils.toInputStream(data,
-        UTF));
-    validateHash(sign, data.getBytes());
+        UTF_8));
+    validateHash(sign, data.getBytes(UTF_8));
   }
 
   /**
@@ -239,21 +238,22 @@ public class TestDefaultCertificateClient {
    */
   @Test
   public void verifySignatureStream() throws Exception {
-    String data = RandomStringUtils.random(500, UTF);
+    String data = RandomStringUtils.random(500);
     byte[] sign = omCertClient.signDataStream(IOUtils.toInputStream(data,
-        UTF));
+        UTF_8));
 
     // Positive tests.
-    assertTrue(omCertClient.verifySignature(data.getBytes(), sign,
+    assertTrue(omCertClient.verifySignature(data.getBytes(UTF_8), sign,
         x509Certificate));
-    assertTrue(omCertClient.verifySignature(IOUtils.toInputStream(data, UTF),
+    assertTrue(omCertClient.verifySignature(
+        IOUtils.toInputStream(data, UTF_8),
         sign, x509Certificate));
 
     // Negative tests.
-    assertFalse(omCertClient.verifySignature(data.getBytes(),
-        "abc".getBytes(), x509Certificate));
+    assertFalse(omCertClient.verifySignature(data.getBytes(UTF_8),
+        "abc".getBytes(UTF_8), x509Certificate));
     assertFalse(omCertClient.verifySignature(IOUtils.toInputStream(data,
-        UTF), "abc".getBytes(), x509Certificate));
+        UTF_8), "abc".getBytes(UTF_8), x509Certificate));
 
   }
 
@@ -262,20 +262,21 @@ public class TestDefaultCertificateClient {
    */
   @Test
   public void verifySignatureDataArray() throws Exception {
-    String data = RandomStringUtils.random(500, UTF);
-    byte[] sign = omCertClient.signData(data.getBytes());
+    String data = RandomStringUtils.random(500);
+    byte[] sign = omCertClient.signData(data.getBytes(UTF_8));
 
     // Positive tests.
-    assertTrue(omCertClient.verifySignature(data.getBytes(), sign,
+    assertTrue(omCertClient.verifySignature(data.getBytes(UTF_8), sign,
         x509Certificate));
-    assertTrue(omCertClient.verifySignature(IOUtils.toInputStream(data, UTF),
+    assertTrue(omCertClient.verifySignature(
+        IOUtils.toInputStream(data, UTF_8),
         sign, x509Certificate));
 
     // Negative tests.
-    assertFalse(omCertClient.verifySignature(data.getBytes(),
-        "abc".getBytes(), x509Certificate));
+    assertFalse(omCertClient.verifySignature(data.getBytes(UTF_8),
+        "abc".getBytes(UTF_8), x509Certificate));
     assertFalse(omCertClient.verifySignature(IOUtils.toInputStream(data,
-        UTF), "abc".getBytes(), x509Certificate));
+        UTF_8), "abc".getBytes(UTF_8), x509Certificate));
 
   }
 
diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/server/TestJsonUtils.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/server/TestJsonUtils.java
index b5452fb..c84eae5 100644
--- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/server/TestJsonUtils.java
+++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/server/TestJsonUtils.java
@@ -38,7 +38,7 @@ public class TestJsonUtils {
 
     assertContains(result, "\"rawSize\" : 123");
     assertContains(result, "\"unit\" : \"MB\"");
-    assertContains(result, "\"quotaInCounts\" : 1000");
+    assertContains(result, "\"quotaInNamespace\" : 1000");
   }
 
   private static void assertContains(String str, String part) {
diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/server/http/TestRatisDropwizardExports.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/server/http/TestRatisDropwizardExports.java
index 25f1cef..906ff55 100644
--- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/server/http/TestRatisDropwizardExports.java
+++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/server/http/TestRatisDropwizardExports.java
@@ -24,7 +24,10 @@ import java.util.concurrent.TimeUnit;
 import com.codahale.metrics.MetricRegistry;
 import io.prometheus.client.CollectorRegistry;
 import io.prometheus.client.exporter.common.TextFormat;
-import org.apache.ratis.server.metrics.RaftLogMetrics;
+import org.apache.ratis.protocol.RaftGroupId;
+import org.apache.ratis.protocol.RaftGroupMemberId;
+import org.apache.ratis.protocol.RaftPeerId;
+import org.apache.ratis.server.metrics.SegmentedRaftLogMetrics;
 import org.junit.Assert;
 import org.junit.Test;
 
@@ -36,7 +39,9 @@ public class TestRatisDropwizardExports {
   @Test
   public void export() throws IOException {
     //create Ratis metrics
-    RaftLogMetrics instance = new RaftLogMetrics("instance");
+    SegmentedRaftLogMetrics instance = new SegmentedRaftLogMetrics(
+        RaftGroupMemberId.valueOf(
+            RaftPeerId.valueOf("peerId"), RaftGroupId.randomId()));
     instance.getRaftLogSyncTimer().update(10, TimeUnit.MILLISECONDS);
     MetricRegistry dropWizardMetricRegistry =
         instance.getRegistry().getDropWizardMetricRegistry();
diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/cache/TestTableCacheImpl.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/cache/TestTableCache.java
similarity index 69%
rename from hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/cache/TestTableCacheImpl.java
rename to hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/cache/TestTableCache.java
index 891c065..07ed307 100644
--- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/cache/TestTableCacheImpl.java
+++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/cache/TestTableCache.java
@@ -26,11 +26,13 @@ import java.util.List;
 import java.util.concurrent.CompletableFuture;
 
 import com.google.common.base.Optional;
+import org.apache.hadoop.test.GenericTestUtils;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
+import org.slf4j.event.Level;
 
 import static org.junit.Assert.fail;
 
@@ -38,31 +40,35 @@ import static org.junit.Assert.fail;
  * Class tests partial table cache.
  */
 @RunWith(value = Parameterized.class)
-public class TestTableCacheImpl {
-  private TableCacheImpl<CacheKey<String>, CacheValue<String>> tableCache;
+public class TestTableCache {
+  private TableCache<CacheKey<String>, CacheValue<String>> tableCache;
 
-  private final TableCacheImpl.CacheCleanupPolicy cacheCleanupPolicy;
+  private final TableCache.CacheType cacheType;
 
 
   @Parameterized.Parameters
   public static Collection<Object[]> policy() {
     Object[][] params = new Object[][] {
-        {TableCacheImpl.CacheCleanupPolicy.NEVER},
-        {TableCacheImpl.CacheCleanupPolicy.MANUAL}
+        {TableCache.CacheType.FULL_CACHE},
+        {TableCache.CacheType.PARTIAL_CACHE}
     };
     return Arrays.asList(params);
   }
 
-  public TestTableCacheImpl(
-      TableCacheImpl.CacheCleanupPolicy cacheCleanupPolicy) {
-    this.cacheCleanupPolicy = cacheCleanupPolicy;
+  public TestTableCache(
+      TableCache.CacheType cacheType) {
+    GenericTestUtils.setLogLevel(FullTableCache.LOG, Level.DEBUG);
+    this.cacheType = cacheType;
   }
 
 
   @Before
   public void create() {
-    tableCache =
-        new TableCacheImpl<>(cacheCleanupPolicy);
+    if (cacheType == TableCache.CacheType.FULL_CACHE) {
+      tableCache = new FullTableCache<>();
+    } else {
+      tableCache = new PartialTableCache<>();
+    }
   }
   @Test
   public void testPartialTableCache() {
@@ -119,7 +125,7 @@ public class TestTableCacheImpl {
     final int count = totalCount;
 
     // If cleanup policy is manual entries should have been removed.
-    if (cacheCleanupPolicy == TableCacheImpl.CacheCleanupPolicy.MANUAL) {
+    if (cacheType == TableCache.CacheType.PARTIAL_CACHE) {
       Assert.assertEquals(count - epochs.size(), tableCache.size());
 
       // Check remaining entries exist or not and deleted entries does not
@@ -178,14 +184,13 @@ public class TestTableCacheImpl {
     epochs.add(3L);
     epochs.add(4L);
 
-    if (cacheCleanupPolicy == cacheCleanupPolicy.MANUAL) {
+    if (cacheType == TableCache.CacheType.PARTIAL_CACHE) {
 
       tableCache.evictCache(epochs);
 
       Assert.assertEquals(0, tableCache.size());
 
-      // Epoch entries which are overrided still exist.
-      Assert.assertEquals(2, tableCache.getEpochEntrySet().size());
+      Assert.assertEquals(0, tableCache.getEpochEntrySet().size());
     }
 
     // Add a new entry.
@@ -194,7 +199,7 @@ public class TestTableCacheImpl {
 
     epochs = new ArrayList<>();
     epochs.add(5L);
-    if (cacheCleanupPolicy == cacheCleanupPolicy.MANUAL) {
+    if (cacheType == TableCache.CacheType.PARTIAL_CACHE) {
       tableCache.evictCache(epochs);
 
       Assert.assertEquals(0, tableCache.size());
@@ -252,21 +257,19 @@ public class TestTableCacheImpl {
     epochs.add(6L);
 
 
-    if (cacheCleanupPolicy == cacheCleanupPolicy.MANUAL) {
+    if (cacheType == TableCache.CacheType.PARTIAL_CACHE) {
       tableCache.evictCache(epochs);
 
       Assert.assertEquals(0, tableCache.size());
 
-      // Epoch entries which are overrided still exist.
-      Assert.assertEquals(4, tableCache.getEpochEntrySet().size());
+      Assert.assertEquals(0, tableCache.getEpochEntrySet().size());
     } else {
       tableCache.evictCache(epochs);
 
       Assert.assertEquals(1, tableCache.size());
 
-      // Epoch entries which are overrided still exist and one not deleted As
-      // this cache clean up policy is NEVER.
-      Assert.assertEquals(5, tableCache.getEpochEntrySet().size());
+      // Epoch entries which are overrided also will be cleaned up.
+      Assert.assertEquals(0, tableCache.getEpochEntrySet().size());
     }
 
     // Add a new entry, now old override entries will be cleaned up.
@@ -276,7 +279,7 @@ public class TestTableCacheImpl {
     epochs = new ArrayList<>();
     epochs.add(7L);
 
-    if (cacheCleanupPolicy == cacheCleanupPolicy.MANUAL) {
+    if (cacheType == TableCache.CacheType.PARTIAL_CACHE) {
       tableCache.evictCache(epochs);
 
       Assert.assertEquals(0, tableCache.size());
@@ -289,9 +292,9 @@ public class TestTableCacheImpl {
       // 2 entries will be in cache, as 2 are not deleted.
       Assert.assertEquals(2, tableCache.size());
 
-      // Epoch entries which are not marked for delete will exist override
-      // entries will be cleaned up.
-      Assert.assertEquals(2, tableCache.getEpochEntrySet().size());
+      // Epoch entries which are not marked for delete will also be cleaned up.
+      // As they are override entries in full cache.
+      Assert.assertEquals(0, tableCache.getEpochEntrySet().size());
     }
 
 
@@ -337,7 +340,7 @@ public class TestTableCacheImpl {
 
     totalCount += value;
 
-    if (cacheCleanupPolicy == TableCacheImpl.CacheCleanupPolicy.MANUAL) {
+    if (cacheType == TableCache.CacheType.PARTIAL_CACHE) {
       int deleted = 5;
 
       // cleanup first 5 entires
@@ -380,6 +383,95 @@ public class TestTableCacheImpl {
 
   }
 
+  @Test
+  public void testTableCache() {
+
+    // In non-HA epoch entries might be out of order.
+    // Scenario is like create vol, set vol, set vol, delete vol
+    tableCache.put(new CacheKey<>(Long.toString(0)),
+        new CacheValue<>(Optional.of(Long.toString(0)), 0));
+    tableCache.put(new CacheKey<>(Long.toString(0)),
+        new CacheValue<>(Optional.of(Long.toString(1)), 1));
+    tableCache.put(new CacheKey<>(Long.toString(0)),
+        new CacheValue<>(Optional.of(Long.toString(2)), 3));
+
+    tableCache.put(new CacheKey<>(Long.toString(0)),
+        new CacheValue<>(Optional.absent(), 2));
+
+    List<Long> epochs = new ArrayList<>();
+    epochs.add(0L);
+    epochs.add(1L);
+    epochs.add(2L);
+    epochs.add(3L);
+
+    tableCache.evictCache(epochs);
+
+    Assert.assertTrue(tableCache.size() == 0);
+    Assert.assertTrue(tableCache.getEpochEntrySet().size() == 0);
+  }
+
+
+  @Test
+  public void testTableCacheWithNonConsecutiveEpochList() {
+
+    // In non-HA epoch entries might be out of order.
+    tableCache.put(new CacheKey<>(Long.toString(0)),
+        new CacheValue<>(Optional.of(Long.toString(0)), 0));
+    tableCache.put(new CacheKey<>(Long.toString(0)),
+        new CacheValue<>(Optional.of(Long.toString(1)), 1));
+    tableCache.put(new CacheKey<>(Long.toString(0)),
+        new CacheValue<>(Optional.of(Long.toString(3)), 3));
+
+    tableCache.put(new CacheKey<>(Long.toString(0)),
+          new CacheValue<>(Optional.of(Long.toString(2)), 2));
+
+    tableCache.put(new CacheKey<>(Long.toString(1)),
+        new CacheValue<>(Optional.of(Long.toString(1)), 4));
+
+    List<Long> epochs = new ArrayList<>();
+    epochs.add(0L);
+    epochs.add(1L);
+    epochs.add(3L);
+
+    tableCache.evictCache(epochs);
+
+    Assert.assertTrue(tableCache.size() == 2);
+    Assert.assertTrue(tableCache.getEpochEntrySet().size() == 2);
+
+    Assert.assertNotNull(tableCache.get(new CacheKey<>(Long.toString(0))));
+    Assert.assertEquals(2,
+        tableCache.get(new CacheKey<>(Long.toString(0))).getEpoch());
+
+    Assert.assertNotNull(tableCache.get(new CacheKey<>(Long.toString(1))));
+    Assert.assertEquals(4,
+        tableCache.get(new CacheKey<>(Long.toString(1))).getEpoch());
+
+    // now evict 2,4
+    epochs = new ArrayList<>();
+    epochs.add(2L);
+    epochs.add(4L);
+
+    tableCache.evictCache(epochs);
+
+    if(cacheType == TableCache.CacheType.PARTIAL_CACHE) {
+      Assert.assertTrue(tableCache.size() == 0);
+      Assert.assertTrue(tableCache.getEpochEntrySet().size() == 0);
+    } else {
+      Assert.assertTrue(tableCache.size() == 2);
+      Assert.assertTrue(tableCache.getEpochEntrySet().size() == 0);
+
+      // Entries should exist, as the entries are not delete entries
+      Assert.assertNotNull(tableCache.get(new CacheKey<>(Long.toString(0))));
+      Assert.assertEquals(2,
+          tableCache.get(new CacheKey<>(Long.toString(0))).getEpoch());
+
+      Assert.assertNotNull(tableCache.get(new CacheKey<>(Long.toString(1))));
+      Assert.assertEquals(4,
+          tableCache.get(new CacheKey<>(Long.toString(1))).getEpoch());
+    }
+
+  }
+
   private int writeToCache(int count, int startVal, long sleep)
       throws InterruptedException {
     int counter = 1;
diff --git a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto
index 886b43c..1a85ebb 100644
--- a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto
+++ b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto
@@ -62,9 +62,12 @@ message ScmContainerLocationRequest {
   optional GetPipelineRequestProto getPipelineRequest = 24;
   optional GetContainerWithPipelineBatchRequestProto getContainerWithPipelineBatchRequest = 25;
   optional GetSafeModeRuleStatusesRequestProto getSafeModeRuleStatusesRequest = 26;
-  optional FinalizeScmUpgradeRequestProto finalizeScmUpgradeRequest = 27;
+  optional DecommissionNodesRequestProto decommissionNodesRequest = 27;
+  optional RecommissionNodesRequestProto recommissionNodesRequest = 28;
+  optional StartMaintenanceNodesRequestProto startMaintenanceNodesRequest = 29;
+  optional FinalizeScmUpgradeRequestProto finalizeScmUpgradeRequest = 30;
   optional QueryUpgradeFinalizationProgressRequestProto
-  queryUpgradeFinalizationProgressRequest = 28;
+  queryUpgradeFinalizationProgressRequest = 31;
 }
 
 message ScmContainerLocationResponse {
@@ -99,9 +102,12 @@ message ScmContainerLocationResponse {
   optional GetPipelineResponseProto getPipelineResponse = 24;
   optional GetContainerWithPipelineBatchResponseProto getContainerWithPipelineBatchResponse = 25;
   optional GetSafeModeRuleStatusesResponseProto getSafeModeRuleStatusesResponse = 26;
-  optional FinalizeScmUpgradeResponseProto finalizeScmUpgradeResponse = 27;
+  optional DecommissionNodesResponseProto decommissionNodesResponse = 27;
+  optional RecommissionNodesResponseProto recommissionNodesResponse = 28;
+  optional StartMaintenanceNodesResponseProto startMaintenanceNodesResponse = 29;
+  optional FinalizeScmUpgradeResponseProto finalizeScmUpgradeResponse = 30;
   optional QueryUpgradeFinalizationProgressResponseProto
-  queryUpgradeFinalizationProgressResponse = 28;
+  queryUpgradeFinalizationProgressResponse = 31;
   enum Status {
     OK = 1;
     CONTAINER_ALREADY_EXISTS = 2;
@@ -132,8 +138,11 @@ enum Type {
   GetPipeline = 19;
   GetContainerWithPipelineBatch = 20;
   GetSafeModeRuleStatuses = 21;
-  FinalizeScmUpgrade = 22;
-  QueryUpgradeFinalizationProgress = 23;
+  DecommissionNodes = 22;
+  RecommissionNodes = 23;
+  StartMaintenanceNodes = 24;
+  FinalizeScmUpgrade = 25;
+  QueryUpgradeFinalizationProgress = 26;
 }
 
 /**
@@ -237,16 +246,51 @@ message SCMCloseContainerResponseProto {
  match the NodeState that we are requesting.
 */
 message NodeQueryRequestProto {
-  required NodeState state = 1;
+  optional NodeState state = 1;
   required QueryScope scope = 2;
   optional string poolName = 3; // if scope is pool, then pool name is needed.
   optional string traceID = 4;
+  optional NodeOperationalState opState = 5;
 }
 
 message NodeQueryResponseProto {
   repeated Node datanodes = 1;
 }
 
+/*
+  Decommission a list of hosts
+*/
+message DecommissionNodesRequestProto {
+  repeated string hosts = 1;
+}
+
+message DecommissionNodesResponseProto {
+  // empty response
+}
+
+/*
+  Recommission a list of hosts in maintenance or decommission states
+*/
+message RecommissionNodesRequestProto {
+  repeated string hosts = 1;
+}
+
+message RecommissionNodesResponseProto {
+  // empty response
+}
+
+/*
+  Place a list of hosts into maintenance mode
+*/
+message StartMaintenanceNodesRequestProto {
+  repeated string hosts = 1;
+  optional int64 endInHours = 2;
+}
+
+message StartMaintenanceNodesResponseProto {
+  // empty response
+}
+
 /**
   Request to create a replication pipeline.
  */
@@ -371,5 +415,4 @@ message QueryUpgradeFinalizationProgressResponseProto {
  */
 service StorageContainerLocationProtocolService {
   rpc submitRequest (ScmContainerLocationRequest) returns (ScmContainerLocationResponse);
-
 }
diff --git a/hadoop-hdds/interface-client/src/main/proto/hdds.proto b/hadoop-hdds/interface-client/src/main/proto/hdds.proto
index 3517731..afe8f1f 100644
--- a/hadoop-hdds/interface-client/src/main/proto/hdds.proto
+++ b/hadoop-hdds/interface-client/src/main/proto/hdds.proto
@@ -43,6 +43,8 @@ message DatanodeDetailsProto {
     // network name, can be Ip address or host name, depends
     optional string networkName = 6;
     optional string networkLocation = 7; // Network topology location
+    optional NodeOperationalState persistedOpState = 8; // The Operational state persisted in the datanode.id file
+    optional int64 persistedOpStateExpiry = 9; // The seconds after the epoch when the OpState should expire
     // TODO(runzhiwang): when uuid is gone, specify 1 as the index of uuid128 and mark as required
     optional UUID uuid128 = 100; // UUID with 128 bits assigned to the Datanode.
 }
@@ -129,9 +131,15 @@ enum NodeState {
     HEALTHY = 1;
     STALE = 2;
     DEAD = 3;
+    HEALTHY_READONLY = 6;
+}
+
+enum NodeOperationalState {
+    IN_SERVICE = 1;
+    ENTERING_MAINTENANCE = 2;
+    IN_MAINTENANCE = 3;
     DECOMMISSIONING = 4;
     DECOMMISSIONED = 5;
-    HEALTHY_READONLY = 6;
 }
 
 enum QueryScope {
@@ -142,6 +150,7 @@ enum QueryScope {
 message Node {
     required DatanodeDetailsProto nodeID = 1;
     repeated NodeState nodeStates = 2;
+    repeated NodeOperationalState nodeOperationalStates = 3;
 }
 
 message NodePool {
diff --git a/hadoop-hdds/interface-client/src/main/resources/proto.lock b/hadoop-hdds/interface-client/src/main/resources/proto.lock
index 581ffaf..8bd3023 100644
--- a/hadoop-hdds/interface-client/src/main/resources/proto.lock
+++ b/hadoop-hdds/interface-client/src/main/resources/proto.lock
@@ -1292,14 +1292,6 @@
               {
                 "name": "DEAD",
                 "integer": 3
-              },
-              {
-                "name": "DECOMMISSIONING",
-                "integer": 4
-              },
-              {
-                "name": "DECOMMISSIONED",
-                "integer": 5
               }
             ]
           },
diff --git a/hadoop-hdds/interface-server/src/main/proto/ScmServerDatanodeHeartbeatProtocol.proto b/hadoop-hdds/interface-server/src/main/proto/ScmServerDatanodeHeartbeatProtocol.proto
index 9d0dbd2..f129c0d 100644
--- a/hadoop-hdds/interface-server/src/main/proto/ScmServerDatanodeHeartbeatProtocol.proto
+++ b/hadoop-hdds/interface-server/src/main/proto/ScmServerDatanodeHeartbeatProtocol.proto
@@ -304,7 +304,8 @@ message SCMCommandProto {
     replicateContainerCommand = 5;
     createPipelineCommand = 6;
     closePipelineCommand = 7;
-    finalizeNewLayoutVersionCommand = 8;
+    setNodeOperationalStateCommand = 8;
+    finalizeNewLayoutVersionCommand = 9;
   }
   // TODO: once we start using protoc 3.x, refactor this message using "oneof"
   required Type commandType = 1;
@@ -315,8 +316,9 @@ message SCMCommandProto {
   optional ReplicateContainerCommandProto replicateContainerCommandProto = 6;
   optional CreatePipelineCommandProto createPipelineCommandProto = 7;
   optional ClosePipelineCommandProto closePipelineCommandProto = 8;
+  optional SetNodeOperationalStateCommandProto setNodeOperationalStateCommandProto = 9;
   optional FinalizeNewLayoutVersionCommandProto
-  finalizeNewLayoutVersionCommandProto = 9;
+  finalizeNewLayoutVersionCommandProto = 10;
 }
 
 /**
@@ -405,6 +407,12 @@ message ClosePipelineCommandProto {
   required int64 cmdId = 2;
 }
 
+message SetNodeOperationalStateCommandProto {
+  required  int64 cmdId = 1;
+  required  NodeOperationalState nodeOperationalState = 2;
+  required  int64 stateExpiryEpochSeconds = 3;
+}
+
 /**
  * This command asks the DataNode to finalize a new layout version.
  */
diff --git a/hadoop-hdds/interface-server/src/main/proto/ScmServerSecurityProtocol.proto b/hadoop-hdds/interface-server/src/main/proto/ScmServerSecurityProtocol.proto
index 72e0e9f..114d215 100644
--- a/hadoop-hdds/interface-server/src/main/proto/ScmServerSecurityProtocol.proto
+++ b/hadoop-hdds/interface-server/src/main/proto/ScmServerSecurityProtocol.proto
@@ -48,6 +48,7 @@ message SCMSecurityRequest {
     optional SCMGetOMCertRequestProto getOMCertRequest = 4;
     optional SCMGetCertificateRequestProto getCertificateRequest = 5;
     optional SCMGetCACertificateRequestProto getCACertificateRequest = 6;
+    optional SCMListCertificateRequestProto listCertificateRequest = 7;
 
 }
 
@@ -66,6 +67,8 @@ message SCMSecurityResponse {
 
     optional SCMGetCertResponseProto getCertResponseProto = 6;
 
+    optional SCMListCertificateResponseProto listCertificateResponseProto = 7;
+
 }
 
 enum Type {
@@ -73,6 +76,7 @@ enum Type {
     GetOMCertificate = 2;
     GetCertificate = 3;
     GetCACertificate = 4;
+    ListCertificate = 5;
 }
 
 enum Status {
@@ -110,6 +114,16 @@ message SCMGetCACertificateRequestProto {
 }
 
 /**
+* Proto request to list certificates by node type or all.
+*/
+message SCMListCertificateRequestProto {
+    optional NodeType role = 1;
+    optional int64 startCertId = 2;
+    required uint32 count = 3; // Max
+    optional bool isRevoked = 4; // list revoked certs
+}
+
+/**
  * Returns a certificate signed by SCM.
  */
 message SCMGetCertResponseProto {
@@ -123,6 +137,18 @@ message SCMGetCertResponseProto {
     optional string x509CACertificate = 3; // Base64 encoded CA X509 certificate.
 }
 
+/**
+* Return a list of PEM encoded certificates.
+*/
+message SCMListCertificateResponseProto {
+    enum ResponseCode {
+        success = 1;
+        authenticationFailed = 2;
+    }
+    required ResponseCode responseCode = 1;
+    repeated string certificates = 2;
+}
+
 
 service SCMSecurityProtocolService {
     rpc submitRequest (SCMSecurityRequest) returns (SCMSecurityResponse);
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java
index dfacae0..91b5494 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java
@@ -24,12 +24,12 @@ import java.util.stream.Collectors;
 
 import org.apache.hadoop.hdds.conf.ConfigurationSource;
 import org.apache.hadoop.hdds.protocol.DatanodeDetails;
-import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
 import org.apache.hadoop.hdds.scm.container.placement.algorithms.ContainerPlacementStatusDefault;
 import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeMetric;
 import org.apache.hadoop.hdds.scm.exceptions.SCMException;
 import org.apache.hadoop.hdds.scm.net.NetworkTopology;
 import org.apache.hadoop.hdds.scm.node.NodeManager;
+import org.apache.hadoop.hdds.scm.node.NodeStatus;
 
 import com.google.common.annotations.VisibleForTesting;
 import org.slf4j.Logger;
@@ -122,7 +122,7 @@ public abstract class SCMCommonPlacementPolicy implements PlacementPolicy {
       List<DatanodeDetails> excludedNodes, List<DatanodeDetails> favoredNodes,
       int nodesRequired, final long sizeRequired) throws SCMException {
     List<DatanodeDetails> healthyNodes =
-        nodeManager.getNodes(HddsProtos.NodeState.HEALTHY);
+        nodeManager.getNodes(NodeStatus.inServiceHealthy());
     if (excludedNodes != null) {
       healthyNodes.removeAll(excludedNodes);
     }
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java
index 014c76c..fae21b4 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/BlockManagerImpl.java
@@ -297,8 +297,10 @@ public class BlockManagerImpl implements BlockManager, BlockmanagerMXBean {
     // TODO: track the block size info so that we can reclaim the container
     // TODO: used space when the block is deleted.
     for (BlockGroup bg : keyBlocksInfoList) {
-      LOG.info("Deleting blocks {}",
-          StringUtils.join(",", bg.getBlockIDList()));
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Deleting blocks {}",
+            StringUtils.join(",", bg.getBlockIDList()));
+      }
       for (BlockID block : bg.getBlockIDList()) {
         long containerID = block.getContainerID();
         if (containerBlocks.containsKey(containerID)) {
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogImpl.java
index aa55480..ac53f2c 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogImpl.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogImpl.java
@@ -18,11 +18,12 @@
 package org.apache.hadoop.hdds.scm.block;
 
 import java.io.IOException;
-import java.util.LinkedHashSet;
 import java.util.List;
-import java.util.Map;
-import java.util.Set;
 import java.util.UUID;
+import java.util.Set;
+import java.util.Map;
+import java.util.LinkedHashSet;
+import java.util.ArrayList;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.locks.Lock;
@@ -35,8 +36,9 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolPro
 import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerBlocksDeletionACKProto.DeleteBlockTransactionResult;
 import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction;
 import org.apache.hadoop.hdds.scm.command.CommandStatusReportHandler.DeleteBlockStatus;
-import org.apache.hadoop.hdds.scm.container.ContainerID;
 import org.apache.hadoop.hdds.scm.container.ContainerInfo;
+import org.apache.hadoop.hdds.scm.container.ContainerID;
+import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException;
 import org.apache.hadoop.hdds.scm.container.ContainerManager;
 import org.apache.hadoop.hdds.scm.container.ContainerReplica;
 import org.apache.hadoop.hdds.scm.metadata.SCMMetadataStore;
@@ -129,9 +131,12 @@ public class DeletedBlockLogImpl
         DeletedBlocksTransaction block =
             scmMetadataStore.getDeletedBlocksTXTable().get(txID);
         if (block == null) {
-          // Should we make this an error ? How can we not find the deleted
-          // TXID?
-          LOG.warn("Deleted TXID {} not found.", txID);
+          if (LOG.isDebugEnabled()) {
+            // This can occur due to race condition between retry and old
+            // service task where old task removes the transaction and the new
+            // task is resending
+            LOG.debug("Deleted TXID {} not found.", txID);
+          }
           continue;
         }
         DeletedBlocksTransaction.Builder builder = block.toBuilder();
@@ -196,9 +201,12 @@ public class DeletedBlockLogImpl
               transactionResult.getContainerID());
           if (dnsWithCommittedTxn == null) {
             // Mostly likely it's a retried delete command response.
-            LOG.debug("Transaction txId={} commit by dnId={} for containerID={}"
-                    + " failed. Corresponding entry not found.", txID, dnID,
-                containerId);
+            if (LOG.isDebugEnabled()) {
+              LOG.debug(
+                  "Transaction txId={} commit by dnId={} for containerID={}"
+                      + " failed. Corresponding entry not found.", txID, dnID,
+                  containerId);
+            }
             continue;
           }
 
@@ -218,12 +226,16 @@ public class DeletedBlockLogImpl
                 .collect(Collectors.toList());
             if (dnsWithCommittedTxn.containsAll(containerDns)) {
               transactionToDNsCommitMap.remove(txID);
-              LOG.debug("Purging txId={} from block deletion log", txID);
+              if (LOG.isDebugEnabled()) {
+                LOG.debug("Purging txId={} from block deletion log", txID);
+              }
               scmMetadataStore.getDeletedBlocksTXTable().delete(txID);
             }
           }
-          LOG.debug("Datanode txId={} containerId={} committed by dnId={}",
-              txID, containerId, dnID);
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("Datanode txId={} containerId={} committed by dnId={}",
+                txID, containerId, dnID);
+          }
         } catch (IOException e) {
           LOG.warn("Could not commit delete block transaction: " +
               transactionResult.getTxID(), e);
@@ -354,19 +366,27 @@ public class DeletedBlockLogImpl
           ? extends Table.KeyValue<Long, DeletedBlocksTransaction>> iter =
                scmMetadataStore.getDeletedBlocksTXTable().iterator()) {
         int numBlocksAdded = 0;
+        List<DeletedBlocksTransaction> txnsToBePurged =
+            new ArrayList<>();
         while (iter.hasNext() && numBlocksAdded < blockDeletionLimit) {
-          Table.KeyValue<Long, DeletedBlocksTransaction> keyValue =
-              iter.next();
+          Table.KeyValue<Long, DeletedBlocksTransaction> keyValue = iter.next();
           DeletedBlocksTransaction txn = keyValue.getValue();
           final ContainerID id = ContainerID.valueof(txn.getContainerID());
-          if (txn.getCount() > -1 && txn.getCount() <= maxRetry
-              && !containerManager.getContainer(id).isOpen()) {
-            numBlocksAdded += txn.getLocalIDCount();
-            getTransaction(txn, transactions);
-            transactionToDNsCommitMap
-                .putIfAbsent(txn.getTxID(), new LinkedHashSet<>());
+          try {
+            if (txn.getCount() > -1 && txn.getCount() <= maxRetry
+                && !containerManager.getContainer(id).isOpen()) {
+              numBlocksAdded += txn.getLocalIDCount();
+              getTransaction(txn, transactions);
+              transactionToDNsCommitMap
+                  .putIfAbsent(txn.getTxID(), new LinkedHashSet<>());
+            }
+          } catch (ContainerNotFoundException ex) {
+            LOG.warn("Container: " + id + " was not found for the transaction: "
+                + txn);
+            txnsToBePurged.add(txn);
           }
         }
+        purgeTransactions(txnsToBePurged);
       }
       return transactions;
     } finally {
@@ -374,6 +394,18 @@ public class DeletedBlockLogImpl
     }
   }
 
+  public void purgeTransactions(List<DeletedBlocksTransaction> txnsToBePurged)
+      throws IOException {
+    try (BatchOperation batch = scmMetadataStore.getBatchHandler()
+        .initBatchOperation()) {
+      for (int i = 0; i < txnsToBePurged.size(); i++) {
+        scmMetadataStore.getDeletedBlocksTXTable()
+            .deleteWithBatch(batch, txnsToBePurged.get(i).getTxID());
+      }
+      scmMetadataStore.getBatchHandler().commitBatchOperation(batch);
+    }
+  }
+
   @Override
   public void onMessage(DeleteBlockStatus deleteBlockStatus,
                         EventPublisher publisher) {
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMBlockDeletingService.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMBlockDeletingService.java
index fbf5654..ceeaa10 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMBlockDeletingService.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMBlockDeletingService.java
@@ -25,12 +25,12 @@ import java.util.concurrent.TimeUnit;
 
 import org.apache.hadoop.hdds.conf.ConfigurationSource;
 import org.apache.hadoop.hdds.protocol.DatanodeDetails;
-import org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState;
 import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.DeletedBlocksTransaction;
 import org.apache.hadoop.hdds.scm.ScmConfig;
 import org.apache.hadoop.hdds.scm.container.ContainerManager;
 import org.apache.hadoop.hdds.scm.events.SCMEvents;
 import org.apache.hadoop.hdds.scm.node.NodeManager;
+import org.apache.hadoop.hdds.scm.node.NodeStatus;
 import org.apache.hadoop.hdds.server.events.EventPublisher;
 import org.apache.hadoop.hdds.utils.BackgroundService;
 import org.apache.hadoop.hdds.utils.BackgroundTask;
@@ -116,11 +116,14 @@ public class SCMBlockDeletingService extends BackgroundService {
       long startTime = Time.monotonicNow();
       // Scan SCM DB in HB interval and collect a throttled list of
       // to delete blocks.
+
       if (LOG.isDebugEnabled()) {
         LOG.debug("Running DeletedBlockTransactionScanner");
       }
-
-      List<DatanodeDetails> datanodes = nodeManager.getNodes(NodeState.HEALTHY);
+      // TODO - DECOMM - should we be deleting blocks from decom nodes
+      //        and what about entering maintenance.
+      List<DatanodeDetails> datanodes =
+          nodeManager.getNodes(NodeStatus.inServiceHealthy());
       if (datanodes != null) {
         try {
           DatanodeDeletedBlockTransactions transactions =
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplicaCount.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplicaCount.java
new file mode 100644
index 0000000..bf8c3b9
--- /dev/null
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerReplicaCount.java
@@ -0,0 +1,271 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdds.scm.container;
+
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
+import java.util.Set;
+
+import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONED;
+import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONING;
+import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.ENTERING_MAINTENANCE;
+import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE;
+import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE;
+
+/**
+ * Immutable object that is created with a set of ContainerReplica objects and
+ * the number of in flight replica add and deletes, the container replication
+ * factor and the min count which must be available for maintenance. This
+ * information can be used to determine if the container is over or under
+ * replicated and also how many additional replicas need created or removed.
+ */
+public class ContainerReplicaCount {
+
+  private int healthyCount = 0;
+  private int decommissionCount = 0;
+  private int maintenanceCount = 0;
+  private int inFlightAdd = 0;
+  private int inFlightDel = 0;
+  private int repFactor;
+  private int minHealthyForMaintenance;
+  private ContainerInfo container;
+  private Set<ContainerReplica> replica;
+
+  public ContainerReplicaCount(ContainerInfo container,
+                               Set<ContainerReplica> replica, int inFlightAdd,
+                               int inFlightDelete, int replicationFactor,
+                               int minHealthyForMaintenance) {
+    this.healthyCount = 0;
+    this.decommissionCount = 0;
+    this.maintenanceCount = 0;
+    this.inFlightAdd = inFlightAdd;
+    this.inFlightDel = inFlightDelete;
+    this.repFactor = replicationFactor;
+    this.replica = replica;
+    this.minHealthyForMaintenance
+        = Math.min(this.repFactor, minHealthyForMaintenance);
+    this.container = container;
+
+    for (ContainerReplica cr : this.replica) {
+      HddsProtos.NodeOperationalState state =
+          cr.getDatanodeDetails().getPersistedOpState();
+      if (state == DECOMMISSIONED || state == DECOMMISSIONING) {
+        decommissionCount++;
+      } else if (state == IN_MAINTENANCE || state == ENTERING_MAINTENANCE) {
+        maintenanceCount++;
+      } else {
+        healthyCount++;
+      }
+    }
+  }
+
+  public int getHealthyCount() {
+    return healthyCount;
+  }
+
+  public int getDecommissionCount() {
+    return decommissionCount;
+  }
+
+  public int getMaintenanceCount() {
+    return maintenanceCount;
+  }
+
+  public int getReplicationFactor() {
+    return repFactor;
+  }
+
+  public ContainerInfo getContainer() {
+    return container;
+  }
+
+  public Set<ContainerReplica> getReplica() {
+    return replica;
+  }
+
+  @Override
+  public String toString() {
+    return "Container State: " +container.getState()+
+        " Replica Count: "+replica.size()+
+        " Healthy Count: "+healthyCount+
+        " Decommission Count: "+decommissionCount+
+        " Maintenance Count: "+maintenanceCount+
+        " inFlightAdd Count: "+inFlightAdd+
+        " inFightDel Count: "+inFlightDel+
+        " ReplicationFactor: "+repFactor+
+        " minMaintenance Count: "+minHealthyForMaintenance;
+  }
+
+  /**
+   * Calculates the the delta of replicas which need to be created or removed
+   * to ensure the container is correctly replicated when considered inflight
+   * adds and deletes.
+   *
+   * When considering inflight operations, it is assumed any operation will
+   * fail. However, to consider the worst case and avoid data loss, we always
+   * assume a delete will succeed and and add will fail. In this way, we will
+   * avoid scheduling too many deletes which could result in dataloss.
+   *
+   * Decisions around over-replication are made only on healthy replicas,
+   * ignoring any in maintenance and also any inflight adds. InFlight adds are
+   * ignored, as they may not complete, so if we have:
+   *
+   *     H, H, H, IN_FLIGHT_ADD
+   *
+   * And then schedule a delete, we could end up under-replicated (add fails,
+   * delete completes). It is better to let the inflight operations complete
+   * and then deal with any further over or under replication.
+   *
+   * For maintenance replicas, assuming replication factor 3, and minHealthy
+   * 2, it is possible for all 3 hosts to be put into maintenance, leaving the
+   * following (H = healthy, M = maintenance):
+   *
+   *     H, H, M, M, M
+   *
+   * Even though we are tracking 5 replicas, this is not over replicated as we
+   * ignore the maintenance copies. Later, the replicas could look like:
+   *
+   *     H, H, H, H, M
+   *
+   * At this stage, the container is over replicated by 1, so one replica can be
+   * removed.
+   *
+   * For containers which have replication factor healthy replica, we ignore any
+   * inflight add or deletes, as they may fail. Instead, wait for them to
+   * complete and then deal with any excess or deficit.
+   *
+   * For under replicated containers we do consider inflight add and delete to
+   * avoid scheduling more adds than needed. There is additional logic around
+   * containers with maintenance replica to ensure minHealthyForMaintenance
+   * replia are maintained.
+   *
+   * @return Delta of replicas needed. Negative indicates over replication and
+   *         containers should be removed. Positive indicates over replication
+   *         and zero indicates the containers has replicationFactor healthy
+   *         replica
+   */
+  public int additionalReplicaNeeded() {
+    int delta = missingReplicas();
+
+    if (delta < 0) {
+      // Over replicated, so may need to remove a container. Do not consider
+      // inFlightAdds, as they may fail, but do consider inFlightDel which
+      // will reduce the over-replication if it completes.
+      // Note this could make the delta positive if there are too many in flight
+      // deletes, which will result in an additional being scheduled.
+      return delta + inFlightDel;
+    } else {
+      // May be under or perfectly replicated.
+      // We must consider in flight add and delete when calculating the new
+      // containers needed, but we bound the lower limit at zero to allow
+      // inflight operations to complete before handling any potential over
+      // replication
+      return Math.max(0, delta - inFlightAdd + inFlightDel);
+    }
+  }
+
+  /**
+   * Returns the count of replicas which need to be created or removed to
+   * ensure the container is perfectly replicate. Inflight operations are not
+   * considered here, but the logic to determine the missing or excess counts
+   * for maintenance is present.
+   *
+   * Decisions around over-replication are made only on healthy replicas,
+   * ignoring any in maintenance. For example, if we have:
+   *
+   *     H, H, H, M, M
+   *
+   * This will not be consider over replicated until one of the Maintenance
+   * replicas moves to Healthy.
+   *
+   * If the container is perfectly replicated, zero will be return.
+   *
+   * If it is under replicated a positive value will be returned, indicating
+   * how many replicas must be added.
+   *
+   * If it is over replicated a negative value will be returned, indicating now
+   * many replicas to remove.
+   *
+   * @return Zero if the container is perfectly replicated, a positive value
+   *         for under replicated and a negative value for over replicated.
+   */
+  private int missingReplicas() {
+    int delta = repFactor - healthyCount;
+
+    if (delta < 0) {
+      // Over replicated, so may need to remove a container.
+      return delta;
+    } else if (delta > 0) {
+      // May be under-replicated, depending on maintenance.
+      delta = Math.max(0, delta - maintenanceCount);
+      int neededHealthy =
+          Math.max(0, minHealthyForMaintenance - healthyCount);
+      delta = Math.max(neededHealthy, delta);
+      return delta;
+    } else { // delta == 0
+      // We have exactly the number of healthy replicas needed.
+      return delta;
+    }
+  }
+
+  /**
+   * Return true if the container is sufficiently replicated. Decommissioning
+   * and Decommissioned containers are ignored in this check, assuming they will
+   * eventually be removed from the cluster.
+   * This check ignores inflight additions, as those replicas have not yet been
+   * created and the create could fail for some reason.
+   * The check does consider inflight deletes as there may be 3 healthy replicas
+   * now, but once the delete completes it will reduce to 2.
+   * We also assume a replica in Maintenance state cannot be removed, so the
+   * pending delete would affect only the healthy replica count.
+   *
+   * @return True if the container is sufficiently replicated and False
+   *         otherwise.
+   */
+  public boolean isSufficientlyReplicated() {
+    return missingReplicas() + inFlightDel <= 0;
+  }
+
+  /**
+   * Return true is the container is over replicated. Decommission and
+   * maintenance containers are ignored for this check.
+   * The check ignores inflight additions, as they may fail, but it does
+   * consider inflight deletes, as they would reduce the over replication when
+   * they complete.
+   *
+   * @return True if the container is over replicated, false otherwise.
+   */
+  public boolean isOverReplicated() {
+    return missingReplicas() + inFlightDel < 0;
+  }
+
+  /**
+   * Returns true if the container is healthy, meaning all replica which are not
+   * in a decommission or maintenance state are in the same state as the
+   * container and in QUASI_CLOSED or in CLOSED state.
+   *
+   * @return true if the container is healthy, false otherwise
+   */
+  public boolean isHealthy() {
+    return (container.getState() == HddsProtos.LifeCycleState.CLOSED
+        || container.getState() == HddsProtos.LifeCycleState.QUASI_CLOSED)
+        && replica.stream()
+        .filter(r -> r.getDatanodeDetails().getPersistedOpState() == IN_SERVICE)
+        .allMatch(r -> ReplicationManager.compareState(
+            container.getState(), r.getState()));
+  }
+}
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java
index ed6924c..bde4c35 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManager.java
@@ -47,6 +47,8 @@ import org.apache.hadoop.hdds.scm.ContainerPlacementStatus;
 import org.apache.hadoop.hdds.scm.PlacementPolicy;
 import org.apache.hadoop.hdds.scm.events.SCMEvents;
 import org.apache.hadoop.hdds.scm.node.NodeManager;
+import org.apache.hadoop.hdds.scm.node.NodeStatus;
+import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException;
 import org.apache.hadoop.hdds.scm.safemode.SCMSafeModeManager.SafeModeStatus;
 import org.apache.hadoop.hdds.server.events.EventHandler;
 import org.apache.hadoop.hdds.server.events.EventPublisher;
@@ -107,6 +109,11 @@ public class ReplicationManager
   private final LockManager<ContainerID> lockManager;
 
   /**
+   * Used to lookup the health of a nodes or the nodes operational state.
+   */
+  private final NodeManager nodeManager;
+
+  /**
    * This is used for tracking container replication commands which are issued
    * by ReplicationManager and not yet complete.
    */
@@ -136,9 +143,9 @@ public class ReplicationManager
   private volatile boolean running;
 
   /**
-   * Used for check datanode state.
+   * Minimum number of replica in a healthy state for maintenance.
    */
-  private final NodeManager nodeManager;
+  private int minHealthyForMaintenance;
 
   /**
    * Constructs ReplicationManager instance with the given configuration.
@@ -158,11 +165,12 @@ public class ReplicationManager
     this.containerPlacement = containerPlacement;
     this.eventPublisher = eventPublisher;
     this.lockManager = lockManager;
+    this.nodeManager = nodeManager;
     this.conf = conf;
     this.running = false;
     this.inflightReplication = new ConcurrentHashMap<>();
     this.inflightDeletion = new ConcurrentHashMap<>();
-    this.nodeManager = nodeManager;
+    this.minHealthyForMaintenance = conf.getMaintenanceReplicaMinimum();
   }
 
   /**
@@ -258,7 +266,7 @@ public class ReplicationManager
    * @param id ContainerID
    */
   private void processContainer(ContainerID id) {
-    lockManager.lock(id);
+    lockManager.writeLock(id);
... 25684 lines suppressed ...


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@ozone.apache.org
For additional commands, e-mail: commits-help@ozone.apache.org