You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by bg...@apache.org on 2022/03/18 08:31:17 UTC

[incubator-mxnet] branch v1.x updated: [v1.x] Implemented MKLDNN Backward Adaptive Pooling kernel (#20855)

This is an automated email from the ASF dual-hosted git repository.

bgawrych pushed a commit to branch v1.x
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/v1.x by this push:
     new 0715d2f  [v1.x] Implemented MKLDNN Backward Adaptive Pooling kernel (#20855)
0715d2f is described below

commit 0715d2fa85b31c717450ffc9648d6f2d0f1f8ee8
Author: PiotrWolinski - Intel <pi...@intel.com>
AuthorDate: Fri Mar 18 09:22:27 2022 +0100

    [v1.x] Implemented MKLDNN Backward Adaptive Pooling kernel (#20855)
    
    * Added adaptive backward pooling to v1.x
    
    * Sanity check fix
    
    * Sanity check fix
    
    * Restructured test to work without numpy
    
    * Changed PoolingParamParser to template, so it works for standard and adaptive pooling
    
    * Added adaptive backward pooling to v1.x
    
    * Sanity check fix
    
    * Sanity check fix
    
    * Restructured test to work without numpy
    
    * Changed PoolingParamParser to template, so it works for standard and adaptive pooling
    
    * Included comments from the review
---
 src/operator/contrib/adaptive_avg_pooling-inl.h    |   2 +-
 src/operator/contrib/adaptive_avg_pooling.cc       |  83 ++++++++++++--
 src/operator/nn/mkldnn/mkldnn_pooling-inl.h        |  81 +++++++++-----
 src/operator/nn/mkldnn/mkldnn_pooling.cc           | 121 +++++++++++++++------
 src/operator/nn/pooling-inl.h                      |  35 +++++-
 src/operator/nn/pooling.cc                         |  55 +---------
 .../mkldnn/mkldnn_quantized_pooling.cc             |   3 +-
 src/operator/quantization/quantized_pooling.cc     |   2 +-
 tests/python/mkl/test_mkldnn.py                    |  29 +++++
 9 files changed, 280 insertions(+), 131 deletions(-)

diff --git a/src/operator/contrib/adaptive_avg_pooling-inl.h b/src/operator/contrib/adaptive_avg_pooling-inl.h
index 118200a..0776dd9 100644
--- a/src/operator/contrib/adaptive_avg_pooling-inl.h
+++ b/src/operator/contrib/adaptive_avg_pooling-inl.h
@@ -123,7 +123,7 @@ static bool AdaptiveAvgPoolOpInferShape(const nnvm::NodeAttrs& attrs,
     return false;
   }
 
-  if (param.output_size.has_value()) {
+  if (param.IsAdaptivePooling()) {
     if (param.output_size.value().ndim() == 1) {
       dshape[2] = param.output_size.value()[0];
       dshape[3] = param.output_size.value()[0];
diff --git a/src/operator/contrib/adaptive_avg_pooling.cc b/src/operator/contrib/adaptive_avg_pooling.cc
index a11f88c..c640ccb 100644
--- a/src/operator/contrib/adaptive_avg_pooling.cc
+++ b/src/operator/contrib/adaptive_avg_pooling.cc
@@ -27,6 +27,7 @@
 #include "../operator_common.h"
 #include "adaptive_avg_pooling-inl.h"
 #if MXNET_USE_MKLDNN == 1
+#include "../nn/mkldnn/mkldnn_base-inl.h"
 #include "../nn/mkldnn/mkldnn_pooling-inl.h"
 #endif  // MXNET_USE_MKLDNN
 
@@ -219,12 +220,12 @@ bool SupportMKLDNNAveragePooling(const NDArray &in_data,
   const int OH = out_data.shape()[2];
   const int OW = out_data.shape()[3];
 
-  const int strides_H = floor((IH << 1) / OH) - floor(IH / OH);
-  const int strides_W = floor((IW << 1) / OW) - floor(IW / OW);
-  const int kernel_H = ceil((IH << 1) / OH) - floor(IH / OH);
-  const int kernel_W = ceil((IW << 1) / OW) - floor(IW / OW);
-  const int pad_l_top = (strides_H * (OH - 1) + kernel_H - IH) / 2;
-  const int pad_l_left = (strides_W * (OW - 1) + kernel_W - IW) / 2;
+  const int strides_H = ComputeStrides(IH, OH);
+  const int strides_W = ComputeStrides(IW, OW);
+  const int kernel_H = ComputeKernel(IH, OH);
+  const int kernel_W = ComputeKernel(IW, OW);
+  const int pad_l_top = ComputePadding(IH, OH, strides_H, kernel_H);
+  const int pad_l_left = ComputePadding(IW, OW, strides_W, kernel_W);
 
   return pad_l_top == 0 && pad_l_left == 0;
 }
@@ -244,16 +245,50 @@ void AdaptiveAvgPoolComputeExCPU(const nnvm::NodeAttrs &attrs,
   if (SupportMKLDNN(inputs[0]) &&
       SupportMKLDNNAveragePooling(inputs[0], outputs[0])) {
     const PoolingParam &param = nnvm::get<PoolingParam>(attrs.parsed);
-
-    const NDArray *workspace = nullptr;
     MKLDNN_OPCHECK_INIT(false, 1, inputs, outputs);
-    MKLDNNPoolingCompute(ctx, param, inputs[0], req[0], outputs[0], workspace, true);
+    MKLDNNRun(MKLDNNPoolingCompute, attrs, ctx, inputs, req, outputs);
     MKLDNN_OPCHECK_RUN(PoolingCompute<cpu>, attrs, ctx, inputs, req, outputs);
     return;
   }
   FallBackCompute(AdaptiveAvgPoolOpForward<cpu>, attrs, ctx, inputs, req,
                   outputs);
 }
+
+
+void AdaptiveAvgPoolOpBackwardExCPU(const nnvm::NodeAttrs& attrs,
+                                    const OpContext& ctx,
+                                    const std::vector<NDArray>& inputs,
+                                    const std::vector<OpReqType>& req,
+                                    const std::vector<NDArray>& outputs) {
+  // Pooling does not currently support working with views
+  if (inputs[0].IsView() || outputs[0].IsView()) {
+    FallBackCompute(AdaptiveAvgPoolOpBackward<cpu>, attrs, ctx, inputs, req, outputs);
+    return;
+  }
+
+  CHECK_EQ(inputs.size(), 1U);
+
+  if (SupportMKLDNNAveragePooling(outputs[0], inputs[0])) {
+    MKLDNN_OPCHECK_INIT(true, outputs.size(), inputs, outputs);
+    MKLDNNRun(MKLDNNPoolingGradCompute, attrs, ctx, inputs, req, outputs);
+    MKLDNN_OPCHECK_RUN(AdaptiveAvgPoolOpBackward<cpu>, attrs, ctx, inputs, req, outputs);
+    return;
+  }
+  FallBackCompute(AdaptiveAvgPoolOpBackward<cpu>, attrs, ctx, inputs, req, outputs);
+}
+
+inline static bool BackwardAdaptivePoolingStorageType(const nnvm::NodeAttrs& attrs,
+                                                      const int dev_mask,
+                                                      DispatchMode* dispatch_mode,
+                                                      std::vector<int>* in_attrs,
+                                                      std::vector<int>* out_attrs) {
+  CHECK_EQ(in_attrs->size(), 1);
+  CHECK_EQ(out_attrs->size(), 1);
+
+  // support_mkldnn is set to true, because at this point there is no way
+  // to check if MKLDNNAdaptivePooling is supported
+  return MKLDNNStorageType(attrs, dev_mask, true, dispatch_mode, in_attrs, out_attrs);
+}
 #endif
 
 inline static bool AdaptivePoolingStorageType(const nnvm::NodeAttrs &attrs,
@@ -298,7 +333,7 @@ The pooling kernel and stride sizes are automatically chosen for desired output
   (N x C x height x width) for any input (NCHW).
 
 )code" ADD_FILELINE)
-.set_attr_parser(ParamParser<PoolingParam>)
+.set_attr_parser(PoolingParamParser<true>)
 .set_num_inputs(1)
 .set_num_outputs(1)
 .set_attr<mxnet::FInferShape>("FInferShape", AdaptiveAvgPoolOpInferShape)
@@ -314,10 +349,36 @@ The pooling kernel and stride sizes are automatically chosen for desired output
 .add_arguments(PoolingParam::__FIELDS__());
 
 NNVM_REGISTER_OP(_backward_contrib_AdaptiveAvgPooling2D)
-.set_attr_parser(ParamParser<PoolingParam>)
+.set_attr_parser(PoolingParamParser<true>)
 .set_num_inputs(1)
 .set_num_outputs(1)
 .set_attr<nnvm::TIsBackward>("TIsBackward", true)
+#if MXNET_USE_ONEDNN == 1
+    .set_attr<FInferStorageType>("FInferStorageType", BackwardAdaptivePoolingStorageType)
+    // Different backend requires different FInplaceOption
+    .set_attr<nnvm::FInplaceOption>("FInplaceOption",
+                                    [](const NodeAttrs& attrs) {
+                                      const PoolingParam& param =
+                                          nnvm::get<PoolingParam>(attrs.parsed);
+                                      if (MKLDNNRequireWorkspace(param) &&
+                                          param.IsAdaptivePooling())
+                                        return std::vector<std::pair<int, int>>{{1, 0}};
+                                      return std::vector<std::pair<int, int>>();
+                                    })
+    .set_attr<FResourceRequest>("FResourceRequest",
+                                [](const NodeAttrs& n) {
+                                  return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
+                                })
+#else
+    .set_attr<nnvm::FInplaceOption>("FInplaceOption",
+                                    [](const NodeAttrs& attrs) {
+                                      return std::vector<std::pair<int, int>>();
+                                    })
+#endif
+#if MXNET_USE_ONEDNN == 1
+    .set_attr<bool>("TIsMKLDNN", true)
+    .set_attr<FComputeEx>("FComputeEx<cpu>", AdaptiveAvgPoolOpBackwardExCPU)
+#endif
 .set_attr<FCompute>("FCompute<cpu>", AdaptiveAvgPoolOpBackward<cpu>);
 
 
diff --git a/src/operator/nn/mkldnn/mkldnn_pooling-inl.h b/src/operator/nn/mkldnn/mkldnn_pooling-inl.h
index 775c6a9..236e3b9 100644
--- a/src/operator/nn/mkldnn/mkldnn_pooling-inl.h
+++ b/src/operator/nn/mkldnn/mkldnn_pooling-inl.h
@@ -28,10 +28,13 @@
 
 #include <mkldnn.hpp>
 #include <utility>
+#include <vector>
 
 #include "../pooling-inl.h"
 #include "./mkldnn_base-inl.h"
 
+#define DIV_ROUND_UP(a, b) (((a) + (b - 1)) / (b))
+
 namespace mxnet {
 namespace op {
 
@@ -54,7 +57,8 @@ class MKLDNNPoolingFwd {
   void Execute(const NDArray& in_data,
                const OpReqType req,
                const NDArray& out_data,
-               const NDArray* workspace);
+               const NDArray* workspace,
+               const bool use_adaptive_pooling);
 
  private:
   bool with_workspace_;
@@ -87,24 +91,37 @@ class MKLDNNPoolingBwd {
   const mkldnn::pooling_backward::primitive_desc& GetPd();
 };
 
+inline int ComputeStrides(const int inner, const int outer) {
+  return ((inner << 1) / outer) - (inner / outer);
+}
+
+inline int ComputeKernel(const int inner, const int outer) {
+  return DIV_ROUND_UP((inner << 1) / outer, 1) - (inner / outer);
+}
+
+inline int ComputePadding(const int inner, const int outer, const int strides,
+                          const int kernel) {
+  return (strides * (outer - 1) + kernel - inner) / 2;
+}
+
 template <typename T = mkldnn::memory::dims>
 void UseAdaptivePaddingKernel(T* kernel,
                               T* strides,
                               T* pad_l,
                               T* pad_r,
-                              const NDArray& in_data,
-                              const NDArray& out_data) {
-  const int IH = in_data.shape()[2];
-  const int IW = in_data.shape()[3];
-  const int OH = out_data.shape()[2];
-  const int OW = out_data.shape()[3];
-
-  strides->at(0) = floor((IH << 1) / OH) - floor(IH / OH);
-  strides->at(1) = floor((IW << 1) / OW) - floor(IW / OW);
-  kernel->at(0)  = ceil((IH << 1) / OH) - floor(IH / OH);
-  kernel->at(1)  = ceil((IW << 1) / OW) - floor(IW / OW);
-  pad_l->at(0)   = (strides->at(0) * (OH - 1) + kernel->at(0) - IH) >> 1;
-  pad_l->at(1)   = (strides->at(1) * (OW - 1) + kernel->at(1) - IW) >> 1;
+                              const mxnet::TShape& input_shape,
+                              const mxnet::TShape& output_shape) {
+  const int IH = input_shape[2];
+  const int IW = input_shape[3];
+  const int OH = output_shape[2];
+  const int OW = output_shape[3];
+
+  strides->at(0) = ComputeStrides(IH, OH);
+  strides->at(1) = ComputeStrides(IW, OW);
+  kernel->at(0) = ComputeKernel(IH, OH);
+  kernel->at(1) = ComputeKernel(IW, OW);
+  pad_l->at(0) = ComputePadding(IH, OH, strides->at(0), kernel->at(0));
+  pad_l->at(1) = ComputePadding(IW, OW, strides->at(1), kernel->at(1));
 }
 
 inline int GetPaddingSizeFull(dim_t x, int padl, int padr, int k, int s) {
@@ -169,30 +186,34 @@ inline bool SupportMKLDNNPooling(const PoolingParam& param, const NDArray& input
 }
 
 inline bool MKLDNNRequireWorkspace(const PoolingParam& param) {
-  return param.pool_type != pool_enum::kAvgPooling;
+  return param.pool_type != pool_enum::kAvgPooling && !param.IsAdaptivePooling();
 }
 
 typedef ParamOpSign<PoolingParam> MKLDNNPoolingSignature;
-void MKLDNNPoolingCompute(const OpContext& ctx,
-                          const PoolingParam& param,
-                          const NDArray& in_data,
-                          const OpReqType req,
-                          const NDArray& out_data,
-                          const NDArray* workspace,
-                          const bool use_adaptive_pooling);
-
-void MKLDNNPoolingGradCompute(const OpContext& ctx,
-                              const PoolingParam& param,
-                              const NDArray& out_grad,
-                              const NDArray& in_data,
-                              const NDArray* workspace,
-                              const OpReqType req,
-                              const NDArray& in_grad);
+
 MKLDNNPoolingFwd& GetPoolingFwd(const PoolingParam& param,
                                 const bool is_train,
                                 const NDArray& data,
                                 const NDArray& output,
                                 const bool use_adaptive_pooling);
+
+MKLDNNPoolingBwd& GetPoolingBwd(const PoolingParam& param,
+                                const NDArray& in_data,
+                                const NDArray& in_grad,
+                                const NDArray& out_grad,
+                                const bool use_adaptive_pooling);
+
+void MKLDNNPoolingCompute(const nnvm::NodeAttrs& attrs,
+                          const OpContext& ctx,
+                          const std::vector<NDArray>& in_data,
+                          const std::vector<OpReqType>& req,
+                          const std::vector<NDArray>& out_data);
+
+void MKLDNNPoolingGradCompute(const nnvm::NodeAttrs &attrs,
+                              const OpContext &ctx,
+                              const std::vector<NDArray> &inputs,
+                              const std::vector<OpReqType> &req,
+                              const std::vector<NDArray> &outputs);
 }  // namespace op
 }  // namespace mxnet
 #endif  // MXNET_USE_MKLDNN == 1
diff --git a/src/operator/nn/mkldnn/mkldnn_pooling.cc b/src/operator/nn/mkldnn/mkldnn_pooling.cc
index d9500bc..63467f8 100644
--- a/src/operator/nn/mkldnn/mkldnn_pooling.cc
+++ b/src/operator/nn/mkldnn/mkldnn_pooling.cc
@@ -71,7 +71,8 @@ void MKLDNNPoolingFwd::Init(const mxnet::NDArray& input,
 void MKLDNNPoolingFwd::Execute(const NDArray& in_data,
                                const OpReqType req,
                                const NDArray& out_data,
-                               const NDArray* workspace) {
+                               const NDArray* workspace,
+                               const bool use_adaptive_pooling) {
   NDArray in_buffer = in_data;
   if (in_data.IsView() && in_data.IsMKLDNNData())
     in_buffer = in_data.Reorder2Default();
@@ -84,7 +85,7 @@ void MKLDNNPoolingFwd::Execute(const NDArray& in_data,
       {MKLDNN_ARG_DST, *(output_mem_t_.second)},
   };
 
-  if (this->with_workspace_) {
+  if (this->with_workspace_ && !use_adaptive_pooling) {
     auto engine = CpuEngine::Get()->get_engine();
 
     if (workspace == nullptr) {
@@ -190,17 +191,30 @@ void InitPoolingPrimitiveParams(const PoolingParam& param,
 mkldnn::pooling_forward::primitive_desc GetPoolingFwdPdesc(const PoolingParam& param,
                                                            const bool is_train,
                                                            const mkldnn::memory::desc& data_md,
-                                                           const mkldnn::memory::desc& out_md) {
+                                                           const mkldnn::memory::desc& out_md,
+                                                           const bool use_adaptive_pooling) {
   CHECK(param.kernel.ndim() == 1 || param.kernel.ndim() == 2 || param.kernel.ndim() == 3)
       << "Not Implemented";
 
-  const int kernel_ndims = param.kernel.ndim();
+  const int kernel_ndims =
+      use_adaptive_pooling ? mxnet::TShape(data_md.dims()).ndim() : param.kernel.ndim();
   mkldnn::memory::dims kernel(kernel_ndims);
   mkldnn::memory::dims strides(kernel_ndims);
   mkldnn::memory::dims pad_l(kernel_ndims);
   mkldnn::memory::dims pad_r(kernel_ndims);
 
-  InitPoolingPrimitiveParams(param, data_md, kernel, strides, pad_l, pad_r);
+  const mxnet::TShape input_shape = mxnet::TShape(data_md.dims());
+  const mxnet::TShape output_shape = mxnet::TShape(out_md.dims());
+
+  if (use_adaptive_pooling) {
+    UseAdaptivePaddingKernel(&kernel, &strides, &pad_l, &pad_r, input_shape, output_shape);
+    mkldnn::memory::validate_dims(kernel);
+    mkldnn::memory::validate_dims(strides);
+    mkldnn::memory::validate_dims(pad_l);
+    mkldnn::memory::validate_dims(pad_r);
+  } else {
+    InitPoolingPrimitiveParams(param, data_md, kernel, strides, pad_l, pad_r);
+  }
 
   const mkldnn::algorithm alg = GetMKLDNNPoolingAlgorithm(param);
   mkldnn::prop_kind kind      = mkldnn::prop_kind::forward_scoring;
@@ -249,7 +263,7 @@ MKLDNNPoolingFwd& GetPoolingFwd(const PoolingParam& param,
     mkldnn::memory::dims pad_r(kernel_ndims);
 
     if (use_adaptive_pooling) {
-      UseAdaptivePaddingKernel(&kernel, &strides, &pad_l, &pad_r, data, output);
+      UseAdaptivePaddingKernel(&kernel, &strides, &pad_l, &pad_r, data.shape(), output.shape());
       mkldnn::memory::validate_dims(kernel);
       mkldnn::memory::validate_dims(strides);
       mkldnn::memory::validate_dims(pad_l);
@@ -268,17 +282,6 @@ MKLDNNPoolingFwd& GetPoolingFwd(const PoolingParam& param,
   return it->second;
 }
 
-void MKLDNNPoolingCompute(const OpContext& ctx,
-                          const PoolingParam& param,
-                          const NDArray& in_data,
-                          const OpReqType req,
-                          const NDArray& out_data,
-                          const NDArray* workspace,
-                          const bool use_adaptive_pooling) {
-  auto& fwd = GetPoolingFwd(param, ctx.is_train, in_data, out_data, use_adaptive_pooling);
-  fwd.Execute(in_data, req, out_data, workspace);
-}
-
 MKLDNNPoolingBwd::MKLDNNPoolingBwd(const mkldnn::pooling_backward::primitive_desc& pdesc,
                                    bool with_ws)
     : with_workspace(with_ws), pd(pdesc) {
@@ -292,7 +295,8 @@ const mkldnn::pooling_backward& MKLDNNPoolingBwd::GetBwd() {
 MKLDNNPoolingBwd& GetPoolingBwd(const PoolingParam& param,
                                 const NDArray& in_data,
                                 const NDArray& in_grad,
-                                const NDArray& out_grad) {
+                                const NDArray& out_grad,
+                                const bool use_adaptive_pooling) {
 #if DMLC_CXX11_THREAD_LOCAL
   static thread_local std::unordered_map<MKLDNNPoolingSignature, MKLDNNPoolingBwd, OpHash>
       pooling_bwds;
@@ -301,11 +305,14 @@ MKLDNNPoolingBwd& GetPoolingBwd(const PoolingParam& param,
       pooling_bwds;
 #endif
 
-  bool with_workspace = MKLDNNRequireWorkspace(param);
+  const bool with_workspace = MKLDNNRequireWorkspace(param);
   MKLDNNPoolingSignature key(param);
   key.AddSign(in_data);
   key.AddSign(in_grad);
   key.AddSign(out_grad);
+  if (use_adaptive_pooling) {
+    key.AddSign(use_adaptive_pooling);
+  }
 
   auto it = pooling_bwds.find(key);
   if (it == pooling_bwds.end()) {
@@ -317,21 +324,31 @@ MKLDNNPoolingBwd& GetPoolingBwd(const PoolingParam& param,
     auto dst_md   = mkldnn::memory::desc(dst_dims, get_data_type(data_md), any);
 
     // fwd hint
-    auto fwd_pd = GetPoolingFwdPdesc(param, true, data_md, dst_md);
+    auto fwd_pd = GetPoolingFwdPdesc(param, true, data_md, dst_md, use_adaptive_pooling);
 
-    // creat bwd desc
+    // create bwd desc
     auto diff_src_dims = mkldnn::memory::dims(in_grad.shape().begin(), in_grad.shape().end());
     auto diff_src_md   = mkldnn::memory::desc(diff_src_dims, get_data_type(data_md), any);
     auto cpu_engine    = CpuEngine::Get()->get_engine();
-    auto alg           = GetMKLDNNPoolingAlgorithm(param);
+    auto alg = use_adaptive_pooling ? mkldnn::algorithm::pooling_avg
+                                    : GetMKLDNNPoolingAlgorithm(param);
 
-    const int kernel_ndims = param.kernel.ndim();
+    const int kernel_ndims = use_adaptive_pooling ? in_grad.shape().ndim() : param.kernel.ndim();
     mkldnn::memory::dims kernel(kernel_ndims);
     mkldnn::memory::dims strides(kernel_ndims);
     mkldnn::memory::dims pad_l(kernel_ndims);
     mkldnn::memory::dims pad_r(kernel_ndims);
 
-    InitPoolingPrimitiveParams(param, data_md, kernel, strides, pad_l, pad_r);
+    if (use_adaptive_pooling) {
+      UseAdaptivePaddingKernel(
+          &kernel, &strides, &pad_l, &pad_r, in_grad.shape(), out_grad.shape());
+      mkldnn::memory::validate_dims(kernel);
+      mkldnn::memory::validate_dims(strides);
+      mkldnn::memory::validate_dims(pad_l);
+      mkldnn::memory::validate_dims(pad_r);
+    } else {
+      InitPoolingPrimitiveParams(param, data_md, kernel, strides, pad_l, pad_r);
+    }
 
     // use dst_md as diff_dst_md with any format
     auto bwd_desc =
@@ -344,24 +361,44 @@ MKLDNNPoolingBwd& GetPoolingBwd(const PoolingParam& param,
   return it->second;
 }
 
-void MKLDNNPoolingGradCompute(const OpContext& ctx,
-                              const PoolingParam& param,
-                              const NDArray& out_grad,
-                              const NDArray& in_data,
-                              const NDArray* workspace,
-                              const OpReqType req,
-                              const NDArray& in_grad) {
-  if (req == kNullOp) {
+void MKLDNNPoolingGradCompute(const nnvm::NodeAttrs &attrs,
+                              const OpContext &ctx,
+                              const std::vector<NDArray> &inputs,
+                              const std::vector<OpReqType> &req,
+                              const std::vector<NDArray> &outputs) {
+  if (req[0] == kNullOp) {
     return;
   }
 
+  const PoolingParam &param = nnvm::get<PoolingParam>(attrs.parsed);
+
+  const NDArray &out_grad = inputs[0];
+  const NDArray *workspace = nullptr;
+  const NDArray *in_data = nullptr;
+  if (MKLDNNRequireWorkspace(param)) {
+    // The first two elements are the gradients of the outputs in forward.
+    // The third is the input of forward.
+    // The fourth and the fifth are the outputs of forward.
+    CHECK_EQ(inputs.size(), 5U);
+    in_data = &inputs[2];
+    workspace = &inputs[4];
+  } else if (!param.IsAdaptivePooling()) {
+    CHECK_EQ(inputs.size(), 3U);
+    in_data = &inputs[1];
+  } else {
+    in_data = &inputs[0];
+  }
+  const NDArray &in_grad = outputs[0];
+
   TmpMemMgr::Get()->Init(ctx.requested[0]);
 
-  auto& bwd              = GetPoolingBwd(param, in_data, in_grad, out_grad);
+
+  auto &bwd = GetPoolingBwd(param, *in_data, in_grad, out_grad,
+                            param.IsAdaptivePooling());
   auto bwd_diff_dst_desc = bwd.pd.diff_dst_desc();
   auto diff_dst_mem =
       static_cast<const mkldnn::memory*>(out_grad.GetMKLDNNDataReorder(&bwd_diff_dst_desc));
-  auto diff_src_mem      = CreateMKLDNNMem(in_grad, bwd.pd.diff_src_desc(), req);
+  auto diff_src_mem = CreateMKLDNNMem(in_grad, bwd.pd.diff_src_desc(), req[0]);
   mkldnn_args_map_t args = {
       {MKLDNN_ARG_DIFF_DST, *diff_dst_mem},
       {MKLDNN_ARG_DIFF_SRC, *diff_src_mem.second},
@@ -375,6 +412,22 @@ void MKLDNNPoolingGradCompute(const OpContext& ctx,
   MKLDNNStream::Get()->Submit();
 }
 
+void MKLDNNPoolingCompute(const nnvm::NodeAttrs &attrs, const OpContext &ctx,
+                        const std::vector<NDArray> &in_data,
+                        const std::vector<OpReqType> &req,
+                        const std::vector<NDArray> &out_data) {
+  const PoolingParam &param = nnvm::get<PoolingParam>(attrs.parsed);
+  const NDArray *workspace = nullptr;
+  const bool is_adaptive_pooling = param.IsAdaptivePooling();
+  if (MKLDNNRequireWorkspace(param) && !is_adaptive_pooling) {
+    CHECK_GT(out_data.size(), 1U);
+    workspace = &out_data[1];
+  }
+  auto &fwd = GetPoolingFwd(param, ctx.is_train, in_data[0], out_data[0],
+                            is_adaptive_pooling);
+  fwd.Execute(in_data[0], req[0], out_data[0], workspace, is_adaptive_pooling);
+}
+
 }  // namespace op
 }  // namespace mxnet
 #endif  // MXNET_USE_MKLDNN == 1
diff --git a/src/operator/nn/pooling-inl.h b/src/operator/nn/pooling-inl.h
index 1c83ec5..7592fbf 100644
--- a/src/operator/nn/pooling-inl.h
+++ b/src/operator/nn/pooling-inl.h
@@ -41,7 +41,6 @@
 namespace mxnet {
 namespace op {
 
-void PoolingParamParser(nnvm::NodeAttrs *attrs);
 
 struct PoolingParam : public dmlc::Parameter<PoolingParam> {
   mxnet::TShape kernel;
@@ -126,6 +125,10 @@ struct PoolingParam : public dmlc::Parameter<PoolingParam> {
            this->output_size        == other.output_size;
   }
 
+  bool IsAdaptivePooling() const {
+    return output_size.has_value();
+  }
+
   // Extract layout from param, or supply default layout based on provided input dimension.
   int GetLayout(int input_dim) const {
     int ret_val = mshadow::kNCW;
@@ -147,6 +150,33 @@ struct PoolingParam : public dmlc::Parameter<PoolingParam> {
   }
 };
 
+
+template<bool isAdaptive>
+void PoolingParamParser(nnvm::NodeAttrs *attrs) {
+  using namespace mshadow;
+  PoolingParam param;
+  param.Init(attrs->dict);
+  // Set default layout if it can be inferred from kernel shape.
+  if (param.kernel.ndim() > 0)
+    param.layout = param.GetLayout(param.kernel.ndim() + 2);
+  if (param.kernel.ndim() == 1) {
+    if (param.stride.ndim() == 0) param.stride = Shape1(1);
+    if (param.pad.ndim() == 0) param.pad = Shape1(0);
+  } else if (param.kernel.ndim() == 2) {
+    if (param.stride.ndim() == 0) param.stride = Shape2(1, 1);
+    if (param.pad.ndim() == 0) param.pad = Shape2(0, 0);
+  } else {
+      // ignore kernel size only if global_pool not assigned false
+      if (param.global_pool == false && !isAdaptive) {
+        CHECK_EQ(param.kernel.ndim(), 3U) << param.kernel.ndim()
+            << "D pooling not supported";
+      }
+    if (param.stride.ndim() == 0) param.stride = Shape3(1, 1, 1);
+    if (param.pad.ndim() == 0) param.pad = Shape3(0, 0, 0);
+  }
+  attrs->parsed = std::move(param);
+}
+
 }  // namespace op
 }  // namespace mxnet
 
@@ -166,6 +196,9 @@ struct hash<mxnet::op::PoolingParam> {
     ret = dmlc::HashCombine(ret, val.count_include_pad);
     int val_layout = val.layout.has_value() ? val.layout.value() : -1;
     ret = dmlc::HashCombine(ret, val_layout);
+    mxnet::Tuple<int> val_out_size =
+        val.IsAdaptivePooling() ? val.output_size.value() : mxnet::Tuple<int>();
+    ret = dmlc::HashCombine(ret, val_out_size);
     return ret;
   }
 };
diff --git a/src/operator/nn/pooling.cc b/src/operator/nn/pooling.cc
index 4a6d105..fd09ecf 100644
--- a/src/operator/nn/pooling.cc
+++ b/src/operator/nn/pooling.cc
@@ -35,31 +35,6 @@
 namespace mxnet {
 namespace op {
 
-void PoolingParamParser(nnvm::NodeAttrs *attrs) {
-  using namespace mshadow;
-  PoolingParam param;
-  param.Init(attrs->dict);
-  // Set default layout if it can be inferred from kernel shape.
-  if (param.kernel.ndim() > 0)
-    param.layout = param.GetLayout(param.kernel.ndim() + 2);
-  if (param.kernel.ndim() == 1) {
-    if (param.stride.ndim() == 0) param.stride = Shape1(1);
-    if (param.pad.ndim() == 0) param.pad = Shape1(0);
-  } else if (param.kernel.ndim() == 2) {
-    if (param.stride.ndim() == 0) param.stride = Shape2(1, 1);
-    if (param.pad.ndim() == 0) param.pad = Shape2(0, 0);
-  } else {
-      // ignore kernel size only if global_pool not assigned false
-      if (param.global_pool == false) {
-        CHECK_EQ(param.kernel.ndim(), 3U) << param.kernel.ndim()
-            << "D pooling not supported";
-      }
-    if (param.stride.ndim() == 0) param.stride = Shape3(1, 1, 1);
-    if (param.pad.ndim() == 0) param.pad = Shape3(0, 0, 0);
-  }
-  attrs->parsed = std::move(param);
-}
-
 int GetNumOutputs(const PoolingParam &param) {
 #if MXNET_USE_MKLDNN == 1
   return MKLDNNRequireWorkspace(param) && SupportMKLDNNPooling(param) ? 2 : 1;
@@ -274,7 +249,6 @@ void PoolingComputeExCPU(const nnvm::NodeAttrs &attrs,
                          const std::vector<OpReqType> &req,
                          const std::vector<NDArray> &outputs) {
   const PoolingParam &param = nnvm::get<PoolingParam>(attrs.parsed);
-  const NDArray *workspace = nullptr;
 
   // Pooling does not currently support working with views
   if (inputs[0].IsView() || outputs[0].IsView()) {
@@ -283,12 +257,8 @@ void PoolingComputeExCPU(const nnvm::NodeAttrs &attrs,
   }
 
   if (SupportMKLDNNPooling(param, inputs[0])) {
-    if (MKLDNNRequireWorkspace(param)) {
-      CHECK_GT(outputs.size(), 1U);
-      workspace = &outputs[1];
-    }
     MKLDNN_OPCHECK_INIT(false, 1, inputs, outputs);
-    MKLDNNPoolingCompute(ctx, param, inputs[0], req[0], outputs[0], workspace, false);
+    MKLDNNRun(MKLDNNPoolingCompute, attrs, ctx, inputs, req, outputs);
     MKLDNN_OPCHECK_RUN(PoolingCompute<cpu>, attrs, ctx, inputs, req, outputs);
     return;
   }
@@ -307,26 +277,9 @@ void PoolingGradComputeExCPU(const nnvm::NodeAttrs &attrs, const OpContext &ctx,
     return;
   }
 
-
   if (SupportMKLDNNPooling(param, inputs[0])) {
-    const NDArray &out_grad = inputs[0];
-    const NDArray *workspace = nullptr;
-    const NDArray *in_data = nullptr;
-    if (MKLDNNRequireWorkspace(param)) {
-      // The first two elements are the gradient of the outputs in forward.
-      // The third is the input of forward.
-      // The fourth and the fifth are the outputs of forward.
-      CHECK_EQ(inputs.size(), 5U);
-      in_data = &inputs[2];
-      workspace = &inputs[4];
-    } else {
-      CHECK_EQ(inputs.size(), 3U);
-      in_data = &inputs[1];
-    }
-    const NDArray &in_grad = outputs[0];
     MKLDNN_OPCHECK_INIT(true, outputs.size(), inputs, outputs);
-    MKLDNNPoolingGradCompute(ctx, param, out_grad, *in_data, workspace,
-                             req[0], in_grad);
+    MKLDNNRun(MKLDNNPoolingGradCompute, attrs, ctx, inputs, req, outputs);
     MKLDNN_OPCHECK_RUN(PoolingGradCompute<cpu>, attrs, ctx, inputs, req,
                        outputs);
     return;
@@ -437,7 +390,7 @@ For each window ``X``, the mathematical expression for Lp pooling is:
   else
     return std::vector<std::string>{"output"};
 })
-.set_attr_parser(PoolingParamParser)
+.set_attr_parser(PoolingParamParser<false>)
 #if MXNET_USE_MKLDNN == 1
 .set_attr<FInferStorageType>("FInferStorageType", PoolingStorageType)
 #endif
@@ -480,7 +433,7 @@ NNVM_REGISTER_OP(_backward_Pooling)
 .set_attr<FInferStorageType>("FInferStorageType",
                              BackwardPoolingStorageType)
 #endif
-.set_attr_parser(PoolingParamParser)
+.set_attr_parser(PoolingParamParser<false>)
 #if MXNET_USE_MKLDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FComputeEx>("FComputeEx<cpu>", PoolingGradComputeExCPU)
diff --git a/src/operator/quantization/mkldnn/mkldnn_quantized_pooling.cc b/src/operator/quantization/mkldnn/mkldnn_quantized_pooling.cc
index 98d1754..ce74c68 100644
--- a/src/operator/quantization/mkldnn/mkldnn_quantized_pooling.cc
+++ b/src/operator/quantization/mkldnn/mkldnn_quantized_pooling.cc
@@ -39,8 +39,7 @@ static void MKLDNNQuantizedPoolingForward(const nnvm::NodeAttrs& attrs,
       << "mkldnn_quantized_pooling op only supports uint8 and int8 as input "
          "type";
   const PoolingParam& param = nnvm::get<PoolingParam>(attrs.parsed);
-  MKLDNNPoolingCompute(
-      ctx, param, in_data[0], req[0], out_data[0], nullptr, /*use_adaptive*/ false);
+  MKLDNNRun(MKLDNNPoolingCompute, attrs, ctx, in_data, req, out_data);
   out_data[1].data().dptr<float>()[0] = in_data[1].data().dptr<float>()[0];
   out_data[2].data().dptr<float>()[0] = in_data[2].data().dptr<float>()[0];
 }
diff --git a/src/operator/quantization/quantized_pooling.cc b/src/operator/quantization/quantized_pooling.cc
index c35c7a4..b699a6d 100644
--- a/src/operator/quantization/quantized_pooling.cc
+++ b/src/operator/quantization/quantized_pooling.cc
@@ -187,7 +187,7 @@ the float32 data into int8.
     This operator only supports `pool_type` of `avg` or `max`.)code" ADD_FILELINE)
 .set_num_inputs(3)
 .set_num_outputs(3)
-.set_attr_parser(PoolingParamParser)
+.set_attr_parser(PoolingParamParser<false>)
 .set_attr<nnvm::FListInputNames>("FListInputNames",
   [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"data", "min_data", "max_data"};
diff --git a/tests/python/mkl/test_mkldnn.py b/tests/python/mkl/test_mkldnn.py
index ec1cf96..dd785b2 100644
--- a/tests/python/mkl/test_mkldnn.py
+++ b/tests/python/mkl/test_mkldnn.py
@@ -422,6 +422,35 @@ def test_pooling():
         check_pooling_training(stype)
 
 
+def test_adaptive_pooling():
+    def test_case(num_filter, output_size, stype, shape):
+        data_tmp = mx.nd.random.uniform(shape=shape)
+        data = mx.sym.var('data', stype=stype)
+        in_channels = shape[1]
+
+        data = mx.sym.Convolution(data=data, kernel=(3, 3), pad=(1,1), num_filter=num_filter)
+        data = mx.sym.contrib.AdaptiveAvgPooling2D(data=data, output_size=output_size)
+
+        weight_tmp = np.random.normal(-0.1, 0.1, size=(num_filter, in_channels, 3, 3))
+        bias_tmp = np.random.normal(0.1, 0.1, size=(num_filter,))
+
+        in_location = [mx.nd.array(data_tmp).tostype(stype), mx.nd.array(weight_tmp).tostype(stype),
+                        mx.nd.array(bias_tmp).tostype(stype)]
+
+        check_numeric_gradient(data, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4)
+
+    num_filters = [4, 8, 16]
+    output_sizes = [4, 5, 8, 16]
+    stypes = ['row_sparse', 'default']
+    shapes = [(3, 3, 8, 8), (3, 4, 20, 20), (3, 6, 20, 20), (3, 3, 32, 32)]
+
+    for num_filter in num_filters:
+        for output_size in output_sizes:
+            for stype in stypes:
+                for shape in shapes:
+                    test_case(num_filter, output_size, stype, shape)
+
+
 @with_seed()
 def test_activation():
     def check_activation_training(stype):