You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by jx...@apache.org on 2018/08/24 23:50:44 UTC

[incubator-mxnet] branch master updated: Fall back when sparse arrays are passed to MKLDNN-enabled operators (#11664)

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new 15e43c0  Fall back when sparse arrays are passed to MKLDNN-enabled operators (#11664)
15e43c0 is described below

commit 15e43c096a54329006e5e44c3723b60694ff1239
Author: Luobao <lu...@intel.com>
AuthorDate: Sat Aug 25 07:50:35 2018 +0800

    Fall back when sparse arrays are passed to MKLDNN-enabled operators (#11664)
    
    * softmax_fallbach
    
    * Fallback Amend
    This is the final rectify for fallback problem(functions call)
    
    * Lint amend
    
    * test_try
    
    * Patch for test fail
    
    * Pooling amend
    
    * Delete non_rectified_operation_test
    
    * fallback_normal
    
    * Fixed_dispatch
    
    * activation-amend
    
    * activation second
    
    * activation backward
    
    * activate_try
    
    * activation_debug
    
    * Act change.
    
    * test_random
    
    * mkldnn choice
    
    * format_modify
    
    * rebase
---
 src/operator/nn/activation.cc         |  57 ++++------------
 src/operator/nn/batch_norm.cc         |   3 +-
 src/operator/nn/convolution.cc        |  43 +++++-------
 src/operator/nn/deconvolution.cc      |  36 ++++------
 src/operator/nn/lrn.cc                |  36 ++++------
 src/operator/nn/mkldnn/mkldnn_base.cc |   4 +-
 src/operator/nn/pooling.cc            |  42 ++++--------
 src/operator/nn/softmax.cc            |  21 ++----
 tests/python/mkl/test_mkldnn.py       | 121 +++++++++++++++++++++++++++++++++-
 9 files changed, 198 insertions(+), 165 deletions(-)

diff --git a/src/operator/nn/activation.cc b/src/operator/nn/activation.cc
index 277ca8e..b8c2045 100644
--- a/src/operator/nn/activation.cc
+++ b/src/operator/nn/activation.cc
@@ -31,6 +31,8 @@
 #include "./mkldnn/mkldnn_base-inl.h"
 #include "./mkldnn/mkldnn_ops-inl.h"
 #endif  // MXNET_USE_MKLDNN
+#include "../operator_common.h"
+#include "../../common/utils.h"
 
 namespace mxnet {
 namespace op {
@@ -101,6 +103,7 @@ void ActivationGradComputeExCPU(const nnvm::NodeAttrs& attrs,
 }
 #endif
 
+#if MXNET_USE_MKLDNN == 1
 inline static bool ActivationStorageType(const nnvm::NodeAttrs& attrs,
                                          const int dev_mask,
                                          DispatchMode* dispatch_mode,
@@ -108,20 +111,9 @@ inline static bool ActivationStorageType(const nnvm::NodeAttrs& attrs,
                                          std::vector<int> *out_attrs) {
   CHECK_EQ(in_attrs->size(), 1);
   CHECK_EQ(out_attrs->size(), 1);
-  bool ret = ElemwiseStorageType<1, 1, false, false, false>(attrs, dev_mask,
-                                                            dispatch_mode,
-                                                            in_attrs, out_attrs);
-#if MXNET_USE_MKLDNN == 1
   const ActivationParam& param = nnvm::get<ActivationParam>(attrs.parsed);
-  if (dev_mask == mshadow::cpu::kDevMask && SupportMKLDNNAct(param)) {
-    *dispatch_mode = DispatchMode::kFComputeEx;
-  }
-  if (dev_mask == mshadow::cpu::kDevMask && !MKLDNNEnvSet()) {
-    *dispatch_mode = DispatchMode::kFComputeFallback;
-    return ret;
-  }
-#endif
-  return ret;
+  return MKLDNNStorageType(attrs, dev_mask, SupportMKLDNNAct(param),
+                           dispatch_mode, in_attrs, out_attrs);
 }
 
 inline static bool BackwardActStorageType(const nnvm::NodeAttrs& attrs,
@@ -129,46 +121,17 @@ inline static bool BackwardActStorageType(const nnvm::NodeAttrs& attrs,
                                           DispatchMode* dispatch_mode,
                                           std::vector<int> *in_attrs,
                                           std::vector<int> *out_attrs) {
-  bool ret = false;
   const ActivationParam& param = nnvm::get<ActivationParam>(attrs.parsed);
-#if (MXNET_USE_CUDNN == 1 || MXNET_USE_MKLDNN == 1)
   if (param.act_type != activation::kReLU) {
     CHECK_EQ(in_attrs->size(), 3U);
-    ret = ElemwiseStorageType<3, 1, false, false, false>(attrs, dev_mask,
-                                                         dispatch_mode,
-                                                         in_attrs, out_attrs);
   } else {
     // for ReLU activation, the backward pass only needs ograd and output
     CHECK_EQ(in_attrs->size(), 2U);
-    ret = ElemwiseStorageType<2, 1, false, false, false>(attrs, dev_mask,
-                                                         dispatch_mode,
-                                                         in_attrs, out_attrs);
-  }
-#else
-  if (param.act_type == activation::kSoftSign) {
-    CHECK_EQ(in_attrs->size(), 3U);
-    ret = ElemwiseStorageType<3, 1, false, false, false>(attrs, dev_mask,
-                                                         dispatch_mode,
-                                                         in_attrs, out_attrs);
-  } else {
-    CHECK_EQ(in_attrs->size(), 2U);
-    ret = ElemwiseStorageType<2, 1, false, false, false>(attrs, dev_mask,
-                                                         dispatch_mode,
-                                                         in_attrs, out_attrs);
   }
-#endif
-  CHECK_EQ(out_attrs->size(), 1U);
-#if MXNET_USE_MKLDNN == 1
-  if (dev_mask == mshadow::cpu::kDevMask && SupportMKLDNNAct(param)) {
-    *dispatch_mode = DispatchMode::kFComputeEx;
-  }
-  if (dev_mask == mshadow::cpu::kDevMask && !MKLDNNEnvSet()) {
-    *dispatch_mode = DispatchMode::kFComputeFallback;
-    return ret;
-  }
-#endif
-  return ret;
+  return MKLDNNStorageType(attrs, dev_mask, SupportMKLDNNAct(param),
+                           dispatch_mode, in_attrs, out_attrs);
 }
+#endif
 
 MXNET_OPERATOR_REGISTER_UNARY(Activation)
 .describe(R"code(Applies an activation function element-wise to the input.
@@ -183,7 +146,9 @@ The following activation functions are supported:
 
 )code" ADD_FILELINE)
 .set_attr_parser(ParamParser<ActivationParam>)
+#if MXNET_USE_MKLDNN == 1
 .set_attr<FInferStorageType>("FInferStorageType", ActivationStorageType)
+#endif
 .set_attr<nnvm::FListOutputNames>("FListOutputNames",
     [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"output"};
@@ -204,7 +169,9 @@ NNVM_REGISTER_OP(_backward_Activation)
   })
 .set_num_outputs(1)
 .set_attr<nnvm::TIsBackward>("TIsBackward", true)
+#if MXNET_USE_MKLDNN == 1
 .set_attr<FInferStorageType>("FInferStorageType", BackwardActStorageType)
+#endif
 .set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<3, 1>)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<3, 1>)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption", [](const NodeAttrs& attrs){
diff --git a/src/operator/nn/batch_norm.cc b/src/operator/nn/batch_norm.cc
index c7b1b60..b15f84e 100644
--- a/src/operator/nn/batch_norm.cc
+++ b/src/operator/nn/batch_norm.cc
@@ -27,6 +27,7 @@
 #include "batch_norm-inl.h"
 #include <nnvm/op_attr_types.h>
 #include "../elemwise_op_common.h"
+#include "../operator_common.h"
 #if MXNET_USE_MKLDNN == 1
 #include "./mkldnn/mkldnn_batch_norm-inl.h"
 #endif
@@ -544,7 +545,7 @@ Both *mean* and *var* returns a scalar by treating the input as a vector.
 
 Assume the input has size *k* on axis 1, then both ``gamma`` and ``beta``
 have shape *(k,)*. If ``output_mean_var`` is set to be true, then outputs both ``data_mean`` and
-the inverse of ``data_var``, which are needed for the backward pass. Note that gradient of these 
+the inverse of ``data_var``, which are needed for the backward pass. Note that gradient of these
 two outputs are blocked.
 
 Besides the inputs and the outputs, this operator accepts two auxiliary
diff --git a/src/operator/nn/convolution.cc b/src/operator/nn/convolution.cc
index 18c0132..8f25cf0 100644
--- a/src/operator/nn/convolution.cc
+++ b/src/operator/nn/convolution.cc
@@ -26,11 +26,14 @@
 
 #include "./convolution-inl.h"
 #include "../elemwise_op_common.h"
-#include "./mkldnn/mkldnn_ops-inl.h"
-#include "./mkldnn/mkldnn_base-inl.h"
+#include "../operator_common.h"
 #if MXNET_USE_NNPACK == 1
 #include "../nnpack/nnpack_pooling-inl.h"
 #endif  // MXNET_USE_NNPACK
+#if MXNET_USE_MKLDNN == 1
+#include "./mkldnn/mkldnn_base-inl.h"
+#include "./mkldnn/mkldnn_ops-inl.h"
+#endif  // MXNET_USE_MKLDNN
 
 namespace mxnet {
 namespace op {
@@ -288,27 +291,19 @@ static bool ConvolutionType(const nnvm::NodeAttrs& attrs,
   return true;
 }
 
+#if MXNET_USE_MKLDNN == 1
 inline static bool ConvStorageType(const nnvm::NodeAttrs& attrs,
                                    const int dev_mask,
                                    DispatchMode* dispatch_mode,
-                                   std::vector<int> *in_attrs,
-                                   std::vector<int> *out_attrs) {
+                                   std::vector<int>* in_attrs,
+                                   std::vector<int>* out_attrs) {
   const ConvolutionParam& param = nnvm::get<ConvolutionParam>(attrs.parsed);
   uint32_t in_expected = param.no_bias ? 2 : 3;
   CHECK_EQ(in_attrs->size(), in_expected);
   CHECK_EQ(out_attrs->size(), 1);
 
-  DispatchMode wanted_mode;
-#if MXNET_USE_MKLDNN == 1
-  if (dev_mask == mshadow::cpu::kDevMask && !MKLDNNEnvSet())
-    wanted_mode = DispatchMode::kFComputeFallback;
-  else if (dev_mask == mshadow::cpu::kDevMask)
-    wanted_mode = DispatchMode::kFComputeEx;
-  else
-#endif
-    wanted_mode = DispatchMode::kFCompute;
-  return storage_type_assign(out_attrs, mxnet::kDefaultStorage,
-                             dispatch_mode, wanted_mode);
+  return MKLDNNStorageType(attrs, dev_mask, true, dispatch_mode, in_attrs,
+                           out_attrs);
 }
 
 inline static bool BackwardConvStorageType(const nnvm::NodeAttrs& attrs,
@@ -322,18 +317,10 @@ inline static bool BackwardConvStorageType(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(in_attrs->size(), in_expected);
   CHECK_EQ(out_attrs->size(), out_expected);
 
-  DispatchMode wanted_mode;
-#if MXNET_USE_MKLDNN == 1
-  if (dev_mask == mshadow::cpu::kDevMask && !MKLDNNEnvSet())
-    wanted_mode = DispatchMode::kFComputeFallback;
-  else if (dev_mask == mshadow::cpu::kDevMask)
-    wanted_mode = DispatchMode::kFComputeEx;
-  else
-#endif
-    wanted_mode = DispatchMode::kFCompute;
-  return storage_type_assign(out_attrs, mxnet::kDefaultStorage,
-                             dispatch_mode, wanted_mode);
+  return MKLDNNStorageType(attrs, dev_mask, true, dispatch_mode, in_attrs,
+                           out_attrs);
 }
+#endif
 
 void ConvolutionParamParser(nnvm::NodeAttrs* attrs) {
   using namespace mshadow;
@@ -492,7 +479,9 @@ There are other options to tune the performance.
 })
 .set_attr<nnvm::FInferShape>("FInferShape", ConvolutionShape)
 .set_attr<nnvm::FInferType>("FInferType", ConvolutionType)
+#if MXNET_USE_MKLDNN == 1
 .set_attr<FInferStorageType>("FInferStorageType", ConvStorageType)
+#endif
 .set_attr<FCompute>("FCompute<cpu>", ConvolutionCompute<cpu>)
 #if MXNET_USE_MKLDNN == 1
 .set_attr<FComputeEx>("FComputeEx<cpu>", ConvolutionComputeExCPU)
@@ -512,7 +501,9 @@ NNVM_REGISTER_OP(_backward_Convolution)
   return params.no_bias ? 2 : 3;
 })
 .set_attr<nnvm::TIsBackward>("TIsBackward", true)
+#if MXNET_USE_MKLDNN == 1
 .set_attr<FInferStorageType>("FInferStorageType", BackwardConvStorageType)
+#endif
 .set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
   return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
 })
diff --git a/src/operator/nn/deconvolution.cc b/src/operator/nn/deconvolution.cc
index 54b77aa..a4be1a0 100644
--- a/src/operator/nn/deconvolution.cc
+++ b/src/operator/nn/deconvolution.cc
@@ -25,8 +25,12 @@
 */
 
 #include "./deconvolution-inl.h"
+#include "../operator_common.h"
+#include "../../common/utils.h"
+#if MXNET_USE_MKLDNN == 1
 #include "./mkldnn/mkldnn_ops-inl.h"
 #include "./mkldnn/mkldnn_base-inl.h"
+#endif
 
 namespace mxnet {
 namespace op {
@@ -256,6 +260,7 @@ static bool DeconvolutionType(const nnvm::NodeAttrs& attrs,
   return true;
 }
 
+#if MXNET_USE_MKLDNN == 1
 inline static bool DeconvStorageType(const nnvm::NodeAttrs& attrs,
                                      const int dev_mask,
                                      DispatchMode* dispatch_mode,
@@ -266,17 +271,8 @@ inline static bool DeconvStorageType(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(in_attrs->size(), in_expected);
   CHECK_EQ(out_attrs->size(), 1);
 
-  DispatchMode wanted_mode;
-#if MXNET_USE_MKLDNN == 1
-  if (dev_mask == mshadow::cpu::kDevMask && !MKLDNNEnvSet())
-    wanted_mode = DispatchMode::kFComputeFallback;
-  else if (dev_mask == mshadow::cpu::kDevMask)
-    wanted_mode = DispatchMode::kFComputeEx;
-  else
-#endif
-    wanted_mode = DispatchMode::kFCompute;
-  return storage_type_assign(out_attrs, mxnet::kDefaultStorage,
-                             dispatch_mode, wanted_mode);
+  return MKLDNNStorageType(attrs, dev_mask, true, dispatch_mode, in_attrs,
+                           out_attrs);
 }
 
 inline static bool BackwardDeconvStorageType(const nnvm::NodeAttrs& attrs,
@@ -289,20 +285,10 @@ inline static bool BackwardDeconvStorageType(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(in_attrs->size(), param.no_bias ? 3U : 4U);
   CHECK_EQ(out_attrs->size(), out_expected);
 
-  DispatchMode wanted_mode;
-#if MXNET_USE_MKLDNN == 1
-  if (dev_mask == mshadow::cpu::kDevMask && !MKLDNNEnvSet())
-    wanted_mode = DispatchMode::kFComputeFallback;
-  else if (dev_mask == mshadow::cpu::kDevMask)
-    wanted_mode = DispatchMode::kFComputeEx;
-  else
-#endif
-    wanted_mode = DispatchMode::kFCompute;
-  return storage_type_assign(out_attrs, mxnet::kDefaultStorage,
-                             dispatch_mode, wanted_mode);
+  return MKLDNNStorageType(attrs, dev_mask, true, dispatch_mode, in_attrs,
+                           out_attrs);
 }
 
-#if MXNET_USE_MKLDNN == 1
 static void DeconvolutionComputeExCPU(const nnvm::NodeAttrs& attrs,
                                       const OpContext& ctx,
                                       const std::vector<NDArray>& inputs,
@@ -419,7 +405,9 @@ NNVM_REGISTER_OP(Deconvolution)
 })
 .set_attr<nnvm::FInferShape>("FInferShape", DeconvolutionShape)
 .set_attr<nnvm::FInferType>("FInferType", DeconvolutionType)
+#if MXNET_USE_MKLDNN == 1
 .set_attr<FInferStorageType>("FInferStorageType", DeconvStorageType)
+#endif
 .set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
   return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
 })
@@ -440,7 +428,9 @@ NNVM_REGISTER_OP(_backward_Deconvolution)
   return params.no_bias ? 2 : 3;
 })
 .set_attr<nnvm::TIsBackward>("TIsBackward", true)
+#if MXNET_USE_MKLDNN == 1
 .set_attr<FInferStorageType>("FInferStorageType", BackwardDeconvStorageType)
+#endif
 .set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
   return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
 })
diff --git a/src/operator/nn/lrn.cc b/src/operator/nn/lrn.cc
index 4433519..587cf93 100644
--- a/src/operator/nn/lrn.cc
+++ b/src/operator/nn/lrn.cc
@@ -28,6 +28,7 @@
 #include "../operator_common.h"
 #if MXNET_USE_MKLDNN == 1
 #include "./mkldnn/mkldnn_lrn-inl.h"
+#include "./mkldnn/mkldnn_base-inl.h"
 #endif
 
 namespace mxnet {
@@ -81,24 +82,16 @@ struct LRNGrad {
   }
 };
 
+#if MXNET_USE_MKLDNN == 1
 bool LRNForwardInferStorageType(const nnvm::NodeAttrs& attrs,
                                 const int dev_mask,
                                 DispatchMode* dispatch_mode,
                                 std::vector<int> *in_attrs,
                                 std::vector<int> *out_attrs) {
   CHECK(!in_attrs->empty());
-#if MXNET_USE_MKLDNN == 1
-  if (dev_mask == mshadow::cpu::kDevMask && !MKLDNNEnvSet()) {
-    return storage_type_assign(out_attrs, mxnet::kDefaultStorage,
-                        dispatch_mode, DispatchMode::kFComputeFallback);
-  } else if (dev_mask == mshadow::cpu::kDevMask) {
-    return storage_type_assign(out_attrs, mxnet::kDefaultStorage,
-                        dispatch_mode, DispatchMode::kFComputeEx);
-  }
-#endif
-  storage_type_assign(out_attrs, mxnet::kDefaultStorage,
-                      dispatch_mode, DispatchMode::kFCompute);
-  return true;
+
+  return MKLDNNStorageType(attrs, dev_mask, true, dispatch_mode, in_attrs,
+                           out_attrs);
 }
 
 bool LRNBackwardInferStorageType(const nnvm::NodeAttrs& attrs,
@@ -107,20 +100,11 @@ bool LRNBackwardInferStorageType(const nnvm::NodeAttrs& attrs,
                                  std::vector<int> *in_attrs,
                                  std::vector<int> *out_attrs) {
   CHECK(!in_attrs->empty());
-#if MXNET_USE_MKLDNN == 1
-  if (dev_mask == mshadow::cpu::kDevMask && !MKLDNNEnvSet()) {
-    return storage_type_assign(out_attrs, mxnet::kDefaultStorage,
-                        dispatch_mode, DispatchMode::kFComputeFallback);
-  } else if (dev_mask == mshadow::cpu::kDevMask) {
-    return storage_type_assign(out_attrs, mxnet::kDefaultStorage,
-                        dispatch_mode, DispatchMode::kFComputeEx);
-  }
-#endif
-  return storage_type_assign(out_attrs, mxnet::kDefaultStorage,
-                      dispatch_mode, DispatchMode::kFCompute);
+
+  return MKLDNNStorageType(attrs, dev_mask, true, dispatch_mode, in_attrs,
+                           out_attrs);
 }
 
-#if MXNET_USE_MKLDNN == 1
 void LRNComputeExCPU(const nnvm::NodeAttrs &attrs,
                      const OpContext &ctx,
                      const std::vector<NDArray> &inputs,
@@ -183,7 +167,9 @@ number of kernels in the layer.
 .set_attr_parser(ParamParser<LRNParam>)
 .set_attr<nnvm::FInferShape>("FInferShape", LRNShape)
 .set_attr<nnvm::FInferType>("FInferType", LRNType)
+#if MXNET_USE_MKLDNN == 1
 .set_attr<FInferStorageType>("FInferStorageType", LRNForwardInferStorageType)
+#endif
 .set_attr<nnvm::FListInputNames>("FListInputNames",
     [](const NodeAttrs& attrs) {
   return std::vector<std::string>{"data"};
@@ -203,7 +189,9 @@ number of kernels in the layer.
 NNVM_REGISTER_OP(_backward_LRN)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<LRNParam>)
+#if MXNET_USE_MKLDNN == 1
 .set_attr<FInferStorageType>("FInferStorageType", LRNBackwardInferStorageType)
+#endif
 .set_attr<nnvm::TIsBackward>("TIsBackward", true)
 #if MXNET_USE_MKLDNN == 1
 .set_attr<FComputeEx>("FComputeEx<cpu>", LRNGradComputeExCPU)
diff --git a/src/operator/nn/mkldnn/mkldnn_base.cc b/src/operator/nn/mkldnn/mkldnn_base.cc
index 27c574d..f3facd9 100644
--- a/src/operator/nn/mkldnn/mkldnn_base.cc
+++ b/src/operator/nn/mkldnn/mkldnn_base.cc
@@ -536,7 +536,9 @@ bool MKLDNNStorageType(const nnvm::NodeAttrs &attrs,
 
   DispatchMode wanted_mode;
 #if MXNET_USE_MKLDNN == 1
-  if (dev_mask == mshadow::cpu::kDevMask && support_mkldnn)
+  if (dev_mask == mshadow::cpu::kDevMask && !MKLDNNEnvSet())
+    wanted_mode = DispatchMode::kFComputeFallback;
+  else if (dev_mask == mshadow::cpu::kDevMask && support_mkldnn)
     wanted_mode = DispatchMode::kFComputeEx;
   else
 #endif
diff --git a/src/operator/nn/pooling.cc b/src/operator/nn/pooling.cc
index 7cb1450..2d11814 100644
--- a/src/operator/nn/pooling.cc
+++ b/src/operator/nn/pooling.cc
@@ -30,8 +30,8 @@
 #endif  // MXNET_USE_NNPACK
 #if MXNET_USE_MKLDNN == 1
 #include "./mkldnn/mkldnn_pooling-inl.h"
+#include "./mkldnn/mkldnn_base-inl.h"
 #endif  // MXNET_USE_MKLDNN
-
 namespace mxnet {
 namespace op {
 
@@ -284,7 +284,6 @@ void PoolingGradComputeExCPU(const nnvm::NodeAttrs &attrs, const OpContext &ctx,
   }
   FallBackCompute(PoolingGradCompute<cpu>, attrs, ctx, inputs, req, outputs);
 }
-#endif
 
 inline static bool PoolingStorageType(const nnvm::NodeAttrs &attrs,
                                       const int dev_mask,
@@ -292,21 +291,11 @@ inline static bool PoolingStorageType(const nnvm::NodeAttrs &attrs,
                                       std::vector<int> *in_attrs,
                                       std::vector<int> *out_attrs) {
   CHECK_EQ(in_attrs->size(), 1);
-
-#if MXNET_USE_MKLDNN == 1
   const PoolingParam &param = nnvm::get<PoolingParam>(attrs.parsed);
-  if (dev_mask == mshadow::cpu::kDevMask && !MKLDNNEnvSet()) {
-    return storage_type_assign(out_attrs, mxnet::kDefaultStorage,
-                        dispatch_mode, DispatchMode::kFComputeFallback);
-  } else if (dev_mask == mshadow::cpu::kDevMask && SupportMKLDNNPooling(param)) {
-    return storage_type_assign(out_attrs, mxnet::kDefaultStorage,
-                               dispatch_mode, DispatchMode::kFComputeEx);
-  }
-#else
-  CHECK_EQ(out_attrs->size(), 1);
-#endif
-  return storage_type_assign(out_attrs, mxnet::kDefaultStorage,
-                             dispatch_mode, DispatchMode::kFCompute);
+  bool support_mkldnn_pool = SupportMKLDNNPooling(param);
+
+  return MKLDNNStorageType(attrs, dev_mask, support_mkldnn_pool,
+                           dispatch_mode, in_attrs, out_attrs);
 }
 
 inline static bool BackwardPoolingStorageType(const nnvm::NodeAttrs &attrs,
@@ -317,21 +306,12 @@ inline static bool BackwardPoolingStorageType(const nnvm::NodeAttrs &attrs,
   const PoolingParam &param = nnvm::get<PoolingParam>(attrs.parsed);
   CHECK_EQ(in_attrs->size(), GetNumBackInputs(param));
   CHECK_EQ(out_attrs->size(), 1);
+  bool support_mkldnn_pool = SupportMKLDNNPooling(param);
 
-#if MXNET_USE_MKLDNN == 1
-  if (dev_mask == mshadow::cpu::kDevMask && !MKLDNNEnvSet()) {
-    return storage_type_assign(out_attrs, mxnet::kDefaultStorage,
-                               dispatch_mode, DispatchMode::kFComputeFallback);
-  } else if (dev_mask == mshadow::cpu::kDevMask && SupportMKLDNNPooling(param)) {
-    return storage_type_assign(out_attrs, mxnet::kDefaultStorage,
-                               dispatch_mode, DispatchMode::kFComputeEx);
-  }
-#else
-  CHECK_EQ(in_attrs->size(), 3);
-#endif
-  return storage_type_assign(out_attrs, mxnet::kDefaultStorage,
-                             dispatch_mode, DispatchMode::kFCompute);
+  return MKLDNNStorageType(attrs, dev_mask, support_mkldnn_pool,
+                           dispatch_mode, in_attrs, out_attrs);
 }
+#endif
 
 DMLC_REGISTER_PARAMETER(PoolingParam);
 
@@ -408,7 +388,9 @@ For each window ``X``, the mathematical expression for Lp pooling is:
     return std::vector<std::string>{"output"};
 })
 .set_attr_parser(PoolingParamParser)
+#if MXNET_USE_MKLDNN == 1
 .set_attr<FInferStorageType>("FInferStorageType", PoolingStorageType)
+#endif
 .set_attr<nnvm::FInferType>("FInferType", PoolingType)
 .set_attr<nnvm::FInferShape>("FInferShape", PoolingShape)
 .set_attr<FCompute>("FCompute<cpu>", PoolingCompute<cpu>)
@@ -437,9 +419,9 @@ NNVM_REGISTER_OP(_backward_Pooling)
 .set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
   return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
 })
-#endif
 .set_attr<FInferStorageType>("FInferStorageType",
                              BackwardPoolingStorageType)
+#endif
 .set_attr_parser(PoolingParamParser)
 #if MXNET_USE_MKLDNN == 1
 .set_attr<FComputeEx>("FComputeEx<cpu>", PoolingGradComputeExCPU)
diff --git a/src/operator/nn/softmax.cc b/src/operator/nn/softmax.cc
index c58f382..0fad3d6 100644
--- a/src/operator/nn/softmax.cc
+++ b/src/operator/nn/softmax.cc
@@ -25,8 +25,11 @@
 #include "./softmax-inl.h"
 #include "../tensor/elemwise_unary_op.h"
 #include "../tensor/elemwise_binary_op.h"
+#include "../operator_common.h"
+#if MXNET_USE_MKLDNN == 1
 #include "mkldnn/mkldnn_base-inl.h"
 #include "mkldnn/mkldnn_ops-inl.h"
+#endif
 
 namespace mxnet {
 namespace op {
@@ -50,7 +53,6 @@ static void SoftmaxComputeExCPU(const nnvm::NodeAttrs& attrs,
   FallBackCompute(SoftmaxCompute<cpu, mxnet_op::softmax_fwd>, attrs, ctx,
                   inputs, req, outputs);
 }
-#endif
 
 inline static bool SoftmaxStorageType(const nnvm::NodeAttrs& attrs,
                                       const int dev_mask,
@@ -60,19 +62,10 @@ inline static bool SoftmaxStorageType(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(in_attrs->size(), 1);
   CHECK_EQ(out_attrs->size(), 1);
 
-  DispatchMode wanted_mode;
-#if MXNET_USE_MKLDNN == 1
-  // We only run MKLDNN op if it runs on CPU.
-  if (dev_mask == mshadow::cpu::kDevMask && !MKLDNNEnvSet())
-    wanted_mode = DispatchMode::kFComputeFallback;
-  else if (dev_mask == mshadow::cpu::kDevMask)
-    wanted_mode = DispatchMode::kFComputeEx;
-  else
-#endif
-    wanted_mode = DispatchMode::kFCompute;
-  return storage_type_assign(out_attrs, static_cast<NDArrayStorageType>((*in_attrs)[0]),
-                             dispatch_mode, wanted_mode);
+  return MKLDNNStorageType(attrs, dev_mask, true, dispatch_mode, in_attrs,
+                           out_attrs);
 }
+#endif
 
 MXNET_OPERATOR_REGISTER_UNARY(softmax)
 .describe(R"code(Applies the softmax function.
@@ -106,8 +99,8 @@ Example::
 .set_attr<FCompute>("FCompute<cpu>", SoftmaxCompute<cpu, mxnet_op::softmax_fwd>)
 #if MXNET_USE_MKLDNN == 1
 .set_attr<FComputeEx>("FComputeEx<cpu>", SoftmaxComputeExCPU)
-#endif
 .set_attr<FInferStorageType>("FInferStorageType", SoftmaxStorageType)
+#endif
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseOut{"_backward_softmax"})
 .add_arguments(SoftmaxParam::__FIELDS__());
 
diff --git a/tests/python/mkl/test_mkldnn.py b/tests/python/mkl/test_mkldnn.py
index 03f3c76..6287bfc 100644
--- a/tests/python/mkl/test_mkldnn.py
+++ b/tests/python/mkl/test_mkldnn.py
@@ -93,7 +93,7 @@ def test_mkldnn_engine_threading():
     # below line triggers different execution thread
     for _ in loader:
         y = net(mx.nd.array(np.ones(X))).asnumpy()
-        # output should be 016711406 (non-mkldnn mode output) 
+        # output should be 016711406 (non-mkldnn mode output)
         assert_almost_equal(y[0, 0, 0, 0], 0.016711406)
         break
 
@@ -243,6 +243,125 @@ def test_batchnorm():
 
 
 @with_seed()
+def test_softmax():
+    def check_softmax_training(stype):
+        for shape in [(2, 3), (2, 3, 2, 2)]:
+            data_tmp = np.random.normal(-0.1, 0.1, size=shape)
+
+            data = mx.symbol.Variable('data', stype=stype)
+            in_location = [mx.nd.array(data_tmp).tostype(stype)]
+
+            test = mx.symbol.softmax(data, axis=-1)
+            check_numeric_gradient(test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4)
+
+    stypes = ['row_sparse', 'default']
+    for stype in stypes:
+        check_softmax_training(stype)
+
+
+@with_seed()
+def test_pooling():
+    def check_pooling_training(stype):
+        for shape in [(3, 3, 10), (3, 3, 20, 20)]:
+            data_tmp = np.random.normal(-0.1, 0.1, size=shape)
+            data = mx.symbol.Variable('data', stype=stype)
+            in_location = [mx.nd.array(data_tmp).tostype(stype)]
+
+            if np.array(shape).shape[0] == 3:
+                test = mx.symbol.Pooling(data=data, kernel=(3,), stride=(2), pool_type='avg')
+            elif np.array(shape).shape[0] == 4:
+                test = mx.symbol.Pooling(data=data, kernel=(3, 3), stride=(2, 2), pool_type='avg')
+            else:
+                return 0
+            check_numeric_gradient(test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4)
+
+    stypes = ['row_sparse', 'default']
+    for stype in stypes:
+        check_pooling_training(stype)
+
+
+@with_seed()
+def test_activation():
+    def check_activation_training(stype):
+        for shape in [(2, 3, 3), (2, 3, 2, 2)]:
+            data_tmp = np.random.normal(-0.1, 1, size=shape)
+
+            data = mx.symbol.Variable('data', stype=stype)
+            in_location = [mx.nd.array(data_tmp).tostype(stype)]
+
+            test = mx.symbol.Activation(data, act_type="relu")
+            check_numeric_gradient(test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4)
+
+    stypes = ['row_sparse', 'default']
+    for stype in stypes:
+        check_activation_training(stype)
+
+
+def test_convolution():
+    def check_convolution_training(stype):
+        for shape in [(3, 3, 10), (3, 3, 10, 10)]:
+            data_tmp = np.random.normal(-0.1, 1, size=shape)
+            data = mx.symbol.Variable('data', stype=stype)
+
+            if np.array(shape).shape[0] == 3:
+                test = mx.symbol.Convolution(data=data, kernel=(3,), stride=(2), num_filter=4)
+                weight_tmp = np.random.normal(-0.1, 0.1, size=(4, 3, 3))
+            elif np.array(shape).shape[0] == 4:
+                test = mx.symbol.Convolution(data=data, kernel=(3, 3), stride=(2, 2), num_filter=4)
+                weight_tmp = np.random.normal(-0.1, 0.1, size=(4, 3, 3, 3))
+            else:
+                return 0
+            bias_tmp = np.random.normal(0.1, 0.1, size=(4,))
+            in_location = [mx.nd.array(data_tmp).tostype(stype), mx.nd.array(weight_tmp).tostype(stype),
+                           mx.nd.array(bias_tmp).tostype(stype)]
+            check_numeric_gradient(test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4)
+
+    stypes = ['row_sparse', 'default']
+    for stype in stypes:
+        check_convolution_training(stype)
+
+
+def test_Deconvolution():
+    def check_Deconvolution_training(stype):
+        for shape in [(3, 3, 10), (3, 3, 10, 10)]:
+            data_tmp = np.random.randint(256, size=shape)
+            data = mx.symbol.Variable('data', stype=stype)
+
+            if np.array(shape).shape[0] == 3:
+                test = mx.symbol.Deconvolution(data=data, kernel=(3,), stride=(2), num_filter=4)
+                weight_tmp = np.random.normal(-0.1, 0.1, size=(3, 4, 3))
+            elif np.array(shape).shape[0] == 4:
+                test = mx.symbol.Deconvolution(data=data, kernel=(3, 3), stride=(2, 2), num_filter=4)
+                weight_tmp = np.random.normal(-0.1, 0.1, size=(3, 4, 3, 3))
+            else:
+                return 0
+            bias_tmp = np.random.normal(0.1, 0.1, size=(4,))
+            in_location = [mx.nd.array(data_tmp).tostype(stype), mx.nd.array(weight_tmp).tostype(stype),
+                           mx.nd.array(bias_tmp).tostype(stype)]
+            check_numeric_gradient(test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4)
+
+    stypes = ['row_sparse', 'default']
+    for stype in stypes:
+        check_Deconvolution_training(stype)
+
+
+@with_seed()
+def test_LRN():
+    def check_LRN_training(stype):
+        for shape in [(3, 4, 5, 5)]:
+            data_tmp = np.random.normal(-0.1, 0.1, size=shape)
+            data = mx.symbol.Variable('data', stype=stype)
+            in_location = [mx.nd.array(data_tmp).tostype(stype)]
+
+            test = mx.symbol.LRN(data, nsize=3)
+            check_numeric_gradient(test, in_location, numeric_eps=1e-2, rtol=0.16, atol=1e-4)
+
+    stypes = ['row_sparse', 'default']
+    for stype in stypes:
+        check_LRN_training(stype)
+
+
+@with_seed()
 def test_fullyconnected():
     def check_fullyconnected_training(stype):
         data_shape = rand_shape_nd(2)