You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2018/12/04 17:48:42 UTC

[GitHub] eric-haibin-lin closed pull request #13409: [MXNET-1234] Fix shape inference problems in Activation backward

eric-haibin-lin closed pull request #13409: [MXNET-1234] Fix shape inference problems in Activation backward
URL: https://github.com/apache/incubator-mxnet/pull/13409
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/src/operator/elemwise_op_common.h b/src/operator/elemwise_op_common.h
index 4b8663bba6e..e622ce216ad 100644
--- a/src/operator/elemwise_op_common.h
+++ b/src/operator/elemwise_op_common.h
@@ -128,29 +128,33 @@ inline bool ElemwiseAttr(const nnvm::NodeAttrs& attrs,
   if (n_out != -1)
     out_size = static_cast<size_t>(n_out);
 
-  auto deduce = [&](std::vector<AttrType> *vec, size_t size, const char *name) {
+  CHECK_LE(in_size, in_attrs->size());
+  CHECK_LE(out_size, out_attrs->size());
+  auto deduce = [&](const std::vector<AttrType>& vec, size_t size, const char *name) {
       for (size_t i = 0; i < size; ++i) {
-        CHECK(assign(&dattr, (*vec)[i]))
+        CHECK(assign(&dattr, vec.at(i)))
           << "Incompatible attr in node " << attrs.name << " at " << i << "-th "
           << name << ": " << "expected " << attr_string(dattr)
-          << ", got " << attr_string((*vec)[i]);
+          << ", got " << attr_string(vec.at(i));
       }
     };
-  deduce(in_attrs, in_size, "input");
-  if (reverse_infer) deduce(out_attrs, out_size, "output");
+  deduce(*in_attrs, in_size, "input");
+  if (reverse_infer)
+      deduce(*out_attrs, out_size, "output");
 
   auto write = [&](std::vector<AttrType> *vec, size_t size, const char *name) {
       for (size_t i = 0; i < size; ++i) {
-        CHECK(assign(&(*vec)[i], dattr))
+        CHECK(assign(&(vec->at(i)), dattr))
           << "Incompatible attr in node " << attrs.name << " at " << i << "-th "
           << name << ": " << "expected " << attr_string(dattr)
-          << ", got " << attr_string((*vec)[i]);
+          << ", got " << attr_string(vec->at(i));
       }
     };
   write(in_attrs, in_size, "input");
   write(out_attrs, out_size, "output");
 
-  if (is_none(dattr)) return false;
+  if (is_none(dattr))
+      return false;
   return true;
 }
 
diff --git a/src/operator/nn/activation-inl.h b/src/operator/nn/activation-inl.h
index 2705177f951..1d8e4c2b6cd 100644
--- a/src/operator/nn/activation-inl.h
+++ b/src/operator/nn/activation-inl.h
@@ -48,6 +48,9 @@ enum ActivationOpInputs {kData};
 enum ActivationOpOutputs {kOut};
 enum ActivationOpResource {kTempSpace};
 enum ActivationOpType {kReLU, kSigmoid, kTanh, kSoftReLU, kSoftSign};
+
+// Get the number of inputs to the gradient depending on the activation type
+int GradNumInputs(int act_type);
 }  // activation
 
 struct ActivationParam : public dmlc::Parameter<ActivationParam> {
@@ -199,13 +202,8 @@ void ActivationGradCompute(const nnvm::NodeAttrs& attrs,
                            const std::vector<OpReqType>& req,
                            const std::vector<TBlob>& outputs) {
   const ActivationParam& param = nnvm::get<ActivationParam>(attrs.parsed);
-#if (MXNET_USE_CUDNN == 1 || MXNET_USE_MKLDNN == 1)
-  bool relu = param.act_type == activation::kReLU;
-  CHECK_EQ(inputs.size(), relu ? 2U : 3U);
-#else
-  bool softsign = param.act_type == activation::kSoftSign;
-  CHECK_EQ(inputs.size(), softsign ? 3U : 2U);
-#endif
+  const int act_type = param.act_type;
+  CHECK_EQ(inputs.size(), activation::GradNumInputs(act_type));
   CHECK_EQ(outputs.size(), 1U);
   CHECK_EQ(req.size(), 1U);
   ActivationGradComputeImpl<xpu>(attrs, ctx, inputs, req, outputs);
diff --git a/src/operator/nn/activation.cc b/src/operator/nn/activation.cc
index ba44ebd4ed4..305eeab2117 100644
--- a/src/operator/nn/activation.cc
+++ b/src/operator/nn/activation.cc
@@ -30,13 +30,34 @@
 #if MXNET_USE_MKLDNN == 1
 #include "./mkldnn/mkldnn_base-inl.h"
 #include "./mkldnn/mkldnn_ops-inl.h"
-#endif  // MXNET_USE_MKLDNN
+#endif  // MXNET_USE_MKLDNN == 1
 #include "../operator_common.h"
 #include "../../common/utils.h"
 
 namespace mxnet {
 namespace op {
 
+namespace activation {
+
+int GradNumInputs(int act_type) {
+    // check activation.cu \sa ActivationGradCompute
+    switch (act_type) {
+        case kReLU:
+            return 2;
+        case kSoftReLU:
+        case kSoftSign:
+        case kTanh:
+        case kSigmoid:
+            return 3;
+        default:
+            CHECK(false) << "missing activation type";
+    }
+    // unreachable
+    return -1;
+}
+
+}  // namespace activation
+
 DMLC_REGISTER_PARAMETER(ActivationParam);
 
 // This will determine the order of the inputs for backward computation.
@@ -44,24 +65,28 @@ struct ActivationGrad {
   const char *op_name;
   std::vector<nnvm::NodeEntry> operator()(const nnvm::NodePtr& n,
                                           const std::vector<nnvm::NodeEntry>& ograds) const {
+    // ograds, output...
     std::vector<nnvm::NodeEntry> heads(ograds.begin(), ograds.end());
     heads.emplace_back(nnvm::NodeEntry{n, activation::kOut, 0});
 
     const NodeAttrs& attrs = n->attrs;
+    using namespace activation;
     int act_type = dmlc::get<ActivationParam>(attrs.parsed).act_type;
-    if (act_type == activation::kSoftSign) {
-      // for softsign need the inputs to compute the activation.
-      heads.push_back(n->inputs[activation::kData]);
-    }
-
-#if (MXNET_USE_CUDNN == 1 || MXNET_USE_MKLDNN == 1)
     // for ReLU, no need to pass input data. This enables inplace optimization during the
     // forward pass.
-    if (act_type != activation::kReLU &&
-        act_type != activation::kSoftSign) {
-      heads.push_back(n->inputs[activation::kData]);
+    // check activation.cu \sa ActivationGradCompute
+    switch (act_type) {
+        case kReLU:
+            break;
+        case kSoftReLU:
+        case kSoftSign:
+        case kTanh:
+        case kSigmoid:
+            heads.push_back(n->inputs[activation::kData]);
+            break;
+        default:
+            CHECK(false) << "missing activation type";
     }
-#endif
     return MakeGradNode(op_name, n, heads, n->attrs.dict);
   }
 };
@@ -89,21 +114,19 @@ void ActivationGradComputeExCPU(const nnvm::NodeAttrs& attrs,
                                 const std::vector<OpReqType>& req,
                                 const std::vector<NDArray>& outputs) {
   const ActivationParam& param = nnvm::get<ActivationParam>(attrs.parsed);
-  bool relu = param.act_type == activation::kReLU;
-  CHECK_EQ(inputs.size(), relu ? 2U : 3U);
+  CHECK_EQ(inputs.size(), activation::GradNumInputs(param.act_type));
   if (SupportMKLDNN(inputs[0])) {
     MKLDNN_OPCHECK_INIT(true, outputs.size(), inputs, outputs);
     // XXX: for y = relu(x), y is passed as "in_data" to Backward()
-    MKLDNNActivationBackward(attrs, ctx, inputs[0], relu ? inputs[1] : inputs[2], req[0],
+    const bool relu = param.act_type == activation::kReLU;
+    MKLDNNActivationBackward(attrs, ctx, inputs.at(0), relu ? inputs.at(1) : inputs.at(2), req[0],
                              outputs[0]);
-     MKLDNN_OPCHECK_RUN(ActivationGradCompute<cpu>, attrs, ctx, inputs, req, outputs);
+    MKLDNN_OPCHECK_RUN(ActivationGradCompute<cpu>, attrs, ctx, inputs, req, outputs);
     return;
   }
   FallBackCompute(ActivationGradComputeImpl<cpu>, attrs, ctx, inputs, req, outputs);
 }
-#endif
 
-#if MXNET_USE_MKLDNN == 1
 inline static bool ActivationStorageType(const nnvm::NodeAttrs& attrs,
                                          const int dev_mask,
                                          DispatchMode* dispatch_mode,
@@ -122,16 +145,12 @@ inline static bool BackwardActStorageType(const nnvm::NodeAttrs& attrs,
                                           std::vector<int> *in_attrs,
                                           std::vector<int> *out_attrs) {
   const ActivationParam& param = nnvm::get<ActivationParam>(attrs.parsed);
-  if (param.act_type != activation::kReLU) {
-    CHECK_EQ(in_attrs->size(), 3U);
-  } else {
-    // for ReLU activation, the backward pass only needs ograd and output
-    CHECK_EQ(in_attrs->size(), 2U);
-  }
+  CHECK_EQ(in_attrs->size(), activation::GradNumInputs(param.act_type));
   return MKLDNNStorageType(attrs, dev_mask, SupportMKLDNNAct(param),
                            dispatch_mode, in_attrs, out_attrs);
 }
-#endif
+#endif  // MXNET_USE_MKLDNN == 1
+
 
 MXNET_OPERATOR_REGISTER_UNARY(Activation)
 .describe(R"code(Applies an activation function element-wise to the input.
@@ -163,18 +182,16 @@ The following activation functions are supported:
 
 NNVM_REGISTER_OP(_backward_Activation)
 .set_num_inputs([](const nnvm::NodeAttrs& attrs) {
-    int act_type = dmlc::get<ActivationParam>(attrs.parsed).act_type;
-    // for ReLU activation, the backward pass only needs ograd and output
-    if (act_type == activation::kReLU) return 2;
-    return 3;
-  })
+    const int act_type = dmlc::get<ActivationParam>(attrs.parsed).act_type;
+    return activation::GradNumInputs(act_type);
+})
 .set_num_outputs(1)
 .set_attr<nnvm::TIsBackward>("TIsBackward", true)
 #if MXNET_USE_MKLDNN == 1
 .set_attr<FInferStorageType>("FInferStorageType", BackwardActStorageType)
 #endif
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<3, 1>)
-.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<3, 1>)
+.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<-1, 1>)
+.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<-1, 1>)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption", [](const NodeAttrs& attrs){
   return std::vector<std::pair<int, int> >{{0, 0}};
 })
diff --git a/src/operator/nn/activation.cu b/src/operator/nn/activation.cu
index 8892cc34f71..ec7db844b10 100644
--- a/src/operator/nn/activation.cu
+++ b/src/operator/nn/activation.cu
@@ -54,12 +54,13 @@ void ActivationCompute<gpu>(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(inputs.size(), 1U);
   CHECK_EQ(outputs.size(), 1U);
   const ActivationParam& param = nnvm::get<ActivationParam>(attrs.parsed);
+  const int act_type = param.act_type;
 
   // SoftReLU and kSoftSign are both not supported by CUDNN yet
-  if (param.act_type == activation::kSoftReLU) {
+  if (act_type == activation::kSoftReLU) {
     ActivationForward<gpu, mshadow_op::softrelu, mshadow_op::softrelu_grad>(ctx,
       inputs[0], req[0], outputs[0]);
-  } else if (param.act_type == activation::kSoftSign) {
+  } else if (act_type == activation::kSoftSign) {
     ActivationForward<gpu, mshadow_op::softsign, mshadow_op::softsign_grad>(ctx,
       inputs[0], req[0], outputs[0]);
   } else {
@@ -76,23 +77,28 @@ void ActivationGradCompute<gpu>(const nnvm::NodeAttrs& attrs,
                                 const std::vector<OpReqType>& req,
                                 const std::vector<TBlob>& outputs) {
   const ActivationParam& param = nnvm::get<ActivationParam>(attrs.parsed);
-  bool relu = param.act_type == activation::kReLU;
-  CHECK_EQ(inputs.size(), relu ? 2U : 3U);
+  const int act_type = param.act_type;
+  CHECK_EQ(inputs.size(), activation::GradNumInputs(act_type));
   CHECK_EQ(outputs.size(), 1U);
   CHECK_EQ(req.size(), 1U);
 
   // both SoftReLU and SoftSign not supported by CUDNN yet
-  if (param.act_type == activation::kSoftReLU) {
+  if (act_type == activation::kSoftReLU) {
     ActivationBackward<gpu, mshadow_op::softrelu, mshadow_op::softrelu_grad>(
-      ctx, inputs[0], inputs[1], req[0], outputs[0]);
-  } else if (param.act_type == activation::kSoftSign) {
+      ctx, inputs.at(0), inputs.at(1), req[0], outputs[0]);
+  } else if (act_type == activation::kSoftSign) {
     ActivationBackward<gpu, mshadow_op::softsign, mshadow_op::softsign_grad>(
-      ctx, inputs[0], inputs[2], req[0], outputs[0]);
-  } else {
-    MSHADOW_REAL_TYPE_SWITCH(inputs[0].type_flag_, DType, {
+      ctx, inputs.at(0), inputs.at(2), req[0], outputs[0]);
+  } else if (act_type == activation::kReLU) {
+    MSHADOW_REAL_TYPE_SWITCH(inputs.at(0).type_flag_, DType, {
       // XXX: for y = relu(x), y is passed as "in_data" to Backward()
-      get_cudnn_op<DType>(param).Backward(ctx, inputs[0], relu ? inputs[1] : inputs[2],
-                                          inputs[1], req[0], outputs[0]);
+      get_cudnn_op<DType>(param).Backward(ctx, inputs.at(0), inputs.at(1),
+                                          inputs.at(1), req[0], outputs[0]);
+    });
+  } else {
+    MSHADOW_REAL_TYPE_SWITCH(inputs.at(0).type_flag_, DType, {
+      get_cudnn_op<DType>(param).Backward(ctx, inputs.at(0), inputs.at(2),
+                                          inputs.at(1), req[0], outputs[0]);
     });
   }
 }
diff --git a/tests/cpp/operator/activation_perf.cc b/tests/cpp/operator/activation_perf.cc
index 1bd8ca89c9f..bba8a3ec572 100644
--- a/tests/cpp/operator/activation_perf.cc
+++ b/tests/cpp/operator/activation_perf.cc
@@ -38,13 +38,27 @@ const kwargs_t basic_activation_args = { };
  * \brief Generic bidirectional sanity test
  */
 TEST(ACTIVATION_PERF, ExecuteBidirectional) {
+  using namespace std;
   TShape shape({5, 5});
-  kwargs_t kwargs = basic_activation_args;
-  kwargs.push_back({"act_type", "tanh"});
-
-  test::op::CoreOperatorRunner<float> runner;
-  runner.RunBidirectional(false, { shape }, test::op::CoreOpExecutor<float>::ArgsWithOpName(
-          kwargs, "Activation", "_backward_Activation"), 1);
+  vector<string> activations = {
+    "relu",
+    "sigmoid",
+    "tanh",
+    "softrelu",
+    "softsign"
+  };
+  for (const string& activation : activations) {
+    kwargs_t activation_args = {{"act_type", activation}};
+    test::op::CoreOperatorRunner<float> runner;
+    runner.RunBidirectional(false, { shape }, test::op::CoreOpExecutor<float>::ArgsWithOpName(
+            activation_args, "Activation", "_backward_Activation"), 1);
+  }
+  for (const string& activation : activations) {
+    kwargs_t activation_args = {{"act_type", activation}};
+    test::op::CoreOperatorRunner<float> runner;
+    runner.RunBidirectional(true, { shape }, test::op::CoreOpExecutor<float>::ArgsWithOpName(
+            activation_args, "Activation", "_backward_Activation"), 1);
+  }
 }
 
 /*!
diff --git a/tests/python/unittest/test_gluon.py b/tests/python/unittest/test_gluon.py
index 3049674821c..abe6b136fe0 100644
--- a/tests/python/unittest/test_gluon.py
+++ b/tests/python/unittest/test_gluon.py
@@ -2411,7 +2411,7 @@ def hybrid_forward(self, F, x):
             x_reshape = x.reshape(self.reshape)
             out = self.act(x_reshape)
             return out
-    acts = ["relu", "sigmoid", "tanh", "softrelu"]
+    acts = ["relu", "sigmoid", "tanh", "softrelu", "softsign"]
     for act in acts:
         x = mx.nd.random.uniform(-1, 1, shape=(4, 16, 32, 32))
         shape = (4, 32, 32, -1)
@@ -2433,7 +2433,7 @@ def hybrid_forward(self, F, x):
             out = self.act(x_slice)
             return out
 
-    acts = ["relu", "sigmoid", "tanh", "softrelu"]
+    acts = ["relu", "sigmoid", "tanh", "softrelu", "softsign"]
     for act in acts:
         x = mx.nd.random.uniform(-1, 1, shape=(8, 32, 64, 64))
         slice = [(0, 16, 32, 32), (4, 32, 64, 64)]
@@ -2457,7 +2457,7 @@ def hybrid_forward(self, F, x):
             y_reshape = y.reshape(self.reshape[1])
             out = self.act1(y_reshape)
             return out
-    acts = ["relu", "sigmoid", "tanh", "softrelu"]
+    acts = ["relu", "sigmoid", "tanh", "softrelu", "softsign"]
     for idx0, act0 in enumerate(acts):
         for idx1, act1 in enumerate(acts):
             if idx1 == idx0:
@@ -2484,7 +2484,7 @@ def hybrid_forward(self, F, x):
             y_slice = y.slice(begin=self.slice[1][0], end=self.slice[1][1])
             out = self.act1(y_slice)
             return out
-    acts = ["relu", "sigmoid", "tanh", "softrelu"]
+    acts = ["relu", "sigmoid", "tanh", "softrelu", "softsign"]
     for idx0, act0 in enumerate(acts):
         for idx1, act1 in enumerate(acts):
             if idx1 == idx0:
@@ -2512,7 +2512,7 @@ def hybrid_forward(self, F, x):
             y_slice = y.slice(begin=self.slice[0], end=self.slice[1])
             out = self.act1(y_slice)
             return out
-    acts = ["relu", "sigmoid", "tanh", "softrelu"]
+    acts = ["relu", "sigmoid", "tanh", "softrelu", "softsign"]
     for idx0, act0 in enumerate(acts):
         for idx1, act1 in enumerate(acts):
             if idx1 == idx0:
@@ -2541,7 +2541,7 @@ def hybrid_forward(self, F, x):
             y_reshape = y.reshape(self.reshape)
             out = self.act1(y_reshape)
             return out
-    acts = ["relu", "sigmoid", "tanh", "softrelu"]
+    acts = ["relu", "sigmoid", "tanh", "softrelu", "softsign"]
     for idx0, act0 in enumerate(acts):
         for idx1, act1 in enumerate(acts):
             if idx1 == idx0:


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services