You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by ha...@apache.org on 2018/03/03 12:13:57 UTC
[incubator-mxnet] branch master updated: sparse regression operators (#9625)

This is an automated email from the ASF dual-hosted git repository.

haibin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new dedfd2d  sparse regression operators (#9625)
dedfd2d is described below

commit dedfd2d60713319855c0b9df0aac57eee2d68f2d
Author: Ziyue Huang <zy...@gmail.com>
AuthorDate: Sat Mar 3 20:13:48 2018 +0800

    sparse regression operators (#9625)
    
    * sparse regression ops
    
    * add elemadd(dns, csr)
    
    * address comments and fix
    
    * replace copy with mshadow_op::identity
    
    * add kWriteInplace check
    
    * elemwise broadcast add
    
    * less template instantiation
    
    * not instantiate broadcast_add
    
    * remove DnsCsrOP instantiation in elemwise_binary
    
    * lint
    
    * remove two regression ops
    
    * enable binary op
    
    * disable binary broadcast
    
    * fix
    
    * duplicate some codes in binary_broadcst
    
    * try to make names short
    
    * try to make names short for infer stype
    
    * disbale sparse broadcst_add
    
    * revert binary broadcast
    
    * update
    
    * disable MAE
    
    * disable DnsCsrOp
    
    * remove IType
    
    * remove binary
    
    * update
    
    * address comments
    
    * update
    
    * try to fix R-test MF
    
    * Revert "try to fix R-test MF"
    
    This reverts commit f6d3e17ea7f5a71d23d81375bf345147a4373a93.
    
    * remove grad_req check for label
    
    * address comments
    
    * trigger CI
---
 docs/api/python/ndarray/sparse.md      |   2 +
 docs/api/python/symbol/sparse.md       |   2 +
 src/operator/regression_output-inl.h   | 179 +++++++++++++++++++++++++++++----
 src/operator/regression_output.cc      |  86 ++++++++++------
 src/operator/regression_output.cu      |  12 ++-
 tests/python/unittest/test_operator.py |  70 ++++++++-----
 6 files changed, 272 insertions(+), 79 deletions(-)

diff --git a/docs/api/python/ndarray/sparse.md b/docs/api/python/ndarray/sparse.md
index df33570..b0cdd88 100644
--- a/docs/api/python/ndarray/sparse.md
+++ b/docs/api/python/ndarray/sparse.md
@@ -496,6 +496,8 @@ We summarize the interface for each class in the following sections.
     make_loss
     stop_gradient
     mxnet.ndarray.contrib.SparseEmbedding
+    LinearRegressionOutput
+    LogisticRegressionOutput
 ```
 
 ## API Reference
diff --git a/docs/api/python/symbol/sparse.md b/docs/api/python/symbol/sparse.md
index b40276b..a44ff15 100644
--- a/docs/api/python/symbol/sparse.md
+++ b/docs/api/python/symbol/sparse.md
@@ -194,6 +194,8 @@ In the rest of this document, we list sparse related routines provided by the
     make_loss
     stop_gradient
     mxnet.symbol.contrib.SparseEmbedding
+    LinearRegressionOutput
+    LogisticRegressionOutput
 ```
 
 ## API Reference
diff --git a/src/operator/regression_output-inl.h b/src/operator/regression_output-inl.h
index 4642f8d..59cbde3 100644
--- a/src/operator/regression_output-inl.h
+++ b/src/operator/regression_output-inl.h
@@ -31,6 +31,7 @@
 #include "./mxnet_op.h"
 #include "./operator_common.h"
 
+
 namespace mxnet {
 namespace op {
 
@@ -77,22 +78,103 @@ inline bool RegressionOpShape(const nnvm::NodeAttrs& attrs,
   return true;
 }
 
+template<bool is_forward>
+inline bool RegressionInferStorageType(const nnvm::NodeAttrs& attrs,
+                                       const int dev_mask,
+                                       DispatchMode* dispatch_mode,
+                                       std::vector<int>* in_attrs,
+                                       std::vector<int>* out_attrs) {
+  CHECK_EQ(in_attrs->size(), 2U);
+  CHECK_EQ(out_attrs->size(), is_forward ? 1U : 2U);
+  const size_t label_pos = is_forward ? 1U : 0U;
+  const auto label_stype = in_attrs->at(label_pos);
+  const auto data_stype = in_attrs->at(1 - label_pos);
+  auto& out_stype = out_attrs->at(0);
+  bool dispatched = false;
+  if (!dispatched && data_stype == kDefaultStorage && label_stype == kDefaultStorage) {
+    dispatched = storage_type_assign(&out_stype, kDefaultStorage,
+                                     dispatch_mode, DispatchMode::kFCompute);
+  }
+
+  if (!dispatched && data_stype == kDefaultStorage && label_stype == kCSRStorage) {
+    dispatched = storage_type_assign(&out_stype, kDefaultStorage,
+                                     dispatch_mode, DispatchMode::kFComputeEx);
+  }
+
+  if (!dispatched) {
+    dispatched = dispatch_fallback(out_attrs, dispatch_mode);
+  }
+  // In backward pass, although we don't care about gradients of label,
+  // a storage type should be assigned to it.
+  if (!is_forward) type_assign(&out_attrs->at(1), kDefaultStorage);
+
+  return dispatched;
+}
+
+/*!
+ * \brief Kernel for binary operator of dense -OP- csr ndarray.
+ * Right hand side of OP has no effect.
+ * Parallelize by each row.
+ */
+template<typename OP, int req>
+struct DnsCsrSparseKernel {
+  template<typename DType, typename IType, typename RType>
+  MSHADOW_XINLINE static void Map(int i, DType* out_data,
+                                  const DType* dns_data,
+                                  const DType* csr_data,
+                                  const IType* csr_idx,
+                                  const RType* csr_indptr,
+                                  const nnvm::dim_t row_length) {
+    nnvm::dim_t row_i = i * row_length;
+    for (nnvm::dim_t j=csr_indptr[i]; j < csr_indptr[i+1]; j++) {
+      KERNEL_ASSIGN(out_data[row_i + csr_idx[j]], req,
+        OP::Map(dns_data[row_i + csr_idx[j]], csr_data[j]));
+    }
+  }
+};
+
+
+template<typename xpu, typename ForwardOp>
+inline void RegressionForwardImpl(mshadow::Stream<xpu> *s, const OpReqType req,
+                                  const TBlob &data, const TBlob &out) {
+  if (req == kNullOp) return;
+  MSHADOW_REAL_TYPE_SWITCH(data.type_flag_, DType, {
+    MXNET_ASSIGN_REQ_SWITCH(req, Req, {
+      const DType* in_data = data.dptr<DType>();
+      DType* out_data = out.dptr<DType>();
+      using namespace mxnet_op;
+      Kernel<op_with_req<ForwardOp, Req>, xpu>::Launch(
+        s, out.Size(), out_data, in_data);
+    });
+  });
+}
+
 template<typename xpu, typename ForwardOp>
 void RegressionForward(const nnvm::NodeAttrs& attrs,
                        const OpContext& ctx,
                        const std::vector<TBlob>& inputs,
                        const std::vector<OpReqType>& req,
                        const std::vector<TBlob>& outputs) {
+  CHECK_EQ(inputs.size(), 2U);
+  CHECK_EQ(outputs.size(), 1U);
   mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
-  MSHADOW_REAL_TYPE_SWITCH(inputs[reg_enum::kData].type_flag_, DType, {
-    MXNET_ASSIGN_REQ_SWITCH(req[reg_enum::kOut], Req, {
-      const DType* in_data = inputs[reg_enum::kData].dptr<DType>();
-      DType* out_data = outputs[reg_enum::kOut].dptr<DType>();
-      using namespace mxnet_op;
-      Kernel<op_with_req<ForwardOp, Req>, xpu>::Launch(
-        s, outputs[reg_enum::kOut].Size(), out_data, in_data);
-    });
-  });
+  RegressionForwardImpl<xpu, ForwardOp>(s, req[reg_enum::kOut],
+    inputs[reg_enum::kData], outputs[reg_enum::kOut]);
+}
+
+template<typename xpu, typename ForwardOp>
+void RegressionForwardEx(const nnvm::NodeAttrs& attrs,
+                         const OpContext& ctx,
+                         const std::vector<NDArray>& inputs,
+                         const std::vector<OpReqType>& req,
+                         const std::vector<NDArray>& outputs) {
+  CHECK_EQ(inputs.size(), 2U);
+  CHECK_EQ(outputs.size(), 1U);
+  CHECK_EQ(inputs[reg_enum::kData].storage_type(), kDefaultStorage);
+  CHECK_EQ(inputs[reg_enum::kOut].storage_type(), kDefaultStorage);
+  mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
+  RegressionForwardImpl<xpu, ForwardOp>(s, req[reg_enum::kOut],
+    inputs[reg_enum::kData].data(), outputs[reg_enum::kOut].data());
 }
 
 template<typename xpu, typename BackwardOp>
@@ -101,26 +183,89 @@ void RegressionBackward(const nnvm::NodeAttrs& attrs,
                         const std::vector<TBlob>& inputs,
                         const std::vector<OpReqType>& req,
                         const std::vector<TBlob>& outputs) {
+  CHECK_EQ(inputs.size(), 2);
+  CHECK_EQ(outputs.size(), 2);
+  if (req[reg_enum::kData] == kNullOp) return;
   const RegressionOutputParam& param = nnvm::get<RegressionOutputParam>(attrs.parsed);
   mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
   // inputs are in_label, out_data
   // outputs are data_grad, label_grad
-  MSHADOW_REAL_TYPE_SWITCH(inputs[1].type_flag_, DType, {
-    MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
-      const DType* in_label = inputs[0].dptr<DType>();
-      const DType* out_data = inputs[1].dptr<DType>();
-      DType* data_grad = outputs[0].dptr<DType>();
-      const real_t num_output = inputs[0].Size()/inputs[0].shape_[0];
+  const TBlob& in_label = inputs[0], out_data = inputs[1];
+  const TBlob& data_grad = outputs[0];
+  MSHADOW_REAL_TYPE_SWITCH(out_data.type_flag_, DType, {
+    MXNET_ASSIGN_REQ_SWITCH(req[reg_enum::kData], Req, {
+      const DType* in_label_ptr = in_label.dptr<DType>();
+      const DType* out_data_ptr = out_data.dptr<DType>();
+      DType* data_grad_ptr = data_grad.dptr<DType>();
+      const real_t num_output = in_label.Size()/in_label.shape_[0];
       using namespace mxnet_op;
       Kernel<op_with_req<BackwardOp, Req>, xpu>::Launch(
-        s, outputs[0].Size(), data_grad, out_data, in_label);
+        s, data_grad.Size(), data_grad_ptr, out_data_ptr, in_label_ptr);
       Kernel<op_with_req<mshadow_op::mul, Req>, xpu>::Launch(
-        s, outputs[0].Size(), data_grad, data_grad,
+        s, data_grad.Size(), data_grad_ptr, data_grad_ptr,
         static_cast<DType>(param.grad_scale/num_output));
     });
   });
 }
 
+
+template<typename xpu, typename BackwardOp>
+inline void RegressionBackwardCSRImpl(mshadow::Stream<xpu> *s,
+                                      const RegressionOutputParam& param,
+                                      const OpReqType req,
+                                      const NDArray &data, const NDArray &label,
+                                      const NDArray &data_grad) {
+  if (req == kNullOp) return;
+  using namespace mshadow;
+  using namespace mxnet_op;
+  using namespace csr;
+  const TShape dshape = data.shape();
+  const nnvm::dim_t num_rows = dshape[0];
+  const nnvm::dim_t row_length = dshape[1];
+  CHECK_EQ(label.aux_type(kIndPtr), label.aux_type(kIdx))
+    << "Type of indices array and index pointer array of the label should be the same";
+  MSHADOW_IDX_TYPE_SWITCH(label.aux_type(kIdx), IType, {
+    MSHADOW_REAL_TYPE_SWITCH(label.dtype(), DType, {
+      MXNET_ASSIGN_REQ_SWITCH(req, Req, {
+        const IType* label_indptr = label.aux_data(kIndPtr).dptr<IType>();
+        const IType* label_idx = label.aux_data(kIdx).dptr<IType>();
+        const DType* label_data = label.data().dptr<DType>();
+        const DType* data_ptr = data.data().dptr<DType>();
+        DType* grad_ptr = data_grad.data().dptr<DType>();
+        if (req != kWriteInplace) {
+          Kernel<op_with_req<mshadow_op::identity, Req>, xpu>::Launch(s,
+            dshape.Size(), grad_ptr, data_ptr);
+        }
+        Kernel<DnsCsrSparseKernel<BackwardOp, Req>, xpu>::Launch(s, num_rows,
+          grad_ptr, data_ptr, label_data, label_idx, label_indptr, row_length);
+        Kernel<op_with_req<mshadow_op::mul, Req>, xpu>::Launch(s, dshape.Size(),
+          grad_ptr, grad_ptr, static_cast<DType>(param.grad_scale/row_length));
+      });
+    });
+  });
+}
+
+
+template<typename xpu, typename BackwardOP>
+void RegressionBackwardEx(const nnvm::NodeAttrs& attrs,
+                          const OpContext& ctx,
+                          const std::vector<NDArray>& inputs,
+                          const std::vector<OpReqType>& req,
+                          const std::vector<NDArray>& outputs) {
+  CHECK_EQ(inputs.size(), 2U);
+  CHECK_EQ(outputs.size(), 2U);
+  const RegressionOutputParam& param = nnvm::get<RegressionOutputParam>(attrs.parsed);
+  const auto label_stype = inputs[0].storage_type();
+  const auto data_stype = inputs[1].storage_type();
+  mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
+  if (data_stype == kDefaultStorage && label_stype == kCSRStorage) {
+    RegressionBackwardCSRImpl<xpu, BackwardOP>(s, param, req[0], inputs[1],
+      inputs[0], outputs[0]);
+  } else {
+    LogUnimplementedOp(attrs, ctx, inputs, req, outputs);
+  }
+}
+
 struct RegressionOpGrad {
   const char *op_name;
   std::vector<nnvm::NodeEntry> operator()(const nnvm::NodePtr& n,
diff --git a/src/operator/regression_output.cc b/src/operator/regression_output.cc
index 0b8ce69..9539a15 100644
--- a/src/operator/regression_output.cc
+++ b/src/operator/regression_output.cc
@@ -26,37 +26,38 @@
 #include "./elemwise_op_common.h"
 
 
-#define MXNET_OPERATOR_REGISTER_REGRESSION_FWD(__name$, __kernel$, __bwdop$)   \
-  NNVM_REGISTER_OP(__name$)                                                    \
-  .set_num_inputs(2)                                                           \
-  .set_num_outputs(1)                                                          \
-  .set_attr<nnvm::FListInputNames>("FListInputNames",                          \
-    [](const NodeAttrs& attrs) {                                               \
-      return std::vector<std::string>{"data", "label"};                        \
-    })                                                                         \
-  .set_attr<nnvm::FInferShape>("FInferShape", RegressionOpShape)               \
-  .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<2, 1>)                \
-  .set_attr<nnvm::FGradient>("FGradient", RegressionOpGrad{__bwdop$})          \
-  .set_attr<nnvm::FInplaceOption>("FInplaceOption",                            \
-  [](const NodeAttrs& attrs){                                                  \
-    return std::vector<std::pair<int, int> >{{0, 0}};                          \
-  })                                                                           \
-  .set_attr<FCompute>("FCompute<cpu>", RegressionForward<cpu, __kernel$>)      \
-  .add_argument("data", "NDArray-or-Symbol", "Input data to the function.")    \
-  .add_argument("label", "NDArray-or-Symbol", "Input label to the function.")  \
+#define MXNET_OPERATOR_REGISTER_REGRESSION_FWD(__name$, __kernel$, __bwdop$)           \
+  NNVM_REGISTER_OP(__name$)                                                            \
+  MXNET_ADD_SPARSE_OP_ALIAS(__name$)                                                   \
+  .set_num_inputs(2)                                                                   \
+  .set_num_outputs(1)                                                                  \
+  .set_attr<nnvm::FListInputNames>("FListInputNames",                                  \
+    [](const NodeAttrs& attrs) {                                                       \
+      return std::vector<std::string>{"data", "label"};                                \
+    })                                                                                 \
+  .set_attr<nnvm::FInferShape>("FInferShape", RegressionOpShape)                       \
+  .set_attr<nnvm::FGradient>("FGradient", RegressionOpGrad{__bwdop$})                  \
+  .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<2, 1>)                        \
+  .set_attr<nnvm::FInplaceOption>("FInplaceOption",                                    \
+  [](const NodeAttrs& attrs){                                                          \
+    return std::vector<std::pair<int, int> >{{0, 0}};                                  \
+  })                                                                                   \
+  .set_attr<FCompute>("FCompute<cpu>", RegressionForward<cpu, __kernel$>)              \
+  .add_argument("data", "NDArray-or-Symbol", "Input data to the function.")            \
+  .add_argument("label", "NDArray-or-Symbol", "Input label to the function.")          \
   .add_arguments(RegressionOutputParam::__FIELDS__())
 
-#define MXNET_OPERATOR_REGISTER_REGRESSION_BWD(__name$, __kernel$)         \
-  NNVM_REGISTER_OP(__name$)                                                \
-  .set_num_inputs(2)                                                       \
-  .set_num_outputs(2)                                                      \
-  .set_attr_parser(ParamParser<RegressionOutputParam>)                     \
-  .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<2, 2>)            \
-  .set_attr<nnvm::TIsBackward>("TIsBackward", true)                        \
-  .set_attr<nnvm::FInplaceOption>("FInplaceOption",                        \
-  [](const NodeAttrs& attrs){                                              \
-    return std::vector<std::pair<int, int> >{{1, 0}};                      \
-  })                                                                       \
+#define MXNET_OPERATOR_REGISTER_REGRESSION_BWD(__name$, __kernel$)                      \
+  NNVM_REGISTER_OP(__name$)                                                             \
+  .set_num_inputs(2)                                                                    \
+  .set_num_outputs(2)                                                                   \
+  .set_attr_parser(ParamParser<RegressionOutputParam>)                                  \
+  .set_attr<nnvm::TIsBackward>("TIsBackward", true)                                     \
+  .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<2, 2>)                         \
+  .set_attr<nnvm::FInplaceOption>("FInplaceOption",                                     \
+  [](const NodeAttrs& attrs){                                                           \
+    return std::vector<std::pair<int, int> >{{1, 0}};                                   \
+  })                                                                                    \
   .set_attr<FCompute>("FCompute<cpu>", RegressionBackward<cpu, __kernel$>)
 
 namespace mxnet {
@@ -67,6 +68,8 @@ DMLC_REGISTER_PARAMETER(RegressionOutputParam);
 
 MXNET_OPERATOR_REGISTER_REGRESSION_FWD(LinearRegressionOutput,
   mshadow_op::identity, "_backward_linear_reg_out")
+.set_attr<FInferStorageType>("FInferStorageType", RegressionInferStorageType<true>)
+.set_attr<FComputeEx>("FComputeEx<cpu>", RegressionForwardEx<cpu, mshadow_op::identity>)
 .describe(R"code(Computes and optimizes for squared loss during backward propagation.
 Just outputs ``data`` during forward propagation.
 
@@ -78,12 +81,19 @@ then the squared loss estimated over :math:`n` samples is defined as
 .. note::
    Use the LinearRegressionOutput as the final output layer of a net.
 
+The storage type of ``label`` can be ``default`` or ``csr``
+
+- LinearRegressionOutput(default, default) = default
+- LinearRegressionOutput(default, csr) = default
+
 By default, gradients of this loss function are scaled by factor `1/m`, where m is the number of regression outputs of a training example.
 The parameter `grad_scale` can be used to change this scale to `grad_scale/m`.
 
 )code" ADD_FILELINE);
 
-MXNET_OPERATOR_REGISTER_REGRESSION_BWD(_backward_linear_reg_out, mshadow_op::minus);
+MXNET_OPERATOR_REGISTER_REGRESSION_BWD(_backward_linear_reg_out, mshadow_op::minus)
+.set_attr<FInferStorageType>("FInferStorageType", RegressionInferStorageType<false>)
+.set_attr<FComputeEx>("FComputeEx<cpu>", RegressionBackwardEx<cpu, mshadow_op::minus>);
 
 MXNET_OPERATOR_REGISTER_REGRESSION_FWD(MAERegressionOutput,
   mshadow_op::identity, "_backward_mae_reg_out")
@@ -99,6 +109,11 @@ then the mean absolute error (MAE) estimated over :math:`n` samples is defined a
 .. note::
    Use the MAERegressionOutput as the final output layer of a net.
 
+The storage type of ``label`` can be ``default`` or ``csr``
+
+- MAERegressionOutput(default, default) = default
+- MAERegressionOutput(default, csr) = default
+
 By default, gradients of this loss function are scaled by factor `1/m`, where m is the number of regression outputs of a training example.
 The parameter `grad_scale` can be used to change this scale to `grad_scale/m`.
 
@@ -108,6 +123,8 @@ MXNET_OPERATOR_REGISTER_REGRESSION_BWD(_backward_mae_reg_out, mshadow_op::minus_
 
 MXNET_OPERATOR_REGISTER_REGRESSION_FWD(LogisticRegressionOutput,
   mshadow_op::sigmoid, "_backward_logistic_reg_out")
+.set_attr<FInferStorageType>("FInferStorageType", RegressionInferStorageType<true>)
+.set_attr<FComputeEx>("FComputeEx<cpu>", RegressionForwardEx<cpu, mshadow_op::sigmoid>)
 .describe(R"code(Applies a logistic function to the input.
 
 The logistic function, also known as the sigmoid function, is computed as
@@ -120,12 +137,19 @@ It is suitable for binary classification or probability prediction tasks.
 .. note::
    Use the LogisticRegressionOutput as the final output layer of a net.
 
+The storage type of ``label`` can be ``default`` or ``csr``
+
+- LogisticRegressionOutput(default, default) = default
+- LogisticRegressionOutput(default, csr) = default
+
 By default, gradients of this loss function are scaled by factor `1/m`, where m is the number of regression outputs of a training example.
 The parameter `grad_scale` can be used to change this scale to `grad_scale/m`.
 
 )code" ADD_FILELINE);
 
-MXNET_OPERATOR_REGISTER_REGRESSION_BWD(_backward_logistic_reg_out, mshadow_op::minus);
+MXNET_OPERATOR_REGISTER_REGRESSION_BWD(_backward_logistic_reg_out, mshadow_op::minus)
+.set_attr<FInferStorageType>("FInferStorageType", RegressionInferStorageType<false>)
+.set_attr<FComputeEx>("FComputeEx<cpu>", RegressionBackwardEx<cpu, mshadow_op::minus>);
 
 }  // namespace op
 }  // namespace mxnet
diff --git a/src/operator/regression_output.cu b/src/operator/regression_output.cu
index e3a2e7e..ca11b84 100644
--- a/src/operator/regression_output.cu
+++ b/src/operator/regression_output.cu
@@ -28,10 +28,12 @@ namespace mxnet {
 namespace op {
 
 NNVM_REGISTER_OP(LinearRegressionOutput)
-.set_attr<FCompute>("FCompute<gpu>", RegressionForward<gpu, mshadow_op::identity>);
+.set_attr<FCompute>("FCompute<gpu>", RegressionForward<gpu, mshadow_op::identity>)
+.set_attr<FComputeEx>("FComputeEx<gpu>", RegressionForwardEx<gpu, mshadow_op::identity>);
 
 NNVM_REGISTER_OP(_backward_linear_reg_out)
-.set_attr<FCompute>("FCompute<gpu>", RegressionBackward<gpu, mshadow_op::minus>);
+.set_attr<FCompute>("FCompute<gpu>", RegressionBackward<gpu, mshadow_op::minus>)
+.set_attr<FComputeEx>("FComputeEx<gpu>", RegressionBackwardEx<gpu, mshadow_op::minus>);
 
 NNVM_REGISTER_OP(MAERegressionOutput)
 .set_attr<FCompute>("FCompute<gpu>", RegressionForward<gpu, mshadow_op::identity>);
@@ -40,10 +42,12 @@ NNVM_REGISTER_OP(_backward_mae_reg_out)
 .set_attr<FCompute>("FCompute<gpu>", RegressionBackward<gpu, mshadow_op::minus_sign>);
 
 NNVM_REGISTER_OP(LogisticRegressionOutput)
-.set_attr<FCompute>("FCompute<gpu>", RegressionForward<gpu, mshadow_op::sigmoid>);
+.set_attr<FCompute>("FCompute<gpu>", RegressionForward<gpu, mshadow_op::sigmoid>)
+.set_attr<FComputeEx>("FComputeEx<gpu>", RegressionForwardEx<gpu, mshadow_op::sigmoid>);
 
 NNVM_REGISTER_OP(_backward_logistic_reg_out)
-.set_attr<FCompute>("FCompute<gpu>", RegressionBackward<gpu, mshadow_op::minus>);
+.set_attr<FCompute>("FCompute<gpu>", RegressionBackward<gpu, mshadow_op::minus>)
+.set_attr<FComputeEx>("FComputeEx<gpu>", RegressionBackwardEx<gpu, mshadow_op::minus>);
 
 }  // namespace op
 }  // namespace mxnet
diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py
index 7889e08..1a04e8e 100644
--- a/tests/python/unittest/test_operator.py
+++ b/tests/python/unittest/test_operator.py
@@ -218,41 +218,57 @@ def test_slice_channel():
     check_slice_channel(data_ndim=3, axis=-1, num_outputs=2, squeeze_axis=False)
     check_slice_channel(data_ndim=5, axis=-2, num_outputs=3, squeeze_axis=True)
 
-
-def check_regression(symbol, forward, backward):
-    data = mx.symbol.Variable('data')
-    label = mx.symbol.Variable('label')
-    out = symbol(data, label)
-    shape = (3, 1)
-    arr_data = mx.random.uniform(-1, 1, shape, ctx=mx.cpu()).copyto(default_context())
-    arr_label = mx.random.uniform(0, 1, shape[0], ctx=mx.cpu()).copyto(default_context())
-    arr_grad = mx.nd.empty(shape)
-    exec1 = out.bind(default_context(),
-                     args=[arr_data, arr_label],
-                     args_grad={"data" : arr_grad})
-    exec1.forward(is_train=True)
-    out1 = exec1.outputs[0].asnumpy()
-    npout = forward(arr_data.asnumpy())
-    # Non-zero atol required by test_operator_gpu.py:test_regression with seed 651640549
-    atol = 1e-5
-    assert_almost_equal(npout, out1, atol=atol)
-
-    exec1.backward()
-    npout = backward(npout,  arr_label.asnumpy().reshape(npout.shape))
-    assert_almost_equal(npout, arr_grad.asnumpy(), atol=atol)
-
-
 @with_seed()
 def test_regression():
+    ''' test regression operator '''
+    def check_regression(symbol, forward, backward, shape, stype='default', densities=[0, 0.5, 1]):
+        # init executor
+        data = mx.symbol.Variable('data')
+        label = mx.symbol.Variable('label', stype=stype)
+        out = symbol(data, label)
+        grad_req = {'data': 'write', 'label': 'null'}
+        out_exec = out.simple_bind(default_context(), grad_req=grad_req,
+            data=shape, label=shape)
+        arg_map = dict(zip(out.list_arguments(), out_exec.arg_arrays))
+        grad_map = dict(zip(out.list_arguments(), out_exec.grad_arrays))
+        # init data
+        arr_data = mx.random.uniform(-1, 1, shape)
+        arg_map["data"][:] = arr_data
+        # init label based on density
+        arr_label = arg_map["label"]
+        atol = 1e-5
+        for density in densities:
+            arr_label[:] = rand_ndarray(shape, stype, density=density)
+            out_exec.forward(is_train=True)
+            out_exec.backward()
+            np_out = forward(arr_data.asnumpy())
+            out_grad = backward(np_out, arr_label.asnumpy().reshape(np_out.shape)) / shape[1]
+            assert_almost_equal(out_exec.outputs[0].asnumpy(), np_out, atol=atol)
+            assert_almost_equal(grad_map["data"].asnumpy(), out_grad, atol=atol)
+
+    shape = (50, 30)
+
     check_regression(mx.symbol.LogisticRegressionOutput,
                      lambda x: 1.0 / (1.0 + np.exp(-x)),
-                     lambda x, y : x - y)
+                     lambda x, y : x - y,
+                     shape)
     check_regression(mx.symbol.LinearRegressionOutput,
                      lambda x: x,
-                     lambda x, y : x - y)
+                     lambda x, y : x - y,
+                     shape)
     check_regression(mx.symbol.MAERegressionOutput,
                      lambda x: x,
-                     lambda x, y : np.where(x > y, np.ones(x.shape), -np.ones(x.shape)))
+                     lambda x, y : np.where(x > y, np.ones(x.shape), -np.ones(x.shape)),
+                     shape)
+    check_regression(mx.symbol.LogisticRegressionOutput,
+                     lambda x: 1.0 / (1.0 + np.exp(-x)),
+                     lambda x, y : x - y,
+                     shape, stype='csr')
+    check_regression(mx.symbol.LinearRegressionOutput,
+                     lambda x: x,
+                     lambda x, y : x - y,
+                     shape, stype='csr')
+   
 
 def check_softmax_grad(xpu):
     x = mx.sym.Variable('x')

-- 
To stop receiving notification emails like this one, please contact
haibin@apache.org.