You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2019/10/23 18:40:19 UTC
[GitHub] [incubator-mxnet] eric-haibin-lin commented on a change in pull request #16586: [Numpy] Support N_D(N>=3) batch_dot

eric-haibin-lin commented on a change in pull request #16586: [Numpy] Support N_D(N>=3) batch_dot
URL: https://github.com/apache/incubator-mxnet/pull/16586#discussion_r338215360
 
 

 ##########
 File path: src/operator/tensor/dot.cc
 ##########
 @@ -138,21 +138,73 @@ which is computed by::
     return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
   })
 .set_attr<FCompute>("FCompute<cpu>", BatchDotForward_<cpu>)
-.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_batch_dot"})
+.set_attr<nnvm::FGradient>("FGradient",
+    [](const nnvm::NodePtr& n,
+       const std::vector<nnvm::NodeEntry>& ograds) {
+  const DotParam& param = nnvm::get<DotParam>(n->attrs.parsed);
+  nnvm::NodePtr lhs_grad;
+  nnvm::NodePtr rhs_grad;
+  std::string lhs_gnode_name = n->attrs.name + "_backward_lhs";
+  std::string rhs_gnode_name = n->attrs.name + "_backward_rhs";
+  if (param.transpose_a && param.transpose_b) {
+    // Gradient of z = dot(x.T, y.T)
+    // dx = dot(dz, y).T = dot(y.T, dz.T)
+    // dy = dot(x, dz).T = dot(dz.T, x.T)
+    lhs_grad = MakeNode("batch_dot", lhs_gnode_name,
+                        {n->inputs[1], ograds[0]}, &(n->attrs.dict), &n);
+    rhs_grad = MakeNode("batch_dot", rhs_gnode_name,
+                        {ograds[0], n->inputs[0]}, &(n->attrs.dict), &n);
+  } else if (!param.transpose_a && param.transpose_b) {
+    // Gradient of z = dot(x, y.T)
+    // dx = dot(dz, y)
+    // dy = dot(x.T, dz).T = dot(dz.T, x)
+    auto lhs_attrs_dict = n->attrs.dict;
+    auto rhs_attrs_dict = n->attrs.dict;
+    lhs_attrs_dict["transpose_a"] = "false";
+    lhs_attrs_dict["transpose_b"] = "false";
+    rhs_attrs_dict["transpose_a"] = "true";
+    rhs_attrs_dict["transpose_b"] = "false";
+    lhs_grad = MakeNode("batch_dot", lhs_gnode_name,
+                        {ograds[0], n->inputs[1]}, &lhs_attrs_dict, &n);
+    rhs_grad = MakeNode("batch_dot", rhs_gnode_name,
+                        {ograds[0], n->inputs[0]}, &rhs_attrs_dict, &n);
+  } else if (param.transpose_a && !param.transpose_b) {
+    // Gradient of z = dot(x.T, y)
+    // dx = dot(dz, y.T).T = dot(y, dz.T)
+    // dy = dot(x, dz)
+    auto lhs_attrs_dict = n->attrs.dict;
+    auto rhs_attrs_dict = n->attrs.dict;
+    lhs_attrs_dict["transpose_a"] = "false";
+    lhs_attrs_dict["transpose_b"] = "true";
+    rhs_attrs_dict["transpose_a"] = "false";
+    rhs_attrs_dict["transpose_b"] = "false";
+    lhs_grad = MakeNode("batch_dot", lhs_gnode_name,
+                        {n->inputs[1], ograds[0]}, &lhs_attrs_dict, &n);
+    rhs_grad = MakeNode("batch_dot", rhs_gnode_name,
+                        {n->inputs[0], ograds[0]}, &rhs_attrs_dict, &n);
+  } else {
+    // Gradient of z = dot(x, y)
+    // dx = dot(dz, y.T)
+    // dy = dot(x.T, dz)
+    auto lhs_attrs_dict = n->attrs.dict;
+    auto rhs_attrs_dict = n->attrs.dict;
+    lhs_attrs_dict["transpose_a"] = "false";
+    lhs_attrs_dict["transpose_b"] = "true";
+    rhs_attrs_dict["transpose_a"] = "true";
+    rhs_attrs_dict["transpose_b"] = "false";
+    lhs_grad = MakeNode("batch_dot", lhs_gnode_name,
+                        {ograds[0], n->inputs[1]}, &lhs_attrs_dict, &n);
+    rhs_grad = MakeNode("batch_dot", rhs_gnode_name,
+                        {n->inputs[0], ograds[0]}, &rhs_attrs_dict, &n);
+  }
+  std::vector<nnvm::NodeEntry> ret;
+  ret.emplace_back(nnvm::NodeEntry{lhs_grad, 0, 0});
+  ret.emplace_back(nnvm::NodeEntry{rhs_grad, 0, 0});
+  return ret;
+})
 .add_argument("lhs", "NDArray-or-Symbol", "The first input")
 .add_argument("rhs", "NDArray-or-Symbol", "The second input")
 .add_arguments(DotParam::__FIELDS__());
 
 
 Review comment:
   Is there any performance difference with the new backward_batch_dot implementation ? 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services