You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2018/05/15 22:44:25 UTC

[GitHub] piiswrong closed pull request #10651: handle fallback correctly for write inplace when the array is MKLDNN.

piiswrong closed pull request #10651: handle fallback correctly for write inplace when the array is MKLDNN.
URL: https://github.com/apache/incubator-mxnet/pull/10651
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/src/common/exec_utils.h b/src/common/exec_utils.h
index 3ac86fba684..b07f7d86dc5 100644
--- a/src/common/exec_utils.h
+++ b/src/common/exec_utils.h
@@ -76,8 +76,8 @@ inline bool SetupDefaultBlobsIn(const std::vector<NDArray>& src,
 }
 
 inline bool SetupDefaultBlobsOut(const std::vector<NDArray>& src,
-                                 const std::vector<OpReqType> &req,
                                  const std::vector<NDArray> *bufs,
+                                 std::vector<OpReqType> *req,
                                  std::vector<TBlob> *blobs,
                                  std::vector<NDArray> *temp_src,
                                  std::vector<NDArray> *temp_dst) {
@@ -86,6 +86,12 @@ inline bool SetupDefaultBlobsOut(const std::vector<NDArray>& src,
     auto& nd = src[i];
     bool is_default = nd.storage_type() == kDefaultStorage;
 #if MXNET_USE_MKLDNN == 1
+    if (req->at(i) == kWriteInplace && nd.IsMKLDNNData())
+      // If it's write inplace and the output array doesn't use the default
+      // layout, we'll generate a temporary output array below, which means
+      // the input array and the output array are no longer the same array.
+      // we should change the request type.
+      req->at(i) = kWriteTo;
     // We have to make sure it's default storage and default layout.
     is_default = nd.IsDefaultData();
 #endif
@@ -115,9 +121,9 @@ inline bool SetupDefaultBlobsOut(const std::vector<NDArray>& src,
  */
 inline void SetupDefaultBlobsInOut(const std::vector<NDArray> &ndinputs,
                                    const std::vector<NDArray> &ndoutputs,
-                                   const std::vector<OpReqType> &req,
                                    const std::vector<NDArray> *in_bufs,
                                    const std::vector<NDArray> *out_bufs,
+                                   std::vector<OpReqType> *req,
                                    std::vector<TBlob> *input_blobs,
                                    std::vector<TBlob> *output_blobs,
                                    std::vector<NDArray> *pre_temp_src,
@@ -130,7 +136,7 @@ inline void SetupDefaultBlobsInOut(const std::vector<NDArray> &ndinputs,
   SetupDefaultBlobsIn(ndinputs, in_bufs, input_blobs, pre_temp_src, pre_temp_dst,
                       in_temp_idx_map);
   // populate output blobs
-  SetupDefaultBlobsOut(ndoutputs, req, out_bufs, output_blobs, post_temp_dst,
+  SetupDefaultBlobsOut(ndoutputs, out_bufs, req, output_blobs, post_temp_dst,
                        post_temp_src);
   // add mutable inputs to post temp list
   for (const auto idx : mutate_idx) {
diff --git a/src/executor/attach_op_execs_pass.cc b/src/executor/attach_op_execs_pass.cc
index f7ac772ec76..697e4869a04 100644
--- a/src/executor/attach_op_execs_pass.cc
+++ b/src/executor/attach_op_execs_pass.cc
@@ -78,7 +78,8 @@ class StorageFallbackOpExecutor : public OpExecutor {
     pre_temp_src_.clear(); pre_temp_dst_.clear();
     post_temp_src_.clear(); post_temp_dst_.clear();
     in_temp_idx_map_.clear();
-    SetupDefaultBlobsInOut(in_array, out_array, req, &pre_temp_buf_, &post_temp_buf_,
+    tmp_req = req;
+    SetupDefaultBlobsInOut(in_array, out_array, &pre_temp_buf_, &post_temp_buf_, &req,
                            &in_data_, &out_data_,
                            &pre_temp_src_, &pre_temp_dst_,
                            &post_temp_src_, &post_temp_dst_,
@@ -89,8 +90,12 @@ class StorageFallbackOpExecutor : public OpExecutor {
   // storage fallback after fcompute is completed
   void PostFCompute(bool is_gpu) {
     common::CastNonDefaultStorage(post_temp_src_, post_temp_dst_, op_ctx, is_gpu);
+    req = tmp_req;
   }
 
+  // output requirement on each output array.
+  // This temporarily saves the original output requirements.
+  std::vector<OpReqType> tmp_req;
   // default storage tensor blobs for fcompute
   std::vector<TBlob> in_data_, out_data_;
   // These are NDArray buffers for cast storage.
diff --git a/src/imperative/imperative_utils.h b/src/imperative/imperative_utils.h
index 10a011e88b3..d7bb37b7cfe 100644
--- a/src/imperative/imperative_utils.h
+++ b/src/imperative/imperative_utils.h
@@ -373,8 +373,9 @@ inline void PushFCompute(const FCompute& fn,
 #if MXNET_USE_MKLDNN == 1
       InvalidateOutputs(outputs, req);
 #endif
+      std::vector<OpReqType> tmp_req = req;
       // setup blobs
-      SetupDefaultBlobsInOut(inputs, outputs, req, nullptr, nullptr,
+      SetupDefaultBlobsInOut(inputs, outputs, nullptr, nullptr, &tmp_req,
                              &input_blobs, &output_blobs, &pre_temp_src, &pre_temp_dst,
                              &post_temp_src, &post_temp_dst, &in_temp_idx_map, mutate_idx);
       // setup context
@@ -382,7 +383,7 @@ inline void PushFCompute(const FCompute& fn,
       bool is_gpu = ctx.dev_mask() == gpu::kDevMask;
       // pre-fcompute fallback, cast to default storage type
       CastNonDefaultStorage(pre_temp_src, pre_temp_dst, opctx, is_gpu);
-      fn(attrs, opctx, input_blobs, req, output_blobs);
+      fn(attrs, opctx, input_blobs, tmp_req, output_blobs);
       // post-fcompute fallback, cast to original storage type
       CastNonDefaultStorage(post_temp_src, post_temp_dst, opctx, is_gpu);
       if (is_gpu) {
@@ -492,15 +493,16 @@ inline void PushOperator(const OpStatePtr& state,
 #if MXNET_USE_MKLDNN == 1
         InvalidateOutputs(outputs, req);
 #endif
+        std::vector<OpReqType> tmp_req = req;
         // populate input blobs and output blobs
-        SetupDefaultBlobsInOut(inputs, outputs, req, nullptr, nullptr,
+        SetupDefaultBlobsInOut(inputs, outputs, nullptr, nullptr, &tmp_req,
                                &input_blobs, &output_blobs, &pre_temp_src, &pre_temp_dst,
                                &post_temp_src, &post_temp_dst, &in_temp_idx_map, mutate_idx);
         // setup contexts
         bool is_gpu = rctx.get_ctx().dev_mask() == gpu::kDevMask;
         // pre-fcompute fallback
         CastNonDefaultStorage(pre_temp_src, pre_temp_dst, opctx, is_gpu);
-        fcompute(state, opctx, input_blobs, req, output_blobs);
+        fcompute(state, opctx, input_blobs, tmp_req, output_blobs);
         // post-fcompute fallback, cast to original storage type, if necessary
         CastNonDefaultStorage(post_temp_src, post_temp_dst, opctx, is_gpu);
         if (is_gpu && exec_type == ExecType::kSync) {
diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc
index 67b4c061eee..0cbfbd8f4eb 100644
--- a/src/ndarray/ndarray.cc
+++ b/src/ndarray/ndarray.cc
@@ -1118,9 +1118,8 @@ inline void CopyFromToDnsImpl(const NDArray& from, const NDArray& to, RunContext
                              to_mem->get_primitive_desc().get_size());
       memcpy(to_mem->get_data_handle(), from_mem->get_data_handle(), size);
     } else {
-      std::vector<mkldnn::primitive> net;
-      net.push_back(mkldnn::reorder(*from_mem, *to_mem));
-      mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait();
+      const_cast<NDArray &>(to).CopyFrom(*from_mem);
+      MKLDNNStream::Get()->Submit();
     }
   } else {
     // In this case, one of the NDArray isn't supported by MKLDNN, we need
diff --git a/src/operator/nn/mkldnn/mkldnn_copy.cc b/src/operator/nn/mkldnn/mkldnn_copy.cc
index 4bfb7faad96..75e51aff006 100644
--- a/src/operator/nn/mkldnn/mkldnn_copy.cc
+++ b/src/operator/nn/mkldnn/mkldnn_copy.cc
@@ -35,7 +35,13 @@ void MKLDNNCopy(const nnvm::NodeAttrs& attrs, const OpContext &ctx,
                 const NDArray &in_data, const OpReqType &req,
                 const NDArray &out_data) {
   TmpMemMgr::Get()->Init(ctx.requested[0]);
-  auto in_mem = in_data.GetMKLDNNData();
+
+  // If the input data is a view of an MKLDNN array, we should create a new
+  // NDArray with reordered data.
+  NDArray data = in_data;
+  if (data.IsMKLDNNData() && data.IsView())
+    data = data.Reorder2Default();
+  auto in_mem = data.GetMKLDNNData();
   if (req == kAddTo) {
     TmpMemMgr::Get()->Init(ctx.requested[0]);
     // We should try and force the output memory has the same format
diff --git a/tests/cpp/operator/mkldnn.cc b/tests/cpp/operator/mkldnn.cc
index c20cd75e07d..bbff53152ec 100644
--- a/tests/cpp/operator/mkldnn.cc
+++ b/tests/cpp/operator/mkldnn.cc
@@ -26,6 +26,7 @@
 #if MXNET_USE_MKLDNN == 1
 
 #include "gtest/gtest.h"
+#include "mxnet/imperative.h"
 #include "../../src/operator/nn/mkldnn/mkldnn_base-inl.h"
 
 using namespace mxnet;
@@ -97,12 +98,18 @@ static void InitArray(NDArray *arr) {
 }
 
 // Init arrays with the specified layout.
-static void InitMKLDNNArray(NDArray *arr, const mkldnn::memory::primitive_desc &pd) {
+static void InitMKLDNNArray(NDArray *arr, const mkldnn::memory::primitive_desc &pd,
+                            bool is_rand = false) {
   const TBlob &blob = arr->data();
   mshadow::default_real_t *data = blob.dptr<mshadow::default_real_t>();
   size_t size = blob.Size();
-  for (size_t i = 0; i < size; i++)
-    data[i] = i;
+  if (is_rand) {
+    for (size_t i = 0; i < size; i++)
+      data[i] = std::rand();
+  } else {
+    for (size_t i = 0; i < size; i++)
+      data[i] = i;
+  }
   arr->MKLDNNDataReorderAsync(pd);
   arr->WaitToRead();
 }
@@ -206,7 +213,7 @@ static std::vector<mkldnn::memory::format> GetMKLDNNFormat(size_t num_dims, int
 }
 
 struct TestArrayShapes {
-  std::vector<TShape> shapes;
+  std::vector<nnvm::TShape> shapes;
   std::vector<mkldnn::memory::primitive_desc> pds;
 };
 
@@ -239,7 +246,7 @@ static TestArrayShapes GetTestArrayShapes() {
   {
     // 4D
     TShape s1(4);
-    s1[0] = 1; s1[1] = 96; s1[2] = 54; s1[3] = 54;
+    s1[0] = 10; s1[1] = 96; s1[2] = 54; s1[3] = 54;
     shapes.push_back(s1);
     pds.push_back(GetMemPD(s1, dtype, mkldnn::memory::format::nchw));
 
@@ -332,4 +339,179 @@ TEST(MKLDNN_NDArray, GetDataReorder) {
   }
 }
 
+struct OpAttrs {
+  nnvm::NodeAttrs attrs;
+  std::vector<DispatchMode> dispatches;
+};
+
+OpAttrs GetCopyOp() {
+  OpAttrs attrs;
+  attrs.attrs.op = Op::Get("_copy");
+  attrs.dispatches.resize(2);
+  attrs.dispatches[0] = DispatchMode::kFCompute;
+  attrs.dispatches[1] = DispatchMode::kFComputeEx;
+  return attrs;
+}
+
+OpAttrs GetLeakyReluOp() {
+  OpAttrs attrs;
+  attrs.attrs.op = Op::Get("LeakyReLU");
+  attrs.dispatches.resize(1);
+  attrs.dispatches[0] = DispatchMode::kFCompute;
+  return attrs;
+}
+
+/*
+ * We want to get a few types of NDArrays for testing:
+ * 1. Normal NDArray
+ * 2. Normal NDArray with MKLDNN layout (output from an MKLDNN operator)
+ * 3. Normal NDArray with MKLDNN layout whose MKLDNN memory may have different
+ *    dimensions from the NDArray (result of MKLDNNDataReorderAsync). However, this
+ *    type of NDArrays only exists for weight arrays. I don't think we should
+ *    pass them to all operators.
+ *    In the inference mode, the MKLDNN memory in the weight array will be
+ *    reordered to 5 dimensions.
+ * 4. Reshaped/sliced NDArray
+ * 5. Reshaped/sliced NDArray with MKLDNN layout (reshape/slice from Normal NDArray
+ *    with MKLDNN layout)
+ * 6. Reshaped/sliced NDArray with MKLDNN layout whose MKLDNN memory may have
+ *    different dimensions from the NDArray (result of MKLDNNDataReorderAsync).
+ *    However, this type of NDArrays only exists for weight arrays. I don't think
+ *    we should pass them to all operators.
+ *    In the inference mode, the MKLDNN memory in the weight array will be
+ *    reordered to 5 dimensions.
+ *
+ */
+std::vector<NDArray> GetTestInputArrays() {
+  TestArrayShapes tas = GetTestArrayShapes();
+  std::vector<nnvm::TShape> shapes = tas.shapes;
+  std::vector<mkldnn::memory::primitive_desc> pds = tas.pds;
+
+  std::vector<NDArray> in_arrs;
+  for (auto shape : shapes) {
+    in_arrs.emplace_back(shape, Context());
+    InitArray(&in_arrs.back());
+    for (auto pd : pds) {
+      if (shape.Size() != pd.get_size() / sizeof(mshadow::default_real_t))
+        continue;
+
+      in_arrs.emplace_back(shape, Context());
+      InitMKLDNNArray(&in_arrs.back(), pd);
+
+      // Get a sliced version.
+      NDArray arr(shape, Context());
+      InitMKLDNNArray(&arr, pd);
+      arr = arr.Slice(1, arr.shape()[0] - 1);
+      in_arrs.emplace_back(arr);
+    }
+  }
+  return in_arrs;
+}
+
+/*
+ * We want to get a few types of NDArrays for testing:
+ * 1. Normal NDArray
+ * 2. Normal NDArray with MKLDNN layout (output from an MKLDNN operator)
+ * 3. Normal NDArray with MKLDNN layout whose MKLDNN memory may have different
+ *    dimensions from the NDArray (result of MKLDNNDataReorderAsync). However, this
+ *    type of NDArrays only exists for weight arrays. I don't think we should
+ *    pass them to all operators.
+ *    In the inference mode, the MKLDNN memory in the weight array will be
+ *    reordered to 5 dimensions.
+ * 4. Reused NDArray (this is created by the MXNet executor). This type of
+ *    NDArrays can only be used as output arrays.
+ */
+std::vector<NDArray> GetTestOutputArrays(const TShape &shape,
+                                         const std::vector<mkldnn::memory::primitive_desc> &pds) {
+  std::vector<NDArray> in_arrs;
+  in_arrs.emplace_back(shape, Context());
+  InitArray(&in_arrs.back());
+
+  // Get a reused version.
+  nnvm::TShape s(1);
+  s[0] = shape.Size();
+  NDArray arr(s, Context());
+  arr = arr.AsArray(shape, arr.dtype());
+  InitArray(&arr);
+  in_arrs.emplace_back(arr);
+
+  for (auto pd : pds) {
+    if (shape.Size() != pd.get_size() / sizeof(mshadow::default_real_t))
+      continue;
+
+    in_arrs.emplace_back(shape, Context());
+    InitMKLDNNArray(&in_arrs.back(), pd, true);
+
+    // Get a reused version.
+    nnvm::TShape s(1);
+    s[0] = shape.Size();
+    arr = NDArray(s, Context());
+    arr = arr.AsArray(shape, arr.dtype());
+    InitMKLDNNArray(&arr, pd, true);
+    in_arrs.emplace_back(arr);
+  }
+  return in_arrs;
+}
+
+using VerifyFunc = std::function<void (const NDArray &in_arr, const NDArray &arr)>;
+
+void VerifyCopyResult(const NDArray &in_arr, const NDArray &arr) {
+  NDArray tmp1 = in_arr.Reorder2Default();
+  NDArray tmp2 = arr.Reorder2Default();
+  EXPECT_EQ(tmp1.shape().Size(), tmp2.shape().Size());
+  TBlob d1 = tmp1.data();
+  TBlob d2 = tmp2.data();
+  EXPECT_EQ(memcmp(d1.dptr_, d2.dptr_,
+                   tmp1.shape().Size() * sizeof(mshadow::default_real_t)), 0);
+}
+
+void TestUnaryOp(const OpAttrs &attrs, VerifyFunc verify_fn) {
+  std::vector<NDArray*> inputs(1);
+  std::vector<NDArray*> outputs(1);
+  std::vector<OpReqType> req(1);
+  std::vector<DispatchMode> dispatches = attrs.dispatches;
+
+  TestArrayShapes tas = GetTestArrayShapes();
+  std::vector<mkldnn::memory::primitive_desc> pds = tas.pds;
+
+  std::vector<NDArray> in_arrs = GetTestInputArrays();
+  for (auto in_arr : in_arrs) {
+    for (auto dispatch : dispatches) {
+      std::vector<NDArray> out_arrs = GetTestOutputArrays(in_arr.shape(), pds);
+      for (auto out_arr : out_arrs) {
+        req[0] = kWriteTo;
+        inputs[0] = &in_arr;
+        outputs[0] = &out_arr;
+        Imperative::Get()->InvokeOp(Context(), attrs.attrs, inputs,
+                                    outputs, req, dispatch, mxnet::OpStatePtr());
+        out_arr.WaitToRead();
+        verify_fn(in_arr, out_arr);
+      }
+    }
+  }
+
+  for (auto dispatch : dispatches) {
+    in_arrs = GetTestInputArrays();
+    for (auto arr : in_arrs) {
+      // If the array is a view, we shouldn't write data to it.
+      if (arr.IsView())
+        continue;
+
+      NDArray orig = arr.Copy(arr.ctx());
+      req[0] = kWriteInplace;
+      inputs[0] = &arr;
+      outputs[0] = &arr;
+      Imperative::Get()->InvokeOp(Context(), attrs.attrs, inputs, outputs, req,
+                                  dispatch, mxnet::OpStatePtr());
+      arr.WaitToRead();
+      verify_fn(orig, arr);
+    }
+  }
+}
+
+TEST(IMPERATIVE, UnaryOp) {
+  OpAttrs attrs = GetCopyOp();
+  TestUnaryOp(attrs, VerifyCopyResult);
+}
+
 #endif


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services