You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by jx...@apache.org on 2018/06/26 18:00:53 UTC

[incubator-mxnet] branch master updated: [MXNET-551] Test CreateMKLDNNMem/CommitOutput (#11308)

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new e4bf646  [MXNET-551] Test CreateMKLDNNMem/CommitOutput (#11308)
e4bf646 is described below

commit e4bf6465cf352507538a29e2aeb05d4772df9690
Author: Alexander Zai <az...@gmail.com>
AuthorDate: Tue Jun 26 11:00:47 2018 -0700

    [MXNET-551] Test CreateMKLDNNMem/CommitOutput (#11308)
    
    * refactor copyfrom
    
    * add boilerplate
    
    * rename to MKLDNNCopy
    
    * write to temp memory
    
    * reorder mkldnn / views
    
    * return memory from GetMKLDNNData
    
    * add kaddto to unit test
    
    * move orig output before creatingnewmem
    
    * coerce memory if shape does not fit
    
    * use MKLDNNCopy in commit
    
    * uncomment addto test
    
    * switch order of mkldnnsum params
    
    * improving logging
    
    * wait to read after copying arr
    
    * remove extra white spaces
    
    * remove extra white space
    
    * remove unused var
    
    * reorder output
    
    * do not write to views
    
    * remove shape check in test
    
    * use input pdesc
    
    * remove unused var
    
    * fix merge
    
    * put inplace in separate loop
    
    * use two mem
    
    * use sum_pd when calling CreateMKLDNNData
    
    * reorder sum shapes if needed
    
    * comment out getsumpd
    
    * use MKLDNNCopy helper to reshape mem
    
    * remove getsumpd
    
    * use output mem for createmem
    
    * remove todo
    
    * waittoread output
    
    * do not attempt to shape output
    
    * use correct arr as input
    
    * revert commit change to ps-lite
    
    * revert change to tvm
    
    * fix lint
    
    * add comment to test
    
    * reduce calls to get_primitive_desc
    
    * skip tests that reorder2default
    
    * push_back to inputs
    
    * skip if view/mkldnn
    
    * add noop test
    
    * pass input ptr for write in place
    
    * allow empty
---
 src/ndarray/ndarray.cc                   |  72 +---------------
 src/operator/nn/mkldnn/mkldnn_base-inl.h |   1 +
 src/operator/nn/mkldnn/mkldnn_base.cc    | 103 +++++++++++++++++++---
 src/operator/nn/mkldnn/mkldnn_sum.cc     |  20 ++++-
 tests/cpp/operator/mkldnn.cc             | 142 +++++++++++++++++++++++++++++--
 5 files changed, 242 insertions(+), 96 deletions(-)

diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc
index 94d3d90..e90fb63 100644
--- a/src/ndarray/ndarray.cc
+++ b/src/ndarray/ndarray.cc
@@ -482,7 +482,7 @@ const mkldnn::memory *NDArray::GetMKLDNNData(
   if (mem->get_primitive_desc() == desc
       || (desc1.data.format == GetDefaultFormat(desc1)
         && desc2.data.format == GetDefaultFormat(desc2))) {
-    return GetMKLDNNExact(ptr_->mkl_mem_->GetRaw(), desc);
+    return GetMKLDNNExact(mem, desc);
   } else {
     return nullptr;
   }
@@ -638,7 +638,6 @@ void NDArray::CopyFrom(const mkldnn::memory &mem) {
 
   CHECK(mem.get_primitive_desc().get_size() == shape().Size() * GetTypeSize(dtype_))
       << "The size of NDArray doesn't match the requested MKLDNN memory desc";
-  MKLDNNStream *stream = MKLDNNStream::Get();
   // If this array uses MKLDNN layout, we have to make sure it's not a view.
   // Otherwise, we'll have to change the layout inside the array.
 
@@ -646,74 +645,7 @@ void NDArray::CopyFrom(const mkldnn::memory &mem) {
     ptr_->Reorder2Default();
 
   const mkldnn::memory *this_mem = GetMKLDNNData();
-  mkldnn::memory::primitive_desc from_pd = mem.get_primitive_desc();
-  mkldnn::memory::desc from_desc = from_pd.desc();
-  mkldnn::memory::primitive_desc this_pd = this_mem->get_primitive_desc();
-  mkldnn::memory::desc this_desc = this_pd.desc();
-  mkldnn_memory_format_t from_def_format = GetDefaultFormat(from_desc);
-  mkldnn_memory_format_t this_def_format = GetDefaultFormat(this_desc);
-  if (IsView()) {
-    // Sliced array must use the default layout.
-    CHECK_EQ(GetDefaultFormat(this_desc), this_desc.data.format);
-  }
-  // It's possible that the memory and the NDArray don't have the same shape.
-  if (!same_shape(this_desc, from_desc)
-      // If the source memory uses the default layout, we can reshape directly.
-      && from_def_format == from_desc.data.format) {
-    // In this case, we can simply create a new MKLDNN memory for the required
-    // shape.
-    mkldnn::memory::dims dims(this_desc.data.dims,
-                              this_desc.data.dims + this_desc.data.ndims);
-    auto this_dtype = static_cast<mkldnn::memory::data_type>(this_desc.data.data_type);
-    auto this_format = static_cast<mkldnn::memory::format>(GetDefaultFormat(this_desc));
-    mkldnn::memory::desc data_md(dims, this_dtype, this_format);
-    mkldnn::memory::primitive_desc pd(data_md, from_pd.get_engine());
-    mkldnn_mem_ptr tmp_mem(new mkldnn::memory(pd, mem.get_data_handle()));
-    stream->RegisterMem(tmp_mem);
-    stream->RegisterPrim(mkldnn::reorder(*tmp_mem, *this_mem));
-  } else if (!same_shape(this_desc, from_desc)) {
-    // In this case, the source memory stores data in a customized layout. We
-    // need to reorganize the data in memory before we can reshape.
-    mkldnn::memory::primitive_desc def_pd = GetPrimitiveDesc(from_pd, from_def_format);
-    mkldnn::memory *def_mem = TmpMemMgr::Get()->Alloc(def_pd);
-    stream->RegisterPrim(mkldnn::reorder(mem, *def_mem));
-    // Now we can reshape it
-    mkldnn::memory::dims dims(this_desc.data.dims,
-                              this_desc.data.dims + this_desc.data.ndims);
-    auto this_dtype = static_cast<mkldnn::memory::data_type>(this_desc.data.data_type);
-    auto this_format = static_cast<mkldnn::memory::format>(GetDefaultFormat(this_desc));
-    mkldnn::memory::desc data_md(dims, this_dtype, this_format);
-    mkldnn::memory::primitive_desc pd(data_md, from_pd.get_engine());
-    mkldnn_mem_ptr tmp_mem(new mkldnn::memory(pd, def_mem->get_data_handle()));
-    stream->RegisterMem(tmp_mem);
-    stream->RegisterPrim(mkldnn::reorder(*tmp_mem, *this_mem));
-  } else if (from_pd == this_pd) {
-    // If the layout is the same, we can just copy data.
-    stream->RegisterPrim(mkldnn::reorder(mem, *this_mem));
-  } else {
-    // If both are not using the default layouts. There isn't much we can do,
-    // other than reorder data layout directly.
-    if (this_def_format != this_desc.data.format
-        && from_def_format != from_desc.data.format) {
-      stream->RegisterPrim(mkldnn::reorder(mem, *this_mem));
-    } else if (this_def_format == this_desc.data.format) {
-      // If the dest mem uses the default memory layout, we can simply use
-      // the default format of the source memory to improve perf of reorder.
-      mkldnn::memory::primitive_desc pd = GetPrimitiveDesc(from_pd,
-                                                           from_def_format);
-      mkldnn_mem_ptr tmp_mem(new mkldnn::memory(pd, this_mem->get_data_handle()));
-      stream->RegisterMem(tmp_mem);
-      stream->RegisterPrim(mkldnn::reorder(mem, *tmp_mem));
-    } else {
-      // If the src mem uses the default memory layout, we can use
-      // the default format of the source memory to improve perf.
-      mkldnn::memory::primitive_desc pd = GetPrimitiveDesc(this_pd,
-                                                           this_def_format);
-      mkldnn_mem_ptr tmp_mem(new mkldnn::memory(pd, mem.get_data_handle()));
-      stream->RegisterMem(tmp_mem);
-      stream->RegisterPrim(mkldnn::reorder(*tmp_mem, *this_mem));
-    }
-  }
+  MKLDNNCopy(mem, this_mem);
 }
 
 mkldnn::memory *NDArray::CreateMKLDNNData(const mkldnn::memory::primitive_desc &desc) {
diff --git a/src/operator/nn/mkldnn/mkldnn_base-inl.h b/src/operator/nn/mkldnn/mkldnn_base-inl.h
index 6a7c58f..c6e7f9b 100644
--- a/src/operator/nn/mkldnn/mkldnn_base-inl.h
+++ b/src/operator/nn/mkldnn/mkldnn_base-inl.h
@@ -318,6 +318,7 @@ enum OutDataOp {
 };
 
 typedef std::pair<OutDataOp, mkldnn::memory *> mkldnn_output_t;
+void MKLDNNCopy(const mkldnn::memory &mem, const mkldnn::memory* this_mem);
 
 /*
  * These two functions try to create MKLDNN memory in an NDArray based on `req'.
diff --git a/src/operator/nn/mkldnn/mkldnn_base.cc b/src/operator/nn/mkldnn/mkldnn_base.cc
index b182aa0..858f8e3 100644
--- a/src/operator/nn/mkldnn/mkldnn_base.cc
+++ b/src/operator/nn/mkldnn/mkldnn_base.cc
@@ -77,6 +77,75 @@ mkldnn::memory *TmpMemMgr::Alloc(const mkldnn::memory::primitive_desc &pd) {
   }
 }
 
+void MKLDNNCopy(const mkldnn::memory &mem, const mkldnn::memory* this_mem) {
+  MKLDNNStream *stream = MKLDNNStream::Get();
+
+  mkldnn::memory::primitive_desc from_pd = mem.get_primitive_desc();
+  mkldnn::memory::desc from_desc = from_pd.desc();
+  mkldnn::memory::primitive_desc this_pd = this_mem->get_primitive_desc();
+  mkldnn::memory::desc this_desc = this_pd.desc();
+  mkldnn_memory_format_t from_def_format = GetDefaultFormat(from_desc);
+  mkldnn_memory_format_t this_def_format = GetDefaultFormat(this_desc);
+  // It's possible that the memory and the NDArray don't have the same shape.
+  if (!same_shape(this_desc, from_desc)
+      // If the source memory uses the default layout, we can reshape directly.
+      && from_def_format == from_desc.data.format) {
+    // In this case, we can simply create a new MKLDNN memory for the required
+    // shape.
+    mkldnn::memory::dims dims(this_desc.data.dims,
+                              this_desc.data.dims + this_desc.data.ndims);
+    auto this_dtype = static_cast<mkldnn::memory::data_type>(this_desc.data.data_type);
+    auto this_format = static_cast<mkldnn::memory::format>(GetDefaultFormat(this_desc));
+    mkldnn::memory::desc data_md(dims, this_dtype, this_format);
+    mkldnn::memory::primitive_desc pd(data_md, from_pd.get_engine());
+    mkldnn_mem_ptr tmp_mem(new mkldnn::memory(pd, mem.get_data_handle()));
+    stream->RegisterMem(tmp_mem);
+    stream->RegisterPrim(mkldnn::reorder(*tmp_mem, *this_mem));
+  } else if (!same_shape(this_desc, from_desc)) {
+    // In this case, the source memory stores data in a customized layout. We
+    // need to reorganize the data in memory before we can reshape.
+    mkldnn::memory::primitive_desc def_pd = GetPrimitiveDesc(from_pd, from_def_format);
+    mkldnn::memory *def_mem = TmpMemMgr::Get()->Alloc(def_pd);
+    stream->RegisterPrim(mkldnn::reorder(mem, *def_mem));
+    // Now we can reshape it
+    mkldnn::memory::dims dims(this_desc.data.dims,
+                              this_desc.data.dims + this_desc.data.ndims);
+    auto this_dtype = static_cast<mkldnn::memory::data_type>(this_desc.data.data_type);
+    auto this_format = static_cast<mkldnn::memory::format>(GetDefaultFormat(this_desc));
+    mkldnn::memory::desc data_md(dims, this_dtype, this_format);
+    mkldnn::memory::primitive_desc pd(data_md, from_pd.get_engine());
+    mkldnn_mem_ptr tmp_mem(new mkldnn::memory(pd, def_mem->get_data_handle()));
+    stream->RegisterMem(tmp_mem);
+    stream->RegisterPrim(mkldnn::reorder(*tmp_mem, *this_mem));
+  } else if (from_pd == this_pd) {
+    // If the layout is the same, we can just copy data.
+    stream->RegisterPrim(mkldnn::reorder(mem, *this_mem));
+  } else {
+    // If both are not using the default layouts. There isn't much we can do,
+    // other than reorder data layout directly.
+    if (this_def_format != this_desc.data.format
+        && from_def_format != from_desc.data.format) {
+      stream->RegisterPrim(mkldnn::reorder(mem, *this_mem));
+    } else if (this_def_format == this_desc.data.format) {
+      // If the dest mem uses the default memory layout, we can simply use
+      // the default format of the source memory to improve perf of reorder.
+      mkldnn::memory::primitive_desc pd = GetPrimitiveDesc(from_pd,
+                                                           from_def_format);
+      mkldnn_mem_ptr tmp_mem(new mkldnn::memory(pd, this_mem->get_data_handle()));
+      stream->RegisterMem(tmp_mem);
+      stream->RegisterPrim(mkldnn::reorder(mem, *tmp_mem));
+    } else {
+      // If the src mem uses the default memory layout, we can use
+      // the default format of the source memory to improve perf.
+      mkldnn::memory::primitive_desc pd = GetPrimitiveDesc(this_pd,
+                                                           this_def_format);
+      mkldnn_mem_ptr tmp_mem(new mkldnn::memory(pd, mem.get_data_handle()));
+      stream->RegisterMem(tmp_mem);
+      stream->RegisterPrim(mkldnn::reorder(*tmp_mem, *this_mem));
+    }
+  }
+}
+
 bool CanWriteTo(const NDArray &out_arr,
                 const NDArray &in_arr,
                 const mkldnn::memory::primitive_desc &desc) {
@@ -94,22 +163,25 @@ mkldnn_output_t CreateMKLDNNMem(const NDArray &out_arr,
   if (kAddTo == req) {
     auto tmp = TmpMemMgr::Get()->Alloc(desc);
     return mkldnn_output_t(OutDataOp::AddBack, tmp);
-  } else if (req == kWriteInplace && in_arr != nullptr && CanWriteTo(out_arr, *in_arr, desc)) {
+  } else if (kWriteInplace == req && in_arr != nullptr && CanWriteTo(out_arr, *in_arr, desc)) {
     mkldnn::memory *mem = const_cast<NDArray &>(out_arr).CreateMKLDNNData(desc);
     // mem is nullptr if out_arr is view and desc is MKLDNN format.
     // need to Reorder2Default before calling CreateMKLDNNMem
     CHECK(mem != nullptr);
     return mkldnn_output_t(OutDataOp::Noop, mem);
-  } else if (req == kWriteInplace) {
-    auto tmp = TmpMemMgr::Get()->Alloc(desc);
-    return mkldnn_output_t(OutDataOp::CopyBack, tmp);
-  }
-  mkldnn::memory *mem = const_cast<NDArray &>(out_arr).CreateMKLDNNData(desc);
-  if (nullptr == mem) {
+  } else if (kWriteInplace == req) {
     auto tmp = TmpMemMgr::Get()->Alloc(desc);
     return mkldnn_output_t(OutDataOp::CopyBack, tmp);
+  } else if (kWriteTo == req) {
+    mkldnn::memory *mem = const_cast<NDArray &>(out_arr).CreateMKLDNNData(desc);
+    if (nullptr == mem) {
+      auto tmp = TmpMemMgr::Get()->Alloc(desc);
+      return mkldnn_output_t(OutDataOp::CopyBack, tmp);
+    }
+    return mkldnn_output_t(OutDataOp::Noop, mem);
   }
-  return mkldnn_output_t(OutDataOp::Noop, mem);
+  auto tmp = TmpMemMgr::Get()->Alloc(desc);
+  return mkldnn_output_t(OutDataOp::Noop, tmp);
 }
 
 mkldnn_output_t CreateMKLDNNWeightGrad(const NDArray &out_arr,
@@ -141,13 +213,16 @@ void CommitOutput(const NDArray &arr, const mkldnn_output_t &res) {
   if (res.first == CopyBack) {
     const_cast<NDArray &>(arr).CopyFrom(*res.second);
   } else if (res.first == AddBack) {
+    auto res_memory = res.second;
+    auto target_pd = arr.GetMKLDNNData()->get_primitive_desc();
     auto mem = arr.GetMKLDNNData(res.second->get_primitive_desc());
-    CHECK(mem != nullptr);
-    // We have to allocate new memory for the sum result.
-    auto sum_res = TmpMemMgr::Get()->Alloc(
-        res.second->get_primitive_desc());
-    op::MKLDNNSum(*res.second, *mem, *sum_res);
-    const_cast<NDArray &>(arr).CopyFrom(*sum_res);
+    if (mem == nullptr) {
+      auto tmp_memory = TmpMemMgr::Get()->Alloc(target_pd);
+      MKLDNNCopy(*res_memory, tmp_memory);
+      res_memory = tmp_memory;
+      mem = arr.GetMKLDNNData();
+    }
+    op::MKLDNNSum(*mem, *res_memory, *mem);
   }
 }
 
diff --git a/src/operator/nn/mkldnn/mkldnn_sum.cc b/src/operator/nn/mkldnn/mkldnn_sum.cc
index c51e108..dfb0e25 100644
--- a/src/operator/nn/mkldnn/mkldnn_sum.cc
+++ b/src/operator/nn/mkldnn/mkldnn_sum.cc
@@ -38,10 +38,22 @@ void MKLDNNSum(const mkldnn::memory &arr1, const mkldnn::memory &arr2,
   std::vector<mkldnn::primitive::at> inputs;
   input_pds[0] = arr1.get_primitive_desc();
   input_pds[1] = arr2.get_primitive_desc();
-  CHECK(input_pds[0] == input_pds[1]);
-  inputs.push_back(arr1);
-  inputs.push_back(arr2);
-  // TODO(zhengda) I need to reorder memory here.
+  CHECK(input_pds[0] == input_pds[0]);
+  const mkldnn::memory *in_mem1 = &arr1;
+  const mkldnn::memory *in_mem2 = &arr2;
+  auto output_pd = out.get_primitive_desc();
+  if (input_pds[0] != output_pd) {
+    auto tmp_memory1 = TmpMemMgr::Get()->Alloc(output_pd);
+    auto tmp_memory2 = TmpMemMgr::Get()->Alloc(output_pd);
+    mxnet::MKLDNNCopy(arr1, tmp_memory1);
+    mxnet::MKLDNNCopy(arr2, tmp_memory2);
+    input_pds[0] = tmp_memory1->get_primitive_desc();
+    input_pds[1] = tmp_memory2->get_primitive_desc();
+    in_mem1 = tmp_memory1;
+    in_mem2 = tmp_memory2;
+  }
+  inputs.push_back(*in_mem1);
+  inputs.push_back(*in_mem2);
   mkldnn::sum::primitive_desc sum_pd(scales, input_pds);
   MKLDNNStream::Get()->RegisterPrim(mkldnn::sum(sum_pd, inputs, out));
 }
diff --git a/tests/cpp/operator/mkldnn.cc b/tests/cpp/operator/mkldnn.cc
index 6554351..e593d00 100644
--- a/tests/cpp/operator/mkldnn.cc
+++ b/tests/cpp/operator/mkldnn.cc
@@ -574,6 +574,7 @@ std::vector<NDArrayAttrs> GetTestOutputArrays(const TShape &shape,
       continue;
 
     // Type 2, 3.
+
     arr = NDArray(shape, Context());
     desc = "MKLDNN NDArray";
     if (shape.ndim() != pd.desc().data.ndims) {
@@ -688,6 +689,15 @@ void PrintVerifyMsg(const NDArrayAttrs &arr1, const NDArrayAttrs &arr2) {
   printf(")\n");
 }
 
+void VerifyAddRequest(const std::vector<NDArray*> &in_arrs,
+                      const std::vector<NDArray*> &original_outputs,
+                      const std::vector<NDArray*> &new_outputs,
+                      VerifyFunc verify_fn) {
+  NDArray tmp = new_outputs[0]->Reorder2Default() - original_outputs[0]->Reorder2Default();
+  tmp.WaitToRead();
+  verify_fn(in_arrs, {&tmp});
+}
+
 TEST(MKLDNN_NDArray, CopyFrom) {
   TestArrayShapes tas = GetTestArrayShapes();
   std::vector<mkldnn::memory::primitive_desc> pds = tas.pds;
@@ -803,25 +813,34 @@ TEST(MKLDNN_BASE, MKLDNNSum) {
   for (int i = 0; i < in_arrs.size(); i++) {
     auto in_arr = in_arrs[i];
     auto in_arr2 = in_arrs2[i];
-    std::vector<NDArrayAttrs> out_arrs = GetTestOutputArrays(in_arr.arr.shape(), pds);
-    if (!SupportMKLDNN(in_arr.arr) || !in_arr.arr.IsMKLDNNData() || in_arr.arr.IsView())
+    if (!SupportMKLDNN(in_arr.arr))
       continue;
-
+    if (in_arr.arr.IsMKLDNNData() && in_arr.arr.IsView()) {
+      continue;
+    }
+    std::vector<NDArrayAttrs> out_arrs = GetTestOutputArrays(in_arr.arr.shape(), pds);
     for (auto out_arr : out_arrs) {
       auto in_mem1 = in_arr.arr.GetMKLDNNData();
       auto in_mem2 = in_arr2.arr.GetMKLDNNData();
-      auto out_mem = out_arr.arr.GetMKLDNNData(in_mem1->get_primitive_desc());
-
-      // TODO(alexzai) : remove this noop when by reordering in MKLDNNSum
-      if (out_mem == nullptr)
+      if (out_arr.arr.IsView())
         continue;
+      auto out_mem = out_arr.arr.GetMKLDNNData();
       PrintVerifyMsg(in_arr, in_arr);
       op::MKLDNNSum(*in_mem1, *in_mem2, *out_mem);
       MKLDNNStream::Get()->Submit();
       VerifySumResult({&in_arr.arr, &in_arr2.arr}, {&out_arr.arr});
     }
+  }
 
-    // in place
+  // in place
+  for (int i = 0; i < in_arrs.size(); i++) {
+    auto in_arr = in_arrs[i];
+    auto in_arr2 = in_arrs2[i];
+    if (!SupportMKLDNN(in_arr.arr))
+      continue;
+    if (in_arr.arr.IsMKLDNNData() && in_arr.arr.IsView()) {
+      continue;
+    }
     auto input_mem = in_arr.arr.GetMKLDNNData();
     auto input_mem2 = in_arr2.arr.GetMKLDNNData();
     NDArrayAttrs orig_arr(in_arr.arr.Copy(in_arr.arr.ctx()), "In Place Copy");
@@ -834,4 +853,111 @@ TEST(MKLDNN_BASE, MKLDNNSum) {
   }
 }
 
+TEST(MKLDNN_BASE, CreateMKLDNNMem) {
+  std::vector<NDArrayAttrs> in_arrs = GetTestInputArrays();
+  std::vector<NDArrayAttrs> in_arrs2 = GetTestInputArrays(true);
+  TestArrayShapes tas = GetTestArrayShapes();
+  std::vector<mkldnn::memory::primitive_desc> pds = tas.pds;
+  MKLDNNStream *stream = MKLDNNStream::Get();
+
+  // kWriteTo
+  for (int i = 0; i < in_arrs.size(); i++) {
+    auto in_arr = in_arrs[i];
+    auto in_arr2 = in_arrs2[i];
+    if (!SupportMKLDNN(in_arr.arr))
+      continue;
+    if (in_arr.arr.IsMKLDNNData() && in_arr.arr.IsView()) {
+      continue;
+    }
+    std::vector<NDArrayAttrs> out_arrs = GetTestOutputArrays(in_arr.arr.shape(), pds);
+    for (auto out_arr : out_arrs) {
+      auto in_mem = in_arr.arr.GetMKLDNNData();
+      auto in_mem2 = in_arr2.arr.GetMKLDNNData();
+      NDArray orig_output = out_arr.arr.Copy(out_arr.arr.ctx());
+      orig_output.WaitToRead();
+      PrintVerifyMsg(in_arr, out_arr);
+      auto out_mem = out_arr.arr.GetMKLDNNData();
+      auto output_mem_t = CreateMKLDNNMem(out_arr.arr, out_mem->get_primitive_desc(), kWriteTo);
+      op::MKLDNNSum(*in_mem, *in_mem2, *output_mem_t.second);
+      CommitOutput(out_arr.arr, output_mem_t);
+      stream->Submit();
+      VerifySumResult({&in_arr.arr, &in_arr2.arr}, {&out_arr.arr});
+    }
+  }
+
+  // kWriteInPlace
+  for (int i = 0; i < in_arrs.size(); i++) {
+    auto in_arr = in_arrs[i];
+    auto in_arr2 = in_arrs2[i];
+    if (!SupportMKLDNN(in_arr.arr))
+      continue;
+    if (in_arr.arr.IsMKLDNNData() && in_arr.arr.IsView()) {
+      continue;
+    }
+    auto input_mem = in_arr.arr.GetMKLDNNData();
+    auto input_mem2 = in_arr2.arr.GetMKLDNNData();
+    NDArrayAttrs orig_arr(in_arr.arr.Copy(in_arr.arr.ctx()), "In Place Copy");
+    orig_arr.arr.WaitToRead();
+    PrintVerifyMsg(orig_arr, in_arr);
+    InitMKLDNNArray(&orig_arr.arr, input_mem->get_primitive_desc());
+    orig_arr.arr.CopyFrom(*input_mem);
+    auto output_mem_t = CreateMKLDNNMem(in_arr.arr,
+        input_mem->get_primitive_desc(), kWriteInplace, &in_arr.arr);
+    op::MKLDNNSum(*input_mem, *input_mem2, *output_mem_t.second);
+    CommitOutput(in_arr.arr, output_mem_t);
+    stream->Submit();
+    VerifySumResult({&orig_arr.arr, &in_arr2.arr}, {&in_arr.arr});
+  }
+
+  // kAddTo
+  for (int i = 0; i < in_arrs.size(); i++) {
+    auto in_arr = in_arrs[i];
+    auto in_arr2 = in_arrs2[i];
+    if (!SupportMKLDNN(in_arr.arr))
+      continue;
+    if (in_arr.arr.IsMKLDNNData() && in_arr.arr.IsView()) {
+      continue;
+    }
+    std::vector<NDArrayAttrs> out_arrs = GetTestOutputArrays(in_arr.arr.shape(), pds);
+    for (auto out_arr : out_arrs) {
+      auto in_mem = in_arr.arr.GetMKLDNNData();
+      auto in_mem2 = in_arr2.arr.GetMKLDNNData();
+      NDArray orig_output = out_arr.arr.Copy(out_arr.arr.ctx());
+      orig_output.WaitToRead();
+      PrintVerifyMsg(in_arr, out_arr);
+      auto out_mem = out_arr.arr.GetMKLDNNData();
+      auto output_mem_t = CreateMKLDNNMem(out_arr.arr, out_mem->get_primitive_desc(), kAddTo);
+      op::MKLDNNSum(*in_mem, *in_mem2, *output_mem_t.second);
+      CommitOutput(out_arr.arr, output_mem_t);
+      stream->Submit();
+      VerifyAddRequest(
+          {&in_arr.arr, &in_arr2.arr}, {&orig_output}, {&out_arr.arr}, VerifySumResult);
+    }
+  }
+
+  // kNullOp
+  for (int i = 0; i < in_arrs.size(); i++) {
+    auto in_arr = in_arrs[i];
+    auto in_arr2 = in_arrs2[i];
+    if (!SupportMKLDNN(in_arr.arr))
+      continue;
+    if (in_arr.arr.IsMKLDNNData() && in_arr.arr.IsView()) {
+      continue;
+    }
+    auto input_mem = in_arr.arr.GetMKLDNNData();
+    auto input_mem2 = in_arr2.arr.GetMKLDNNData();
+    NDArrayAttrs orig_arr(in_arr.arr.Copy(in_arr.arr.ctx()), "In Place Copy");
+    orig_arr.arr.WaitToRead();
+    PrintVerifyMsg(orig_arr, in_arr);
+    InitMKLDNNArray(&orig_arr.arr, input_mem->get_primitive_desc());
+    orig_arr.arr.CopyFrom(*input_mem);
+    auto output_mem_t = CreateMKLDNNMem(in_arr.arr, input_mem->get_primitive_desc(), kNullOp);
+    op::MKLDNNSum(*input_mem, *input_mem2, *output_mem_t.second);
+    CommitOutput(in_arr.arr, output_mem_t);
+    stream->Submit();
+    // original and input should be the same since noop
+    VerifyCopyResult({&orig_arr.arr}, {&in_arr.arr});
+  }
+}
+
 #endif