You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by an...@apache.org on 2018/06/13 21:32:41 UTC
[incubator-mxnet] 11/12: handle fallback correctly for write
inplace when the array is MKLDNN. (#10651)
This is an automated email from the ASF dual-hosted git repository.
anirudh2290 pushed a commit to branch v1.2.0
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
commit dfc847f59197a8c4ce1a65806ec3226183cee4f8
Author: Da Zheng <zh...@gmail.com>
AuthorDate: Tue May 15 15:44:23 2018 -0700
handle fallback correctly for write inplace when the array is MKLDNN. (#10651)
* handle writeinplace correctly for mkldnn arrays.
* Add unit tests.
* Fix a bug in mkldnn copy.
* Fix a bug in ndarray copy.
* Verify results.
---
src/common/exec_utils.h | 15 +--
src/executor/attach_op_execs_pass.cc | 7 +-
src/imperative/imperative_utils.h | 10 +-
src/ndarray/ndarray.cc | 5 +-
src/operator/nn/mkldnn/mkldnn_copy.cc | 8 +-
tests/cpp/operator/mkldnn.cc | 192 +++++++++++++++++++++++++++++++++-
6 files changed, 217 insertions(+), 20 deletions(-)
diff --git a/src/common/exec_utils.h b/src/common/exec_utils.h
index 29537d3..b07f7d8 100644
--- a/src/common/exec_utils.h
+++ b/src/common/exec_utils.h
@@ -76,8 +76,8 @@ inline bool SetupDefaultBlobsIn(const std::vector<NDArray>& src,
}
inline bool SetupDefaultBlobsOut(const std::vector<NDArray>& src,
- const std::vector<OpReqType> &req,
const std::vector<NDArray> *bufs,
+ std::vector<OpReqType> *req,
std::vector<TBlob> *blobs,
std::vector<NDArray> *temp_src,
std::vector<NDArray> *temp_dst) {
@@ -86,9 +86,12 @@ inline bool SetupDefaultBlobsOut(const std::vector<NDArray>& src,
auto& nd = src[i];
bool is_default = nd.storage_type() == kDefaultStorage;
#if MXNET_USE_MKLDNN == 1
- // If it's writeTo, we don't need to worry whether it contains valid data.
- if (req[i] == kWriteTo && is_default)
- const_cast<NDArray &>(nd).InvalidateMKLDNNData();
+ if (req->at(i) == kWriteInplace && nd.IsMKLDNNData())
+ // If it's write inplace and the output array doesn't use the default
+ // layout, we'll generate a temporary output array below, which means
+ // the input array and the output array are no longer the same array.
+ // we should change the request type.
+ req->at(i) = kWriteTo;
// We have to make sure it's default storage and default layout.
is_default = nd.IsDefaultData();
#endif
@@ -118,9 +121,9 @@ inline bool SetupDefaultBlobsOut(const std::vector<NDArray>& src,
*/
inline void SetupDefaultBlobsInOut(const std::vector<NDArray> &ndinputs,
const std::vector<NDArray> &ndoutputs,
- const std::vector<OpReqType> &req,
const std::vector<NDArray> *in_bufs,
const std::vector<NDArray> *out_bufs,
+ std::vector<OpReqType> *req,
std::vector<TBlob> *input_blobs,
std::vector<TBlob> *output_blobs,
std::vector<NDArray> *pre_temp_src,
@@ -133,7 +136,7 @@ inline void SetupDefaultBlobsInOut(const std::vector<NDArray> &ndinputs,
SetupDefaultBlobsIn(ndinputs, in_bufs, input_blobs, pre_temp_src, pre_temp_dst,
in_temp_idx_map);
// populate output blobs
- SetupDefaultBlobsOut(ndoutputs, req, out_bufs, output_blobs, post_temp_dst,
+ SetupDefaultBlobsOut(ndoutputs, out_bufs, req, output_blobs, post_temp_dst,
post_temp_src);
// add mutable inputs to post temp list
for (const auto idx : mutate_idx) {
diff --git a/src/executor/attach_op_execs_pass.cc b/src/executor/attach_op_execs_pass.cc
index 3c8fb83..1709965 100644
--- a/src/executor/attach_op_execs_pass.cc
+++ b/src/executor/attach_op_execs_pass.cc
@@ -78,7 +78,8 @@ class StorageFallbackOpExecutor : public OpExecutor {
pre_temp_src_.clear(); pre_temp_dst_.clear();
post_temp_src_.clear(); post_temp_dst_.clear();
in_temp_idx_map_.clear();
- SetupDefaultBlobsInOut(in_array, out_array, req, &pre_temp_buf_, &post_temp_buf_,
+ tmp_req = req;
+ SetupDefaultBlobsInOut(in_array, out_array, &pre_temp_buf_, &post_temp_buf_, &req,
&in_data_, &out_data_,
&pre_temp_src_, &pre_temp_dst_,
&post_temp_src_, &post_temp_dst_,
@@ -89,8 +90,12 @@ class StorageFallbackOpExecutor : public OpExecutor {
// storage fallback after fcompute is completed
void PostFCompute(bool is_gpu) {
common::CastNonDefaultStorage(post_temp_src_, post_temp_dst_, op_ctx, is_gpu);
+ req = tmp_req;
}
+ // output requirement on each output array.
+ // This temporarily saves the original output requirements.
+ std::vector<OpReqType> tmp_req;
// default storage tensor blobs for fcompute
std::vector<TBlob> in_data_, out_data_;
// These are NDArray buffers for cast storage.
diff --git a/src/imperative/imperative_utils.h b/src/imperative/imperative_utils.h
index 86683f9..0956deb 100644
--- a/src/imperative/imperative_utils.h
+++ b/src/imperative/imperative_utils.h
@@ -369,8 +369,9 @@ inline void PushFCompute(const FCompute& fn,
#if MXNET_USE_MKLDNN == 1
InvalidateOutputs(outputs, req);
#endif
+ std::vector<OpReqType> tmp_req = req;
// setup blobs
- SetupDefaultBlobsInOut(inputs, outputs, req, nullptr, nullptr,
+ SetupDefaultBlobsInOut(inputs, outputs, nullptr, nullptr, &tmp_req,
&input_blobs, &output_blobs, &pre_temp_src, &pre_temp_dst,
&post_temp_src, &post_temp_dst, &in_temp_idx_map, mutate_idx);
// setup context
@@ -378,7 +379,7 @@ inline void PushFCompute(const FCompute& fn,
bool is_gpu = ctx.dev_mask() == gpu::kDevMask;
// pre-fcompute fallback, cast to default storage type
CastNonDefaultStorage(pre_temp_src, pre_temp_dst, opctx, is_gpu);
- fn(attrs, opctx, input_blobs, req, output_blobs);
+ fn(attrs, opctx, input_blobs, tmp_req, output_blobs);
// post-fcompute fallback, cast to original storage type
CastNonDefaultStorage(post_temp_src, post_temp_dst, opctx, is_gpu);
if (is_gpu) {
@@ -488,15 +489,16 @@ inline void PushOperator(const OpStatePtr& state,
#if MXNET_USE_MKLDNN == 1
InvalidateOutputs(outputs, req);
#endif
+ std::vector<OpReqType> tmp_req = req;
// populate input blobs and output blobs
- SetupDefaultBlobsInOut(inputs, outputs, req, nullptr, nullptr,
+ SetupDefaultBlobsInOut(inputs, outputs, nullptr, nullptr, &tmp_req,
&input_blobs, &output_blobs, &pre_temp_src, &pre_temp_dst,
&post_temp_src, &post_temp_dst, &in_temp_idx_map, mutate_idx);
// setup contexts
bool is_gpu = rctx.get_ctx().dev_mask() == gpu::kDevMask;
// pre-fcompute fallback
CastNonDefaultStorage(pre_temp_src, pre_temp_dst, opctx, is_gpu);
- fcompute(state, opctx, input_blobs, req, output_blobs);
+ fcompute(state, opctx, input_blobs, tmp_req, output_blobs);
// post-fcompute fallback, cast to original storage type, if necessary
CastNonDefaultStorage(post_temp_src, post_temp_dst, opctx, is_gpu);
if (is_gpu && exec_type == ExecType::kSync) {
diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc
index 0175c5c..6a8bc9d 100644
--- a/src/ndarray/ndarray.cc
+++ b/src/ndarray/ndarray.cc
@@ -1111,9 +1111,8 @@ inline void CopyFromToDnsImpl(const NDArray& from, const NDArray& to, RunContext
to_mem->get_primitive_desc().get_size());
memcpy(to_mem->get_data_handle(), from_mem->get_data_handle(), size);
} else {
- std::vector<mkldnn::primitive> net;
- net.push_back(mkldnn::reorder(*from_mem, *to_mem));
- mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait();
+ const_cast<NDArray &>(to).CopyFrom(*from_mem);
+ MKLDNNStream::Get()->Submit();
}
} else {
// In this case, one of the NDArray isn't supported by MKLDNN, we need
diff --git a/src/operator/nn/mkldnn/mkldnn_copy.cc b/src/operator/nn/mkldnn/mkldnn_copy.cc
index 71d540c..9596739 100644
--- a/src/operator/nn/mkldnn/mkldnn_copy.cc
+++ b/src/operator/nn/mkldnn/mkldnn_copy.cc
@@ -35,7 +35,13 @@ void MKLDNNCopy(const nnvm::NodeAttrs& attrs, const OpContext &ctx,
const NDArray &in_data, const OpReqType &req,
const NDArray &out_data) {
TmpMemMgr::Get()->Init(ctx.requested[0]);
- auto in_mem = in_data.GetMKLDNNData();
+
+ // If the input data is a view of an MKLDNN array, we should create a new
+ // NDArray with reordered data.
+ NDArray data = in_data;
+ if (data.IsMKLDNNData() && data.IsView())
+ data = data.Reorder2Default();
+ auto in_mem = data.GetMKLDNNData();
if (req == kAddTo) {
TmpMemMgr::Get()->Init(ctx.requested[0]);
// We should try and force the output memory has the same format
diff --git a/tests/cpp/operator/mkldnn.cc b/tests/cpp/operator/mkldnn.cc
index 385e501..a6dd8ad 100644
--- a/tests/cpp/operator/mkldnn.cc
+++ b/tests/cpp/operator/mkldnn.cc
@@ -26,6 +26,7 @@
#if MXNET_USE_MKLDNN == 1
#include "gtest/gtest.h"
+#include "mxnet/imperative.h"
#include "../../src/operator/nn/mkldnn/mkldnn_base-inl.h"
using namespace mxnet;
@@ -90,12 +91,18 @@ static void InitArray(NDArray *arr) {
}
// Init arrays with the specified layout.
-static void InitMKLDNNArray(NDArray *arr, const mkldnn::memory::primitive_desc &pd) {
+static void InitMKLDNNArray(NDArray *arr, const mkldnn::memory::primitive_desc &pd,
+ bool is_rand = false) {
const TBlob &blob = arr->data();
mshadow::default_real_t *data = blob.dptr<mshadow::default_real_t>();
size_t size = blob.Size();
- for (size_t i = 0; i < size; i++)
- data[i] = i;
+ if (is_rand) {
+ for (size_t i = 0; i < size; i++)
+ data[i] = std::rand();
+ } else {
+ for (size_t i = 0; i < size; i++)
+ data[i] = i;
+ }
arr->MKLDNNDataReorderAsync(pd);
arr->WaitToRead();
}
@@ -199,7 +206,7 @@ static std::vector<mkldnn::memory::format> GetMKLDNNFormat(size_t num_dims, int
}
struct TestArrayShapes {
- std::vector<TShape> shapes;
+ std::vector<nnvm::TShape> shapes;
std::vector<mkldnn::memory::primitive_desc> pds;
};
@@ -232,7 +239,7 @@ static TestArrayShapes GetTestArrayShapes() {
{
// 4D
TShape s1(4);
- s1[0] = 1; s1[1] = 96; s1[2] = 54; s1[3] = 54;
+ s1[0] = 10; s1[1] = 96; s1[2] = 54; s1[3] = 54;
shapes.push_back(s1);
pds.push_back(GetMemPD(s1, dtype, mkldnn::memory::format::nchw));
@@ -325,4 +332,179 @@ TEST(MKLDNN_NDArray, GetDataReorder) {
}
}
+struct OpAttrs {
+ nnvm::NodeAttrs attrs;
+ std::vector<DispatchMode> dispatches;
+};
+
+OpAttrs GetCopyOp() {
+ OpAttrs attrs;
+ attrs.attrs.op = Op::Get("_copy");
+ attrs.dispatches.resize(2);
+ attrs.dispatches[0] = DispatchMode::kFCompute;
+ attrs.dispatches[1] = DispatchMode::kFComputeEx;
+ return attrs;
+}
+
+OpAttrs GetLeakyReluOp() {
+ OpAttrs attrs;
+ attrs.attrs.op = Op::Get("LeakyReLU");
+ attrs.dispatches.resize(1);
+ attrs.dispatches[0] = DispatchMode::kFCompute;
+ return attrs;
+}
+
+/*
+ * We want to get a few types of NDArrays for testing:
+ * 1. Normal NDArray
+ * 2. Normal NDArray with MKLDNN layout (output from an MKLDNN operator)
+ * 3. Normal NDArray with MKLDNN layout whose MKLDNN memory may have different
+ * dimensions from the NDArray (result of MKLDNNDataReorderAsync). However, this
+ * type of NDArrays only exists for weight arrays. I don't think we should
+ * pass them to all operators.
+ * In the inference mode, the MKLDNN memory in the weight array will be
+ * reordered to 5 dimensions.
+ * 4. Reshaped/sliced NDArray
+ * 5. Reshaped/sliced NDArray with MKLDNN layout (reshape/slice from Normal NDArray
+ * with MKLDNN layout)
+ * 6. Reshaped/sliced NDArray with MKLDNN layout whose MKLDNN memory may have
+ * different dimensions from the NDArray (result of MKLDNNDataReorderAsync).
+ * However, this type of NDArrays only exists for weight arrays. I don't think
+ * we should pass them to all operators.
+ * In the inference mode, the MKLDNN memory in the weight array will be
+ * reordered to 5 dimensions.
+ *
+ */
+std::vector<NDArray> GetTestInputArrays() {
+ TestArrayShapes tas = GetTestArrayShapes();
+ std::vector<nnvm::TShape> shapes = tas.shapes;
+ std::vector<mkldnn::memory::primitive_desc> pds = tas.pds;
+
+ std::vector<NDArray> in_arrs;
+ for (auto shape : shapes) {
+ in_arrs.emplace_back(shape, Context());
+ InitArray(&in_arrs.back());
+ for (auto pd : pds) {
+ if (shape.Size() != pd.get_size() / sizeof(mshadow::default_real_t))
+ continue;
+
+ in_arrs.emplace_back(shape, Context());
+ InitMKLDNNArray(&in_arrs.back(), pd);
+
+ // Get a sliced version.
+ NDArray arr(shape, Context());
+ InitMKLDNNArray(&arr, pd);
+ arr = arr.Slice(1, arr.shape()[0] - 1);
+ in_arrs.emplace_back(arr);
+ }
+ }
+ return in_arrs;
+}
+
+/*
+ * We want to get a few types of NDArrays for testing:
+ * 1. Normal NDArray
+ * 2. Normal NDArray with MKLDNN layout (output from an MKLDNN operator)
+ * 3. Normal NDArray with MKLDNN layout whose MKLDNN memory may have different
+ * dimensions from the NDArray (result of MKLDNNDataReorderAsync). However, this
+ * type of NDArrays only exists for weight arrays. I don't think we should
+ * pass them to all operators.
+ * In the inference mode, the MKLDNN memory in the weight array will be
+ * reordered to 5 dimensions.
+ * 4. Reused NDArray (this is created by the MXNet executor). This type of
+ * NDArrays can only be used as output arrays.
+ */
+std::vector<NDArray> GetTestOutputArrays(const TShape &shape,
+ const std::vector<mkldnn::memory::primitive_desc> &pds) {
+ std::vector<NDArray> in_arrs;
+ in_arrs.emplace_back(shape, Context());
+ InitArray(&in_arrs.back());
+
+ // Get a reused version.
+ nnvm::TShape s(1);
+ s[0] = shape.Size();
+ NDArray arr(s, Context());
+ arr = arr.AsArray(shape, arr.dtype());
+ InitArray(&arr);
+ in_arrs.emplace_back(arr);
+
+ for (auto pd : pds) {
+ if (shape.Size() != pd.get_size() / sizeof(mshadow::default_real_t))
+ continue;
+
+ in_arrs.emplace_back(shape, Context());
+ InitMKLDNNArray(&in_arrs.back(), pd, true);
+
+ // Get a reused version.
+ nnvm::TShape s(1);
+ s[0] = shape.Size();
+ arr = NDArray(s, Context());
+ arr = arr.AsArray(shape, arr.dtype());
+ InitMKLDNNArray(&arr, pd, true);
+ in_arrs.emplace_back(arr);
+ }
+ return in_arrs;
+}
+
+using VerifyFunc = std::function<void (const NDArray &in_arr, const NDArray &arr)>;
+
+void VerifyCopyResult(const NDArray &in_arr, const NDArray &arr) {
+ NDArray tmp1 = in_arr.Reorder2Default();
+ NDArray tmp2 = arr.Reorder2Default();
+ EXPECT_EQ(tmp1.shape().Size(), tmp2.shape().Size());
+ TBlob d1 = tmp1.data();
+ TBlob d2 = tmp2.data();
+ EXPECT_EQ(memcmp(d1.dptr_, d2.dptr_,
+ tmp1.shape().Size() * sizeof(mshadow::default_real_t)), 0);
+}
+
+void TestUnaryOp(const OpAttrs &attrs, VerifyFunc verify_fn) {
+ std::vector<NDArray*> inputs(1);
+ std::vector<NDArray*> outputs(1);
+ std::vector<OpReqType> req(1);
+ std::vector<DispatchMode> dispatches = attrs.dispatches;
+
+ TestArrayShapes tas = GetTestArrayShapes();
+ std::vector<mkldnn::memory::primitive_desc> pds = tas.pds;
+
+ std::vector<NDArray> in_arrs = GetTestInputArrays();
+ for (auto in_arr : in_arrs) {
+ for (auto dispatch : dispatches) {
+ std::vector<NDArray> out_arrs = GetTestOutputArrays(in_arr.shape(), pds);
+ for (auto out_arr : out_arrs) {
+ req[0] = kWriteTo;
+ inputs[0] = &in_arr;
+ outputs[0] = &out_arr;
+ Imperative::Get()->InvokeOp(Context(), attrs.attrs, inputs,
+ outputs, req, dispatch, mxnet::OpStatePtr());
+ out_arr.WaitToRead();
+ verify_fn(in_arr, out_arr);
+ }
+ }
+ }
+
+ for (auto dispatch : dispatches) {
+ in_arrs = GetTestInputArrays();
+ for (auto arr : in_arrs) {
+ // If the array is a view, we shouldn't write data to it.
+ if (arr.IsView())
+ continue;
+
+ NDArray orig = arr.Copy(arr.ctx());
+ req[0] = kWriteInplace;
+ inputs[0] = &arr;
+ outputs[0] = &arr;
+ Imperative::Get()->InvokeOp(Context(), attrs.attrs, inputs, outputs, req,
+ dispatch, mxnet::OpStatePtr());
+ arr.WaitToRead();
+ verify_fn(orig, arr);
+ }
+ }
+}
+
+TEST(IMPERATIVE, UnaryOp) {
+ OpAttrs attrs = GetCopyOp();
+ TestUnaryOp(attrs, VerifyCopyResult);
+}
+
#endif
--
To stop receiving notification emails like this one, please contact
anirudh2290@apache.org.