You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by an...@apache.org on 2018/06/13 21:32:37 UTC
[incubator-mxnet] 07/12: Fix a bug in getting MKLDNN memory (#10731)
This is an automated email from the ASF dual-hosted git repository.
anirudh2290 pushed a commit to branch v1.2.0
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
commit 0111a36793df919ae490299a0edaaddedc9f1287
Author: Da Zheng <zh...@gmail.com>
AuthorDate: Thu May 3 10:27:01 2018 -0700
Fix a bug in getting MKLDNN memory (#10731)
* test inference multiple times.
* Fix a bug in GetMKLDNNData().
* Update comments.
* Handle all cases for GetMKLDNNDataReorder
* avoid unnecessary message.
* Add C++ unit test for NDArray.
* Fix a minor bug.
* Unit tests on GetMKLDNNDataReorder.
* Fix lint error.
* Add more test cases.
* add comments for the test code.
* Reorganize test code.
* Fix cpp tests.
* test.
* Add a new Jenkins compile task.
* Update jenkins.
* update jenkins.
* Fix a Jenkins.
* Fix jenkins.
* Fix jenkins.
* Fix CMake for MKLDNN.
* Fix jenkins.
* update jenkins.
* update CMake.
* Fix cmake.
* update CI.
* add comment.
* add comments.
* cmake builds mkldnn with -mtune=generic by default.
* adjust comments.
remove unnecessary tests.
---
CMakeLists.txt | 8 +-
Jenkinsfile | 13 +-
ci/docker/runtime_functions.sh | 3 +
src/ndarray/ndarray.cc | 48 ++++--
src/operator/nn/mkldnn/mkldnn_base-inl.h | 36 +++-
src/operator/nn/mkldnn/mkldnn_base.cc | 12 +-
tests/cpp/include/test_core_op.h | 10 +-
tests/cpp/operator/mkldnn.cc | 248 +++++++++++++++++++++++++++
tests/python/gpu/test_gluon_model_zoo_gpu.py | 19 +-
9 files changed, 355 insertions(+), 42 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 05d8021..ed96a6c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -187,8 +187,12 @@ endif()
if(USE_MKL_IF_AVAILABLE)
if(USE_MKLDNN)
+ # We need to use generic archtecture. Otherwise, MKLDNN compiled in one
+ # CPU architecture (e.g., C5) can't run on another architecture (e.g., g3).
+ set(ARCH_OPT_FLAGS "-mtune=generic")
add_subdirectory(3rdparty/mkldnn)
include_directories(3rdparty/mkldnn/include)
+ add_definitions(-DMXNET_USE_MKLDNN=1)
list(APPEND mxnet_LINKER_LIBS mkldnn)
endif()
find_package(MKL)
@@ -197,10 +201,6 @@ if(USE_MKL_IF_AVAILABLE)
include_directories(${MKL_INCLUDE_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src/operator/mkl)
- if(USE_MKLDNN)
- add_definitions(-DMXNET_USE_MKLDNN=1)
- endif()
-
add_definitions(-DUSE_MKL=1)
add_definitions(-DCUB_MKL=1)
list(APPEND mxnet_LINKER_LIBS ${MKL_LIBRARIES})
diff --git a/Jenkinsfile b/Jenkinsfile
index eb2160f..84116e4 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -26,7 +26,7 @@ mx_lib = 'lib/libmxnet.so, lib/libmxnet.a, 3rdparty/dmlc-core/libdmlc.a, 3rdpart
mx_dist_lib = 'lib/libmxnet.so, lib/libmxnet.a, 3rdparty/dmlc-core/libdmlc.a, 3rdparty/nnvm/lib/libnnvm.a, 3rdparty/ps-lite/build/libps.a, deps/lib/libprotobuf-lite.a, deps/lib/libzmq.a'
// mxnet cmake libraries, in cmake builds we do not produce a libnvvm static library by default.
mx_cmake_lib = 'build/libmxnet.so, build/libmxnet.a, build/3rdparty/dmlc-core/libdmlc.a, build/tests/mxnet_unit_tests, build/3rdparty/openmp/runtime/src/libomp.so'
-mx_cmake_mkldnn_lib = 'build/libmxnet.so, build/libmxnet.a, build/3rdparty/dmlc-core/libdmlc.a, build/tests/mxnet_unit_tests, build/3rdparty/openmp/runtime/src/libomp.so, build/3rdparty/mkldnn/src/libmkldnn.so, build/3rdparty/mkldnn/src/libmkldnn.so.0'
+mx_cmake_mkldnn_lib = 'build/libmxnet.so, build/libmxnet.a, build/3rdparty/dmlc-core/libdmlc.a, build/tests/mxnet_unit_tests, build/3rdparty/openmp/runtime/src/libomp.so, build/3rdparty/mkldnn/src/libmkldnn.so.0'
mx_mkldnn_lib = 'lib/libmxnet.so, lib/libmxnet.a, lib/libiomp5.so, lib/libmkldnn.so.0, lib/libmklml_intel.so, 3rdparty/dmlc-core/libdmlc.a, 3rdparty/nnvm/lib/libnnvm.a'
// command to start a docker container
docker_run = 'tests/ci_build/ci_build.sh'
@@ -534,6 +534,17 @@ try {
}
}
},
+ 'Cpp: MKLDNN+GPU': {
+ node('mxnetlinux-gpu') {
+ ws('workspace/ut-cpp-mkldnn-gpu') {
+ timeout(time: max_time, unit: 'MINUTES') {
+ init_git()
+ unpack_lib('cmake_mkldnn_gpu', mx_cmake_mkldnn_lib)
+ sh "ci/build.py --nvidiadocker --platform ubuntu_gpu /work/runtime_functions.sh unittest_ubuntu_gpu_cpp"
+ }
+ }
+ }
+ },
'R: CPU': {
node('mxnetlinux-cpu') {
ws('workspace/ut-r-cpu') {
diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh
index 4d0f846..8ba6fa3 100755
--- a/ci/docker/runtime_functions.sh
+++ b/ci/docker/runtime_functions.sh
@@ -323,6 +323,9 @@ build_ubuntu_gpu_cmake_mkldnn() {
/work/mxnet
ninja -v
+ # libmkldnn.so.0 is a link file. We need an actual binary file named libmkldnn.so.0.
+ cp 3rdparty/mkldnn/src/libmkldnn.so.0 3rdparty/mkldnn/src/libmkldnn.so.0.tmp
+ mv 3rdparty/mkldnn/src/libmkldnn.so.0.tmp 3rdparty/mkldnn/src/libmkldnn.so.0
}
build_ubuntu_gpu_cmake() {
diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc
index d175a13..4b45969 100644
--- a/src/ndarray/ndarray.cc
+++ b/src/ndarray/ndarray.cc
@@ -485,8 +485,8 @@ const mkldnn::memory *NDArray::GetMKLDNNData(
}
const mkldnn::memory *NDArray::GetMKLDNNDataReorder(
- const mkldnn::memory::primitive_desc &desc) const {
- if (desc.get_size() != shape().Size() * GetTypeSize(dtype_)) {
+ const mkldnn::memory::primitive_desc &new_pd) const {
+ if (new_pd.get_size() != shape().Size() * GetTypeSize(dtype_)) {
LOG(FATAL) << "The size of NDArray doesn't match the requested MKLDNN memory desc";
return nullptr;
}
@@ -495,24 +495,41 @@ const mkldnn::memory *NDArray::GetMKLDNNDataReorder(
const mkldnn::memory *mem = GetMKLDNNData();
// If the memory descriptor matches, it's easy.
MKLDNNStream *stream = MKLDNNStream::Get();
- if (mem->get_primitive_desc() == desc) {
- return GetMKLDNNExact(mem, desc);
+ if (mem->get_primitive_desc() == new_pd) {
+ return GetMKLDNNExact(mem, new_pd);
}
- mkldnn::memory::primitive_desc _desc = desc;
+ mkldnn::memory::primitive_desc _pd = new_pd;
+ mkldnn::memory::desc desc1 = mem->get_primitive_desc().desc();
+ mkldnn::memory::desc desc2 = _pd.desc();
// Now we need to determine if we should reorder the memory.
// If both use the default formats, we think we don't need to reorder.
- mkldnn::memory::desc desc1 = mem->get_primitive_desc().desc();
- mkldnn::memory::desc desc2 = _desc.desc();
if (desc1.data.format == GetDefaultFormat(desc1) &&
desc2.data.format == GetDefaultFormat(desc2)) {
- mkldnn_mem_ptr ret(new mkldnn::memory(desc, mem->get_data_handle()));
+ mkldnn_mem_ptr ret(new mkldnn::memory(new_pd, mem->get_data_handle()));
stream->RegisterMem(ret);
return ret.get();
- } else {
- mkldnn::memory *ret = TmpMemMgr::Get()->Alloc(desc);
+ } else if (same_shape(desc1, desc2)) {
+ // If they have the same shape, we can reorder data directly.
+ mkldnn::memory *ret = TmpMemMgr::Get()->Alloc(new_pd);
stream->RegisterPrim(mkldnn::reorder(*mem, *ret));
return ret;
+ } else {
+ // If they have different shapes, we need to reshape the array first.
+ // Since this method will only be used inside an operator, we can call
+ // MKLDNNDataReshape to reshape an array.
+ TShape required_shape(desc2.data.ndims);
+ for (int i = 0; i < desc2.data.ndims; i++)
+ required_shape[i] = desc2.data.dims[i];
+ NDArray reshaped = MKLDNNDataReshape(required_shape);
+ const mkldnn::memory *ret = reshaped.GetMKLDNNData();
+ if (ret->get_primitive_desc() == new_pd) {
+ return GetMKLDNNExact(ret, new_pd);
+ } else {
+ mkldnn::memory *ret2 = TmpMemMgr::Get()->Alloc(new_pd);
+ stream->RegisterPrim(mkldnn::reorder(*ret, *ret2));
+ return ret2;
+ }
}
}
@@ -559,10 +576,15 @@ void NDArray::MKLDNNDataReorderAsync(const mkldnn::memory::primitive_desc &desc)
const mkldnn::memory *NDArray::GetMKLDNNData() const {
CHECK(storage_type() == kDefaultStorage);
- // If this array uses MKLDNN layout, we have to make sure it's not a view.
- // Otherwise, we'll have to change the layout inside the array.
- if (IsMKLDNNData())
+ if (IsMKLDNNData()) {
+ // If this array uses MKLDNN layout, we have to make sure it's not a view.
+ // Otherwise, we'll have to change the layout inside the array.
CHECK(!IsView());
+ MKLDNNStream::Get()->RegisterMem(ptr_->mkl_mem_->GetMem());
+ // If this array uses MKLDNN format, we should return now. Otherwise,
+ // SetMKLMem may mess up mkl_mem_.
+ return ptr_->mkl_mem_->GetRaw();
+ }
ptr_->SetMKLMem(IsView() ? ptr_->storage_shape : shape_, dtype_);
MKLDNNStream::Get()->RegisterMem(ptr_->mkl_mem_->GetMem());
if (IsView()) {
diff --git a/src/operator/nn/mkldnn/mkldnn_base-inl.h b/src/operator/nn/mkldnn/mkldnn_base-inl.h
index 16e5605..48a0298 100644
--- a/src/operator/nn/mkldnn/mkldnn_base-inl.h
+++ b/src/operator/nn/mkldnn/mkldnn_base-inl.h
@@ -273,12 +273,11 @@ class MKLDNNStream {
std::vector<std::shared_ptr<const mkldnn::memory> > mem_holder;
public:
- static MKLDNNStream *Get() {
- static thread_local MKLDNNStream stream;
- return &stream;
- }
+ static MKLDNNStream *Get();
- void RegisterPrim(const mkldnn::primitive &prim) { net.push_back(prim); }
+ void RegisterPrim(const mkldnn::primitive &prim) {
+ net.push_back(prim);
+ }
void RegisterMem(std::shared_ptr<const mkldnn::memory> mem) {
mem_holder.push_back(mem);
@@ -288,10 +287,21 @@ class MKLDNNStream {
return !net.empty();
}
- void Submit() {
- if (!net.empty())
+ /*
+ * After submitting mkldnn operations for execution, we need to
+ * clean up memory held by the stream. However, sometimes users
+ * might want to separate mkldnn execution and memory cleanup.
+ */
+ void Submit(bool cleanup = true) {
+ if (!net.empty()) {
mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait();
- net.clear();
+ net.clear();
+ }
+ if (cleanup)
+ Cleanup();
+ }
+
+ void Cleanup() {
mem_holder.clear();
TmpMemMgr::Get()->Reset();
}
@@ -349,6 +359,16 @@ inline bool same_shape(const TShape &shape, const mkldnn_dims_t dims, int ndims)
return true;
}
+inline bool same_shape(const mkldnn::memory::desc &desc1,
+ const mkldnn::memory::desc &desc2) {
+ if (desc1.data.ndims != desc2.data.ndims)
+ return false;
+ for (int i = 0; i < desc1.data.ndims; i++)
+ if (desc1.data.dims[i] != desc2.data.dims[i])
+ return false;
+ return true;
+}
+
inline bool same_shape(const TShape &shape, int dtype,
const mkldnn::memory::desc &desc) {
return same_shape(shape, desc.data.dims, desc.data.ndims)
diff --git a/src/operator/nn/mkldnn/mkldnn_base.cc b/src/operator/nn/mkldnn/mkldnn_base.cc
index 8792cbc..9083216 100644
--- a/src/operator/nn/mkldnn/mkldnn_base.cc
+++ b/src/operator/nn/mkldnn/mkldnn_base.cc
@@ -25,6 +25,11 @@
namespace mxnet {
+MKLDNNStream *MKLDNNStream::Get() {
+ static thread_local MKLDNNStream stream;
+ return &stream;
+}
+
void *AlignMem(void *mem, size_t size, size_t alignment, size_t *space) {
if (size > *space)
return nullptr;
@@ -57,8 +62,11 @@ mkldnn::memory *TmpMemMgr::Alloc(const mkldnn::memory::primitive_desc &pd) {
this->curr_mem = static_cast<char *>(mem) + pd.get_size();
return ret.get();
} else {
- LOG(WARNING) << "Allocate " << pd.get_size()
- << " bytes with malloc directly";
+ // If curr_mem has been initialized and we still reach here. It means
+ // the current allocated memory isn't enough.
+ if (this->curr_mem)
+ LOG(WARNING) << "Allocate " << pd.get_size()
+ << " bytes with malloc directly";
mkldnn_mem_ptr ret(new mkldnn::memory(pd));
MKLDNNStream::Get()->RegisterMem(ret);
return ret.get();
diff --git a/tests/cpp/include/test_core_op.h b/tests/cpp/include/test_core_op.h
index 7dc05fd..c39373b 100644
--- a/tests/cpp/include/test_core_op.h
+++ b/tests/cpp/include/test_core_op.h
@@ -410,7 +410,7 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer<DType>
if (bwd_node_ptr) {
CHECK_EQ(bwd_node_ptr->inputs.size(), num_inputs);
input_types.resize(bwd_node_ptr->inputs.size(), -1);
- for (size_t i = 0; i < num_inputs; ++i) {
+ for (int i = 0; i < num_inputs; ++i) {
const int map_key = bwd_node_ptr->inputs[i].index;
CHECK(index2array.find(map_key) != index2array.end());
const int dtype = index2array[map_key]->dtype();
@@ -421,7 +421,7 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer<DType>
output_types.emplace_back(dtype);
}
} else {
- for (size_t x = 0; x < num_inputs; ++x) {
+ for (int x = 0; x < num_inputs; ++x) {
input_types.emplace_back(default_dtype());
}
for (const auto &fwd_inp : backward_for_op->inputs()) {
@@ -431,10 +431,10 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer<DType>
}
} else {
CHECK(false); // above always true?
- for (size_t x = 0; x < num_inputs; ++x) {
+ for (int x = 0; x < num_inputs; ++x) {
input_types.emplace_back(default_dtype());
}
- for (size_t x = 0; x < inferred_num_outputs; ++x) {
+ for (int x = 0; x < inferred_num_outputs; ++x) {
output_types.emplace_back(default_dtype());
}
}
@@ -455,7 +455,7 @@ class CoreOpExecutor : public test::op::OperatorDataInitializer<DType>
if (bwd_node_ptr) {
input_shapes.clear();
CHECK_EQ(bwd_node_ptr->inputs.size(), num_inputs);
- for (size_t i = 0; i < num_inputs; ++i) {
+ for (int i = 0; i < num_inputs; ++i) {
const int map_key = bwd_node_ptr->inputs[i].index;
CHECK(index2array.find(map_key) != index2array.end());
const nnvm::TShape &shp = index2array[map_key]->shape();
diff --git a/tests/cpp/operator/mkldnn.cc b/tests/cpp/operator/mkldnn.cc
index c3e03df..385e501 100644
--- a/tests/cpp/operator/mkldnn.cc
+++ b/tests/cpp/operator/mkldnn.cc
@@ -28,6 +28,8 @@
#include "gtest/gtest.h"
#include "../../src/operator/nn/mkldnn/mkldnn_base-inl.h"
+using namespace mxnet;
+
#if __GNUC__ >= 5
bool test_mem_align(void *mem, size_t size, size_t alignment, size_t space) {
void *ret1, *ret2;
@@ -77,4 +79,250 @@ TEST(MKLDNN_UTIL_FUNC, AlignMem) {
LOG(INFO) << "Skipped for GCC " << __GNUC__ << "." << __GNUC_MINOR__;
#endif
}
+
+// Init arrays with the default layout.
+static void InitArray(NDArray *arr) {
+ const TBlob &blob = arr->data();
+ mshadow::default_real_t *data = blob.dptr<mshadow::default_real_t>();
+ size_t size = blob.Size();
+ for (size_t i = 0; i < size; i++)
+ data[i] = i;
+}
+
+// Init arrays with the specified layout.
+static void InitMKLDNNArray(NDArray *arr, const mkldnn::memory::primitive_desc &pd) {
+ const TBlob &blob = arr->data();
+ mshadow::default_real_t *data = blob.dptr<mshadow::default_real_t>();
+ size_t size = blob.Size();
+ for (size_t i = 0; i < size; i++)
+ data[i] = i;
+ arr->MKLDNNDataReorderAsync(pd);
+ arr->WaitToRead();
+}
+
+static void VerifyDefMem(const mkldnn::memory &mem) {
+ mkldnn::memory::primitive_desc pd = mem.get_primitive_desc();
+ mshadow::default_real_t *data
+ = static_cast<mshadow::default_real_t *>(mem.get_data_handle());
+ size_t size = pd.get_size() / sizeof(mshadow::default_real_t);
+ size_t num_same = 0;
+ for (size_t i = 0; i < size; i++)
+ num_same += data[i] == static_cast<mshadow::default_real_t>(i);
+ EXPECT_EQ(num_same, size);
+}
+
+static void VerifyMem(const mkldnn::memory &mem) {
+ mkldnn::memory::primitive_desc pd = mem.get_primitive_desc();
+
+ if (pd.desc().data.format == GetDefaultFormat(pd.desc())) {
+ VerifyDefMem(mem);
+ } else {
+ mkldnn::memory::dims dims(pd.desc().data.ndims);
+ for (size_t i = 0; i < dims.size(); i++)
+ dims[i] = pd.desc().data.dims[i];
+ mkldnn::memory::desc desc{dims,
+ static_cast<mkldnn::memory::data_type>(pd.desc().data.data_type),
+ static_cast<mkldnn::memory::format>(GetDefaultFormat(pd.desc()))};
+ mkldnn::memory::primitive_desc new_pd(desc, CpuEngine::Get()->get_engine());
+ mkldnn::memory new_mem(new_pd);
+
+ std::vector<mkldnn::primitive> net;
+ net.push_back(mkldnn::reorder(mem, new_mem));
+ mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait();
+ VerifyDefMem(new_mem);
+ }
+}
+
+static mkldnn::memory::primitive_desc GetMemPD(const TShape s, int dtype,
+ mkldnn::memory::format format) {
+ mkldnn::memory::dims dims(s.ndim());
+ for (size_t i = 0; i < dims.size(); i++)
+ dims[i] = s[i];
+ mkldnn::memory::desc desc{dims, get_mkldnn_type(dtype), format};
+ return mkldnn::memory::primitive_desc(desc, CpuEngine::Get()->get_engine());
+}
+
+// This function gets special MKLDNN formats without knowing the specific
+// hardware configuration. Certainly, it potentially misses some format if
+// it's specific for certain array shapes. It covers at least one special format
+// for each of the formats: nchw, oihw, goihw.
+// To test the logic of the code in NDArray, these formats should be enough.
+static std::vector<mkldnn::memory::format> GetMKLDNNFormat(size_t num_dims, int dtype) {
+ if (num_dims == 4) {
+ mkldnn::memory::dims data_dims{1, 3, 224, 224};
+ mkldnn::memory::desc data_md{data_dims, get_mkldnn_type(dtype),
+ mkldnn::memory::format::any};
+ mkldnn::memory::dims weight_dims{96, 3, 11, 11};
+ mkldnn::memory::desc weight_md{weight_dims, get_mkldnn_type(dtype),
+ mkldnn::memory::format::any};
+ mkldnn::memory::dims output_dims{1, 96, 54, 54};
+ mkldnn::memory::desc out_md{output_dims, get_mkldnn_type(dtype),
+ mkldnn::memory::format::any};
+ mkldnn::memory::dims strides{4, 4};
+ mkldnn::memory::dims padding{0, 0};
+
+ mkldnn::convolution_forward::desc desc(mkldnn::prop_kind::forward_training,
+ mkldnn::algorithm::convolution_direct,
+ data_md, weight_md, out_md, strides,
+ padding, padding, mkldnn::padding_kind::zero);
+ mkldnn::convolution_forward::primitive_desc pd(desc, CpuEngine::Get()->get_engine());
+ std::vector<mkldnn::memory::format> ret(2);
+ ret[0] = static_cast<mkldnn::memory::format>(pd.dst_primitive_desc().desc().data.format);
+ ret[1] = static_cast<mkldnn::memory::format>(pd.weights_primitive_desc().desc().data.format);
+ printf("format: %d, %d\n", ret[0], ret[1]);
+ return ret;
+ } else if (num_dims == 5) {
+ mkldnn::memory::dims data_dims{1, 32, 112, 112};
+ mkldnn::memory::desc data_md{data_dims, get_mkldnn_type(dtype),
+ mkldnn::memory::format::any};
+ mkldnn::memory::dims weight_dims{32, 1, 1, 3, 3};
+ mkldnn::memory::desc weight_md{weight_dims, get_mkldnn_type(dtype),
+ mkldnn::memory::format::any};
+ mkldnn::memory::dims output_dims{1, 32, 112, 112};
+ mkldnn::memory::desc out_md{output_dims, get_mkldnn_type(dtype),
+ mkldnn::memory::format::any};
+ mkldnn::memory::dims strides{1, 1};
+ mkldnn::memory::dims padding{1, 1};
+
+ mkldnn::convolution_forward::desc desc(mkldnn::prop_kind::forward_training,
+ mkldnn::algorithm::convolution_direct,
+ data_md, weight_md, out_md, strides,
+ padding, padding, mkldnn::padding_kind::zero);
+ mkldnn::convolution_forward::primitive_desc pd(desc, CpuEngine::Get()->get_engine());
+ std::vector<mkldnn::memory::format> ret(1);
+ ret[0] = static_cast<mkldnn::memory::format>(pd.weights_primitive_desc().desc().data.format);
+ printf("format: %d\n", ret[0]);
+ return ret;
+ } else {
+ return std::vector<mkldnn::memory::format>();
+ }
+}
+
+struct TestArrayShapes {
+ std::vector<TShape> shapes;
+ std::vector<mkldnn::memory::primitive_desc> pds;
+};
+
+static TestArrayShapes GetTestArrayShapes() {
+ int dtype = mshadow::DataType<mshadow::default_real_t>::kFlag;
+ std::vector<TShape> shapes;
+ std::vector<mkldnn::memory::primitive_desc> pds;
+ {
+ // 1D
+ TShape s(1);
+ s[0] = 279936;
+ shapes.push_back(s);
+ pds.push_back(GetMemPD(s, dtype, mkldnn::memory::format::x));
+ s[0] = 34848;
+ shapes.push_back(s);
+ pds.push_back(GetMemPD(s, dtype, mkldnn::memory::format::x));
+ }
+ {
+ // 2D
+ TShape s(2);
+ s[0] = 96;
+ s[1] = 2916;
+ shapes.push_back(s);
+ pds.push_back(GetMemPD(s, dtype, mkldnn::memory::format::nc));
+ s[0] = 96;
+ s[1] = 363;
+ shapes.push_back(s);
+ pds.push_back(GetMemPD(s, dtype, mkldnn::memory::format::nc));
+ }
+ {
+ // 4D
+ TShape s1(4);
+ s1[0] = 1; s1[1] = 96; s1[2] = 54; s1[3] = 54;
+ shapes.push_back(s1);
+ pds.push_back(GetMemPD(s1, dtype, mkldnn::memory::format::nchw));
+
+ TShape s2(4);
+ s2[0] = 96; s2[1] = 3; s2[2] = 11; s2[3] = 11;
+ shapes.push_back(s2);
+ pds.push_back(GetMemPD(s2, dtype, mkldnn::memory::format::oihw));
+
+ std::vector<mkldnn::memory::format> formats = GetMKLDNNFormat(4, dtype);
+ pds.push_back(GetMemPD(s1, dtype, formats[0]));
+ pds.push_back(GetMemPD(s2, dtype, formats[1]));
+ }
+ {
+ // 5D
+ TShape s(5);
+ s[0] = 96; s[1] = 1; s[2] = 3; s[3] = 11; s[4] = 11;
+ shapes.push_back(s);
+ pds.push_back(GetMemPD(s, dtype, mkldnn::memory::format::goihw));
+
+ std::vector<mkldnn::memory::format> formats = GetMKLDNNFormat(5, dtype);
+ pds.push_back(GetMemPD(s, dtype, formats[0]));
+ }
+
+ TestArrayShapes ret;
+ ret.shapes = shapes;
+ ret.pds = pds;
+ return ret;
+}
+
+TEST(MKLDNN_NDArray, GetDataReorder) {
+ TestArrayShapes tas = GetTestArrayShapes();
+ std::vector<TShape> shapes = tas.shapes;
+ std::vector<mkldnn::memory::primitive_desc> pds = tas.pds;
+
+
+ // Reorder from the default to any other layout.
+ for (auto s : shapes) {
+ NDArray arr(s, Context());
+ InitArray(&arr);
+ for (auto pd : pds) {
+ if (s.Size() == pd.get_size() / sizeof(mshadow::default_real_t)) {
+ const mkldnn::memory *mem = arr.GetMKLDNNDataReorder(pd);
+ printf("reorder from (");
+ for (size_t i = 0; i < s.ndim(); i++)
+ printf("%ld, ", s[i]);
+ printf(") to (");
+ for (int i = 0; i < pd.desc().data.ndims; i++)
+ printf("%d, ", pd.desc().data.dims[i]);
+ printf("), format: %d\n", pd.desc().data.format);
+ MKLDNNStream::Get()->Submit(false);
+ VerifyMem(*mem);
+ MKLDNNStream::Get()->Cleanup();
+ }
+ }
+ }
+
+ // Reorder from a special layout to another layout.
+ for (auto s : shapes) {
+ for (auto from_pd : pds) {
+ if (from_pd.get_size() / sizeof(mshadow::default_real_t) == s.Size()) {
+ NDArray arr(s, Context());
+ // There is possibility that the dimensions of an NDArray doesn't match
+ // with the MKLDNN memory inside.
+ printf("Init array (");
+ for (size_t i = 0; i < s.ndim(); i++)
+ printf("%ld, ", s[i]);
+ printf(") with MKLDNN memory (");
+ for (int i = 0; i < from_pd.desc().data.ndims; i++)
+ printf("%d, ", from_pd.desc().data.dims[i]);
+ printf("), format: %d\n", from_pd.desc().data.format);
+ InitMKLDNNArray(&arr, from_pd);
+ for (auto to_pd : pds) {
+ if (to_pd.get_size() / sizeof(mshadow::default_real_t) == s.Size()) {
+ const mkldnn::memory *mem = arr.GetMKLDNNDataReorder(to_pd);
+ printf("reorder from (");
+ for (size_t i = 0; i < s.ndim(); i++)
+ printf("%ld, ", s[i]);
+ printf("), format: %d to (",
+ arr.GetMKLDNNData()->get_primitive_desc().desc().data.format);
+ for (int i = 0; i < to_pd.desc().data.ndims; i++)
+ printf("%d, ", to_pd.desc().data.dims[i]);
+ printf("), format: %d\n", to_pd.desc().data.format);
+ MKLDNNStream::Get()->Submit(false);
+ VerifyMem(*mem);
+ MKLDNNStream::Get()->Cleanup();
+ }
+ }
+ }
+ }
+ }
+}
+
#endif
diff --git a/tests/python/gpu/test_gluon_model_zoo_gpu.py b/tests/python/gpu/test_gluon_model_zoo_gpu.py
index 378a822..273ad3d 100644
--- a/tests/python/gpu/test_gluon_model_zoo_gpu.py
+++ b/tests/python/gpu/test_gluon_model_zoo_gpu.py
@@ -81,15 +81,16 @@ def test_inference():
gpu_param = gpu_params.get(k)
gpu_param.set_data(cpu_param.data().as_in_context(mx.gpu()))
- # Run inference.
- with autograd.record(train_mode=False):
- cpu_out = cpu_model(mx.nd.array(data, ctx=mx.cpu()))
- gpu_out = gpu_model(gpu_data)
- out = cpu_out.asnumpy()
- max_val = np.max(np.abs(out))
- gpu_max_val = np.max(np.abs(gpu_out.asnumpy()))
- eprint(model_name + ": CPU " + str(max_val) + ", GPU " + str(gpu_max_val))
- assert_almost_equal(out / max_val, gpu_out.asnumpy() / max_val, rtol=1e-3, atol=1e-3)
+ for i in range(5):
+ # Run inference.
+ with autograd.record(train_mode=False):
+ cpu_out = cpu_model(mx.nd.array(data, ctx=mx.cpu()))
+ gpu_out = gpu_model(gpu_data)
+ out = cpu_out.asnumpy()
+ max_val = np.max(np.abs(out))
+ gpu_max_val = np.max(np.abs(gpu_out.asnumpy()))
+ eprint(model_name + ": CPU " + str(max_val) + ", GPU " + str(gpu_max_val))
+ assert_almost_equal(out / max_val, gpu_out.asnumpy() / max_val, rtol=1e-3, atol=1e-3)
def get_nn_model(name):
if "densenet" in name:
--
To stop receiving notification emails like this one, please contact
anirudh2290@apache.org.