You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by zh...@apache.org on 2021/03/15 16:35:19 UTC

[incubator-mxnet] branch master updated: Change inner mxnet flags nomenclature for oneDNN library (#19944)

This is an automated email from the ASF dual-hosted git repository.

zhasheng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new aa92db0  Change inner mxnet flags nomenclature for oneDNN library (#19944)
aa92db0 is described below

commit aa92db02201799841bf52db5a0b8507719f04e61
Author: bartekkuncer <ba...@intel.com>
AuthorDate: Mon Mar 15 17:32:37 2021 +0100

    Change inner mxnet flags nomenclature for oneDNN library (#19944)
    
    This change includes:
    * changing MXNET_USE_MKLDNN flag name to MXNET_USE_ONEDNN
    * changing USE_MKLDNN flag name to USE_ONEDNN
    * changing 3rdparty/mkldnn folder name to 3rdparty/onednn
    * changing include/mkldnn folder name to include/onednn
    * changing MKLDNN occurences in build and documentation files to ONEDNN
    * adding Bartosz Kuncer to contributors list
---
 .gitignore                                         |  4 +-
 .gitmodules                                        |  6 +--
 3rdparty/{mkldnn => onednn}                        |  0
 CMakeLists.txt                                     | 26 +++++------
 CONTRIBUTORS.md                                    |  1 +
 LICENSE                                            | 16 +++----
 NEWS.md                                            |  2 +-
 README.md                                          |  2 +-
 benchmark/opperf/README.md                         |  2 +-
 cd/README.md                                       |  8 ++--
 cd/mxnet_lib/Jenkins_pipeline.groovy               |  2 +-
 cd/python/pypi/pypi_package.sh                     |  8 ++--
 cd/utils/artifact_repository.md                    |  4 +-
 cd/utils/artifact_repository.py                    |  2 +-
 ci/build_windows.py                                |  8 ++--
 ci/docker/runtime_functions.sh                     | 52 +++++++++++-----------
 ci/jenkins/Jenkins_steps.groovy                    |  2 +-
 config/darwin.cmake                                |  2 +-
 config/distribution/darwin_cpu.cmake               |  2 +-
 config/distribution/darwin_native.cmake            |  2 +-
 config/distribution/linux_cpu.cmake                |  2 +-
 config/distribution/linux_cu100.cmake              |  2 +-
 config/distribution/linux_cu101.cmake              |  2 +-
 config/distribution/linux_cu102.cmake              |  2 +-
 config/distribution/linux_cu110.cmake              |  2 +-
 config/distribution/linux_cu112.cmake              |  2 +-
 config/distribution/linux_cu92.cmake               |  2 +-
 config/distribution/linux_native.cmake             |  2 +-
 config/linux.cmake                                 |  2 +-
 config/linux_gpu.cmake                             |  2 +-
 .../performance/backend/mkldnn/mkldnn_readme.md    | 52 +++++++++++-----------
 .../tutorials/performance/backend/profiler.md      |  6 +--
 .../src/_includes/get_started/cloud/cpu.md         |  2 +-
 .../src/_includes/get_started/cloud/gpu.md         |  2 +-
 .../cpp/docs/tutorials/multi_threaded_inference.md |  4 +-
 docs/static_site/src/pages/api/faq/cloud.md        |  2 +-
 docs/static_site/src/pages/api/faq/env_var.md      | 14 +++---
 .../src/pages/api/faq/large_tensor_support.md      |  4 +-
 docs/static_site/src/pages/api/faq/perf.md         |  4 +-
 .../src/pages/api/faq/tensor_inspector_tutorial.md |  2 +-
 example/README.md                                  |  2 +-
 example/multi_threaded_inference/Makefile          | 10 ++---
 include/mkldnn/dnnl.h                              |  1 -
 include/mkldnn/dnnl.hpp                            |  1 -
 include/mkldnn/dnnl_config.h                       |  1 -
 include/mkldnn/dnnl_debug.h                        |  1 -
 include/mkldnn/dnnl_ocl.h                          |  1 -
 include/mkldnn/dnnl_ocl.hpp                        |  1 -
 include/mkldnn/dnnl_sycl.h                         |  1 -
 include/mkldnn/dnnl_sycl.hpp                       |  1 -
 include/mkldnn/dnnl_sycl_types.h                   |  1 -
 include/mkldnn/dnnl_threadpool.h                   |  1 -
 include/mkldnn/dnnl_threadpool.hpp                 |  1 -
 include/mkldnn/dnnl_threadpool_iface.hpp           |  1 -
 include/mkldnn/dnnl_types.h                        |  1 -
 include/mkldnn/dnnl_version.h                      |  1 -
 include/mkldnn/mkldnn.h                            |  1 -
 include/mkldnn/mkldnn.hpp                          |  1 -
 include/mkldnn/mkldnn_config.h                     |  1 -
 include/mkldnn/mkldnn_debug.h                      |  1 -
 include/mkldnn/mkldnn_dnnl_mangling.h              |  1 -
 include/mkldnn/mkldnn_types.h                      |  1 -
 include/mkldnn/mkldnn_version.h                    |  1 -
 include/mkldnn/oneapi/dnnl/dnnl.h                  |  1 -
 include/mkldnn/oneapi/dnnl/dnnl.hpp                |  1 -
 include/mkldnn/oneapi/dnnl/dnnl_debug.h            |  1 -
 include/mkldnn/oneapi/dnnl/dnnl_ocl.h              |  1 -
 include/mkldnn/oneapi/dnnl/dnnl_ocl.hpp            |  1 -
 include/mkldnn/oneapi/dnnl/dnnl_sycl.h             |  1 -
 include/mkldnn/oneapi/dnnl/dnnl_sycl.hpp           |  1 -
 include/mkldnn/oneapi/dnnl/dnnl_sycl_types.h       |  1 -
 include/mkldnn/oneapi/dnnl/dnnl_threadpool.h       |  1 -
 include/mkldnn/oneapi/dnnl/dnnl_threadpool.hpp     |  1 -
 .../mkldnn/oneapi/dnnl/dnnl_threadpool_iface.hpp   |  1 -
 include/mkldnn/oneapi/dnnl/dnnl_types.h            |  1 -
 include/mxnet/base.h                               |  2 +-
 include/mxnet/libinfo.h                            |  4 +-
 include/mxnet/ndarray.h                            | 14 +++---
 include/onednn/dnnl.h                              |  1 +
 include/onednn/dnnl.hpp                            |  1 +
 include/onednn/dnnl_config.h                       |  1 +
 include/onednn/dnnl_debug.h                        |  1 +
 include/onednn/dnnl_ocl.h                          |  1 +
 include/onednn/dnnl_ocl.hpp                        |  1 +
 include/onednn/dnnl_sycl.h                         |  1 +
 include/onednn/dnnl_sycl.hpp                       |  1 +
 include/onednn/dnnl_sycl_types.h                   |  1 +
 include/onednn/dnnl_threadpool.h                   |  1 +
 include/onednn/dnnl_threadpool.hpp                 |  1 +
 include/onednn/dnnl_threadpool_iface.hpp           |  1 +
 include/onednn/dnnl_types.h                        |  1 +
 include/onednn/dnnl_version.h                      |  1 +
 include/onednn/mkldnn.h                            |  1 +
 include/onednn/mkldnn.hpp                          |  1 +
 include/onednn/mkldnn_config.h                     |  1 +
 include/onednn/mkldnn_debug.h                      |  1 +
 include/onednn/mkldnn_dnnl_mangling.h              |  1 +
 include/onednn/mkldnn_types.h                      |  1 +
 include/onednn/mkldnn_version.h                    |  1 +
 include/onednn/oneapi/dnnl/dnnl.h                  |  1 +
 include/onednn/oneapi/dnnl/dnnl.hpp                |  1 +
 include/onednn/oneapi/dnnl/dnnl_debug.h            |  1 +
 include/onednn/oneapi/dnnl/dnnl_ocl.h              |  1 +
 include/onednn/oneapi/dnnl/dnnl_ocl.hpp            |  1 +
 include/onednn/oneapi/dnnl/dnnl_sycl.h             |  1 +
 include/onednn/oneapi/dnnl/dnnl_sycl.hpp           |  1 +
 include/onednn/oneapi/dnnl/dnnl_sycl_types.h       |  1 +
 include/onednn/oneapi/dnnl/dnnl_threadpool.h       |  1 +
 include/onednn/oneapi/dnnl/dnnl_threadpool.hpp     |  1 +
 .../onednn/oneapi/dnnl/dnnl_threadpool_iface.hpp   |  1 +
 include/onednn/oneapi/dnnl/dnnl_types.h            |  1 +
 src/c_api/c_api.cc                                 |  8 ++--
 src/common/exec_utils.h                            |  8 ++--
 src/common/utils.h                                 |  4 +-
 src/imperative/attach_op_execs_pass.cc             |  2 +-
 src/imperative/imperative_utils.h                  |  8 ++--
 src/libinfo.cc                                     |  2 +-
 src/ndarray/ndarray.cc                             | 22 ++++-----
 src/operator/contrib/batch_norm_relu.cc            | 12 ++---
 src/operator/leaky_relu.cc                         | 16 +++----
 src/operator/nn/activation.cc                      | 18 ++++----
 src/operator/nn/batch_norm.cc                      | 12 ++---
 src/operator/nn/concat.cc                          | 50 ++++++++++-----------
 src/operator/nn/convolution.cc                     | 16 +++----
 src/operator/nn/deconvolution.cc                   | 10 ++---
 src/operator/nn/fully_connected.cc                 | 12 ++---
 src/operator/nn/log_softmax.cc                     |  8 ++--
 src/operator/nn/lrn.cc                             | 12 ++---
 src/operator/nn/mkldnn/mkldnn_act-inl.h            |  4 +-
 src/operator/nn/mkldnn/mkldnn_act.cc               |  2 +-
 src/operator/nn/mkldnn/mkldnn_base-inl.h           |  2 +-
 src/operator/nn/mkldnn/mkldnn_base.cc              |  4 +-
 src/operator/nn/mkldnn/mkldnn_batch_norm-inl.h     |  4 +-
 src/operator/nn/mkldnn/mkldnn_concat-inl.h         |  4 +-
 src/operator/nn/mkldnn/mkldnn_concat.cc            |  4 +-
 src/operator/nn/mkldnn/mkldnn_convolution-inl.h    |  4 +-
 src/operator/nn/mkldnn/mkldnn_convolution.cc       |  4 +-
 src/operator/nn/mkldnn/mkldnn_copy.cc              |  2 +-
 src/operator/nn/mkldnn/mkldnn_deconvolution.cc     |  4 +-
 .../nn/mkldnn/mkldnn_fully_connected-inl.h         |  4 +-
 src/operator/nn/mkldnn/mkldnn_fully_connected.cc   |  4 +-
 src/operator/nn/mkldnn/mkldnn_log_softmax.cc       |  2 +-
 src/operator/nn/mkldnn/mkldnn_lrn-inl.h            |  4 +-
 src/operator/nn/mkldnn/mkldnn_ops-inl.h            |  4 +-
 src/operator/nn/mkldnn/mkldnn_pooling-inl.h        |  4 +-
 src/operator/nn/mkldnn/mkldnn_pooling.cc           |  4 +-
 src/operator/nn/mkldnn/mkldnn_reshape-inl.h        |  4 +-
 src/operator/nn/mkldnn/mkldnn_reshape.cc           |  2 +-
 src/operator/nn/mkldnn/mkldnn_rnn-inl.h            |  6 +--
 src/operator/nn/mkldnn/mkldnn_rnn.cc               |  4 +-
 src/operator/nn/mkldnn/mkldnn_slice-inl.h          |  4 +-
 src/operator/nn/mkldnn/mkldnn_slice.cc             |  4 +-
 src/operator/nn/mkldnn/mkldnn_softmax.cc           |  2 +-
 src/operator/nn/mkldnn/mkldnn_sum.cc               |  2 +-
 src/operator/nn/mkldnn/mkldnn_transpose.cc         |  2 +-
 src/operator/nn/pooling.cc                         | 32 ++++++-------
 src/operator/nn/softmax.cc                         |  8 ++--
 src/operator/operator_common.h                     |  6 +--
 src/operator/quantization/dequantize.cc            |  8 ++--
 .../quantization/mkldnn/mkldnn_dequantize-inl.h    |  4 +-
 .../quantization/mkldnn/mkldnn_quantize-inl.h      |  4 +-
 .../quantization/mkldnn/mkldnn_quantize_v2-inl.h   |  4 +-
 .../quantization/mkldnn/mkldnn_quantized_act.cc    |  4 +-
 .../mkldnn/mkldnn_quantized_batch_norm.cc          |  4 +-
 .../quantization/mkldnn/mkldnn_quantized_concat.cc |  4 +-
 .../quantization/mkldnn/mkldnn_quantized_conv.cc   |  4 +-
 .../mkldnn/mkldnn_quantized_elemwise_add.cc        |  4 +-
 .../mkldnn/mkldnn_quantized_flatten.cc             |  4 +-
 .../mkldnn/mkldnn_quantized_fully_connected.cc     |  4 +-
 .../quantization/mkldnn/mkldnn_quantized_ops-inl.h |  4 +-
 .../mkldnn/mkldnn_quantized_pooling.cc             |  4 +-
 .../quantization/mkldnn/mkldnn_requantize-inl.h    |  4 +-
 src/operator/quantization/quantize.cc              |  6 +--
 src/operator/quantization/quantize_v2.cc           |  8 ++--
 src/operator/quantization/quantized_activation.cc  |  2 +-
 src/operator/quantization/quantized_batch_norm.cc  |  4 +-
 src/operator/quantization/quantized_conv.cc        | 12 ++---
 .../quantization/quantized_fully_connected.cc      | 10 ++---
 src/operator/quantization/quantized_pooling.cc     |  8 ++--
 src/operator/quantization/requantize.cc            |  6 +--
 src/operator/rnn.cc                                | 24 +++++-----
 .../subgraph/mkldnn/mkldnn_bn_relu_property.h      |  4 +-
 src/operator/subgraph/mkldnn/mkldnn_common.h       |  4 +-
 src/operator/subgraph/mkldnn/mkldnn_conv-inl.h     |  4 +-
 src/operator/subgraph/mkldnn/mkldnn_conv.cc        |  4 +-
 .../subgraph/mkldnn/mkldnn_conv_property.h         |  4 +-
 .../mkldnn_elemwisemul_post_quantize_property.h    |  4 +-
 src/operator/subgraph/mkldnn/mkldnn_fc-inl.h       |  4 +-
 src/operator/subgraph/mkldnn/mkldnn_fc.cc          |  4 +-
 .../mkldnn/mkldnn_fc_post_quantize_property.h      |  4 +-
 src/operator/subgraph/mkldnn/mkldnn_fc_property.h  |  4 +-
 .../mkldnn_post_quantize_align_scale_property.h    |  4 +-
 .../mkldnn/mkldnn_post_quantize_property.h         |  4 +-
 .../subgraph/mkldnn/mkldnn_subgraph_base-inl.h     |  4 +-
 .../subgraph/mkldnn/mkldnn_subgraph_property.cc    |  4 +-
 .../partitioner/custom_subgraph_property.h         |  4 +-
 src/operator/tensor/amp_cast.cc                    | 12 ++---
 src/operator/tensor/cast_storage-inl.h             |  6 +--
 src/operator/tensor/elemwise_binary_op_basic.cc    | 12 ++---
 src/operator/tensor/elemwise_sum.cc                |  8 ++--
 src/operator/tensor/elemwise_unary_op.h            |  2 +-
 src/operator/tensor/elemwise_unary_op_basic.cc     | 16 +++----
 src/operator/tensor/matrix_op-inl.h                |  2 +-
 src/operator/tensor/matrix_op.cc                   | 22 ++++-----
 src/serialization/cnpy.cc                          |  4 +-
 src/storage/cpu_device_storage.h                   |  2 +-
 src/storage/storage_manager_helpers.h              |  2 +-
 tests/README.md                                    |  4 +-
 tests/cpp/include/test_mkldnn.h                    |  6 +--
 tests/cpp/include/test_util.h                      |  2 +-
 tests/cpp/operator/batchnorm_test.cc               |  4 +-
 tests/cpp/operator/mkldnn_operator_test.cc         |  4 +-
 tests/cpp/operator/mkldnn_test.cc                  |  4 +-
 tests/cpp/storage/storage_test.cc                  |  2 +-
 tools/license_header.py                            |  4 +-
 tools/pip/doc/CPU_ADDITIONAL.md                    |  2 +-
 tools/pip/doc/CU101_ADDITIONAL.md                  |  2 +-
 tools/pip/doc/CU102_ADDITIONAL.md                  |  2 +-
 tools/pip/doc/CU110_ADDITIONAL.md                  |  2 +-
 tools/pip/doc/CU112_ADDITIONAL.md                  |  2 +-
 tools/pip/doc/NATIVE_ADDITIONAL.md                 |  2 +-
 tools/pip/setup.py                                 |  4 +-
 tools/source-exclude-artifacts.txt                 |  2 +-
 tools/staticbuild/README.md                        |  4 +-
 224 files changed, 531 insertions(+), 530 deletions(-)

diff --git a/.gitignore b/.gitignore
index 648d59c..c35b5a1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -171,5 +171,5 @@ coverage.xml
 cmake_options.yml
 
 # header file generated at compile time
-include/mkldnn/oneapi/dnnl/dnnl_version.h
-include/mkldnn/oneapi/dnnl/dnnl_config.h
+include/onednn/oneapi/dnnl/dnnl_version.h
+include/onednn/oneapi/dnnl/dnnl_config.h
diff --git a/.gitmodules b/.gitmodules
index 85246d6..f3b95da 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -10,9 +10,6 @@
 [submodule "3rdparty/googletest"]
 	path = 3rdparty/googletest
 	url = https://github.com/google/googletest.git
-[submodule "3rdparty/mkldnn"]
-	path = 3rdparty/mkldnn
-	url = https://github.com/oneapi-src/oneDNN.git
 [submodule "3rdparty/tvm"]
 	path = 3rdparty/tvm
 	url = https://github.com/apache/incubator-tvm.git
@@ -28,3 +25,6 @@
 [submodule "3rdparty/intgemm"]
 	path = 3rdparty/intgemm
 	url = https://github.com/kpu/intgemm
+[submodule "3rdparty/onednn"]
+	path = 3rdparty/onednn
+	url = https://github.com/oneapi-src/oneDNN
diff --git a/3rdparty/mkldnn b/3rdparty/onednn
similarity index 100%
rename from 3rdparty/mkldnn
rename to 3rdparty/onednn
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3534897..5c8865a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -62,9 +62,9 @@ option(USE_F16C "Build with x86 F16C instruction support" ON) # autodetects supp
 option(USE_LAPACK "Build with lapack support" ON)
 option(USE_MKL_LAYERNORM "Use layer normalization from MKL, which is currently slower than internal. No effect unless USE_BLAS=MKL (or mkl)." OFF)
 if((NOT APPLE) AND (NOT MSVC) AND (CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "x86_64") AND (NOT CMAKE_CROSSCOMPILING))
-  option(USE_MKLDNN "Build with MKL-DNN support" ON)
+  option(USE_ONEDNN "Build with ONEDNN support" ON)
 else()
-  option(USE_MKLDNN "Build with MKL-DNN support" OFF)
+  option(USE_ONEDNN "Build with ONEDNN support" OFF)
 endif()
 cmake_dependent_option(USE_INTGEMM "Build with x86_64 intgemm library for low-precision multiplication" ON "CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64" OFF)
 if(NOT MSVC)
@@ -257,7 +257,7 @@ endif()
 if(USE_MKL_LAYERNORM)
   add_definitions(-DMXNET_USE_MKL_LAYERNORM=1)
 endif()
-if(USE_MKLDNN)
+if(USE_ONEDNN)
   # CPU architecture (e.g., C5) can't run on another architecture (e.g., g3).
   if(MSVC)
     set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /EHsc")
@@ -272,7 +272,7 @@ if(USE_MKLDNN)
     endif()
   endif()
 
-  function(load_mkldnn)
+  function(load_onednn)
     set(MKLDNN_BUILD_TESTS OFF CACHE INTERNAL "" FORCE)
     set(MKLDNN_BUILD_EXAMPLES OFF CACHE INTERNAL "" FORCE)
     set(MKLDNN_ARCH_OPT_FLAGS "" CACHE INTERNAL "" FORCE)
@@ -285,13 +285,13 @@ if(USE_MKLDNN)
       set(MKLDNN_CPU_RUNTIME SEQ CACHE INTERNAL "" FORCE)
     endif()
 
-    set(CMAKE_INSTALL_INCLUDEDIR "${CMAKE_INSTALL_INCLUDEDIR}/mkldnn")
-    add_subdirectory(3rdparty/mkldnn)
+    set(CMAKE_INSTALL_INCLUDEDIR "${CMAKE_INSTALL_INCLUDEDIR}/onednn")
+    add_subdirectory(3rdparty/onednn)
   endfunction()
-  load_mkldnn()
-  include_directories(3rdparty/mkldnn/include)
-  include_directories(${PROJECT_BINARY_DIR}/3rdparty/mkldnn/include)
-  add_definitions(-DMXNET_USE_MKLDNN=1)
+  load_onednn()
+  include_directories(3rdparty/onednn/include)
+  include_directories(${PROJECT_BINARY_DIR}/3rdparty/onednn/include)
+  add_definitions(-DMXNET_USE_ONEDNN=1)
   list(APPEND mxnet_LINKER_LIBS dnnl)
   set_target_properties(dnnl PROPERTIES CXX_CLANG_TIDY "")  # don't lint 3rdparty dependency
 endif()
@@ -836,12 +836,12 @@ if(USE_DIST_KVSTORE)
   set_target_properties(pslite PROPERTIES CXX_CLANG_TIDY "")  # don't lint 3rdparty dependency
 endif()
 
-if(USE_MKLDNN)
+if(USE_ONEDNN)
     add_custom_command(TARGET mxnet POST_BUILD
       COMMAND ${CMAKE_COMMAND} -E copy
-      ${CMAKE_BINARY_DIR}/3rdparty/mkldnn/include/oneapi/dnnl/dnnl_config.h  ${CMAKE_SOURCE_DIR}/include/mkldnn/oneapi/dnnl/
+      ${CMAKE_BINARY_DIR}/3rdparty/onednn/include/oneapi/dnnl/dnnl_config.h  ${CMAKE_SOURCE_DIR}/include/onednn/oneapi/dnnl/
       COMMAND ${CMAKE_COMMAND} -E copy
-      ${CMAKE_BINARY_DIR}/3rdparty/mkldnn/include/oneapi/dnnl/dnnl_version.h  ${CMAKE_SOURCE_DIR}/include/mkldnn/oneapi/dnnl/)
+      ${CMAKE_BINARY_DIR}/3rdparty/onednn/include/oneapi/dnnl/dnnl_version.h  ${CMAKE_SOURCE_DIR}/include/onednn/oneapi/dnnl/)
 endif()
 
 if(USE_INTGEMM)
diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
index 702daff..b3a71e2 100644
--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -290,6 +290,7 @@ List of Contributors
 * [Paweł Głomski](https://github.com/PawelGlomski-Intel)
 * [Andrzej Kotlowski](https://github.com/anko-intel)
 * [Yingxiao Du](https://github.com/Duconnor)
+* [Bartosz Kuncer](https://github.com/bartekkuncer)
 
 Label Bot
 ---------
diff --git a/LICENSE b/LICENSE
index f5c68dd..13c9371 100644
--- a/LICENSE
+++ b/LICENSE
@@ -226,12 +226,12 @@
     3rdparty/tvm/3rdparty/dmlc-core
     3rdparty/tvm/3rdparty/dlpack
     3rdparty/ps-lite
-    3rdparty/mkldnn
+    3rdparty/onednn
     3rdparty/googletest/googlemock/scripts/generator
     3rdparty/onnx-tensorrt/third_party/onnx/third_party/benchmark
-    3rdparty/mkldnn/tests/benchdnn (Copy of the License available at top of current file)
+    3rdparty/onednn/tests/benchdnn (Copy of the License available at top of current file)
     src/operator/special_functions-inl.h Cephes Library Functions (Copy of the License available at top of current file)
-    3rdparty/mkldnn/doc/assets/mathjax (Copy of the License available at top of current file)
+    3rdparty/onednn/doc/assets/mathjax (Copy of the License available at top of current file)
     docs/python_docs/themes/mx-theme/mxtheme/static/material-design-icons-3.0.1 (Copy of the License available at top of current file)
     docs/python_docs/themes/mx-theme/mxtheme/static/font/Roboto (Copy of the License available at top of current file)
     3rdparty/tvm/3rdparty/bfloat16/bfloat16.cc (Copy of the License available at top of current file)
@@ -256,10 +256,10 @@
     3-clause BSD license
     =======================================================================================
 
-    3rdparty/mkldnn/src/cpu/x64/xbyak
-    3rdparty/mkldnn/tests/gtests/gtest
-    3rdparty/mkldnn/cmake/FindOpenCL.cmake (Copy of the License available at licenses/BSD3-cmake)
-    3rdparty/mkldnn/src/cpu/x64/jit_utils/jitprofiling/
+    3rdparty/onednn/src/cpu/x64/xbyak
+    3rdparty/onednn/tests/gtests/gtest
+    3rdparty/onednn/cmake/FindOpenCL.cmake (Copy of the License available at licenses/BSD3-cmake)
+    3rdparty/onednn/src/cpu/x64/jit_utils/jitprofiling/
     3rdparty/onnx-tensorrt/third_party/onnx/third_party/pybind11/tools/FindPythonLibsNew.cmake
     3rdparty/ctc_include/contrib/moderngpu
     3rdparty/nvidia_cub
@@ -333,7 +333,7 @@
     =======================================================================================
 
     3rdparty/intgemm/test/3rd_party/catch.hpp  (Copy of the License available at licenses/BOOST1_0)
-    3rdparty/mkldnn/src/common/primitive_hashing.hpp
+    3rdparty/onednn/src/common/primitive_hashing.hpp
 
     =======================================================================================
     LLVM Release License
diff --git a/NEWS.md b/NEWS.md
index 0ba22152..d63ee0e 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1973,7 +1973,7 @@ Note: this feature is still experimental, for more details, refer to [design doc
 * Add back R tests and fix typo around R and perl tests (#13940)
 * Fix document build (#13927)
 * Temporarily disables windows pipeline to unblock PRs (#14261)
-* Fix USE_MKLDNN check in Makefile (#13775)
+* Fix USE_ONEDNN check in Makefile (#13775)
 * Fix spelling in threaded_engine_test (#14709)
 * Fix cmake options parsing in dev_menu (#13458)
 * Add Local test stage and option to jump directly to menu item from commandline (#13809)
diff --git a/README.md b/README.md
index e37f41c..8374b40 100644
--- a/README.md
+++ b/README.md
@@ -87,7 +87,7 @@ What's New
 
 ### Ecosystem News
 
-* [MKLDNN for Faster CPU Performance](docs/python_docs/python/tutorials/performance/backend/mkldnn/mkldnn_readme.md)
+* [ONEDNN for Faster CPU Performance](docs/python_docs/python/tutorials/performance/backend/mkldnn/mkldnn_readme.md)
 * [MXNet Memory Monger, Training Deeper Nets with Sublinear Memory Cost](https://github.com/dmlc/mxnet-memonger)
 * [Tutorial for NVidia GTC 2016](https://github.com/dmlc/mxnet-gtc-tutorial)
 * [MXNet.js: Javascript Package for Deep Learning in Browser (without server)](https://github.com/dmlc/mxnet.js/)
diff --git a/benchmark/opperf/README.md b/benchmark/opperf/README.md
index 4935ea7..bb3fb8e 100644
--- a/benchmark/opperf/README.md
+++ b/benchmark/opperf/README.md
@@ -37,7 +37,7 @@ Benchmarks are usually done end-to-end for a given Network Architecture. For exa
 2. A standard Network Architecture like ResNet-50 is made up of many operators Ex: Convolution2D, Softmax, Dense and more. Consider the following scenarios:
     1. We improved the performance of Convolution2D operator, but due to a bug, Softmax performance went down. Overall, we may observe end to end benchmarks are running fine, we may miss out the performance degradation of a single operator which can accumulate and become untraceable.
     2. You need to see in a given network, which operator is taking maximum time and plan optimization work. With end to end benchmarks, it is hard to get more fine grained numbers at operator level.
-3. We need to know on different hardware infrastructure (Ex: CPU with MKLDNN, GPU with NVIDIA CUDA and cuDNN) how different operators performs. With these details, we can plan the optimization work at operator level, which could exponentially boost up end to end performance.
+3. We need to know on different hardware infrastructure (Ex: CPU with ONEDNN, GPU with NVIDIA CUDA and cuDNN) how different operators performs. With these details, we can plan the optimization work at operator level, which could exponentially boost up end to end performance.
 4. You want to have nightly performance tests across all operators in a deep learning framework to catch regressions early. 
 5. We can integrate this framework with a CI/CD system to run per operator performance tests for PRs. Example: When a PR modifies the kernel of TransposeConv2D, we can run benchmarks of TransposeConv2D operator to verify performance.
 
diff --git a/cd/README.md b/cd/README.md
index 356b7b6d..b060314 100644
--- a/cd/README.md
+++ b/cd/README.md
@@ -19,18 +19,18 @@
 
 ## Introduction
 
-MXNet aims to support a variety of frontends, e.g. Python, Java, Perl, R, etc. as well as environments (Windows, Linux, Mac, with or without GPU, with or without MKL-DNN support, etc.). This package contains a small continuous delivery (CD) framework used to automate the delivery nightly and release builds across our delivery channels.
+MXNet aims to support a variety of frontends, e.g. Python, Java, Perl, R, etc. as well as environments (Windows, Linux, Mac, with or without GPU, with or without ONEDNN support, etc.). This package contains a small continuous delivery (CD) framework used to automate the delivery nightly and release builds across our delivery channels.
 
 <!-- TODO: Add links to the actual jobs, once this is live on PROD -->
 
 The CD process is driven by the [CD pipeline job](Jenkinsfile_cd_pipeline), which orchestrates the order in which the artifacts are delivered. For instance, first publish the libmxnet library before publishing the pip package. It does this by triggering the [release job](Jenkinsfile_release_job) with a specific set of parameters for each delivery channel. The release job executes the specific release pipeline for a delivery channel across all MXNet *variants*.
 
-A variant is a specific environment or features for which MXNet is compiled. For instance CPU, GPU with CUDA v10.1, CUDA v10.2 with MKL-DNN support, etc.
+A variant is a specific environment or features for which MXNet is compiled. For instance CPU, GPU with CUDA v10.1, CUDA v10.2 with ONEDNN support, etc.
 
-Currently, below variants are supported. All of these variants except native have MKL-DNN backend enabled.
+Currently, below variants are supported. All of these variants except native have ONEDNN backend enabled.
 
 * *cpu*: CPU
-* *native*: CPU without MKL-DNN
+* *native*: CPU without ONEDNN
 * *cu101*: CUDA 10.1
 * *cu102*: CUDA 10.2
 * *cu110*: CUDA 11.0
diff --git a/cd/mxnet_lib/Jenkins_pipeline.groovy b/cd/mxnet_lib/Jenkins_pipeline.groovy
index 73fdfc6..d38971b 100644
--- a/cd/mxnet_lib/Jenkins_pipeline.groovy
+++ b/cd/mxnet_lib/Jenkins_pipeline.groovy
@@ -33,7 +33,7 @@ licenses = 'licenses/*'
 
 // libmxnet dependencies
 mx_native_deps = 'lib/libgfortran.so.*, lib/libopenblas.so.0'
-mx_deps = 'lib/libgfortran.so.*, lib/libopenblas.so.0, include/mkldnn/oneapi/dnnl/dnnl_version.h, include/mkldnn/oneapi/dnnl/dnnl_config.h'
+mx_deps = 'lib/libgfortran.so.*, lib/libopenblas.so.0, include/onednn/oneapi/dnnl/dnnl_version.h, include/onednn/oneapi/dnnl/dnnl_config.h'
 
 // library type
 // either static or dynamic - depending on how it links to its dependencies
diff --git a/cd/python/pypi/pypi_package.sh b/cd/python/pypi/pypi_package.sh
index 3f9908a..076f85a 100755
--- a/cd/python/pypi/pypi_package.sh
+++ b/cd/python/pypi/pypi_package.sh
@@ -23,14 +23,14 @@ export mxnet_variant=${1:?"Please specify the mxnet variant"}
 
 # Due to this PR: https://github.com/apache/incubator-mxnet/pull/14899
 # The setup.py expects that mkldnn_version.h be present in
-# mxnet-build/3rdparty/mkldnn/build/install/include
+# mxnet-build/3rdparty/onednn/build/install/include
 # The artifact repository stores this file in the dependencies
 # and CD unpacks it to a directory called cd_misc
 # Nov. 2019 Update: With v1.1, MKL-DNN is renaming to DNNL. Hence changing the prefix of file name.
 if [ -f "cd_misc/dnnl_version.h" ]; then
-  mkdir -p 3rdparty/mkldnn/include/oneapi/dnnl
-  cp cd_misc/dnnl_version.h 3rdparty/mkldnn/include/oneapi/dnnl/.
-  cp cd_misc/dnnl_config.h 3rdparty/mkldnn/include/oneapi/dnnl/.
+  mkdir -p 3rdparty/onednn/include/oneapi/dnnl
+  cp cd_misc/dnnl_version.h 3rdparty/onednn/include/oneapi/dnnl/.
+  cp cd_misc/dnnl_config.h 3rdparty/onednn/include/oneapi/dnnl/.
 fi
 
 # Create wheel workspace
diff --git a/cd/utils/artifact_repository.md b/cd/utils/artifact_repository.md
index c37646b..46a97d3 100644
--- a/cd/utils/artifact_repository.md
+++ b/cd/utils/artifact_repository.md
@@ -55,11 +55,11 @@ If not set, derived through the value of sys.platform (https://docs.python.org/3
 
 Manually configured through the --variant argument. The current variants are: cpu, native, cu101, cu102, cu110, cu112.
 
-As long as the tool is being run from the MXNet code base, the runtime feature detection tool (https://github.com/larroy/mxnet/blob/dd432b7f241c9da2c96bcb877c2dc84e6a1f74d4/docs/api/python/libinfo/libinfo.md) can be used to detect whether the library has been compiled with MKL (library has MKL-DNN feature enabled) and/or CUDA support (compiled with CUDA feature enabled).
+As long as the tool is being run from the MXNet code base, the runtime feature detection tool (https://github.com/larroy/mxnet/blob/dd432b7f241c9da2c96bcb877c2dc84e6a1f74d4/docs/api/python/libinfo/libinfo.md) can be used to detect whether the library has been compiled with MKL (library has ONEDNN feature enabled) and/or CUDA support (compiled with CUDA feature enabled).
 
 If it has been compiled with CUDA support, the output of /usr/local/cuda/bin/nvcc --version can be mined for the exact CUDA version (eg. 8.0, 9.0, etc.).
 
-By knowing which features are enabled on the binary, and if necessary, which CUDA version is installed on the machine, the value for the variant argument can be calculated. Eg. if CUDA features are enabled, and nvcc reports cuda version 10.2, then the variant would be cu102. If neither MKL-DNN nor CUDA features are enabled, the variant would be native. 
+By knowing which features are enabled on the binary, and if necessary, which CUDA version is installed on the machine, the value for the variant argument can be calculated. Eg. if CUDA features are enabled, and nvcc reports cuda version 10.2, then the variant would be cu102. If neither ONEDNN nor CUDA features are enabled, the variant would be native. 
 
 **Dependency Linking**
 
diff --git a/cd/utils/artifact_repository.py b/cd/utils/artifact_repository.py
index 41893d9..dd10a4b 100644
--- a/cd/utils/artifact_repository.py
+++ b/cd/utils/artifact_repository.py
@@ -313,7 +313,7 @@ def probe_gpu_variant(mxnet_features: Dict[str, bool]) -> Optional[str]:
     if cuda_version:
         variant = 'cu{}'.format(cuda_version)
         if not mxnet_features['MKLDNN']:
-            RuntimeError('Error determining mxnet variant: MKL-DNN should be enabled for cuda variants')
+            RuntimeError('Error determining mxnet variant: ONEDNN should be enabled for cuda variants')
         logger.debug('variant is: {}'.format(variant))
         return variant
 
diff --git a/ci/build_windows.py b/ci/build_windows.py
index 899d1dd..2035e4c 100755
--- a/ci/build_windows.py
+++ b/ci/build_windows.py
@@ -79,7 +79,7 @@ CMAKE_FLAGS = {
         '-DUSE_BLAS=open '
         '-DUSE_LAPACK=ON '
         '-DUSE_DIST_KVSTORE=OFF '
-        '-DUSE_MKLDNN=ON '
+        '-DUSE_ONEDNN=ON '
         '-DCMAKE_BUILD_TYPE=Release')
 
     , 'WIN_CPU_MKLDNN_MKL': (
@@ -92,7 +92,7 @@ CMAKE_FLAGS = {
         '-DUSE_BLAS=mkl '
         '-DUSE_LAPACK=ON '
         '-DUSE_DIST_KVSTORE=OFF '
-        '-DUSE_MKLDNN=ON '
+        '-DUSE_ONEDNN=ON '
         '-DCMAKE_BUILD_TYPE=Release')
 
     , 'WIN_CPU_MKL': (
@@ -105,7 +105,7 @@ CMAKE_FLAGS = {
         '-DUSE_BLAS=mkl '
         '-DUSE_LAPACK=ON '
         '-DUSE_DIST_KVSTORE=OFF '
-        '-DUSE_MKLDNN=OFF '
+        '-DUSE_ONEDNN=OFF '
         '-DCMAKE_BUILD_TYPE=Release')
 
     , 'WIN_GPU': (
@@ -132,7 +132,7 @@ CMAKE_FLAGS = {
         '-DUSE_LAPACK=ON '
         '-DUSE_DIST_KVSTORE=OFF '
         '-DMXNET_CUDA_ARCH="5.2" '
-        '-DUSE_MKLDNN=ON '
+        '-DUSE_ONEDNN=ON '
         '-DCMAKE_BUILD_TYPE=Release')
 
 }
diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh
index c66f72d..4a94449 100755
--- a/ci/docker/runtime_functions.sh
+++ b/ci/docker/runtime_functions.sh
@@ -116,17 +116,17 @@ build_dynamic_libmxnet() {
     export CXXFLAGS="-fabi-version=11 -fabi-compat-version=7"
     if [[ ${mxnet_variant} = "cpu" ]]; then
         cmake -DUSE_BLAS=Open \
-            -DUSE_MKLDNN=ON \
+            -DUSE_ONEDNN=ON \
             -DUSE_CUDA=OFF \
             -G Ninja /work/mxnet
     elif [[ ${mxnet_variant} = "native" ]]; then
         cmake -DUSE_BLAS=Open \
-            -DUSE_MKLDNN=OFF \
+            -DUSE_ONEDNN=OFF \
             -DUSE_CUDA=OFF \
             -G Ninja /work/mxnet
     elif [[ ${mxnet_variant} =~ cu[0-9]+$ ]]; then
         cmake -DUSE_BLAS=Open \
-            -DUSE_MKLDNN=ON \
+            -DUSE_ONEDNN=ON \
             -DUSE_DIST_KVSTORE=ON \
             -DUSE_CUDA=ON \
             -G Ninja /work/mxnet
@@ -263,7 +263,7 @@ build_centos7_cpu() {
     export CXXFLAGS="-fabi-version=11 -fabi-compat-version=7"
     cmake \
         -DCMAKE_BUILD_TYPE="RelWithDebInfo" \
-        -DUSE_MKLDNN=OFF \
+        -DUSE_ONEDNN=OFF \
         -DUSE_DIST_KVSTORE=ON \
         -DUSE_CUDA=OFF \
         -DBUILD_EXTENSION_PATH=/work/mxnet/example/extensions/lib_external_ops \
@@ -280,7 +280,7 @@ build_centos7_mkldnn() {
     # Opt in to newer GCC C++ ABI. devtoolset defaults to ABI Version 2.
     export CXXFLAGS="-fabi-version=11 -fabi-compat-version=7"
     cmake -DUSE_BLAS=Open \
-        -DUSE_MKLDNN=ON \
+        -DUSE_ONEDNN=ON \
         -DUSE_CUDA=OFF \
         -DUSE_INT64_TENSOR_SIZE=OFF \
         -G Ninja /work/mxnet
@@ -296,7 +296,7 @@ build_centos7_gpu() {
     cmake \
         -DCMAKE_BUILD_TYPE="RelWithDebInfo" \
         -DUSE_BLAS=Open \
-        -DUSE_MKLDNN=ON \
+        -DUSE_ONEDNN=ON \
         -DUSE_CUDA=ON \
         -DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \
         -DUSE_DIST_KVSTORE=ON \
@@ -318,7 +318,7 @@ build_ubuntu_cpu_openblas() {
         -DENABLE_TESTCOVERAGE=ON \
         -DUSE_TVM_OP=ON \
         -DUSE_BLAS=Open \
-        -DUSE_MKLDNN=OFF \
+        -DUSE_ONEDNN=OFF \
         -DUSE_CUDA=OFF \
         -DUSE_DIST_KVSTORE=ON \
         -DBUILD_CYTHON_MODULES=ON \
@@ -333,7 +333,7 @@ build_ubuntu_cpu_mkl() {
     CC=gcc-7 CXX=g++-7 cmake \
         -DCMAKE_BUILD_TYPE="RelWithDebInfo" \
         -DENABLE_TESTCOVERAGE=OFF \
-        -DUSE_MKLDNN=OFF \
+        -DUSE_ONEDNN=OFF \
         -DUSE_CUDA=OFF \
         -DUSE_TVM_OP=ON \
         -DUSE_MKL_LAYERNORM=ON \
@@ -385,7 +385,7 @@ build_ubuntu_cpu_cmake_asan() {
     cmake \
         -DUSE_CUDA=OFF \
         -DUSE_BLAS=Open \
-        -DUSE_MKLDNN=OFF \
+        -DUSE_ONEDNN=OFF \
         -DUSE_OPENMP=OFF \
         -DUSE_OPENCV=OFF \
         -DCMAKE_BUILD_TYPE=Debug \
@@ -444,7 +444,7 @@ build_ubuntu_cpu_clang6() {
     export OpenBLAS_HOME=/usr/local/openblas-clang/
     CXX=clang++-6.0 CC=clang-6.0 cmake \
         -DUSE_BLAS=Open \
-        -DUSE_MKLDNN=OFF \
+        -DUSE_ONEDNN=OFF \
         -DUSE_CUDA=OFF \
         -DUSE_OPENMP=OFF \
         -DUSE_DIST_KVSTORE=ON \
@@ -458,7 +458,7 @@ build_ubuntu_cpu_clang100() {
     export OpenBLAS_HOME=/usr/local/openblas-clang/
     CXX=clang++-10 CC=clang-10 cmake \
        -DUSE_BLAS=Open \
-       -DUSE_MKLDNN=OFF \
+       -DUSE_ONEDNN=OFF \
        -DUSE_CUDA=OFF \
        -DUSE_OPENMP=ON \
        -DUSE_DIST_KVSTORE=ON \
@@ -473,7 +473,7 @@ build_ubuntu_cpu_clang_tidy() {
     # TODO(leezu) USE_OPENMP=OFF 3rdparty/dmlc-core/CMakeLists.txt:79 broken?
     CXX=clang++-10 CC=clang-10 cmake \
        -DUSE_BLAS=Open \
-       -DUSE_MKLDNN=OFF \
+       -DUSE_ONEDNN=OFF \
        -DUSE_CUDA=OFF \
        -DUSE_OPENMP=OFF \
        -DCMAKE_BUILD_TYPE=Debug \
@@ -489,7 +489,7 @@ build_ubuntu_cpu_clang6_mkldnn() {
     export OpenBLAS_HOME=/usr/local/openblas-clang/
     CXX=clang++-6.0 CC=clang-6.0 cmake \
        -DUSE_BLAS=Open \
-       -DUSE_MKLDNN=ON \
+       -DUSE_ONEDNN=ON \
        -DUSE_CUDA=OFF \
        -DUSE_OPENMP=OFF \
        -G Ninja /work/mxnet
@@ -502,7 +502,7 @@ build_ubuntu_cpu_clang100_mkldnn() {
     export OpenBLAS_HOME=/usr/local/openblas-clang/
     CXX=clang++-10 CC=clang-10 cmake \
        -DUSE_BLAS=Open \
-       -DUSE_MKLDNN=ON \
+       -DUSE_ONEDNN=ON \
        -DUSE_CUDA=OFF \
        -G Ninja /work/mxnet
     ninja
@@ -516,7 +516,7 @@ build_ubuntu_cpu_mkldnn() {
         -DENABLE_TESTCOVERAGE=ON \
         -DUSE_TVM_OP=ON \
         -DUSE_BLAS=Open \
-        -DUSE_MKLDNN=ON \
+        -DUSE_ONEDNN=ON \
         -DUSE_CUDA=OFF \
         -DBUILD_EXTENSION_PATH=/work/mxnet/example/extensions/lib_external_ops \
         -G Ninja /work/mxnet
@@ -529,7 +529,7 @@ build_ubuntu_cpu_mkldnn_mkl() {
     CC=gcc-7 CXX=g++-7 cmake \
         -DCMAKE_BUILD_TYPE="RelWithDebInfo" \
         -DENABLE_TESTCOVERAGE=OFF \
-        -DUSE_MKLDNN=ON \
+        -DUSE_ONEDNN=ON \
         -DUSE_CUDA=OFF \
         -DUSE_TVM_OP=ON \
         -DUSE_BLAS=MKL \
@@ -584,7 +584,7 @@ build_ubuntu_gpu_tensorrt() {
           -DUSE_TENSORRT=1                        \
           -DUSE_OPENMP=0                          \
           -DUSE_BLAS=Open                         \
-          -DUSE_MKLDNN=0                          \
+          -DUSE_ONEDNN=0                          \
           -DUSE_NVML=OFF                          \
           -DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \
           -G Ninja                                \
@@ -632,7 +632,7 @@ build_ubuntu_gpu() {
         -DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \
         -DUSE_CUDNN=ON \
         -DUSE_BLAS=Open \
-        -DUSE_MKLDNN=OFF \
+        -DUSE_ONEDNN=OFF \
         -DUSE_DIST_KVSTORE=ON \
         -DBUILD_CYTHON_MODULES=ON \
         -DBUILD_EXTENSION_PATH=/work/mxnet/example/extensions/lib_external_ops \
@@ -650,7 +650,7 @@ build_ubuntu_gpu_debug() {
         -DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \
         -DUSE_CUDNN=ON \
         -DUSE_BLAS=Open \
-        -DUSE_MKLDNN=OFF \
+        -DUSE_ONEDNN=OFF \
         -DUSE_DIST_KVSTORE=ON \
         -DBUILD_CYTHON_MODULES=ON \
         -G Ninja /work/mxnet
@@ -665,7 +665,7 @@ build_ubuntu_cpu_large_tensor() {
         -DUSE_CUDA=OFF                          \
         -DUSE_CUDNN=OFF                         \
         -DUSE_BLAS=Open                         \
-        -DUSE_MKLDNN=ON                         \
+        -DUSE_ONEDNN=ON                         \
         -G Ninja                                \
         /work/mxnet
 
@@ -681,7 +681,7 @@ build_ubuntu_gpu_large_tensor() {
         -DUSE_CUDNN=ON                          \
         -DUSE_NVML=OFF                          \
         -DUSE_BLAS=Open                         \
-        -DUSE_MKLDNN=ON                         \
+        -DUSE_ONEDNN=ON                         \
         -DUSE_DIST_KVSTORE=ON                   \
         -DCMAKE_BUILD_TYPE=Release              \
         -DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \
@@ -707,7 +707,7 @@ sanity_license() {
 
 sanity_cpp() {
     set -ex
-    3rdparty/dmlc-core/scripts/lint.py mxnet cpp include src plugin tests --exclude_path src/operator/contrib/ctc_include include/mkldnn
+    3rdparty/dmlc-core/scripts/lint.py mxnet cpp include src plugin tests --exclude_path src/operator/contrib/ctc_include include/onednn
 }
 
 sanity_python() {
@@ -1291,10 +1291,10 @@ build_static_libmxnet() {
 # Tests CD PyPI packaging in CI
 ci_package_pypi() {
     set -ex
-    # copies mkldnn header files to 3rdparty/mkldnn/include/oneapi/dnnl/ as in CD
-    mkdir -p 3rdparty/mkldnn/include/oneapi/dnnl
-    cp include/mkldnn/oneapi/dnnl/dnnl_version.h 3rdparty/mkldnn/include/oneapi/dnnl/.
-    cp include/mkldnn/oneapi/dnnl/dnnl_config.h 3rdparty/mkldnn/include/oneapi/dnnl/.
+    # copies mkldnn header files to 3rdparty/onednn/include/oneapi/dnnl/ as in CD
+    mkdir -p 3rdparty/onednn/include/oneapi/dnnl
+    cp include/onednn/oneapi/dnnl/dnnl_version.h 3rdparty/onednn/include/oneapi/dnnl/.
+    cp include/onednn/oneapi/dnnl/dnnl_config.h 3rdparty/onednn/include/oneapi/dnnl/.
     local mxnet_variant=${1:?"This function requires a python command as the first argument"}
     cd_package_pypi ${mxnet_variant}
     cd_integration_test_pypi
diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy
index ba7d052..ac30fff 100644
--- a/ci/jenkins/Jenkins_steps.groovy
+++ b/ci/jenkins/Jenkins_steps.groovy
@@ -37,7 +37,7 @@ mx_tensorrt_lib = 'build/libmxnet.so, build/3rdparty/tvm/libtvm_runtime.so, buil
 mx_lib_cpp_examples = 'build/libmxnet.so, build/3rdparty/tvm/libtvm_runtime.so, build/libtvmop.so, build/tvmop.conf, build/libcustomop_lib.so, build/libcustomop_gpu_lib.so, build/libsubgraph_lib.so, example/extensions/lib_external_ops/build/libexternal_lib.so, python/mxnet/_cy3/*.so, python/mxnet/_ffi/_cy3/*.so'
 mx_lib_cpp_examples_no_tvm_op = 'build/libmxnet.so, build/libcustomop_lib.so, build/libcustomop_gpu_lib.so, build/libsubgraph_lib.so, python/mxnet/_cy3/*.so, python/mxnet/_ffi/_cy3/*.so'
 mx_lib_cpp_examples_cpu = 'build/libmxnet.so, build/3rdparty/tvm/libtvm_runtime.so, build/libtvmop.so, build/tvmop.conf'
-mx_cd_lib = 'lib/libmxnet.so, licenses/*, lib/libgfortran.so.*, lib/libopenblas.so.0, include/mkldnn/oneapi/dnnl/dnnl_version.h, include/mkldnn/oneapi/dnnl/dnnl_config.h'
+mx_cd_lib = 'lib/libmxnet.so, licenses/*, lib/libgfortran.so.*, lib/libopenblas.so.0, include/onednn/oneapi/dnnl/dnnl_version.h, include/onednn/oneapi/dnnl/dnnl_config.h'
 
 
 // Python unittest for CPU
diff --git a/config/darwin.cmake b/config/darwin.cmake
index 2311da9..1015a2f 100644
--- a/config/darwin.cmake
+++ b/config/darwin.cmake
@@ -45,7 +45,7 @@ set(OPENCV_ROOT "" CACHE BOOL "OpenCV install path. Supports autodetection.")
 
 set(USE_OPENMP OFF CACHE BOOL "Build with Openmp support")
 
-set(USE_MKLDNN ON CACHE BOOL "Build with MKL-DNN support")
+set(USE_ONEDNN ON CACHE BOOL "Build with ONEDNN support")
 
 set(USE_LAPACK ON CACHE BOOL "Build with lapack support")
 
diff --git a/config/distribution/darwin_cpu.cmake b/config/distribution/darwin_cpu.cmake
index baff103..ddda2ca 100644
--- a/config/distribution/darwin_cpu.cmake
+++ b/config/distribution/darwin_cpu.cmake
@@ -24,7 +24,7 @@ set(USE_BLAS "apple" CACHE STRING "BLAS Vendor")
 set(USE_CUDA OFF CACHE BOOL "Build with CUDA support")
 set(USE_OPENCV ON CACHE BOOL "Build with OpenCV support")
 set(USE_OPENMP OFF CACHE BOOL "Build with Openmp support")
-set(USE_MKLDNN ON CACHE BOOL "Build with MKL-DNN support")
+set(USE_ONEDNN ON CACHE BOOL "Build with ONEDNN support")
 set(USE_LAPACK ON CACHE BOOL "Build with lapack support")
 set(USE_TVM_OP OFF CACHE BOOL "Enable use of TVM operator build system.")
 set(USE_SSE ON CACHE BOOL "Build with x86 SSE instruction support")
diff --git a/config/distribution/darwin_native.cmake b/config/distribution/darwin_native.cmake
index c9d8fa1..4b256c6 100644
--- a/config/distribution/darwin_native.cmake
+++ b/config/distribution/darwin_native.cmake
@@ -24,7 +24,7 @@ set(USE_BLAS "apple" CACHE STRING "BLAS Vendor")
 set(USE_CUDA OFF CACHE BOOL "Build with CUDA support")
 set(USE_OPENCV ON CACHE BOOL "Build with OpenCV support")
 set(USE_OPENMP OFF CACHE BOOL "Build with Openmp support")
-set(USE_MKLDNN OFF CACHE BOOL "Build with MKL-DNN support")
+set(USE_ONEDNN OFF CACHE BOOL "Build with ONEDNN support")
 set(USE_LAPACK ON CACHE BOOL "Build with lapack support")
 set(USE_TVM_OP OFF CACHE BOOL "Enable use of TVM operator build system.")
 set(USE_SSE ON CACHE BOOL "Build with x86 SSE instruction support")
diff --git a/config/distribution/linux_cpu.cmake b/config/distribution/linux_cpu.cmake
index 9f0885d..9b8a979 100644
--- a/config/distribution/linux_cpu.cmake
+++ b/config/distribution/linux_cpu.cmake
@@ -23,7 +23,7 @@ set(USE_BLAS "open" CACHE STRING "BLAS Vendor")
 set(USE_CUDA OFF CACHE BOOL "Build with CUDA support")
 set(USE_OPENCV ON CACHE BOOL "Build with OpenCV support")
 set(USE_OPENMP ON CACHE BOOL "Build with Openmp support")
-set(USE_MKLDNN ON CACHE BOOL "Build with MKL-DNN support")
+set(USE_ONEDNN ON CACHE BOOL "Build with ONEDNN support")
 set(USE_LAPACK ON CACHE BOOL "Build with lapack support")
 set(USE_TVM_OP OFF CACHE BOOL "Enable use of TVM operator build system.")
 set(USE_SSE ON CACHE BOOL "Build with x86 SSE instruction support")
diff --git a/config/distribution/linux_cu100.cmake b/config/distribution/linux_cu100.cmake
index a328040..35ec5a3 100644
--- a/config/distribution/linux_cu100.cmake
+++ b/config/distribution/linux_cu100.cmake
@@ -25,7 +25,7 @@ set(USE_CUDNN ON CACHE BOOL "Build with CUDNN support")
 set(USE_NCCL ON CACHE BOOL "Build with NCCL support")
 set(USE_OPENCV ON CACHE BOOL "Build with OpenCV support")
 set(USE_OPENMP ON CACHE BOOL "Build with Openmp support")
-set(USE_MKLDNN ON CACHE BOOL "Build with MKL-DNN support")
+set(USE_ONEDNN ON CACHE BOOL "Build with ONEDNN support")
 set(USE_LAPACK ON CACHE BOOL "Build with lapack support")
 set(USE_TVM_OP OFF CACHE BOOL "Enable use of TVM operator build system.")
 set(USE_SSE ON CACHE BOOL "Build with x86 SSE instruction support")
diff --git a/config/distribution/linux_cu101.cmake b/config/distribution/linux_cu101.cmake
index 210e07e..80f522d 100644
--- a/config/distribution/linux_cu101.cmake
+++ b/config/distribution/linux_cu101.cmake
@@ -27,7 +27,7 @@ set(USE_CUDNN ON CACHE BOOL "Build with CUDNN support")
 set(USE_NCCL ON CACHE BOOL "Build with NCCL support")
 set(USE_OPENCV ON CACHE BOOL "Build with OpenCV support")
 set(USE_OPENMP ON CACHE BOOL "Build with Openmp support")
-set(USE_MKLDNN ON CACHE BOOL "Build with MKL-DNN support")
+set(USE_ONEDNN ON CACHE BOOL "Build with ONEDNN support")
 set(USE_LAPACK ON CACHE BOOL "Build with lapack support")
 set(USE_TVM_OP OFF CACHE BOOL "Enable use of TVM operator build system.")
 set(USE_SSE ON CACHE BOOL "Build with x86 SSE instruction support")
diff --git a/config/distribution/linux_cu102.cmake b/config/distribution/linux_cu102.cmake
index 1bfedb5..d580354 100644
--- a/config/distribution/linux_cu102.cmake
+++ b/config/distribution/linux_cu102.cmake
@@ -25,7 +25,7 @@ set(USE_CUDNN ON CACHE BOOL "Build with CUDNN support")
 set(USE_NCCL ON CACHE BOOL "Build with NCCL support")
 set(USE_OPENCV ON CACHE BOOL "Build with OpenCV support")
 set(USE_OPENMP ON CACHE BOOL "Build with Openmp support")
-set(USE_MKLDNN ON CACHE BOOL "Build with MKL-DNN support")
+set(USE_ONEDNN ON CACHE BOOL "Build with ONEDNN support")
 set(USE_LAPACK ON CACHE BOOL "Build with lapack support")
 set(USE_TVM_OP OFF CACHE BOOL "Enable use of TVM operator build system.")
 set(USE_SSE ON CACHE BOOL "Build with x86 SSE instruction support")
diff --git a/config/distribution/linux_cu110.cmake b/config/distribution/linux_cu110.cmake
index b3cd6a7..0c239cb 100644
--- a/config/distribution/linux_cu110.cmake
+++ b/config/distribution/linux_cu110.cmake
@@ -25,7 +25,7 @@ set(USE_CUDNN ON CACHE BOOL "Build with CUDNN support")
 set(USE_NCCL ON CACHE BOOL "Build with NCCL support")
 set(USE_OPENCV ON CACHE BOOL "Build with OpenCV support")
 set(USE_OPENMP ON CACHE BOOL "Build with Openmp support")
-set(USE_MKLDNN ON CACHE BOOL "Build with MKL-DNN support")
+set(USE_ONEDNN ON CACHE BOOL "Build with ONEDNN support")
 set(USE_LAPACK ON CACHE BOOL "Build with lapack support")
 set(USE_TVM_OP OFF CACHE BOOL "Enable use of TVM operator build system.")
 set(USE_SSE ON CACHE BOOL "Build with x86 SSE instruction support")
diff --git a/config/distribution/linux_cu112.cmake b/config/distribution/linux_cu112.cmake
index 8c1e74a..031d129 100644
--- a/config/distribution/linux_cu112.cmake
+++ b/config/distribution/linux_cu112.cmake
@@ -25,7 +25,7 @@ set(USE_CUDNN ON CACHE BOOL "Build with CUDNN support")
 set(USE_NCCL ON CACHE BOOL "Build with NCCL support")
 set(USE_OPENCV ON CACHE BOOL "Build with OpenCV support")
 set(USE_OPENMP ON CACHE BOOL "Build with Openmp support")
-set(USE_MKLDNN ON CACHE BOOL "Build with MKL-DNN support")
+set(USE_ONEDNN ON CACHE BOOL "Build with ONEDNN support")
 set(USE_LAPACK ON CACHE BOOL "Build with lapack support")
 set(USE_TVM_OP OFF CACHE BOOL "Enable use of TVM operator build system.")
 set(USE_SSE ON CACHE BOOL "Build with x86 SSE instruction support")
diff --git a/config/distribution/linux_cu92.cmake b/config/distribution/linux_cu92.cmake
index 7af9ded..9466a52 100644
--- a/config/distribution/linux_cu92.cmake
+++ b/config/distribution/linux_cu92.cmake
@@ -25,7 +25,7 @@ set(USE_CUDNN ON CACHE BOOL "Build with CUDNN support")
 set(USE_NCCL ON CACHE BOOL "Build with NCCL support")
 set(USE_OPENCV ON CACHE BOOL "Build with OpenCV support")
 set(USE_OPENMP ON CACHE BOOL "Build with Openmp support")
-set(USE_MKLDNN ON CACHE BOOL "Build with MKL-DNN support")
+set(USE_ONEDNN ON CACHE BOOL "Build with ONEDNN support")
 set(USE_LAPACK ON CACHE BOOL "Build with lapack support")
 set(USE_TVM_OP OFF CACHE BOOL "Enable use of TVM operator build system.")
 set(USE_SSE ON CACHE BOOL "Build with x86 SSE instruction support")
diff --git a/config/distribution/linux_native.cmake b/config/distribution/linux_native.cmake
index 6d74b12..a0900f3 100644
--- a/config/distribution/linux_native.cmake
+++ b/config/distribution/linux_native.cmake
@@ -23,7 +23,7 @@ set(USE_BLAS "open" CACHE STRING "BLAS Vendor")
 set(USE_CUDA OFF CACHE BOOL "Build with CUDA support")
 set(USE_OPENCV ON CACHE BOOL "Build with OpenCV support")
 set(USE_OPENMP ON CACHE BOOL "Build with Openmp support")
-set(USE_MKLDNN OFF CACHE BOOL "Build with MKL-DNN support")
+set(USE_ONEDNN OFF CACHE BOOL "Build with ONEDNN support")
 set(USE_LAPACK ON CACHE BOOL "Build with lapack support")
 set(USE_TVM_OP OFF CACHE BOOL "Enable use of TVM operator build system.")
 set(USE_SSE ON CACHE BOOL "Build with x86 SSE instruction support")
diff --git a/config/linux.cmake b/config/linux.cmake
index 5d0e0a1..0a0f2d9 100644
--- a/config/linux.cmake
+++ b/config/linux.cmake
@@ -62,7 +62,7 @@ set(OPENCV_ROOT "" CACHE BOOL "OpenCV install path. Supports autodetection.")
 
 set(USE_OPENMP ON CACHE BOOL "Build with Openmp support")
 
-set(USE_MKLDNN ON CACHE BOOL "Build with MKL-DNN support")
+set(USE_ONEDNN ON CACHE BOOL "Build with ONEDNN support")
 
 set(USE_LAPACK ON CACHE BOOL "Build with lapack support")
 
diff --git a/config/linux_gpu.cmake b/config/linux_gpu.cmake
index 56b00d0..42ebc11 100644
--- a/config/linux_gpu.cmake
+++ b/config/linux_gpu.cmake
@@ -66,7 +66,7 @@ set(OPENCV_ROOT "" CACHE BOOL "OpenCV install path. Supports autodetection.")
 
 set(USE_OPENMP ON CACHE BOOL "Build with Openmp support")
 
-set(USE_MKLDNN ON CACHE BOOL "Build with MKL-DNN support")
+set(USE_ONEDNN ON CACHE BOOL "Build with ONEDNN support")
 
 set(USE_LAPACK ON CACHE BOOL "Build with lapack support")
 
diff --git a/docs/python_docs/python/tutorials/performance/backend/mkldnn/mkldnn_readme.md b/docs/python_docs/python/tutorials/performance/backend/mkldnn/mkldnn_readme.md
index dfe61cb..6438a5c 100644
--- a/docs/python_docs/python/tutorials/performance/backend/mkldnn/mkldnn_readme.md
+++ b/docs/python_docs/python/tutorials/performance/backend/mkldnn/mkldnn_readme.md
@@ -15,14 +15,14 @@
 <!--- specific language governing permissions and limitations -->
 <!--- under the License. -->
 
-# Install MXNet with MKL-DNN
+# Install MXNet with ONEDNN
 
-A better training and inference performance is expected to be achieved on Intel-Architecture CPUs with MXNet built with [Intel MKL-DNN](https://github.com/intel/mkl-dnn) on multiple operating system, including Linux, Windows and MacOS.
-In the following sections, you will find build instructions for MXNet with Intel MKL-DNN on Linux, MacOS and Windows.
+A better training and inference performance is expected to be achieved on Intel-Architecture CPUs with MXNet built with [Intel ONEDNN](https://github.com/oneapi-src/oneDNN) on multiple operating system, including Linux, Windows and MacOS.
+In the following sections, you will find build instructions for MXNet with Intel ONEDNN on Linux, MacOS and Windows.
 
-Please find MKL-DNN optimized operators and other features in the [MKL-DNN operator list](https://github.com/apache/incubator-mxnet/blob/v1.5.x/docs/tutorials/mkldnn/operator_list.md).
+Please find ONEDNN optimized operators and other features in the [ONEDNN operator list](https://github.com/apache/incubator-mxnet/blob/v1.5.x/docs/tutorials/mkldnn/operator_list.md).
 
-The detailed performance data collected on Intel Xeon CPU with MXNet built with Intel MKL-DNN can be found [here](https://mxnet.apache.org/api/faq/perf#intel-cpu).
+The detailed performance data collected on Intel Xeon CPU with MXNet built with Intel ONEDNN can be found [here](https://mxnet.apache.org/api/faq/perf#intel-cpu).
 
 
 <h2 id="0">Contents</h2>
@@ -55,25 +55,25 @@ git clone --recursive https://github.com/apache/incubator-mxnet.git
 cd incubator-mxnet
 ```
 
-### Build MXNet with MKL-DNN
+### Build MXNet with ONEDNN
 
 To achieve better performance, the Intel OpenMP and llvm OpenMP are recommended as below instruction. Otherwise, default GNU OpenMP will be used and you may get the sub-optimal performance. If you don't have the full [MKL](https://software.intel.com/en-us/intel-mkl) library installation, you might use OpenBLAS as the blas library, by setting USE_BLAS=openblas.
 
 ```
 # build with llvm OpenMP and Intel MKL/openblas
 mkdir build && cd build
-cmake -DUSE_CUDA=OFF -DUSE_MKLDNN=ON -DUSE_OPENMP=ON -DUSE_OPENCV=ON ..
+cmake -DUSE_CUDA=OFF -DUSE_ONEDNN=ON -DUSE_OPENMP=ON -DUSE_OPENCV=ON ..
 make -j $(nproc)
 ```
 
 ```
 # build with Intel MKL and Intel OpenMP
-make -j $(nproc) USE_OPENCV=1 USE_MKLDNN=1 USE_BLAS=mkl USE_INTEL_PATH=/opt/intel
+make -j $(nproc) USE_OPENCV=1 USE_ONEDNN=1 USE_BLAS=mkl USE_INTEL_PATH=/opt/intel
 ```
 
 ```
 # build with openblas and GNU OpenMP(sub-optimal performance)
-make -j $(nproc) USE_OPENCV=1 USE_MKLDNN=1 USE_BLAS=openblas
+make -j $(nproc) USE_OPENCV=1 USE_ONEDNN=1 USE_BLAS=openblas
 ```
 
 <h2 id="2">MacOS</h2>
@@ -107,15 +107,15 @@ git clone --recursive https://github.com/apache/incubator-mxnet.git
 cd incubator-mxnet
 ```
 
-### Build MXNet with MKL-DNN
+### Build MXNet with ONEDNN
 
 ```
-LIBRARY_PATH=$(brew --prefix llvm)/lib/ make -j $(sysctl -n hw.ncpu) CC=$(brew --prefix llvm)/bin/clang CXX=$(brew --prefix llvm)/bin/clang++ USE_OPENCV=1 USE_OPENMP=1 USE_MKLDNN=1 USE_BLAS=apple
+LIBRARY_PATH=$(brew --prefix llvm)/lib/ make -j $(sysctl -n hw.ncpu) CC=$(brew --prefix llvm)/bin/clang CXX=$(brew --prefix llvm)/bin/clang++ USE_OPENCV=1 USE_OPENMP=1 USE_ONEDNN=1 USE_BLAS=apple
 ```
 
 <h2 id="3">Windows</h2>
 
-On Windows, you can use [Micrsoft Visual Studio 2015](https://www.visualstudio.com/vs/older-downloads/) and [Microsoft Visual Studio 2017](https://www.visualstudio.com/downloads/) to compile MXNet with Intel MKL-DNN.
+On Windows, you can use [Micrsoft Visual Studio 2015](https://www.visualstudio.com/vs/older-downloads/) and [Microsoft Visual Studio 2017](https://www.visualstudio.com/downloads/) to compile MXNet with Intel ONEDNN.
 [Micrsoft Visual Studio 2015](https://www.visualstudio.com/vs/older-downloads/) is recommended.
 
 **Visual Studio 2015**
@@ -136,32 +136,32 @@ After you have installed all of the required dependencies, build the MXNet sourc
 git clone --recursive https://github.com/apache/incubator-mxnet.git
 cd C:\incubator-mxent
 ```
-2. Enable Intel MKL-DNN by -DUSE_MKLDNN=1. Use [CMake 3](https://cmake.org/) to create a Visual Studio solution in ```./build```. Make sure to specify the architecture in the
+2. Enable Intel ONEDNN by -DUSE_ONEDNN=1. Use [CMake 3](https://cmake.org/) to create a Visual Studio solution in ```./build```. Make sure to specify the architecture in the
 command:
 ```
 >mkdir build
 >cd build
->cmake -G "Visual Studio 14 Win64" .. -DUSE_CUDA=0 -DUSE_CUDNN=0 -DUSE_NVRTC=0 -DUSE_OPENCV=1 -DUSE_OPENMP=1 -DUSE_PROFILER=1 -DUSE_BLAS=open -DUSE_LAPACK=1 -DUSE_DIST_KVSTORE=0 -DCUDA_ARCH_NAME=All -DUSE_MKLDNN=1 -DCMAKE_BUILD_TYPE=Release
+>cmake -G "Visual Studio 14 Win64" .. -DUSE_CUDA=0 -DUSE_CUDNN=0 -DUSE_NVRTC=0 -DUSE_OPENCV=1 -DUSE_OPENMP=1 -DUSE_PROFILER=1 -DUSE_BLAS=open -DUSE_LAPACK=1 -DUSE_DIST_KVSTORE=0 -DCUDA_ARCH_NAME=All -DUSE_ONEDNN=1 -DCMAKE_BUILD_TYPE=Release
 ```
-3. Enable Intel MKL-DNN and Intel MKL as BLAS library by the command:
+3. Enable Intel ONEDNN and Intel MKL as BLAS library by the command:
 ```
 >"C:\Program Files (x86)\IntelSWTools\compilers_and_libraries\windows\mkl\bin\mklvars.bat" intel64
->cmake -G "Visual Studio 14 Win64" .. -DUSE_CUDA=0 -DUSE_CUDNN=0 -DUSE_NVRTC=0 -DUSE_OPENCV=1 -DUSE_OPENMP=1 -DUSE_PROFILER=1 -DUSE_BLAS=mkl -DUSE_LAPACK=1 -DUSE_DIST_KVSTORE=0 -DCUDA_ARCH_NAME=All -DUSE_MKLDNN=1 -DCMAKE_BUILD_TYPE=Release
+>cmake -G "Visual Studio 14 Win64" .. -DUSE_CUDA=0 -DUSE_CUDNN=0 -DUSE_NVRTC=0 -DUSE_OPENCV=1 -DUSE_OPENMP=1 -DUSE_PROFILER=1 -DUSE_BLAS=mkl -DUSE_LAPACK=1 -DUSE_DIST_KVSTORE=0 -DCUDA_ARCH_NAME=All -DUSE_ONEDNN=1 -DCMAKE_BUILD_TYPE=Release
 ```
 4. After the CMake successfully completed, in Visual Studio, open the solution file ```.sln``` and compile it, or compile the MXNet source code by using following command:
 ```r
 msbuild mxnet.sln /p:Configuration=Release;Platform=x64 /maxcpucount
 ```
-   These commands produce mxnet library called ```libmxnet.dll``` in the ```./build/Release/``` or ```./build/Debug``` folder. Also ```libmkldnn.dll``` with be in the ```./build/3rdparty/mkldnn/src/Release/```
+   These commands produce mxnet library called ```libmxnet.dll``` in the ```./build/Release/``` or ```./build/Debug``` folder. Also ```libmkldnn.dll``` with be in the ```./build/3rdparty/onednn/src/Release/```
 
 5. Make sure that all the dll files used above(such as `libmkldnn.dll`, `libmklml*.dll`, `libiomp5.dll`, `libopenblas*.dll`, etc) are added to the system PATH. For convinence, you can put all of them to ```\windows\system32```. Or you will come across `Not Found Dependencies` when loading MXNet.
 
 **Visual Studio 2017**
 
-User can follow the same steps of Visual Studio 2015 to build MXNET with MKL-DNN, but change the version related command, for example,```C:\opencv\build\x64\vc15\bin``` and build command is as below:
+User can follow the same steps of Visual Studio 2015 to build MXNET with ONEDNN, but change the version related command, for example,```C:\opencv\build\x64\vc15\bin``` and build command is as below:
 
 ```
->cmake -G "Visual Studio 15 Win64" .. -DUSE_CUDA=0 -DUSE_CUDNN=0 -DUSE_NVRTC=0 -DUSE_OPENCV=1 -DUSE_OPENMP=1 -DUSE_PROFILER=1 -DUSE_BLAS=mkl -DUSE_LAPACK=1 -DUSE_DIST_KVSTORE=0 -DCUDA_ARCH_NAME=All -DUSE_MKLDNN=1 -DCMAKE_BUILD_TYPE=Release
+>cmake -G "Visual Studio 15 Win64" .. -DUSE_CUDA=0 -DUSE_CUDNN=0 -DUSE_NVRTC=0 -DUSE_OPENCV=1 -DUSE_OPENMP=1 -DUSE_PROFILER=1 -DUSE_BLAS=mkl -DUSE_LAPACK=1 -DUSE_DIST_KVSTORE=0 -DCUDA_ARCH_NAME=All -DUSE_ONEDNN=1 -DCMAKE_BUILD_TYPE=Release
 
 ```
 
@@ -183,9 +183,9 @@ Expected Output:
 [[ 2.  2.  2.]
  [ 2.  2.  2.]]
 ```
-### Verify whether MKL-DNN works
+### Verify whether ONEDNN works
 
-After MXNet is installed, you can verify if MKL-DNN backend works well with a single Convolution layer.
+After MXNet is installed, you can verify if ONEDNN backend works well with a single Convolution layer.
 ```
 import mxnet as mx
 import numpy as np
@@ -212,7 +212,7 @@ More detailed debugging and profiling information can be logged by setting the e
 ```
 export MKLDNN_VERBOSE=1
 ```
-For example, by running above code snippet, the following debugging logs providing more insights on MKL-DNN primitives `convolution` and `reorder`. That includes: Memory layout, infer shape and the time cost of primitive execution.
+For example, by running above code snippet, the following debugging logs providing more insights on ONEDNN primitives `convolution` and `reorder`. That includes: Memory layout, infer shape and the time cost of primitive execution.
 ```
 dnnl_verbose,info,DNNL v1.1.2 (commit cb2cc7ac17ff4e2ef50805c7048d33256d82be4d)
 dnnl_verbose,info,Detected ISA is Intel AVX-512 with Intel DL Boost
@@ -223,7 +223,7 @@ dnnl_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:abcd:f0 dst_f32::bl
 dnnl_verbose,exec,cpu,reorder,jit:uni,undef,src_f32::blocked:aBcd16b:f0 dst_f32::blocked:abcd:f0,,,32x32x256x256,35.9771
 ```
 
-You can find step-by-step guidance to do profiling for MKLDNN primitives in [Profiling MKLDNN Operators](https://mxnet.apache.org/api/python/docs/tutorials/performance/backend/profiler.html#Profiling-MKLDNN-Operators).
+You can find step-by-step guidance to do profiling for ONEDNN primitives in [Profiling ONEDNN Operators](https://mxnet.apache.org/api/python/docs/tutorials/performance/backend/profiler.html#Profiling-MKLDNN-Operators).
 
 <h2 id="5">Enable MKL BLAS</h2>
 
@@ -293,7 +293,7 @@ This limitations of this experimental feature are:
 
 <h2 id="7">Quantization and Inference with INT8</h2>
 
-Benefiting from Intel MKL-DNN, MXNet built with Intel MKL-DNN brings outstanding performance improvement on quantization and inference with INT8 Intel CPU Platform on Intel Xeon Scalable Platform.
+Benefiting from Intel ONEDNN, MXNet built with Intel ONEDNN brings outstanding performance improvement on quantization and inference with INT8 Intel CPU Platform on Intel Xeon Scalable Platform.
 
 - [CNN Quantization Examples](https://github.com/apache/incubator-mxnet/tree/master/example/quantization).
 
@@ -303,6 +303,6 @@ Benefiting from Intel MKL-DNN, MXNet built with Intel MKL-DNN brings outstanding
 
 - For questions or support specific to MKL, visit the [Intel MKL](https://software.intel.com/en-us/mkl) website.
 
-- For questions or support specific to MKL, visit the [Intel MKLDNN](https://github.com/intel/mkl-dnn) website.
+- For questions or support specific to ONEDNN, visit the [Intel ONEDNN](https://github.com/oneapi-src/oneDNN) website.
 
-- If you find bugs, please open an issue on GitHub for [MXNet with MKL](https://github.com/apache/incubator-mxnet/labels/MKL) or [MXNet with MKLDNN](https://github.com/apache/incubator-mxnet/labels/MKLDNN).
+- If you find bugs, please open an issue on GitHub for [MXNet with MKL](https://github.com/apache/incubator-mxnet/labels/MKL) or [MXNet with ONEDNN](https://github.com/apache/incubator-mxnet/labels/MKLDNN).
diff --git a/docs/python_docs/python/tutorials/performance/backend/profiler.md b/docs/python_docs/python/tutorials/performance/backend/profiler.md
index ecd9fc8..5585ccd 100644
--- a/docs/python_docs/python/tutorials/performance/backend/profiler.md
+++ b/docs/python_docs/python/tutorials/performance/backend/profiler.md
@@ -210,12 +210,12 @@ Let's zoom in to check the time taken by operators
 
 The above picture visualizes the sequence in which the operators were executed and the time taken by each operator.
 
-### Profiling MKLDNN Operators
-Reagrding MKLDNN operators, the library has already provided the internal profiling tool. Firstly, you need set `MKLDNN_VERBOSE=1` to enable internal profiler.
+### Profiling ONEDNN Operators
+Reagrding ONEDNN operators, the library has already provided the internal profiling tool. Firstly, you need set `MKLDNN_VERBOSE=1` to enable internal profiler.
 
 `$ MKLDNN_VERBOSE=1 python my_script.py > mkldnn_verbose.log`
 
-Now, the detailed profiling insights of each mkldnn prmitive are saved into `mkldnn_verbose.log` (like below).
+Now, the detailed profiling insights of each ONEDNN prmitive are saved into `mkldnn_verbose.log` (like below).
 
 ```
 dnnl_verbose,info,DNNL v1.1.2 (commit cb2cc7ac17ff4e2ef50805c7048d33256d82be4d)
diff --git a/docs/static_site/src/_includes/get_started/cloud/cpu.md b/docs/static_site/src/_includes/get_started/cloud/cpu.md
index 4405827..8dfcbb4 100644
--- a/docs/static_site/src/_includes/get_started/cloud/cpu.md
+++ b/docs/static_site/src/_includes/get_started/cloud/cpu.md
@@ -13,4 +13,4 @@ the [Download page](https://mxnet.apache.org/get_started/download).
 * **Amazon Web Services**
 - [AWS Deep Learning AMI](https://aws.amazon.com/machine-learning/amis/) - Preinstalled
 Conda environments
-for Python 2 or 3 with MXNet and MKL-DNN.
+for Python 2 or 3 with MXNet and ONEDNN.
diff --git a/docs/static_site/src/_includes/get_started/cloud/gpu.md b/docs/static_site/src/_includes/get_started/cloud/gpu.md
index 8f64a3a..3bdf006 100644
--- a/docs/static_site/src/_includes/get_started/cloud/gpu.md
+++ b/docs/static_site/src/_includes/get_started/cloud/gpu.md
@@ -18,7 +18,7 @@ VM](https://docs.nvidia.com/ngc/ngc-alibaba-setup-guide/launching-nv-cloud-vm-co
 MXNet models
 - [AWS Deep Learning AMI](https://aws.amazon.com/machine-learning/amis/) - Preinstalled
 Conda environments
-for Python 2 or 3 with MXNet, CUDA, cuDNN, MKL-DNN, and AWS Elastic Inference
+for Python 2 or 3 with MXNet, CUDA, cuDNN, ONEDNN, and AWS Elastic Inference
 - [Dynamic Training on
 AWS](https://github.com/awslabs/dynamic-training-with-apache-mxnet-on-aws) -
 experimental manual EC2 setup or semi-automated CloudFormation setup
diff --git a/docs/static_site/src/pages/api/cpp/docs/tutorials/multi_threaded_inference.md b/docs/static_site/src/pages/api/cpp/docs/tutorials/multi_threaded_inference.md
index 08cfea1..ea9091a 100644
--- a/docs/static_site/src/pages/api/cpp/docs/tutorials/multi_threaded_inference.md
+++ b/docs/static_site/src/pages/api/cpp/docs/tutorials/multi_threaded_inference.md
@@ -79,7 +79,7 @@ $ cd example/multi_threaded_inference
 $ make
 ```
 
-If you have built mxnet from source with cmake, please uncomment the specific lines for cmake build or set the following environment variables: `MKLDNN_BUILD_DIR (default is $(MXNET_ROOT)/3rdparty/mkldnn/build)`, `MKLDNN_INCLUDE_DIR (default is $(MXNET_ROOT)/3rdparty/mkldnn/include)`, `MXNET_LIB_DIR (default is $(MXNET_ROOT)/lib)`.
+If you have built mxnet from source with cmake, please uncomment the specific lines for cmake build or set the following environment variables: `MKLDNN_BUILD_DIR (default is $(MXNET_ROOT)/3rdparty/onednn/build)`, `MKLDNN_INCLUDE_DIR (default is $(MXNET_ROOT)/3rdparty/onednn/include)`, `MXNET_LIB_DIR (default is $(MXNET_ROOT)/lib)`.
 
 ### Run multi threaded inference example
 The example is tested with models such as `imagenet1k-inception-bn`, `imagenet1k-resnet-50`,
@@ -166,7 +166,7 @@ The above code outputs results for different threads and cleans up the thread sa
 
 1. Only operators tested with the existing model coverage are supported. Other operators and operator types (stateful operators, custom operators are not supported. Existing model coverage is as follows (this list will keep growing as we test more models with different model types):
 
-|Models Tested|MKLDNN|CUDNN|NO-CUDNN|
+|Models Tested|ONEDNN|CUDNN|NO-CUDNN|
 | --- | --- | --- | --- |
 | imagenet1k-resnet-18 | Yes | Yes | Yes |
 | imagenet1k-resnet-152 | Yes | Yes | Yes |
diff --git a/docs/static_site/src/pages/api/faq/cloud.md b/docs/static_site/src/pages/api/faq/cloud.md
index dd1643c..894b83e 100644
--- a/docs/static_site/src/pages/api/faq/cloud.md
+++ b/docs/static_site/src/pages/api/faq/cloud.md
@@ -55,7 +55,7 @@ on how to connect to a Jupyter notebook running on an EC2 instance.
 
 [Deep Learning Base AMIs](https://aws.amazon.com/marketplace/search/results?x=0&y=0&searchTerms=Deep+Learning+Base+AMI)
 provide a foundational image with NVIDIA CUDA, cuDNN, GPU drivers, Intel
-MKL-DNN, Docker and Nvidia-Docker, etc. for deploying your own custom deep
+ONEDNN, Docker and Nvidia-Docker, etc. for deploying your own custom deep
 learning environment. You may follow the [MXNet Build From Source
 instructions](https://mxnet.apache.org/get_started/build_from_source) easily on
 the Deep Learning Base AMIs.
diff --git a/docs/static_site/src/pages/api/faq/env_var.md b/docs/static_site/src/pages/api/faq/env_var.md
index b28e27b..eaead19 100644
--- a/docs/static_site/src/pages/api/faq/env_var.md
+++ b/docs/static_site/src/pages/api/faq/env_var.md
@@ -326,12 +326,12 @@ If ctypes is used, it must be `mxnet._ctypes.ndarray.NDArrayBase`.
 
 * MXNET_MKLDNN_ENABLED
   - Values: 0, 1 ```(default=1)```
-  - Flag to enable or disable MKLDNN accelerator. On by default.
-  - Only applies to mxnet that has been compiled with MKLDNN (```pip install mxnet-mkl``` or built from source with ```USE_MKLDNN=1```)
+  - Flag to enable or disable ONEDNN accelerator. On by default.
+  - Only applies to mxnet that has been compiled with ONEDNN (```pip install mxnet``` or built from source with ```USE_ONEDNN=1```)
 
 * MXNET_MKLDNN_CACHE_NUM
   - Values: Int ```(default=-1)```
-  - Flag to set num of elements that MKLDNN cache can hold. Default is -1 which means cache size is unbounded. Should only be set if your model has variable input shapes, as cache size may grow unbounded. The number represents the number of items in the cache and is proportional to the number of layers that use MKLDNN and different input shape.
+  - Flag to set num of elements that ONEDNN cache can hold. Default is -1 which means cache size is unbounded. Should only be set if your model has variable input shapes, as cache size may grow unbounded. The number represents the number of items in the cache and is proportional to the number of layers that use ONEDNN and different input shape.
 
 * MXNET_ENFORCE_DETERMINISM
   - Values: 0(false) or 1(true) ```(default=0)```
@@ -371,9 +371,9 @@ If ctypes is used, it must be `mxnet._ctypes.ndarray.NDArrayBase`.
   - This variable controls how many CuDNN dropout state resources to create for each GPU context for use in operator.
 
 * MXNET_SUBGRAPH_BACKEND
-  - Values: String ```(default="MKLDNN")``` if MKLDNN is avaliable, otherwise ```(default="")```
+  - Values: String ```(default="MKLDNN")``` if ONEDNN is avaliable, otherwise ```(default="")```
   - This variable controls the subgraph partitioning in MXNet.
-  - This variable is used to perform MKL-DNN FP32 operator fusion and quantization. Please refer to the [MKL-DNN operator list](https://github.com/apache/incubator-mxnet/blob/v1.5.x/docs/tutorials/mkldnn/operator_list.md) for how this variable is used and the list of fusion passes.
+  - This variable is used to perform ONEDNN FP32 operator fusion and quantization. Please refer to the [ONEDNN operator list](https://github.com/apache/incubator-mxnet/blob/v1.5.x/docs/tutorials/mkldnn/operator_list.md) for how this variable is used and the list of fusion passes.
   - Set ```MXNET_SUBGRAPH_BACKEND=NONE``` to disable subgraph backend.
 
 * MXNET_SAFE_ACCUMULATION
@@ -399,9 +399,9 @@ If ctypes is used, it must be `mxnet._ctypes.ndarray.NDArrayBase`.
   - Values: 0(false) or 1(true) ```(default=1)```
   - If this variable is set, MXNet will simplify the computation graph, eliminating duplicated operations on the same inputs.
 
-* MXNET_USE_MKLDNN_RNN
+* MXNET_USE_ONEDNN_RNN
   - Values: 0(false) or 1(true) ```(default=1)```
-  - This variable controls whether to use the MKL-DNN backend in fused RNN operator for CPU context. There are two fusion implementations of RNN operator in MXNet. The MKL-DNN implementation has a better performance than the naive one, but the latter is more stable in the backward operation currently.
+  - This variable controls whether to use the ONEDNN backend in fused RNN operator for CPU context. There are two fusion implementations of RNN operator in MXNet. The ONEDNN implementation has a better performance than the naive one, but the latter is more stable in the backward operation currently.
 
 * MXNET_FC_TRUE_FP16
   - Values: 0(false) or 1(true) ```(default=0)```
diff --git a/docs/static_site/src/pages/api/faq/large_tensor_support.md b/docs/static_site/src/pages/api/faq/large_tensor_support.md
index ab251a7..247720f 100644
--- a/docs/static_site/src/pages/api/faq/large_tensor_support.md
+++ b/docs/static_site/src/pages/api/faq/large_tensor_support.md
@@ -141,9 +141,9 @@ Backward pass is partially supported and not completely tested, so it is conside
 
 Not supported:
 
-* GPU and MKLDNN. 
+* GPU and ONEDNN. 
 * Windows, ARM or any operating system other than Ubuntu
-* Any permutation of MXNet wheel that contains MKLDNN. 
+* Any permutation of MXNet wheel that contains ONEDNN. 
 * Other language bindings like Scala, Java, R,  and Julia.
 
 
diff --git a/docs/static_site/src/pages/api/faq/perf.md b/docs/static_site/src/pages/api/faq/perf.md
index c20d8da..d085fc0 100644
--- a/docs/static_site/src/pages/api/faq/perf.md
+++ b/docs/static_site/src/pages/api/faq/perf.md
@@ -49,7 +49,7 @@ When using Intel Xeon CPUs for training and inference, the `mxnet-mkl` package i
 $ pip install mxnet-mkl [--pre]
 ```
 
-Or build MXNet from source code with `USE_MKLDNN=1`. For Linux users, `USE_MKLDNN=1` will be turned on by default.
+Or build MXNet from source code with `USE_ONEDNN=1`. For Linux users, `USE_ONEDNN=1` will be turned on by default.
 
 We also find that setting the following environment variables can help:
 
@@ -58,7 +58,7 @@ We also find that setting the following environment variables can help:
 | :-------- | :---------- |
 | `OMP_NUM_THREADS`            | Suggested value: `vCPUs / 2` in which `vCPUs` is the number of virtual CPUs. For more information, please see the guide for [setting the number of threads using an OpenMP environment variable](https://software.intel.com/en-us/mkl-windows-developer-guide-setting-the-number-of-threads-using-an-openmp-environment-variable) |
 | `KMP_AFFINITY`               | Suggested value: `granularity=fine,compact,1,0`.  For more information, please see the guide for [Thread Affinity Interface (Linux* and Windows*)](https://software.intel.com/en-us/node/522691). |
-| `MXNET_SUBGRAPH_BACKEND` | Set to MKLDNN to enable the [subgraph feature](https://cwiki.apache.org/confluence/display/MXNET/MXNet+Graph+Optimization+and+Quantization+based+on+subgraph+and+MKL-DNN) for better performance. For more information please see [Build/Install MXNet with MKL-DNN](https://mxnet.apache.org/api/python/docs/tutorials/performance/backend/mkldnn/mkldnn_readme.html)|
+| `MXNET_SUBGRAPH_BACKEND` | Set to ONEDNN to enable the [subgraph feature](https://cwiki.apache.org/confluence/display/MXNET/MXNet+Graph+Optimization+and+Quantization+based+on+subgraph+and+MKL-DNN) for better performance. For more information please see [Build/Install MXNet with ONEDNN](https://mxnet.apache.org/api/python/docs/tutorials/performance/backend/mkldnn/mkldnn_readme.html)|
 
 Note that _MXNet_ treats all CPUs on a single machine as a single device.
 So whether you specify `cpu(0)` or `cpu()`, _MXNet_ will use all CPU cores on the machine.
diff --git a/docs/static_site/src/pages/api/faq/tensor_inspector_tutorial.md b/docs/static_site/src/pages/api/faq/tensor_inspector_tutorial.md
index 8d6838e..1212524 100644
--- a/docs/static_site/src/pages/api/faq/tensor_inspector_tutorial.md
+++ b/docs/static_site/src/pages/api/faq/tensor_inspector_tutorial.md
@@ -168,7 +168,7 @@ Notice: in `interactive_print()`, you could also do value dumping with command "
 
 ### Test Coverage and Limitations
 
-This utility has been tested on Mac and Ubuntu with and without CUDNN and MKLDNN. Supports for `Tensor`, `TBlob`, and `NDArray`, as well as for CPU and GPU have been manually tested. 
+This utility has been tested on Mac and Ubuntu with and without CUDNN and ONEDNN. Supports for `Tensor`, `TBlob`, and `NDArray`, as well as for CPU and GPU have been manually tested. 
 
 Currently, this utility only supports non-empty tensors and tensors with known shapes i.e. `tb_.ndim() > 0`. Also, this utility only supports dense `NDArray` objects, i.e. when the type is `kDefaultStorage`. 
 
diff --git a/example/README.md b/example/README.md
index d0eafa7..f145600 100644
--- a/example/README.md
+++ b/example/README.md
@@ -106,7 +106,7 @@ If your tutorial depends on specific packages, simply add them to this provision
 * [Kaggle 2nd national data science bowl](kaggle-ndsb2) - a tutorial for Kaggle Second Nation Data Science Bowl
 * [Multi-task Learning](multi-task) - how to use MXNet for multi-task learning
 * [Profiling](profiler) - generate profiling results in json files
-* [Quantization and Calibration Examples](quantization) - examples of quantizing a FP32 model to INT8 and performing low-precision inference with Intel MKL-DNN on CPU or cuDNN on GPU
+* [Quantization and Calibration Examples](quantization) - examples of quantizing a FP32 model to INT8 and performing low-precision inference with Intel ONEDNN on CPU or cuDNN on GPU
 * [Recommender Systems](recommenders) - examples of how to build various kinds of recommender systems
 * [Restricted Boltzmann Machine](restricted-boltzmann-machine) - an example of the binary restricted Boltzmann machine learning MNIST
 * [Single Shot MultiBox Detector](ssd) - SSD object recognition example
diff --git a/example/multi_threaded_inference/Makefile b/example/multi_threaded_inference/Makefile
index 6dba117..49403b5 100644
--- a/example/multi_threaded_inference/Makefile
+++ b/example/multi_threaded_inference/Makefile
@@ -16,7 +16,7 @@
 # under the License.
 
 
-CFLAGS=-std=c++17 -g -Wno-unknown-pragmas -Wall -DMXNET_USE_CUDA=1 -DMXNET_USE_CUDNN=1 -DMXNET_USE_MKLDNN=1
+CFLAGS=-std=c++17 -g -Wno-unknown-pragmas -Wall -DMXNET_USE_CUDA=1 -DMXNET_USE_CUDNN=1 -DMXNET_USE_ONEDNN=1
 
 export MXNET_ROOT = `pwd`/../..
 
@@ -28,17 +28,17 @@ ifndef USE_CUDA_PATH
 endif
 
 ifndef MKLDNN_BUILD_DIR
-    export MKLDNN_BUILD_DIR = $(MXNET_ROOT)/3rdparty/mkldnn/build
+    export MKLDNN_BUILD_DIR = $(MXNET_ROOT)/3rdparty/onednn/build
     # Cmake build path by default
     # Uncomment below line for CMake build
-    #export MKLDNN_BUILD_DIR = $(MXNET_ROOT)/build/3rdparty/mkldnn
+    #export MKLDNN_BUILD_DIR = $(MXNET_ROOT)/build/3rdparty/onednn
 endif
 
 ifndef MKLDNN_INCLUDE_DIR
-    export MKLDNN_INCLUDE_DIR = $(MXNET_ROOT)/3rdparty/mkldnn/include
+    export MKLDNN_INCLUDE_DIR = $(MXNET_ROOT)/3rdparty/onednn/include
     # Cmake build path by default
     # Uncomment below line for CMake build
-    #export MKLDNN_INCLUDE_DIR = $(MXNET_ROOT)/3rdparty/mkldnn/include
+    #export MKLDNN_INCLUDE_DIR = $(MXNET_ROOT)/3rdparty/onednn/include
 endif
 
 CFLAGS += -I$(MXNET_ROOT)/include -I$(USE_CUDA_PATH)/include -I$(MKLDNN_INCLUDE_DIR) -I$(MKLDNN_BUILD_DIR)/include
diff --git a/include/mkldnn/dnnl.h b/include/mkldnn/dnnl.h
deleted file mode 120000
index 44625f5..0000000
--- a/include/mkldnn/dnnl.h
+++ /dev/null
@@ -1 +0,0 @@
-../../3rdparty/mkldnn/include/dnnl.h
\ No newline at end of file
diff --git a/include/mkldnn/dnnl.hpp b/include/mkldnn/dnnl.hpp
deleted file mode 120000
index 4dfc038..0000000
--- a/include/mkldnn/dnnl.hpp
+++ /dev/null
@@ -1 +0,0 @@
-../../3rdparty/mkldnn/include/dnnl.hpp
\ No newline at end of file
diff --git a/include/mkldnn/dnnl_config.h b/include/mkldnn/dnnl_config.h
deleted file mode 120000
index ff3719d..0000000
--- a/include/mkldnn/dnnl_config.h
+++ /dev/null
@@ -1 +0,0 @@
-../../3rdparty/mkldnn/include/dnnl_config.h
\ No newline at end of file
diff --git a/include/mkldnn/dnnl_debug.h b/include/mkldnn/dnnl_debug.h
deleted file mode 120000
index db549ed..0000000
--- a/include/mkldnn/dnnl_debug.h
+++ /dev/null
@@ -1 +0,0 @@
-../../3rdparty/mkldnn/include/dnnl_debug.h
\ No newline at end of file
diff --git a/include/mkldnn/dnnl_ocl.h b/include/mkldnn/dnnl_ocl.h
deleted file mode 120000
index ecc7f34..0000000
--- a/include/mkldnn/dnnl_ocl.h
+++ /dev/null
@@ -1 +0,0 @@
-../../3rdparty/mkldnn/include/dnnl_ocl.h
\ No newline at end of file
diff --git a/include/mkldnn/dnnl_ocl.hpp b/include/mkldnn/dnnl_ocl.hpp
deleted file mode 120000
index 3f4fec4..0000000
--- a/include/mkldnn/dnnl_ocl.hpp
+++ /dev/null
@@ -1 +0,0 @@
-../../3rdparty/mkldnn/include/dnnl_ocl.hpp
\ No newline at end of file
diff --git a/include/mkldnn/dnnl_sycl.h b/include/mkldnn/dnnl_sycl.h
deleted file mode 120000
index 4c1bfe5..0000000
--- a/include/mkldnn/dnnl_sycl.h
+++ /dev/null
@@ -1 +0,0 @@
-../../3rdparty/mkldnn/include/dnnl_sycl.h
\ No newline at end of file
diff --git a/include/mkldnn/dnnl_sycl.hpp b/include/mkldnn/dnnl_sycl.hpp
deleted file mode 120000
index 8837231..0000000
--- a/include/mkldnn/dnnl_sycl.hpp
+++ /dev/null
@@ -1 +0,0 @@
-../../3rdparty/mkldnn/include/dnnl_sycl.hpp
\ No newline at end of file
diff --git a/include/mkldnn/dnnl_sycl_types.h b/include/mkldnn/dnnl_sycl_types.h
deleted file mode 120000
index 94461bc..0000000
--- a/include/mkldnn/dnnl_sycl_types.h
+++ /dev/null
@@ -1 +0,0 @@
-../../3rdparty/mkldnn/include/dnnl_sycl_types.h
\ No newline at end of file
diff --git a/include/mkldnn/dnnl_threadpool.h b/include/mkldnn/dnnl_threadpool.h
deleted file mode 120000
index 5ed7f64..0000000
--- a/include/mkldnn/dnnl_threadpool.h
+++ /dev/null
@@ -1 +0,0 @@
-../../3rdparty/mkldnn/include/dnnl_threadpool.h
\ No newline at end of file
diff --git a/include/mkldnn/dnnl_threadpool.hpp b/include/mkldnn/dnnl_threadpool.hpp
deleted file mode 120000
index dff43b0..0000000
--- a/include/mkldnn/dnnl_threadpool.hpp
+++ /dev/null
@@ -1 +0,0 @@
-../../3rdparty/mkldnn/include/dnnl_threadpool.hpp
\ No newline at end of file
diff --git a/include/mkldnn/dnnl_threadpool_iface.hpp b/include/mkldnn/dnnl_threadpool_iface.hpp
deleted file mode 120000
index f651ff1..0000000
--- a/include/mkldnn/dnnl_threadpool_iface.hpp
+++ /dev/null
@@ -1 +0,0 @@
-../../3rdparty/mkldnn/include/dnnl_threadpool_iface.hpp
\ No newline at end of file
diff --git a/include/mkldnn/dnnl_types.h b/include/mkldnn/dnnl_types.h
deleted file mode 120000
index 750b64c..0000000
--- a/include/mkldnn/dnnl_types.h
+++ /dev/null
@@ -1 +0,0 @@
-../../3rdparty/mkldnn/include/dnnl_types.h
\ No newline at end of file
diff --git a/include/mkldnn/dnnl_version.h b/include/mkldnn/dnnl_version.h
deleted file mode 120000
index a4fde02..0000000
--- a/include/mkldnn/dnnl_version.h
+++ /dev/null
@@ -1 +0,0 @@
-../../3rdparty/mkldnn/include/dnnl_version.h
\ No newline at end of file
diff --git a/include/mkldnn/mkldnn.h b/include/mkldnn/mkldnn.h
deleted file mode 120000
index 873c515..0000000
--- a/include/mkldnn/mkldnn.h
+++ /dev/null
@@ -1 +0,0 @@
-../../3rdparty/mkldnn/include/mkldnn.h
\ No newline at end of file
diff --git a/include/mkldnn/mkldnn.hpp b/include/mkldnn/mkldnn.hpp
deleted file mode 120000
index 2cb212a..0000000
--- a/include/mkldnn/mkldnn.hpp
+++ /dev/null
@@ -1 +0,0 @@
-../../3rdparty/mkldnn/include/mkldnn.hpp
\ No newline at end of file
diff --git a/include/mkldnn/mkldnn_config.h b/include/mkldnn/mkldnn_config.h
deleted file mode 120000
index 8f5259e..0000000
--- a/include/mkldnn/mkldnn_config.h
+++ /dev/null
@@ -1 +0,0 @@
-../../3rdparty/mkldnn/include/mkldnn_config.h
\ No newline at end of file
diff --git a/include/mkldnn/mkldnn_debug.h b/include/mkldnn/mkldnn_debug.h
deleted file mode 120000
index a67617c..0000000
--- a/include/mkldnn/mkldnn_debug.h
+++ /dev/null
@@ -1 +0,0 @@
-../../3rdparty/mkldnn/include/mkldnn_debug.h
\ No newline at end of file
diff --git a/include/mkldnn/mkldnn_dnnl_mangling.h b/include/mkldnn/mkldnn_dnnl_mangling.h
deleted file mode 120000
index 876ad64..0000000
--- a/include/mkldnn/mkldnn_dnnl_mangling.h
+++ /dev/null
@@ -1 +0,0 @@
-../../3rdparty/mkldnn/include/mkldnn_dnnl_mangling.h
\ No newline at end of file
diff --git a/include/mkldnn/mkldnn_types.h b/include/mkldnn/mkldnn_types.h
deleted file mode 120000
index 548b884..0000000
--- a/include/mkldnn/mkldnn_types.h
+++ /dev/null
@@ -1 +0,0 @@
-../../3rdparty/mkldnn/include/mkldnn_types.h
\ No newline at end of file
diff --git a/include/mkldnn/mkldnn_version.h b/include/mkldnn/mkldnn_version.h
deleted file mode 120000
index 76927f2..0000000
--- a/include/mkldnn/mkldnn_version.h
+++ /dev/null
@@ -1 +0,0 @@
-../../3rdparty/mkldnn/include/mkldnn_version.h
\ No newline at end of file
diff --git a/include/mkldnn/oneapi/dnnl/dnnl.h b/include/mkldnn/oneapi/dnnl/dnnl.h
deleted file mode 120000
index ee11f50..0000000
--- a/include/mkldnn/oneapi/dnnl/dnnl.h
+++ /dev/null
@@ -1 +0,0 @@
-../../../../3rdparty/mkldnn/include/oneapi/dnnl/dnnl.h
\ No newline at end of file
diff --git a/include/mkldnn/oneapi/dnnl/dnnl.hpp b/include/mkldnn/oneapi/dnnl/dnnl.hpp
deleted file mode 120000
index 22635d6..0000000
--- a/include/mkldnn/oneapi/dnnl/dnnl.hpp
+++ /dev/null
@@ -1 +0,0 @@
-../../../../3rdparty/mkldnn/include/oneapi/dnnl/dnnl.hpp
\ No newline at end of file
diff --git a/include/mkldnn/oneapi/dnnl/dnnl_debug.h b/include/mkldnn/oneapi/dnnl/dnnl_debug.h
deleted file mode 120000
index 982bc21..0000000
--- a/include/mkldnn/oneapi/dnnl/dnnl_debug.h
+++ /dev/null
@@ -1 +0,0 @@
-../../../../3rdparty/mkldnn/include/oneapi/dnnl/dnnl_debug.h
\ No newline at end of file
diff --git a/include/mkldnn/oneapi/dnnl/dnnl_ocl.h b/include/mkldnn/oneapi/dnnl/dnnl_ocl.h
deleted file mode 120000
index 85970c1..0000000
--- a/include/mkldnn/oneapi/dnnl/dnnl_ocl.h
+++ /dev/null
@@ -1 +0,0 @@
-../../../../3rdparty/mkldnn/include/oneapi/dnnl/dnnl_ocl.h
\ No newline at end of file
diff --git a/include/mkldnn/oneapi/dnnl/dnnl_ocl.hpp b/include/mkldnn/oneapi/dnnl/dnnl_ocl.hpp
deleted file mode 120000
index 4fcef2c..0000000
--- a/include/mkldnn/oneapi/dnnl/dnnl_ocl.hpp
+++ /dev/null
@@ -1 +0,0 @@
-../../../../3rdparty/mkldnn/include/oneapi/dnnl/dnnl_ocl.hpp
\ No newline at end of file
diff --git a/include/mkldnn/oneapi/dnnl/dnnl_sycl.h b/include/mkldnn/oneapi/dnnl/dnnl_sycl.h
deleted file mode 120000
index e39828a..0000000
--- a/include/mkldnn/oneapi/dnnl/dnnl_sycl.h
+++ /dev/null
@@ -1 +0,0 @@
-../../../../3rdparty/mkldnn/include/oneapi/dnnl/dnnl_sycl.h
\ No newline at end of file
diff --git a/include/mkldnn/oneapi/dnnl/dnnl_sycl.hpp b/include/mkldnn/oneapi/dnnl/dnnl_sycl.hpp
deleted file mode 120000
index 240f3ce..0000000
--- a/include/mkldnn/oneapi/dnnl/dnnl_sycl.hpp
+++ /dev/null
@@ -1 +0,0 @@
-../../../../3rdparty/mkldnn/include/oneapi/dnnl/dnnl_sycl.hpp
\ No newline at end of file
diff --git a/include/mkldnn/oneapi/dnnl/dnnl_sycl_types.h b/include/mkldnn/oneapi/dnnl/dnnl_sycl_types.h
deleted file mode 120000
index 57e21d0..0000000
--- a/include/mkldnn/oneapi/dnnl/dnnl_sycl_types.h
+++ /dev/null
@@ -1 +0,0 @@
-../../../../3rdparty/mkldnn/include/oneapi/dnnl/dnnl_sycl_types.h
\ No newline at end of file
diff --git a/include/mkldnn/oneapi/dnnl/dnnl_threadpool.h b/include/mkldnn/oneapi/dnnl/dnnl_threadpool.h
deleted file mode 120000
index 025a0d3..0000000
--- a/include/mkldnn/oneapi/dnnl/dnnl_threadpool.h
+++ /dev/null
@@ -1 +0,0 @@
-../../../../3rdparty/mkldnn/include/oneapi/dnnl/dnnl_threadpool.h
\ No newline at end of file
diff --git a/include/mkldnn/oneapi/dnnl/dnnl_threadpool.hpp b/include/mkldnn/oneapi/dnnl/dnnl_threadpool.hpp
deleted file mode 120000
index f0d9325..0000000
--- a/include/mkldnn/oneapi/dnnl/dnnl_threadpool.hpp
+++ /dev/null
@@ -1 +0,0 @@
-../../../../3rdparty/mkldnn/include/oneapi/dnnl/dnnl_threadpool.hpp
\ No newline at end of file
diff --git a/include/mkldnn/oneapi/dnnl/dnnl_threadpool_iface.hpp b/include/mkldnn/oneapi/dnnl/dnnl_threadpool_iface.hpp
deleted file mode 120000
index e650ecb..0000000
--- a/include/mkldnn/oneapi/dnnl/dnnl_threadpool_iface.hpp
+++ /dev/null
@@ -1 +0,0 @@
-../../../../3rdparty/mkldnn/include/oneapi/dnnl/dnnl_threadpool_iface.hpp
\ No newline at end of file
diff --git a/include/mkldnn/oneapi/dnnl/dnnl_types.h b/include/mkldnn/oneapi/dnnl/dnnl_types.h
deleted file mode 120000
index 88baa15..0000000
--- a/include/mkldnn/oneapi/dnnl/dnnl_types.h
+++ /dev/null
@@ -1 +0,0 @@
-../../../../3rdparty/mkldnn/include/oneapi/dnnl/dnnl_types.h
\ No newline at end of file
diff --git a/include/mxnet/base.h b/include/mxnet/base.h
index 6d30f98..e610e2c 100644
--- a/include/mxnet/base.h
+++ b/include/mxnet/base.h
@@ -541,7 +541,7 @@ inline std::ostream& operator<<(std::ostream &out, const Context &ctx) {
 #define ADD_FILELINE "\n\nDefined in " __FILE__ ":L" STRINGIZE(__LINE__)
 
 
-#if MXNET_USE_MKLDNN == 1 || MXNET_USE_INTGEMM == 1
+#if MXNET_USE_ONEDNN == 1 || MXNET_USE_INTGEMM == 1
 constexpr size_t kMKLDNNAlign = 64;
 #endif
 
diff --git a/include/mxnet/libinfo.h b/include/mxnet/libinfo.h
index 9f640d7..6eee0a9 100644
--- a/include/mxnet/libinfo.h
+++ b/include/mxnet/libinfo.h
@@ -103,8 +103,8 @@
 #define MXNET_USE_LAPACK 0
 #endif
 
-#ifndef MXNET_USE_MKLDNN
-#define MXNET_USE_MKLDNN 0
+#ifndef MXNET_USE_ONEDNN
+#define MXNET_USE_ONEDNN 0
 #endif
 
 #ifndef MXNET_USE_OPENMP
diff --git a/include/mxnet/ndarray.h b/include/mxnet/ndarray.h
index 7c406f4..f01c67f 100644
--- a/include/mxnet/ndarray.h
+++ b/include/mxnet/ndarray.h
@@ -37,7 +37,7 @@
 #include <algorithm>
 #include <memory>
 #include <algorithm>
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include <mkldnn.hpp>
 #endif
 #include "./base.h"
@@ -727,7 +727,7 @@ class NDArray {
     ptr_->CheckAndAllocAuxData(i, aux_shape);
   }
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   /*
    * Create NDArray from mkldnn memory.
    * mkldnn_mem The mkldnn memory to be managed.
@@ -859,7 +859,7 @@ class NDArray {
     */
     std::vector<Storage::Handle> aux_handles;
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
     /*! This is created when data is stored in MKLDNN format.
      */
     std::shared_ptr<MKLDNNMemory> mkl_mem_;
@@ -1018,7 +1018,7 @@ class NDArray {
     inline void CheckAndAlloc(void) {
       if (delay_alloc) {
         Storage::Get()->Alloc(&shandle);
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
         mkl_mem_ = nullptr;
 #endif
         delay_alloc = false;
@@ -1034,7 +1034,7 @@ class NDArray {
       if (delay_alloc) {
         shandle.size = dbytes;
         Storage::Get()->Alloc(&shandle);
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
         mkl_mem_ = nullptr;
 #endif
         delay_alloc = false;
@@ -1044,7 +1044,7 @@ class NDArray {
         // init storage
         shandle.size = dbytes;
         Storage::Get()->Alloc(&shandle);
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
         mkl_mem_ = nullptr;
 #endif
       }
@@ -1080,7 +1080,7 @@ class NDArray {
     // and allocate new storage
     void CheckAndAllocData(const mxnet::TShape &shape, int dtype);
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
     // Have MKL memory reference to the data in the default storage
     // or create memory for MKLDNN.
     void SetMKLMem(const mxnet::TShape &shape, int dtype);
diff --git a/include/onednn/dnnl.h b/include/onednn/dnnl.h
new file mode 120000
index 0000000..bc5443e
--- /dev/null
+++ b/include/onednn/dnnl.h
@@ -0,0 +1 @@
+../../3rdparty/onednn/include/dnnl.h
\ No newline at end of file
diff --git a/include/onednn/dnnl.hpp b/include/onednn/dnnl.hpp
new file mode 120000
index 0000000..db10025
--- /dev/null
+++ b/include/onednn/dnnl.hpp
@@ -0,0 +1 @@
+../../3rdparty/onednn/include/dnnl.hpp
\ No newline at end of file
diff --git a/include/onednn/dnnl_config.h b/include/onednn/dnnl_config.h
new file mode 120000
index 0000000..03a1060
--- /dev/null
+++ b/include/onednn/dnnl_config.h
@@ -0,0 +1 @@
+../../3rdparty/onednn/include/dnnl_config.h
\ No newline at end of file
diff --git a/include/onednn/dnnl_debug.h b/include/onednn/dnnl_debug.h
new file mode 120000
index 0000000..489991d
--- /dev/null
+++ b/include/onednn/dnnl_debug.h
@@ -0,0 +1 @@
+../../3rdparty/onednn/include/dnnl_debug.h
\ No newline at end of file
diff --git a/include/onednn/dnnl_ocl.h b/include/onednn/dnnl_ocl.h
new file mode 120000
index 0000000..09509a3
--- /dev/null
+++ b/include/onednn/dnnl_ocl.h
@@ -0,0 +1 @@
+../../3rdparty/onednn/include/dnnl_ocl.h
\ No newline at end of file
diff --git a/include/onednn/dnnl_ocl.hpp b/include/onednn/dnnl_ocl.hpp
new file mode 120000
index 0000000..2ca3edc
--- /dev/null
+++ b/include/onednn/dnnl_ocl.hpp
@@ -0,0 +1 @@
+../../3rdparty/onednn/include/dnnl_ocl.hpp
\ No newline at end of file
diff --git a/include/onednn/dnnl_sycl.h b/include/onednn/dnnl_sycl.h
new file mode 120000
index 0000000..b998f38
--- /dev/null
+++ b/include/onednn/dnnl_sycl.h
@@ -0,0 +1 @@
+../../3rdparty/onednn/include/dnnl_sycl.h
\ No newline at end of file
diff --git a/include/onednn/dnnl_sycl.hpp b/include/onednn/dnnl_sycl.hpp
new file mode 120000
index 0000000..348184f
--- /dev/null
+++ b/include/onednn/dnnl_sycl.hpp
@@ -0,0 +1 @@
+../../3rdparty/onednn/include/dnnl_sycl.hpp
\ No newline at end of file
diff --git a/include/onednn/dnnl_sycl_types.h b/include/onednn/dnnl_sycl_types.h
new file mode 120000
index 0000000..a710056
--- /dev/null
+++ b/include/onednn/dnnl_sycl_types.h
@@ -0,0 +1 @@
+../../3rdparty/onednn/include/dnnl_sycl_types.h
\ No newline at end of file
diff --git a/include/onednn/dnnl_threadpool.h b/include/onednn/dnnl_threadpool.h
new file mode 120000
index 0000000..ee586c8
--- /dev/null
+++ b/include/onednn/dnnl_threadpool.h
@@ -0,0 +1 @@
+../../3rdparty/onednn/include/dnnl_threadpool.h
\ No newline at end of file
diff --git a/include/onednn/dnnl_threadpool.hpp b/include/onednn/dnnl_threadpool.hpp
new file mode 120000
index 0000000..54e0af6
--- /dev/null
+++ b/include/onednn/dnnl_threadpool.hpp
@@ -0,0 +1 @@
+../../3rdparty/onednn/include/dnnl_threadpool.hpp
\ No newline at end of file
diff --git a/include/onednn/dnnl_threadpool_iface.hpp b/include/onednn/dnnl_threadpool_iface.hpp
new file mode 120000
index 0000000..1b05134
--- /dev/null
+++ b/include/onednn/dnnl_threadpool_iface.hpp
@@ -0,0 +1 @@
+../../3rdparty/onednn/include/dnnl_threadpool_iface.hpp
\ No newline at end of file
diff --git a/include/onednn/dnnl_types.h b/include/onednn/dnnl_types.h
new file mode 120000
index 0000000..31bbbc9
--- /dev/null
+++ b/include/onednn/dnnl_types.h
@@ -0,0 +1 @@
+../../3rdparty/onednn/include/dnnl_types.h
\ No newline at end of file
diff --git a/include/onednn/dnnl_version.h b/include/onednn/dnnl_version.h
new file mode 120000
index 0000000..bb789df
--- /dev/null
+++ b/include/onednn/dnnl_version.h
@@ -0,0 +1 @@
+../../3rdparty/onednn/include/dnnl_version.h
\ No newline at end of file
diff --git a/include/onednn/mkldnn.h b/include/onednn/mkldnn.h
new file mode 120000
index 0000000..ef19407
--- /dev/null
+++ b/include/onednn/mkldnn.h
@@ -0,0 +1 @@
+../../3rdparty/onednn/include/mkldnn.h
\ No newline at end of file
diff --git a/include/onednn/mkldnn.hpp b/include/onednn/mkldnn.hpp
new file mode 120000
index 0000000..e7f56e9
--- /dev/null
+++ b/include/onednn/mkldnn.hpp
@@ -0,0 +1 @@
+../../3rdparty/onednn/include/mkldnn.hpp
\ No newline at end of file
diff --git a/include/onednn/mkldnn_config.h b/include/onednn/mkldnn_config.h
new file mode 120000
index 0000000..714a586
--- /dev/null
+++ b/include/onednn/mkldnn_config.h
@@ -0,0 +1 @@
+../../3rdparty/onednn/include/mkldnn_config.h
\ No newline at end of file
diff --git a/include/onednn/mkldnn_debug.h b/include/onednn/mkldnn_debug.h
new file mode 120000
index 0000000..ca0e6b9
--- /dev/null
+++ b/include/onednn/mkldnn_debug.h
@@ -0,0 +1 @@
+../../3rdparty/onednn/include/mkldnn_debug.h
\ No newline at end of file
diff --git a/include/onednn/mkldnn_dnnl_mangling.h b/include/onednn/mkldnn_dnnl_mangling.h
new file mode 120000
index 0000000..67bf8d0
--- /dev/null
+++ b/include/onednn/mkldnn_dnnl_mangling.h
@@ -0,0 +1 @@
+../../3rdparty/onednn/include/mkldnn_dnnl_mangling.h
\ No newline at end of file
diff --git a/include/onednn/mkldnn_types.h b/include/onednn/mkldnn_types.h
new file mode 120000
index 0000000..334078b
--- /dev/null
+++ b/include/onednn/mkldnn_types.h
@@ -0,0 +1 @@
+../../3rdparty/onednn/include/mkldnn_types.h
\ No newline at end of file
diff --git a/include/onednn/mkldnn_version.h b/include/onednn/mkldnn_version.h
new file mode 120000
index 0000000..ed35758
--- /dev/null
+++ b/include/onednn/mkldnn_version.h
@@ -0,0 +1 @@
+../../3rdparty/onednn/include/mkldnn_version.h
\ No newline at end of file
diff --git a/include/onednn/oneapi/dnnl/dnnl.h b/include/onednn/oneapi/dnnl/dnnl.h
new file mode 120000
index 0000000..863d538
--- /dev/null
+++ b/include/onednn/oneapi/dnnl/dnnl.h
@@ -0,0 +1 @@
+../../../../3rdparty/onednn/include/oneapi/dnnl/dnnl.h
\ No newline at end of file
diff --git a/include/onednn/oneapi/dnnl/dnnl.hpp b/include/onednn/oneapi/dnnl/dnnl.hpp
new file mode 120000
index 0000000..4337527
--- /dev/null
+++ b/include/onednn/oneapi/dnnl/dnnl.hpp
@@ -0,0 +1 @@
+../../../../3rdparty/onednn/include/oneapi/dnnl/dnnl.hpp
\ No newline at end of file
diff --git a/include/onednn/oneapi/dnnl/dnnl_debug.h b/include/onednn/oneapi/dnnl/dnnl_debug.h
new file mode 120000
index 0000000..aa05682
--- /dev/null
+++ b/include/onednn/oneapi/dnnl/dnnl_debug.h
@@ -0,0 +1 @@
+../../../../3rdparty/onednn/include/oneapi/dnnl/dnnl_debug.h
\ No newline at end of file
diff --git a/include/onednn/oneapi/dnnl/dnnl_ocl.h b/include/onednn/oneapi/dnnl/dnnl_ocl.h
new file mode 120000
index 0000000..4652f65
--- /dev/null
+++ b/include/onednn/oneapi/dnnl/dnnl_ocl.h
@@ -0,0 +1 @@
+../../../../3rdparty/onednn/include/oneapi/dnnl/dnnl_ocl.h
\ No newline at end of file
diff --git a/include/onednn/oneapi/dnnl/dnnl_ocl.hpp b/include/onednn/oneapi/dnnl/dnnl_ocl.hpp
new file mode 120000
index 0000000..a2f0eb9
--- /dev/null
+++ b/include/onednn/oneapi/dnnl/dnnl_ocl.hpp
@@ -0,0 +1 @@
+../../../../3rdparty/onednn/include/oneapi/dnnl/dnnl_ocl.hpp
\ No newline at end of file
diff --git a/include/onednn/oneapi/dnnl/dnnl_sycl.h b/include/onednn/oneapi/dnnl/dnnl_sycl.h
new file mode 120000
index 0000000..2cc2245
--- /dev/null
+++ b/include/onednn/oneapi/dnnl/dnnl_sycl.h
@@ -0,0 +1 @@
+../../../../3rdparty/onednn/include/oneapi/dnnl/dnnl_sycl.h
\ No newline at end of file
diff --git a/include/onednn/oneapi/dnnl/dnnl_sycl.hpp b/include/onednn/oneapi/dnnl/dnnl_sycl.hpp
new file mode 120000
index 0000000..2a58d8d
--- /dev/null
+++ b/include/onednn/oneapi/dnnl/dnnl_sycl.hpp
@@ -0,0 +1 @@
+../../../../3rdparty/onednn/include/oneapi/dnnl/dnnl_sycl.hpp
\ No newline at end of file
diff --git a/include/onednn/oneapi/dnnl/dnnl_sycl_types.h b/include/onednn/oneapi/dnnl/dnnl_sycl_types.h
new file mode 120000
index 0000000..5ac056d
--- /dev/null
+++ b/include/onednn/oneapi/dnnl/dnnl_sycl_types.h
@@ -0,0 +1 @@
+../../../../3rdparty/onednn/include/oneapi/dnnl/dnnl_sycl_types.h
\ No newline at end of file
diff --git a/include/onednn/oneapi/dnnl/dnnl_threadpool.h b/include/onednn/oneapi/dnnl/dnnl_threadpool.h
new file mode 120000
index 0000000..86e888d
--- /dev/null
+++ b/include/onednn/oneapi/dnnl/dnnl_threadpool.h
@@ -0,0 +1 @@
+../../../../3rdparty/onednn/include/oneapi/dnnl/dnnl_threadpool.h
\ No newline at end of file
diff --git a/include/onednn/oneapi/dnnl/dnnl_threadpool.hpp b/include/onednn/oneapi/dnnl/dnnl_threadpool.hpp
new file mode 120000
index 0000000..0a579db
--- /dev/null
+++ b/include/onednn/oneapi/dnnl/dnnl_threadpool.hpp
@@ -0,0 +1 @@
+../../../../3rdparty/onednn/include/oneapi/dnnl/dnnl_threadpool.hpp
\ No newline at end of file
diff --git a/include/onednn/oneapi/dnnl/dnnl_threadpool_iface.hpp b/include/onednn/oneapi/dnnl/dnnl_threadpool_iface.hpp
new file mode 120000
index 0000000..a6b52b5
--- /dev/null
+++ b/include/onednn/oneapi/dnnl/dnnl_threadpool_iface.hpp
@@ -0,0 +1 @@
+../../../../3rdparty/onednn/include/oneapi/dnnl/dnnl_threadpool_iface.hpp
\ No newline at end of file
diff --git a/include/onednn/oneapi/dnnl/dnnl_types.h b/include/onednn/oneapi/dnnl/dnnl_types.h
new file mode 120000
index 0000000..c28dbed
--- /dev/null
+++ b/include/onednn/oneapi/dnnl/dnnl_types.h
@@ -0,0 +1 @@
+../../../../3rdparty/onednn/include/oneapi/dnnl/dnnl_types.h
\ No newline at end of file
diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc
index c8c8139..e35f0f4 100644
--- a/src/c_api/c_api.cc
+++ b/src/c_api/c_api.cc
@@ -155,7 +155,7 @@ void CustomFComputeDispatcher(const std::string op_name,
   // convert inputs/outpus NDArray to C types to be passed to lib_api.h
   for (size_t i = 0; i < inputs.size(); i++) {
     NDArray const* in_nd = &(inputs[i]);
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
     // reorder data if in MKLDNN format
     if (in_nd->IsMKLDNNData()) {
       // convert from MKLDNN
@@ -1392,7 +1392,7 @@ void registerPasses(void *lib, int verbose, mxnet::ext::msgSize_t msgSize,
           arg_names.push_back(in_arg_names[i].c_str());
           const NDArray &in_arg = *(in_args_ptr[i]);
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
           // reorder data if in MKLDNN format
           if (in_arg.IsMKLDNNData()) {
             in_arg.Reorder2DefaultAsync();
@@ -1418,7 +1418,7 @@ void registerPasses(void *lib, int verbose, mxnet::ext::msgSize_t msgSize,
           aux_names.push_back(in_aux_names[i].c_str());
           const auto &in_aux = *(in_aux_ptr[i]);
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
           // reorder data if in MKLDNN format
           if (in_aux.IsMKLDNNData()) {
             in_aux.Reorder2DefaultAsync();
@@ -2237,7 +2237,7 @@ int MXNDArrayGetData(NDArrayHandle handle,
                      void **out_pdata) {
   API_BEGIN();
   NDArray *arr = static_cast<NDArray*>(handle);
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   if (arr->IsMKLDNNData()) {
     arr->Reorder2DefaultAsync();
     arr->WaitToRead();
diff --git a/src/common/exec_utils.h b/src/common/exec_utils.h
index 87cfde3..80936a9 100644
--- a/src/common/exec_utils.h
+++ b/src/common/exec_utils.h
@@ -36,7 +36,7 @@
 namespace mxnet {
 namespace common {
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
      // We have to make sure it's default storage and default layout.
 #define DEFAULT_DATA(x)    x.IsDefaultData()
 #else
@@ -69,7 +69,7 @@ inline bool SetupDefaultBlobsIn(const std::vector<NDArray>& src,
       (*idx_map)[i] = temp_dst->size();
       NDArray temp = bufs != nullptr ? bufs->at(i) : NDArray(nd.shape(), nd.ctx(),
                                                              true, nd.dtype());
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
       CHECK(temp.IsDefaultData());
 #endif
       temp_src->emplace_back(nd);
@@ -93,7 +93,7 @@ inline bool SetupDefaultBlobsOut(const std::vector<NDArray>& src,
   for (size_t i = 0; i < src.size(); i++) {
     const auto& nd = src[i];
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
     if (req->at(i) == kWriteInplace && nd.IsMKLDNNData())
       // If it's write inplace and the output array doesn't use the default
       // layout, we'll generate a temporary output array below, which means
@@ -103,7 +103,7 @@ inline bool SetupDefaultBlobsOut(const std::vector<NDArray>& src,
     // We have to make sure it's default storage and default layout.
 #endif
     if (!DEFAULT_DATA(nd)) {
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
       NDArray temp;
       if (bufs != nullptr) {
         temp = bufs->at(i);
diff --git a/src/common/utils.h b/src/common/utils.h
index aa36dc1..2d01e6b 100644
--- a/src/common/utils.h
+++ b/src/common/utils.h
@@ -49,7 +49,7 @@
 #include <limits>
 
 #include "../operator/mxnet_op.h"
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include "../operator/nn/mkldnn/mkldnn_base-inl.h"
 #endif
 
@@ -495,7 +495,7 @@ inline void LogStorageFallback(const nnvm::NodeAttrs& attrs,
     "0 to suppress this warning.";
   os << "\nStorage type fallback detected:\n" << op_str << warning;
   LogOnce(os.str());
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   if (!MKLDNNEnvSet()) common::LogOnce("MXNET_MKLDNN_ENABLED flag is off. "
                                        "You can re-enable by setting MXNET_MKLDNN_ENABLED=1");
   if (GetMKLDNNCacheSize() != -1) common::LogOnce("MXNET_MKLDNN_CACHE_NUM is set."
diff --git a/src/imperative/attach_op_execs_pass.cc b/src/imperative/attach_op_execs_pass.cc
index c102f34..30e67f4 100644
--- a/src/imperative/attach_op_execs_pass.cc
+++ b/src/imperative/attach_op_execs_pass.cc
@@ -36,7 +36,7 @@ namespace mxnet {
 
 namespace exec {
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #define CREATE_DEFAULT_INPUTS_MKLDNN(in_array, in_array_fallback, attrs)  \
         CREATE_DEFAULT_INPUTS(true, attrs, CreateDefaultInputs(in_array, in_array_fallback))
 #else
diff --git a/src/imperative/imperative_utils.h b/src/imperative/imperative_utils.h
index 2ad98b2..f53b0db 100644
--- a/src/imperative/imperative_utils.h
+++ b/src/imperative/imperative_utils.h
@@ -36,7 +36,7 @@
 
 namespace mxnet {
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 template<typename T>T *pntr(T &obj)           { return &obj; }  // NOLINT
 template<typename T>T *pntr(T *obj)           { return obj; }
 
@@ -534,7 +534,7 @@ inline bool SetupDefaultBlobsIn(const std::vector<NDArray *>& src,
       (*idx_map)[i] = temp_dst->size();
       NDArray temp = bufs != nullptr ? bufs->at(i) : NDArray(nd.shape(), nd.ctx(),
                                                              true, nd.dtype());
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
       CHECK(temp.IsDefaultData());
 #endif
       temp_src->emplace_back(nd);
@@ -558,7 +558,7 @@ inline bool SetupDefaultBlobsOut(const std::vector<NDArray *>& src,
   for (size_t i = 0; i < src.size(); i++) {
     const auto& nd = *src[i];
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
     if (req->at(i) == kWriteInplace && nd.IsMKLDNNData())
       // If it's write inplace and the output array doesn't use the default
       // layout, we'll generate a temporary output array below, which means
@@ -567,7 +567,7 @@ inline bool SetupDefaultBlobsOut(const std::vector<NDArray *>& src,
       req->at(i) = kWriteTo;
 #endif
     if (!DEFAULT_DATA(nd)) {
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
       NDArray temp;
       if (bufs != nullptr) {
         temp = bufs->at(i);
diff --git a/src/libinfo.cc b/src/libinfo.cc
index 9348b72..ae25146 100644
--- a/src/libinfo.cc
+++ b/src/libinfo.cc
@@ -78,7 +78,7 @@ class FeatureSet {
     feature_bits.set(BLAS_MKL, MXNET_USE_BLAS_MKL);
     feature_bits.set(BLAS_APPLE, MXNET_USE_BLAS_APPLE);
     feature_bits.set(LAPACK, MXNET_USE_LAPACK);
-    feature_bits.set(MKLDNN, MXNET_USE_MKLDNN);
+    feature_bits.set(MKLDNN, MXNET_USE_ONEDNN);
 
     // Image
     feature_bits.set(OPENCV, MXNET_USE_OPENCV);
diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc
index f52fa25..bbce020 100644
--- a/src/ndarray/ndarray.cc
+++ b/src/ndarray/ndarray.cc
@@ -110,7 +110,7 @@ void NDArray::SetShapeFromChunk() const {
 struct ChunkMem {
   Storage::Handle h;
   std::vector<Storage::Handle> aux_h;
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   std::shared_ptr<MKLDNNMemory> mem;
 #endif
 };
@@ -120,14 +120,14 @@ NDArray::Chunk::~Chunk() {
   ChunkMem mem;
   mem.h = this->shandle;
   mem.aux_h = this->aux_handles;
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   // We want to delete mkldnn memory after deleting the variable.
   mem.mem = this->mkl_mem_;
 #endif
   if (auto engine = engine_ref_.lock()) {
     engine->DeleteVariable([mem, skip_free](RunContext s) {
       if (skip_free == false) {
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
         if (mem.mem) {
           CHECK_LE(mem.mem->GetSize(), mem.h.size);
           CHECK_EQ(mem.mem->GetDataHandle(), mem.h.dptr);
@@ -157,7 +157,7 @@ void NDArray::Chunk::CheckAndAllocData(const mxnet::TShape &shape, int dtype) {
     // init storage
     shandle.size = dbytes;
     Storage::Get()->Alloc(&shandle);
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
     mkl_mem_ = nullptr;
 #endif
   }
@@ -185,7 +185,7 @@ nnvm::Symbol NDArray::get_autograd_symbol() const {
   return ret;
 }
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 
 NDArray::NDArray(const mkldnn::memory::desc &md)
     : storage_type_(kDefaultStorage), autograd_entry_(nullptr) {
@@ -489,7 +489,7 @@ void NDArray::set_fresh_out_grad(bool state) const {
   info.fresh_out_grad = state;
 }
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 
 bool NDArray::Chunk::IsMKLDNN() const {
   if (storage_type != kDefaultStorage)
@@ -886,7 +886,7 @@ void NDArray::SetTBlob() const {
   char *dptr = static_cast<char*>(ptr_->shandle.dptr);
   auto stype = storage_type();
   if (stype == kDefaultStorage) {
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
     CHECK(!IsMKLDNNData()) << "We can't generate TBlob for MKLDNN data. "
         << "Please use Reorder2Default() to generate a new NDArray first";
 #endif
@@ -1228,7 +1228,7 @@ inline void CopyFromToRspImpl(const NDArray& from, const NDArray& to, RunContext
 // Make a copy of a dense NDArray
 template<typename from_xpu, typename to_xpu>
 inline void CopyFromToDnsImpl(const NDArray& from, const NDArray& to, RunContext ctx) {
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   // If neither is MKLDNN, we can copy data normally.
   if (!from.IsMKLDNNData() && !to.IsMKLDNNData()) {
 #endif
@@ -1237,7 +1237,7 @@ inline void CopyFromToDnsImpl(const NDArray& from, const NDArray& to, RunContext
     TBlob tmp = to.data();
     ndarray::Copy<from_xpu, to_xpu>(from.data(), &tmp,
                                     from.ctx(), to.ctx(), ctx);
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   } else if (SupportMKLDNN(from.dtype(), from.shape())
              && SupportMKLDNN(to.dtype(), to.shape())
              && from.ctx().dev_mask() == cpu::kDevMask
@@ -1763,7 +1763,7 @@ void NDArray::Save(dmlc::Stream *strm) const {
   } else {
     this->WaitToRead();
     nd_cpu = *this;
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
     if (nd_cpu.IsMKLDNNData())
       nd_cpu = nd_cpu.Reorder2Default();
 #endif
@@ -2164,7 +2164,7 @@ void NDArray::SyncCopyToCPU(void *data, size_t size) const {
   if (this->ctx().dev_mask() == cpu::kDevMask) {
     RunContext rctx{this->ctx(), nullptr, nullptr, false};
     NDArray src = *this;
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
     if (src.IsMKLDNNData())
       src = this->Reorder2Default();
 #endif
diff --git a/src/operator/contrib/batch_norm_relu.cc b/src/operator/contrib/batch_norm_relu.cc
index 890239d..52671a0 100644
--- a/src/operator/contrib/batch_norm_relu.cc
+++ b/src/operator/contrib/batch_norm_relu.cc
@@ -28,7 +28,7 @@
 #include <nnvm/op_attr_types.h>
 #include "../elemwise_op_common.h"
 #include "../operator_common.h"
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include "../nn/mkldnn/mkldnn_batch_norm-inl.h"
 #endif
 
@@ -125,7 +125,7 @@ static bool BatchNormWithReLUType(const nnvm::NodeAttrs& attrs,
   return true;
 }
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 static inline bool SupportMKLDNNBNReLU(const NDArray &input, const BatchNormParam &param) {
   if (mxnet::op::batchnorm::disable_mkl) return false;
   const mxnet::TShape shape = input.shape();
@@ -181,7 +181,7 @@ static inline bool BatchNormWithReLUStorageType(const nnvm::NodeAttrs &attrs,
   const BatchNormParam &param = nnvm::get<BatchNormParam>(attrs.parsed);
 
   bool dispatched = false;
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   if (!dispatched) {
     dispatched = MKLDNNStorageType(attrs, dev_mask, true, dispatch_mode,
                                    in_attrs, out_attrs);
@@ -273,11 +273,11 @@ An extented operator of Batch normalization which can fuse ReLU activation.
 .set_attr<mxnet::FInferShape>("FInferShape", BatchNormWithReLUShape)
 .set_attr<nnvm::FInferType>("FInferType", BatchNormWithReLUType)
 .set_attr<FInferStorageType>("FInferStorageType", BatchNormWithReLUStorageType)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<FComputeEx>("FComputeEx<cpu>", BatchNormWithReLUComputeExCPU)
 #endif
 .set_attr<nnvm::FGradient>("FGradient", BatchNormWithReLUGrad)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
   return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
@@ -305,7 +305,7 @@ NNVM_REGISTER_OP(_backward_contrib_BatchNormWithReLU)
 .set_num_outputs(3)
 .set_attr<nnvm::TIsBackward>("TIsBackward", true)
 .set_attr<FInferStorageType>("FInferStorageType", BatchNormWithReLUStorageType)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
   return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
 })
diff --git a/src/operator/leaky_relu.cc b/src/operator/leaky_relu.cc
index 6690834..95a8255 100644
--- a/src/operator/leaky_relu.cc
+++ b/src/operator/leaky_relu.cc
@@ -25,10 +25,10 @@
 */
 
 #include "./leaky_relu-inl.h"
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include "./nn/mkldnn/mkldnn_base-inl.h"
 #include "./nn/mkldnn/mkldnn_ops-inl.h"
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 
 #include <nnvm/op_attr_types.h>
 namespace mxnet {
@@ -84,7 +84,7 @@ static bool LeakyReLUShape(const nnvm::NodeAttrs& attrs,
   return true;
 }
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 static void LeakyReLUComputeExCPU(const nnvm::NodeAttrs& attrs,
                                   const OpContext& ctx,
                                   const std::vector<NDArray>& inputs,
@@ -141,7 +141,7 @@ inline static bool BackwardLeakyReLUStorageType(const nnvm::NodeAttrs& attrs,
   return MKLDNNStorageType(attrs, dev_mask, SupportMKLDNNLeakyRelu(param),
                            dispatch_mode, in_attrs, out_attrs);
 }
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 
 NNVM_REGISTER_OP(LeakyReLU)
 .describe(R"code(Applies Leaky rectified linear unit activation element-wise to the input.
@@ -172,7 +172,7 @@ The following modified ReLU Activation functions are supported:
   return param.act_type == leakyrelu::kRReLU ? 2 : 1;
 })
 .set_attr_parser(ParamParser<LeakyReLUParam>)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<FInferStorageType>("FInferStorageType", LeakyReLUStorageType)
 #endif
 .set_attr<nnvm::FListInputNames>("FListInputNames",
@@ -190,7 +190,7 @@ The following modified ReLU Activation functions are supported:
 .set_attr<mxnet::FInferShape>("FInferShape", LeakyReLUShape)
 .set_attr<nnvm::FInferType>("FInferType", LeakyReLUType)
 .set_attr<FCompute>("FCompute<cpu>", LeakyReLUCompute<cpu>)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FComputeEx>("FComputeEx<cpu>", LeakyReLUComputeExCPU)
 #endif
@@ -227,7 +227,7 @@ NNVM_REGISTER_OP(_backward_LeakyReLU)
   return param.act_type == leakyrelu::kPReLU ? 2 : 1;
 })
 .set_attr<nnvm::TIsBackward>("TIsBackward", true)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<FInferStorageType>("FInferStorageType", BackwardLeakyReLUStorageType)
 #endif
 .set_attr<nnvm::FInplaceOption>("FInplaceOption", [](const NodeAttrs& attrs){
@@ -237,7 +237,7 @@ NNVM_REGISTER_OP(_backward_LeakyReLU)
   return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
 })
 .set_attr_parser(ParamParser<LeakyReLUParam>)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FComputeEx>("FComputeEx<cpu>", LeakyReLUGradComputeExCPU)
 #endif
diff --git a/src/operator/nn/activation.cc b/src/operator/nn/activation.cc
index 622d346..e9c5251 100644
--- a/src/operator/nn/activation.cc
+++ b/src/operator/nn/activation.cc
@@ -27,10 +27,10 @@
 #include "./activation-inl.h"
 #include "../mshadow_op.h"
 #include "../tensor/elemwise_unary_op.h"
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include "./mkldnn/mkldnn_base-inl.h"
 #include "./mkldnn/mkldnn_ops-inl.h"
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 #include "../operator_common.h"
 #include "../../common/utils.h"
 
@@ -101,7 +101,7 @@ struct ActivationGrad {
   }
 };
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 static void ActivationComputeExCPU(const nnvm::NodeAttrs& attrs,
                                    const OpContext& ctx,
                                    const std::vector<NDArray>& inputs,
@@ -157,7 +157,7 @@ inline static bool BackwardActStorageType(const nnvm::NodeAttrs& attrs,
   return MKLDNNStorageType(attrs, dev_mask, SupportMKLDNNAct(param),
                            dispatch_mode, in_attrs, out_attrs);
 }
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 
 
 MXNET_OPERATOR_REGISTER_UNARY(Activation)
@@ -174,7 +174,7 @@ The following activation functions are supported:
 
 )code" ADD_FILELINE)
 .set_attr_parser(ParamParser<ActivationParam>)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<FInferStorageType>("FInferStorageType", ActivationStorageType)
 #endif
 .set_attr<nnvm::FListOutputNames>("FListOutputNames",
@@ -182,7 +182,7 @@ The following activation functions are supported:
     return std::vector<std::string>{"output"};
 })
 .set_attr<FCompute>("FCompute<cpu>", ActivationCompute<cpu>)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FComputeEx>("FComputeEx<cpu>", ActivationComputeExCPU)
 #endif
@@ -196,7 +196,7 @@ NNVM_REGISTER_OP(_backward_Activation)
 })
 .set_num_outputs(1)
 .set_attr<nnvm::TIsBackward>("TIsBackward", true)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<FInferStorageType>("FInferStorageType", BackwardActStorageType)
 #endif
 .set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<-1, 1>)
@@ -204,13 +204,13 @@ NNVM_REGISTER_OP(_backward_Activation)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption", [](const NodeAttrs& attrs){
   return std::vector<std::pair<int, int> >{{0, 0}};
 })
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
   return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
 })
 #endif
 .set_attr_parser(ParamParser<ActivationParam>)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FComputeEx>("FComputeEx<cpu>", ActivationGradComputeExCPU)
 #endif
diff --git a/src/operator/nn/batch_norm.cc b/src/operator/nn/batch_norm.cc
index 2a91a37..87456dd 100644
--- a/src/operator/nn/batch_norm.cc
+++ b/src/operator/nn/batch_norm.cc
@@ -28,7 +28,7 @@
 #include <nnvm/op_attr_types.h>
 #include "../elemwise_op_common.h"
 #include "../operator_common.h"
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include "./mkldnn/mkldnn_batch_norm-inl.h"
 #endif
 
@@ -433,7 +433,7 @@ static bool BatchNormType(const nnvm::NodeAttrs& attrs,
   return true;
 }
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 static inline bool SupportMKLDNNBN(const NDArray &input, const BatchNormParam &param) {
   if (mxnet::op::batchnorm::disable_mkl) return false;
   const mxnet::TShape shape = input.shape();
@@ -489,7 +489,7 @@ static inline bool BatchNormStorageType(const nnvm::NodeAttrs &attrs,
   const BatchNormParam &param = nnvm::get<BatchNormParam>(attrs.parsed);
 
   bool dispatched = false;
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   if (!dispatched) {
     dispatched = MKLDNNStorageType(attrs, dev_mask, true, dispatch_mode,
                                    in_attrs, out_attrs);
@@ -627,11 +627,11 @@ then set ``gamma`` to 1 and its gradient to 0.
 .set_attr<nnvm::FInferType>("FInferType", BatchNormType)
 .set_attr<FInferStorageType>("FInferStorageType", BatchNormStorageType)
 .set_attr<FCompute>("FCompute<cpu>", BatchNormCompute<cpu>)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<FComputeEx>("FComputeEx<cpu>", BatchNormComputeExCPU)
 #endif
 .set_attr<nnvm::FGradient>("FGradient", BatchNormGrad)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
   return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
@@ -666,7 +666,7 @@ NNVM_REGISTER_OP(_backward_BatchNorm)
   return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
 })
 .set_attr_parser(ParamParser<BatchNormParam>)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FComputeEx>("FComputeEx<cpu>", BatchNormGradComputeExCPU)
 #endif
diff --git a/src/operator/nn/concat.cc b/src/operator/nn/concat.cc
index 2328e0a..c50af8f 100644
--- a/src/operator/nn/concat.cc
+++ b/src/operator/nn/concat.cc
@@ -198,14 +198,14 @@ inline static bool ConcatForwardInferStorageType(const nnvm::NodeAttrs& attrs,
     dispatched = storage_type_assign(&out_stype, kCSRStorage,
                                      dispatch_mode, DispatchMode::kFComputeEx);
   }
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   if (!dispatched && dev_mask == mshadow::cpu::kDevMask
       && common::ContainsOnlyStorage(*in_attrs, kDefaultStorage)
       && param.dim > 0) {
     dispatched = storage_type_assign(&out_stype, kDefaultStorage,
                                      dispatch_mode, DispatchMode::kFComputeEx);
   }
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
   if (!dispatched && common::ContainsOnlyStorage(*in_attrs, kDefaultStorage)) {
     dispatched = storage_type_assign(&out_stype, kDefaultStorage,
                                      dispatch_mode, DispatchMode::kFCompute);
@@ -213,10 +213,10 @@ inline static bool ConcatForwardInferStorageType(const nnvm::NodeAttrs& attrs,
   if (!dispatched) {
     dispatched = dispatch_fallback(out_attrs, dispatch_mode);
   }
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   if (!MKLDNNEnvSet())
     *dispatch_mode = DispatchMode::kFComputeFallback;
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
   return dispatched;
 }
 
@@ -226,7 +226,7 @@ inline static bool BackwardConcatStorageType(const nnvm::NodeAttrs& attrs,
                                              std::vector<int> *in_attrs,
                                              std::vector<int> *out_attrs) {
   DispatchMode wanted_mode;
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   const ConcatParam& param = nnvm::get<ConcatParam>(attrs.parsed);
   CHECK_EQ(out_attrs->size(), in_attrs->size() - 1);
   if (dev_mask == mshadow::cpu::kDevMask
@@ -234,16 +234,16 @@ inline static bool BackwardConcatStorageType(const nnvm::NodeAttrs& attrs,
       && param.dim > 0)
     wanted_mode = DispatchMode::kFComputeEx;
   else
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
     wanted_mode = DispatchMode::kFCompute;
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   if (!MKLDNNEnvSet())
     wanted_mode = DispatchMode::kFComputeFallback;
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
   return storage_type_assign(out_attrs, mxnet::kDefaultStorage,
                              dispatch_mode, wanted_mode);
 }
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 bool SupportMKLDNNConcat(const std::vector<NDArray> &arrs) {
   for (auto &arr : arrs) {
     if (arr.IsView()) return false;
@@ -256,7 +256,7 @@ bool SupportMKLDNNConcat(const std::vector<NDArray> &arrs) {
   }
   return true;
 }
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 static void ConcatComputeExCPU(const nnvm::NodeAttrs& attrs,
                                const OpContext& op_ctx,
                                const std::vector<NDArray>& inputs,
@@ -269,20 +269,20 @@ static void ConcatComputeExCPU(const nnvm::NodeAttrs& attrs,
   if (common::ContainsOnlyStorage(inputs, kCSRStorage) &&
       outputs[0].storage_type() == kCSRStorage) {
     ConcatCSRImpl<cpu>(attrs, op_ctx, inputs, req, outputs);
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   } else if (SupportMKLDNNConcat(inputs)) {
     MKLDNN_OPCHECK_INIT(false, outputs.size(), inputs, outputs);
     MKLDNNRun(MKLDNNConcatForward, attrs, op_ctx, inputs, req, outputs);
     MKLDNN_OPCHECK_RUN(ConcatCompute<cpu>, attrs, op_ctx, inputs, req, outputs);
   } else if (common::ContainsOnlyStorage(inputs, kDefaultStorage)) {
     FallBackCompute(ConcatCompute<cpu>, attrs, op_ctx, inputs, req, outputs);
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
   } else {
     LogUnimplementedOp(attrs, op_ctx, inputs, req, outputs);
   }
 }
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 static void ConcatGradComputeExCPU(const nnvm::NodeAttrs& attrs,
                                    const OpContext& ctx,
                                    const std::vector<NDArray>& inputs,
@@ -296,7 +296,7 @@ static void ConcatGradComputeExCPU(const nnvm::NodeAttrs& attrs,
   }
   FallBackCompute(ConcatGradCompute<cpu>, attrs, ctx, inputs, req, outputs);
 }
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 
 struct ConcatGrad {
   const char *op_name;
@@ -304,11 +304,11 @@ struct ConcatGrad {
                                           const std::vector<nnvm::NodeEntry>& ograds) const {
     CHECK_EQ(ograds.size(), 1);
     std::vector<nnvm::NodeEntry> heads(ograds.begin(), ograds.end());
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
     for (size_t i = 0; i < n->inputs.size(); i++) {
       heads.push_back(n->inputs[i]);
     }
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
     return MakeGradNode(op_name, n, heads, n->attrs.dict);
   }
 };
@@ -384,13 +384,13 @@ Example::
                          [ 5.,  5.,  8.,  8.]]
 
 )code" ADD_FILELINE)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
   return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
 })
 .set_attr<THasDeterministicOutput>("THasDeterministicOutput", true)
 .set_attr<bool>("TIsMKLDNN", true)
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 CONCAT_FORWARD_ATTRS
 .set_attr<mxnet::FInferShape>("FInferShape", ConcatShape)
 .add_argument("data", "NDArray-or-Symbol[]", "List of arrays to concatenate")
@@ -398,7 +398,7 @@ CONCAT_FORWARD_ATTRS
 
 NNVM_REGISTER_OP(_backward_Concat)
 .set_num_inputs([](const NodeAttrs& attrs) {
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   const ConcatParam& params = nnvm::get<ConcatParam>(attrs.parsed);
   return 1 + params.num_args;
 #else
@@ -410,17 +410,17 @@ NNVM_REGISTER_OP(_backward_Concat)
   return params.num_args;
 })
 .set_attr_parser(ParamParser<ConcatParam>)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
   return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
 })
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 .set_attr<nnvm::TIsBackward>("TIsBackward", true)
 .set_attr<FInferStorageType>("FInferStorageType", BackwardConcatStorageType)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FComputeEx>("FComputeEx<cpu>", ConcatGradComputeExCPU)
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 .set_attr<FCompute>("FCompute<cpu>", ConcatGradCompute<cpu>);
 
 // _rnn_param_concat is a custom concat op with specialized infer_shape,
@@ -428,11 +428,11 @@ NNVM_REGISTER_OP(_backward_Concat)
 // unknown shape that can be inferred from output shape.
 NNVM_REGISTER_OP(_rnn_param_concat)
 .add_alias("_npi_rnn_param_concat")
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
   return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
 })
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 CONCAT_FORWARD_ATTRS
 .set_attr<THasDeterministicOutput>("THasDeterministicOutput", true)
 .set_attr<mxnet::FInferShape>("FInferShape", RNNParamConcatShape)
diff --git a/src/operator/nn/convolution.cc b/src/operator/nn/convolution.cc
index 05d4cb7..556918a 100644
--- a/src/operator/nn/convolution.cc
+++ b/src/operator/nn/convolution.cc
@@ -27,10 +27,10 @@
 #include "./convolution-inl.h"
 #include "../elemwise_op_common.h"
 #include "../operator_common.h"
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include "./mkldnn/mkldnn_base-inl.h"
 #include "./mkldnn/mkldnn_ops-inl.h"
-#endif  // MXNET_USE_MKLDNN
+#endif  // MXNET_USE_ONEDNN
 
 namespace mxnet {
 namespace op {
@@ -48,7 +48,7 @@ static inline std::vector<std::string> ListArguments(const ConvolutionParam& par
   }
 }
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 static void ConvolutionComputeExCPU(const nnvm::NodeAttrs& attrs,
                                     const OpContext& ctx,
                                     const std::vector<NDArray>& inputs,
@@ -302,7 +302,7 @@ static bool ConvolutionType(const nnvm::NodeAttrs& attrs,
   return true;
 }
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 inline static bool ConvStorageType(const nnvm::NodeAttrs& attrs,
                                    const int dev_mask,
                                    DispatchMode* dispatch_mode,
@@ -491,11 +491,11 @@ There are other options to tune the performance.
 })
 .set_attr<mxnet::FInferShape>("FInferShape", ConvolutionShape)
 .set_attr<nnvm::FInferType>("FInferType", ConvolutionType)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<FInferStorageType>("FInferStorageType", ConvStorageType)
 #endif
 .set_attr<FCompute>("FCompute<cpu>", ConvolutionCompute<cpu>)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FComputeEx>("FComputeEx<cpu>", ConvolutionComputeExCPU)
 #endif
@@ -519,14 +519,14 @@ NNVM_REGISTER_OP(_backward_Convolution)
   return params.no_bias ? 2 : 3;
 })
 .set_attr<nnvm::TIsBackward>("TIsBackward", true)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<FInferStorageType>("FInferStorageType", BackwardConvStorageType)
 #endif
 .set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
   return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
 })
 .set_attr_parser(ConvolutionParamParser)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FComputeEx>("FComputeEx<cpu>", ConvolutionGradComputeExCPU)
 #endif
diff --git a/src/operator/nn/deconvolution.cc b/src/operator/nn/deconvolution.cc
index 08d6306..6dc8b31 100644
--- a/src/operator/nn/deconvolution.cc
+++ b/src/operator/nn/deconvolution.cc
@@ -27,15 +27,15 @@
 #include "./deconvolution-inl.h"
 #include "../operator_common.h"
 #include "../../common/utils.h"
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include "./mkldnn/mkldnn_base-inl.h"
 #include "./mkldnn/mkldnn_ops-inl.h"
-#endif  // MXNET_USE_MKLDNN
+#endif  // MXNET_USE_ONEDNN
 
 namespace mxnet {
 namespace op {
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 static void DeconvolutionComputeExCPU(const nnvm::NodeAttrs& attrs,
                                       const OpContext& ctx,
                                       const std::vector<NDArray>& inputs,
@@ -445,7 +445,7 @@ NNVM_REGISTER_OP(Deconvolution)
 .set_attr<THasDeterministicOutput>("THasDeterministicOutput", true)
 .set_attr<FCompute>("FCompute<cpu>", DeconvolutionCompute<cpu>)
 .set_attr<nnvm::FGradient>("FGradient", DeconvolutionGrad{"_backward_Deconvolution"})
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FInferStorageType>("FInferStorageType", DeconvStorageType)
 .set_attr<FComputeEx>("FComputeEx<cpu>", DeconvolutionComputeExCPU)
@@ -470,7 +470,7 @@ NNVM_REGISTER_OP(_backward_Deconvolution)
   return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
 })
 .set_attr_parser(DeconvolutionParamParser)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FInferStorageType>("FInferStorageType", BackwardDeconvStorageType)
 .set_attr<FComputeEx>("FComputeEx<cpu>", DeconvolutionGradComputeExCPU)
diff --git a/src/operator/nn/fully_connected.cc b/src/operator/nn/fully_connected.cc
index 7b243f1..32bddf2 100644
--- a/src/operator/nn/fully_connected.cc
+++ b/src/operator/nn/fully_connected.cc
@@ -95,7 +95,7 @@ void FullyConnectedComputeExCPU(const nnvm::NodeAttrs& attrs,
     valid_bias = inputs[2].storage_type() == kDefaultStorage ||
                  inputs[2].storage_type() == kRowSparseStorage;
   }
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   if (common::ContainsOnlyStorage(inputs, kDefaultStorage) &&
       common::ContainsOnlyStorage(outputs, kDefaultStorage)) {
     if (SupportMKLDNNFC(inputs[0])) {
@@ -139,7 +139,7 @@ void FullyConnectedComputeExCPU(const nnvm::NodeAttrs& attrs,
 #endif
 }
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 void FullyConnectedGradComputeExCPU(const nnvm::NodeAttrs& attrs,
                                     const OpContext &ctx,
                                     const std::vector<NDArray> &inputs,
@@ -207,7 +207,7 @@ static bool FCStorageType(const nnvm::NodeAttrs& attrs,
     dispatched = storage_type_assign(out_attrs, mxnet::kDefaultStorage,
                                      dispatch_mode, DispatchMode::kFComputeEx);
   }
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   if (!MKLDNNEnvSet())
     *dispatch_mode = DispatchMode::kFComputeFallback;
 #endif
@@ -239,7 +239,7 @@ static bool BackwardFCStorageType(const nnvm::NodeAttrs& attrs,
     dispatched = storage_type_assign(out_attrs, mxnet::kDefaultStorage,
                                      dispatch_mode, DispatchMode::kFCompute);
   }
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   if (!MKLDNNEnvSet())
     *dispatch_mode = DispatchMode::kFComputeFallback;
 #endif
@@ -301,7 +301,7 @@ If ``no_bias`` is set to be true, then the ``bias`` term is ignored.
     [](const NodeAttrs& attrs) {
     return std::vector<std::string>{"output"};
 })
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
   return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
@@ -334,7 +334,7 @@ NNVM_REGISTER_OP(_backward_FullyConnected)
 .set_attr<nnvm::FGradient>("FGradient", FullyConnectedGradGrad{"_backward_backward_FullyConnected"})
 .set_attr<FInferStorageType>("FInferStorageType", BackwardFCStorageType)
 .set_attr_parser(ParamParser<FullyConnectedParam>)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FComputeEx>("FComputeEx<cpu>", FullyConnectedGradComputeExCPU)
 #endif
diff --git a/src/operator/nn/log_softmax.cc b/src/operator/nn/log_softmax.cc
index 2a1d1b3..6275d54 100644
--- a/src/operator/nn/log_softmax.cc
+++ b/src/operator/nn/log_softmax.cc
@@ -26,7 +26,7 @@
 #include "../tensor/elemwise_unary_op.h"
 #include "../tensor/elemwise_binary_op.h"
 #include "../operator_common.h"
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include "mkldnn/mkldnn_base-inl.h"
 #include "mkldnn/mkldnn_ops-inl.h"
 #endif
@@ -34,7 +34,7 @@
 namespace mxnet {
 namespace op {
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 static void LogSoftmaxComputeExCPU(const nnvm::NodeAttrs& attrs,
                                    const OpContext& ctx,
                                    const std::vector<NDArray>& inputs,
@@ -125,7 +125,7 @@ Examples::
       return std::vector<std::string>{"data"};
 })
 .set_attr<FCompute>("FCompute<cpu>", SoftmaxCompute<cpu, mxnet_op::log_softmax_fwd>)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FComputeEx>("FComputeEx<cpu>", LogSoftmaxComputeExCPU)
 .set_attr<FInferStorageType>("FInferStorageType", LogSoftmaxStorageType)
@@ -151,7 +151,7 @@ NNVM_REGISTER_OP(_backward_log_softmax)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption", SoftmaxGradOpInplaceOption)
 .add_argument("args", "NDArray-or-Symbol[]", "Positional input arguments")
 .set_attr_parser(ParamParser<SoftmaxParam>)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FComputeEx>("FComputeEx<cpu>", LogSoftmaxGradComputeExCPU)
 .set_attr<FInferStorageType>("FInferStorageType", LogSoftmaxGradStorageType)
diff --git a/src/operator/nn/lrn.cc b/src/operator/nn/lrn.cc
index 1ac3840..920f34a 100644
--- a/src/operator/nn/lrn.cc
+++ b/src/operator/nn/lrn.cc
@@ -26,7 +26,7 @@
 
 #include "./lrn-inl.h"
 #include "../operator_common.h"
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include "./mkldnn/mkldnn_lrn-inl.h"
 #include "./mkldnn/mkldnn_base-inl.h"
 #endif
@@ -82,7 +82,7 @@ struct LRNGrad {
   }
 };
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 bool LRNForwardInferStorageType(const nnvm::NodeAttrs& attrs,
                                 const int dev_mask,
                                 DispatchMode* dispatch_mode,
@@ -163,7 +163,7 @@ number of kernels in the layer.
 .set_attr_parser(ParamParser<LRNParam>)
 .set_attr<mxnet::FInferShape>("FInferShape", LRNShape)
 .set_attr<nnvm::FInferType>("FInferType", LRNType)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<FInferStorageType>("FInferStorageType", LRNForwardInferStorageType)
 #endif
 .set_attr<nnvm::FListInputNames>("FListInputNames",
@@ -175,7 +175,7 @@ number of kernels in the layer.
   return std::vector<std::string>{"output", "tmp_norm"};
 })
 .set_attr<FCompute>("FCompute<cpu>", LRNCompute<cpu>)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FComputeEx>("FComputeEx<cpu>", LRNComputeExCPU)
 #endif
@@ -187,11 +187,11 @@ NNVM_REGISTER_OP(_backward_LRN)
 .set_num_inputs(3)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<LRNParam>)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<FInferStorageType>("FInferStorageType", LRNBackwardInferStorageType)
 #endif
 .set_attr<nnvm::TIsBackward>("TIsBackward", true)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FComputeEx>("FComputeEx<cpu>", LRNGradComputeExCPU)
 // Native compute requires norm while MKLDNN does not so cannot be compared in debug mode
diff --git a/src/operator/nn/mkldnn/mkldnn_act-inl.h b/src/operator/nn/mkldnn/mkldnn_act-inl.h
index 70bf16a..0c6e856 100644
--- a/src/operator/nn/mkldnn/mkldnn_act-inl.h
+++ b/src/operator/nn/mkldnn/mkldnn_act-inl.h
@@ -28,7 +28,7 @@
 #define MXNET_OPERATOR_NN_MKLDNN_MKLDNN_ACT_INL_H_
 
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include <vector>
 #include <utility>
 #include "../activation-inl.h"
@@ -108,5 +108,5 @@ struct hash<mxnet::op::MKLDNNActParam> {
 };
 }  // namespace std
 
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 #endif  // MXNET_OPERATOR_NN_MKLDNN_MKLDNN_ACT_INL_H_
diff --git a/src/operator/nn/mkldnn/mkldnn_act.cc b/src/operator/nn/mkldnn/mkldnn_act.cc
index e76a062..761ab86 100644
--- a/src/operator/nn/mkldnn/mkldnn_act.cc
+++ b/src/operator/nn/mkldnn/mkldnn_act.cc
@@ -23,7 +23,7 @@
  * \author Da Zheng
 */
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 
 #include <dmlc/logging.h>
 #include <dmlc/parameter.h>
diff --git a/src/operator/nn/mkldnn/mkldnn_base-inl.h b/src/operator/nn/mkldnn/mkldnn_base-inl.h
index 25cd251..9f8e4eb 100644
--- a/src/operator/nn/mkldnn/mkldnn_base-inl.h
+++ b/src/operator/nn/mkldnn/mkldnn_base-inl.h
@@ -46,7 +46,7 @@
 #ifndef MXNET_OPERATOR_NN_MKLDNN_MKLDNN_BASE_INL_H_
 #define MXNET_OPERATOR_NN_MKLDNN_MKLDNN_BASE_INL_H_
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include <algorithm>
 #include <iterator>
 #include <memory>
diff --git a/src/operator/nn/mkldnn/mkldnn_base.cc b/src/operator/nn/mkldnn/mkldnn_base.cc
index 7aeb21b..d4cb978 100644
--- a/src/operator/nn/mkldnn/mkldnn_base.cc
+++ b/src/operator/nn/mkldnn/mkldnn_base.cc
@@ -17,7 +17,7 @@
  * under the License.
  */
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 
 #include <atomic>
 #include "./mkldnn_base-inl.h"
@@ -605,7 +605,7 @@ bool MKLDNNStorageType(const nnvm::NodeAttrs &attrs,
     if (v == - 1) v = kDefaultStorage;
 
   DispatchMode wanted_mode;
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   if (dev_mask == mshadow::cpu::kDevMask && !MKLDNNEnvSet())
     wanted_mode = DispatchMode::kFComputeFallback;
   else if (dev_mask == mshadow::cpu::kDevMask && support_mkldnn)
diff --git a/src/operator/nn/mkldnn/mkldnn_batch_norm-inl.h b/src/operator/nn/mkldnn/mkldnn_batch_norm-inl.h
index 75c7c4d..963ed2c 100644
--- a/src/operator/nn/mkldnn/mkldnn_batch_norm-inl.h
+++ b/src/operator/nn/mkldnn/mkldnn_batch_norm-inl.h
@@ -26,7 +26,7 @@
 #ifndef MXNET_OPERATOR_NN_MKLDNN_MKLDNN_BATCH_NORM_INL_H_
 #define MXNET_OPERATOR_NN_MKLDNN_MKLDNN_BATCH_NORM_INL_H_
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include <vector>
 #include <utility>
 #include <mkldnn.hpp>
@@ -479,5 +479,5 @@ void MKLDNNBatchNormBackward(const nnvm::NodeAttrs &attrs, const OpContext &ctx,
 }
 }  // namespace op
 }  // namespace mxnet
-#endif  // MXNET_USE_MKLDNN
+#endif  // MXNET_USE_ONEDNN
 #endif  // MXNET_OPERATOR_NN_MKLDNN_MKLDNN_BATCH_NORM_INL_H_
diff --git a/src/operator/nn/mkldnn/mkldnn_concat-inl.h b/src/operator/nn/mkldnn/mkldnn_concat-inl.h
index ff47ef3..cd1a559 100644
--- a/src/operator/nn/mkldnn/mkldnn_concat-inl.h
+++ b/src/operator/nn/mkldnn/mkldnn_concat-inl.h
@@ -26,7 +26,7 @@
 #define MXNET_OPERATOR_NN_MKLDNN_MKLDNN_CONCAT_INL_H_
 
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include <vector>
 #include <utility>
 #include "../concat-inl.h"
@@ -74,5 +74,5 @@ static MKLDNNConcatFwd &GetConcatForward(
 }  // namespace op
 }  // namespace mxnet
 
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 #endif  // MXNET_OPERATOR_NN_MKLDNN_MKLDNN_CONCAT_INL_H_
diff --git a/src/operator/nn/mkldnn/mkldnn_concat.cc b/src/operator/nn/mkldnn/mkldnn_concat.cc
index aa30ffc..946008b 100644
--- a/src/operator/nn/mkldnn/mkldnn_concat.cc
+++ b/src/operator/nn/mkldnn/mkldnn_concat.cc
@@ -23,7 +23,7 @@
  * \author
 */
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include "mkldnn_concat-inl.h"
 
 namespace mxnet {
@@ -101,4 +101,4 @@ void MKLDNNConcatBackward(const nnvm::NodeAttrs& attrs, const OpContext &ctx,
 
 }  // namespace op
 }  // namespace mxnet
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
diff --git a/src/operator/nn/mkldnn/mkldnn_convolution-inl.h b/src/operator/nn/mkldnn/mkldnn_convolution-inl.h
index 84299ee..dfa365f 100644
--- a/src/operator/nn/mkldnn/mkldnn_convolution-inl.h
+++ b/src/operator/nn/mkldnn/mkldnn_convolution-inl.h
@@ -25,7 +25,7 @@
 #ifndef MXNET_OPERATOR_NN_MKLDNN_MKLDNN_CONVOLUTION_INL_H_
 #define MXNET_OPERATOR_NN_MKLDNN_MKLDNN_CONVOLUTION_INL_H_
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 
 #include <vector>
 #include <utility>
@@ -146,5 +146,5 @@ class MKLDNNConvBackward {
 }  // namespace op
 }  // namespace mxnet
 
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 #endif  // MXNET_OPERATOR_NN_MKLDNN_MKLDNN_CONVOLUTION_INL_H_
diff --git a/src/operator/nn/mkldnn/mkldnn_convolution.cc b/src/operator/nn/mkldnn/mkldnn_convolution.cc
index eca7be2..325dfbe 100644
--- a/src/operator/nn/mkldnn/mkldnn_convolution.cc
+++ b/src/operator/nn/mkldnn/mkldnn_convolution.cc
@@ -23,7 +23,7 @@
  * \author Da Zheng
  */
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 
 #include "../convolution-inl.h"
 #include "./mkldnn_ops-inl.h"
@@ -556,4 +556,4 @@ void MKLDNNConvolutionBackward(const nnvm::NodeAttrs& attrs, const OpContext &ct
 
 }  // namespace op
 }  // namespace mxnet
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
diff --git a/src/operator/nn/mkldnn/mkldnn_copy.cc b/src/operator/nn/mkldnn/mkldnn_copy.cc
index a67847f..8f8ee66 100644
--- a/src/operator/nn/mkldnn/mkldnn_copy.cc
+++ b/src/operator/nn/mkldnn/mkldnn_copy.cc
@@ -26,7 +26,7 @@
 #include "./mkldnn_ops-inl.h"
 #include "./mkldnn_base-inl.h"
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 namespace mxnet {
 namespace op {
 
diff --git a/src/operator/nn/mkldnn/mkldnn_deconvolution.cc b/src/operator/nn/mkldnn/mkldnn_deconvolution.cc
index cdf3639..01d7c3b 100644
--- a/src/operator/nn/mkldnn/mkldnn_deconvolution.cc
+++ b/src/operator/nn/mkldnn/mkldnn_deconvolution.cc
@@ -22,7 +22,7 @@
  * \brief
  */
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 
 #include "../deconvolution-inl.h"
 #include "./mkldnn_base-inl.h"
@@ -525,4 +525,4 @@ void MKLDNNDeconvolutionBackward(const nnvm::NodeAttrs &attrs,
 
 }  // namespace op
 }  // namespace mxnet
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
diff --git a/src/operator/nn/mkldnn/mkldnn_fully_connected-inl.h b/src/operator/nn/mkldnn/mkldnn_fully_connected-inl.h
index 1c9396e..a91a4f6 100644
--- a/src/operator/nn/mkldnn/mkldnn_fully_connected-inl.h
+++ b/src/operator/nn/mkldnn/mkldnn_fully_connected-inl.h
@@ -27,7 +27,7 @@
 #ifndef MXNET_OPERATOR_NN_MKLDNN_MKLDNN_FULLY_CONNECTED_INL_H_
 #define MXNET_OPERATOR_NN_MKLDNN_MKLDNN_FULLY_CONNECTED_INL_H_
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 
 #include <vector>
 #include <string>
@@ -127,5 +127,5 @@ void MKLDNNFCForwardFullFeature(const MKLDNNFCFullParam &param,
 }  // namespace op
 }  // namespace mxnet
 
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 #endif  // MXNET_OPERATOR_NN_MKLDNN_MKLDNN_FULLY_CONNECTED_INL_H_
diff --git a/src/operator/nn/mkldnn/mkldnn_fully_connected.cc b/src/operator/nn/mkldnn/mkldnn_fully_connected.cc
index 6e8a150..8e0a6e6 100644
--- a/src/operator/nn/mkldnn/mkldnn_fully_connected.cc
+++ b/src/operator/nn/mkldnn/mkldnn_fully_connected.cc
@@ -24,7 +24,7 @@
  * \author Da Zheng, Ciyong Chen
 */
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include "mkldnn_fully_connected-inl.h"
 
 namespace mxnet {
@@ -326,4 +326,4 @@ void MKLDNNFCBackward(const nnvm::NodeAttrs& attrs, const OpContext &ctx,
 
 }  // namespace op
 }  // namespace mxnet
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
diff --git a/src/operator/nn/mkldnn/mkldnn_log_softmax.cc b/src/operator/nn/mkldnn/mkldnn_log_softmax.cc
index 0d992b2..eb0ff37 100644
--- a/src/operator/nn/mkldnn/mkldnn_log_softmax.cc
+++ b/src/operator/nn/mkldnn/mkldnn_log_softmax.cc
@@ -26,7 +26,7 @@
 #include "./mkldnn_ops-inl.h"
 #include "./mkldnn_base-inl.h"
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 namespace mxnet {
 namespace op {
 
diff --git a/src/operator/nn/mkldnn/mkldnn_lrn-inl.h b/src/operator/nn/mkldnn/mkldnn_lrn-inl.h
index 6f7a1d9..fa08c52 100644
--- a/src/operator/nn/mkldnn/mkldnn_lrn-inl.h
+++ b/src/operator/nn/mkldnn/mkldnn_lrn-inl.h
@@ -25,7 +25,7 @@
 #ifndef MXNET_OPERATOR_NN_MKLDNN_MKLDNN_LRN_INL_H_
 #define MXNET_OPERATOR_NN_MKLDNN_MKLDNN_LRN_INL_H_
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include <utility>
 #include <vector>
 #include <mkldnn.hpp>
@@ -266,5 +266,5 @@ void MKLDNNLRNBackward(const nnvm::NodeAttrs &attrs, const OpContext &ctx,
 }
 }  // namespace op
 }  // namespace mxnet
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 #endif  // MXNET_OPERATOR_NN_MKLDNN_MKLDNN_LRN_INL_H__
diff --git a/src/operator/nn/mkldnn/mkldnn_ops-inl.h b/src/operator/nn/mkldnn/mkldnn_ops-inl.h
index 15c2040..890e111 100644
--- a/src/operator/nn/mkldnn/mkldnn_ops-inl.h
+++ b/src/operator/nn/mkldnn/mkldnn_ops-inl.h
@@ -36,7 +36,7 @@
 #include <dmlc/optional.h>
 #include <vector>
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include <mkldnn.hpp>
 
 namespace mxnet {
@@ -145,5 +145,5 @@ void MKLDNNReshapeForward(const nnvm::NodeAttrs& attrs,
 }  // namespace op
 }  // namespace mxnet
 
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 #endif  // MXNET_OPERATOR_NN_MKLDNN_MKLDNN_OPS_INL_H_
diff --git a/src/operator/nn/mkldnn/mkldnn_pooling-inl.h b/src/operator/nn/mkldnn/mkldnn_pooling-inl.h
index ae1e23e..b475ba1 100644
--- a/src/operator/nn/mkldnn/mkldnn_pooling-inl.h
+++ b/src/operator/nn/mkldnn/mkldnn_pooling-inl.h
@@ -24,7 +24,7 @@
 #ifndef MXNET_OPERATOR_NN_MKLDNN_MKLDNN_POOLING_INL_H_
 #define MXNET_OPERATOR_NN_MKLDNN_MKLDNN_POOLING_INL_H_
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 
 #include <utility>
 #include <mkldnn.hpp>
@@ -160,5 +160,5 @@ MKLDNNPoolingFwd &GetPoolingFwd(const PoolingParam &param,
                                 const NDArray &output);
 }  // namespace op
 }  // namespace mxnet
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 #endif  // MXNET_OPERATOR_NN_MKLDNN_MKLDNN_POOLING_INL_H_
diff --git a/src/operator/nn/mkldnn/mkldnn_pooling.cc b/src/operator/nn/mkldnn/mkldnn_pooling.cc
index bea09bf..b1f8fd3 100644
--- a/src/operator/nn/mkldnn/mkldnn_pooling.cc
+++ b/src/operator/nn/mkldnn/mkldnn_pooling.cc
@@ -23,7 +23,7 @@
  * \author Tao Lv
 */
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 
 #include "./mkldnn_pooling-inl.h"
 
@@ -399,4 +399,4 @@ void MKLDNNPoolingGradCompute(const OpContext &ctx, const PoolingParam &param,
 
 }  // namespace op
 }  // namespace mxnet
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
diff --git a/src/operator/nn/mkldnn/mkldnn_reshape-inl.h b/src/operator/nn/mkldnn/mkldnn_reshape-inl.h
index c89e458..48dd934 100644
--- a/src/operator/nn/mkldnn/mkldnn_reshape-inl.h
+++ b/src/operator/nn/mkldnn/mkldnn_reshape-inl.h
@@ -26,7 +26,7 @@
 #ifndef MXNET_OPERATOR_NN_MKLDNN_MKLDNN_RESHAPE_INL_H_
 #define MXNET_OPERATOR_NN_MKLDNN_MKLDNN_RESHAPE_INL_H_
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include <vector>
 #include "mkldnn_base-inl.h"
 #include "../../tensor/matrix_op-inl.h"
@@ -57,5 +57,5 @@ MKLDNNReshapeFwd &GetReshapeForward(const OpReqType &req, const NDArray &input,
 }  // namespace op
 }  // namespace mxnet
 
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 #endif  // MXNET_OPERATOR_NN_MKLDNN_MKLDNN_RESHAPE_INL_H_
diff --git a/src/operator/nn/mkldnn/mkldnn_reshape.cc b/src/operator/nn/mkldnn/mkldnn_reshape.cc
index 81d96bb..45e5bcb 100644
--- a/src/operator/nn/mkldnn/mkldnn_reshape.cc
+++ b/src/operator/nn/mkldnn/mkldnn_reshape.cc
@@ -23,7 +23,7 @@
  * \author Tao Lv
 */
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include "../../tensor/elemwise_unary_op.h"
 #include "./mkldnn_ops-inl.h"
 #include "./mkldnn_base-inl.h"
diff --git a/src/operator/nn/mkldnn/mkldnn_rnn-inl.h b/src/operator/nn/mkldnn/mkldnn_rnn-inl.h
index a3c7440..8652982 100644
--- a/src/operator/nn/mkldnn/mkldnn_rnn-inl.h
+++ b/src/operator/nn/mkldnn/mkldnn_rnn-inl.h
@@ -27,7 +27,7 @@
 #ifndef MXNET_OPERATOR_NN_MKLDNN_MKLDNN_RNN_INL_H_
 #define MXNET_OPERATOR_NN_MKLDNN_MKLDNN_RNN_INL_H_
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 
 #include <vector>
 #include "../../rnn-inl.h"
@@ -467,7 +467,7 @@ class MKLDNNRnnOp {
 };
 
 inline bool SupportMKLDNNRnn(const int input_dtype) {
-  if (input_dtype == mshadow::kFloat32 && dmlc::GetEnv("MXNET_USE_MKLDNN_RNN", 1)) {
+  if (input_dtype == mshadow::kFloat32 && dmlc::GetEnv("MXNET_USE_ONEDNN_RNN", 1)) {
     return true;
   }
   return false;
@@ -481,5 +481,5 @@ inline bool SupportMKLDNNRnn(const RNNParam &param, const int input_dtype) {
 }  // namespace op
 }  // namespace mxnet
 
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 #endif  // MXNET_OPERATOR_NN_MKLDNN_MKLDNN_RNN_INL_H_
diff --git a/src/operator/nn/mkldnn/mkldnn_rnn.cc b/src/operator/nn/mkldnn/mkldnn_rnn.cc
index c33ad48..d6de8b2 100644
--- a/src/operator/nn/mkldnn/mkldnn_rnn.cc
+++ b/src/operator/nn/mkldnn/mkldnn_rnn.cc
@@ -24,7 +24,7 @@
  * \author Zixuan Wei
 */
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 
 #include <numeric>
 #include "./mkldnn_rnn-inl.h"
@@ -1252,4 +1252,4 @@ void MKLDNNRnnOp::Backward(const OpContext& ctx,
 }  // namespace op
 }  // namespace mxnet
 
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
diff --git a/src/operator/nn/mkldnn/mkldnn_slice-inl.h b/src/operator/nn/mkldnn/mkldnn_slice-inl.h
index 0bb432d..0cc8257 100644
--- a/src/operator/nn/mkldnn/mkldnn_slice-inl.h
+++ b/src/operator/nn/mkldnn/mkldnn_slice-inl.h
@@ -26,7 +26,7 @@
 #ifndef MXNET_OPERATOR_NN_MKLDNN_MKLDNN_SLICE_INL_H_
 #define MXNET_OPERATOR_NN_MKLDNN_MKLDNN_SLICE_INL_H_
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 
 #include <dmlc/logging.h>
 #include <dmlc/parameter.h>
@@ -62,5 +62,5 @@ void MKLDNNSlice(const nnvm::NodeAttrs& attrs, const OpContext& ctx,
 
 }  // namespace op
 }  // namespace mxnet
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 #endif  // MXNET_OPERATOR_NN_MKLDNN_MKLDNN_SLICE_INL_H_
diff --git a/src/operator/nn/mkldnn/mkldnn_slice.cc b/src/operator/nn/mkldnn/mkldnn_slice.cc
index 26d4f09..0efecc8 100644
--- a/src/operator/nn/mkldnn/mkldnn_slice.cc
+++ b/src/operator/nn/mkldnn/mkldnn_slice.cc
@@ -23,7 +23,7 @@
  * \author Zhiyuan Huang
 */
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 
 #include "./mkldnn_ops-inl.h"
 #include "./mkldnn_base-inl.h"
@@ -105,4 +105,4 @@ void MKLDNNSlice(const nnvm::NodeAttrs& attrs, const OpContext& ctx,
 
 }  // namespace op
 }  // namespace mxnet
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
diff --git a/src/operator/nn/mkldnn/mkldnn_softmax.cc b/src/operator/nn/mkldnn/mkldnn_softmax.cc
index e96ab6c..f7a0330 100644
--- a/src/operator/nn/mkldnn/mkldnn_softmax.cc
+++ b/src/operator/nn/mkldnn/mkldnn_softmax.cc
@@ -27,7 +27,7 @@
 #include "./mkldnn_ops-inl.h"
 #include "./mkldnn_base-inl.h"
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 namespace mxnet {
 namespace op {
 
diff --git a/src/operator/nn/mkldnn/mkldnn_sum.cc b/src/operator/nn/mkldnn/mkldnn_sum.cc
index 747dde6..e76f845 100644
--- a/src/operator/nn/mkldnn/mkldnn_sum.cc
+++ b/src/operator/nn/mkldnn/mkldnn_sum.cc
@@ -31,7 +31,7 @@
 namespace mxnet {
 namespace op {
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 void MKLDNNSum(const mkldnn::memory &arr1,
                const mkldnn::memory &arr2,
                const mkldnn::memory &out) {
diff --git a/src/operator/nn/mkldnn/mkldnn_transpose.cc b/src/operator/nn/mkldnn/mkldnn_transpose.cc
index 23e385d..9b3c5de 100644
--- a/src/operator/nn/mkldnn/mkldnn_transpose.cc
+++ b/src/operator/nn/mkldnn/mkldnn_transpose.cc
@@ -23,7 +23,7 @@
  * \author Tao Lv
 */
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 
 #include <mkldnn.hpp>
 #include "../../tensor/matrix_op-inl.h"
diff --git a/src/operator/nn/pooling.cc b/src/operator/nn/pooling.cc
index d2edcc5..39bd32b 100644
--- a/src/operator/nn/pooling.cc
+++ b/src/operator/nn/pooling.cc
@@ -25,10 +25,10 @@
 */
 #include "../elemwise_op_common.h"
 #include "./pooling-inl.h"
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include "./mkldnn/mkldnn_pooling-inl.h"
 #include "./mkldnn/mkldnn_base-inl.h"
-#endif  // MXNET_USE_MKLDNN
+#endif  // MXNET_USE_ONEDNN
 namespace mxnet {
 namespace op {
 
@@ -58,7 +58,7 @@ void PoolingParamParser(nnvm::NodeAttrs *attrs) {
 }
 
 int GetNumOutputs(const PoolingParam &param) {
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   return MKLDNNRequireWorkspace(param) && SupportMKLDNNPooling(param) ? 2 : 1;
 #else
   return 1;
@@ -66,7 +66,7 @@ int GetNumOutputs(const PoolingParam &param) {
 }
 
 int GetNumBackInputs(const PoolingParam &param) {
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   return MKLDNNRequireWorkspace(param) && SupportMKLDNNPooling(param) ? 5 : 3;
 #else
   return 3;
@@ -77,7 +77,7 @@ static bool PoolingType(const nnvm::NodeAttrs& attrs,
                         std::vector<int> *in_attrs,
                         std::vector<int> *out_attrs) {
   out_attrs->at(0) = in_attrs->at(0);
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   const PoolingParam &param = nnvm::get<PoolingParam>(attrs.parsed);
   if (MKLDNNRequireWorkspace(param) && SupportMKLDNNPooling(param)) {
     CHECK_GT(out_attrs->size(), 1U);
@@ -145,7 +145,7 @@ static bool PoolingShape(const nnvm::NodeAttrs &attrs,
         oshape[i] = 1;
     out_shape->clear();
     out_shape->push_back(oshape);  // save output shape
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
     if (MKLDNNRequireWorkspace(param) && SupportMKLDNNPooling(param))
         out_shape->push_back(oshape);   // for workspace
 #endif
@@ -182,7 +182,7 @@ static bool PoolingShape(const nnvm::NodeAttrs &attrs,
                     ConvertLayout(oshape_ncw, mshadow::kNCW, mshadow::kNWC) : oshape_ncw;
     out_shape->clear();
     out_shape->push_back(oshape);  // save output shape
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
     if (MKLDNNRequireWorkspace(param) && SupportMKLDNNPooling(param))
       out_shape->push_back(oshape);   // for workspace
 #endif
@@ -220,7 +220,7 @@ static bool PoolingShape(const nnvm::NodeAttrs &attrs,
                     ConvertLayout(oshape_nchw, mshadow::kNCHW, mshadow::kNHWC) : oshape_nchw;
     out_shape->clear();
     out_shape->push_back(oshape);  // save output shape
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
     if (MKLDNNRequireWorkspace(param) && SupportMKLDNNPooling(param))
       out_shape->push_back(oshape);   // for workspace
 #endif
@@ -262,7 +262,7 @@ static bool PoolingShape(const nnvm::NodeAttrs &attrs,
                     ConvertLayout(oshape_ncdhw, mshadow::kNCDHW, mshadow::kNDHWC) : oshape_ncdhw;
     out_shape->clear();
     out_shape->push_back(oshape);  // save output shape
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
     if (MKLDNNRequireWorkspace(param) && SupportMKLDNNPooling(param))
       out_shape->push_back(oshape);   // for workspace
 #endif
@@ -271,7 +271,7 @@ static bool PoolingShape(const nnvm::NodeAttrs &attrs,
   return true;
 }
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 void PoolingComputeExCPU(const nnvm::NodeAttrs &attrs, const OpContext &ctx,
                          const std::vector<NDArray> &inputs,
                          const std::vector<OpReqType> &req,
@@ -424,7 +424,7 @@ For each window ``X``, the mathematical expression for Lp pooling is:
   const PoolingParam &param = nnvm::get<PoolingParam>(attrs.parsed);
   return GetNumOutputs(param);
 })
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<nnvm::FNumVisibleOutputs>("FNumVisibleOutputs",
                                     [](const NodeAttrs& attrs) { return 1; })
 #endif
@@ -441,13 +441,13 @@ For each window ``X``, the mathematical expression for Lp pooling is:
     return std::vector<std::string>{"output"};
 })
 .set_attr_parser(PoolingParamParser)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<FInferStorageType>("FInferStorageType", PoolingStorageType)
 #endif
 .set_attr<nnvm::FInferType>("FInferType", PoolingType)
 .set_attr<mxnet::FInferShape>("FInferShape", PoolingShape)
 .set_attr<FCompute>("FCompute<cpu>", PoolingCompute<cpu>)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FComputeEx>("FComputeEx<cpu>", PoolingComputeExCPU)
 #endif
@@ -469,14 +469,14 @@ NNVM_REGISTER_OP(_backward_Pooling)
     "FInplaceOption",
     [](const NodeAttrs &attrs) {
 // Different backend requires different FInplaceOption
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   const PoolingParam &param = nnvm::get<PoolingParam>(attrs.parsed);
   if (MKLDNNRequireWorkspace(param) && SupportMKLDNNPooling(param))
     return std::vector<std::pair<int, int> >{{1, 0}};
 #endif
   return std::vector<std::pair<int, int> >();
 })
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
   return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
 })
@@ -484,7 +484,7 @@ NNVM_REGISTER_OP(_backward_Pooling)
                              BackwardPoolingStorageType)
 #endif
 .set_attr_parser(PoolingParamParser)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FComputeEx>("FComputeEx<cpu>", PoolingGradComputeExCPU)
 #endif
diff --git a/src/operator/nn/softmax.cc b/src/operator/nn/softmax.cc
index b3ffd42..6e0e07a 100644
--- a/src/operator/nn/softmax.cc
+++ b/src/operator/nn/softmax.cc
@@ -26,7 +26,7 @@
 #include "../tensor/elemwise_unary_op.h"
 #include "../tensor/elemwise_binary_op.h"
 #include "../operator_common.h"
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include "mkldnn/mkldnn_base-inl.h"
 #include "mkldnn/mkldnn_ops-inl.h"
 #endif
@@ -36,7 +36,7 @@ namespace op {
 DMLC_REGISTER_PARAMETER(SoftmaxParam);
 DMLC_REGISTER_PARAMETER(MaskedSoftmaxParam);
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 static void SoftmaxComputeExCPU(const nnvm::NodeAttrs& attrs,
                                 const OpContext& ctx,
                                 const std::vector<NDArray>& inputs,
@@ -148,7 +148,7 @@ Example::
     return std::vector<std::string>{"output"};
 })
 .set_attr<FCompute>("FCompute<cpu>", SoftmaxCompute<cpu, mxnet_op::softmax_fwd>)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FComputeEx>("FComputeEx<cpu>", SoftmaxComputeExCPU)
 .set_attr<FInferStorageType>("FInferStorageType", SoftmaxStorageType)
@@ -181,7 +181,7 @@ NNVM_REGISTER_OP(_backward_softmax)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption", SoftmaxGradOpInplaceOption)
 .add_argument("args", "NDArray-or-Symbol[]", "Positional input arguments")
 .set_attr_parser(ParamParser<SoftmaxParam>)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FComputeEx>("FComputeEx<cpu>", SoftmaxGradComputeExCPU)
 .set_attr<FInferStorageType>("FInferStorageType", SoftmaxGradStorageType)
diff --git a/src/operator/operator_common.h b/src/operator/operator_common.h
index 31c6663..3f82a14 100644
--- a/src/operator/operator_common.h
+++ b/src/operator/operator_common.h
@@ -550,7 +550,7 @@ class OpSignature {
    * and the layout to sign the op.
    */
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   void AddSign(const mkldnn::memory &mem) {
     auto desc = mem.get_desc();
     hash = hash * 2 + desc.data.format_kind;
@@ -618,7 +618,7 @@ class OpSignature {
   }
 
   void AddSign(const NDArray &arr) {
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
     if (arr.IsMKLDNNData()) {
       AddSign(*(arr.GetMKLDNNData()));
     } else {
@@ -626,7 +626,7 @@ class OpSignature {
       hash = hash * 2 + arr.dtype();
       eles.push_back(arr.dtype());
       AddSign(arr.shape());
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
     }
 #endif
   }
diff --git a/src/operator/quantization/dequantize.cc b/src/operator/quantization/dequantize.cc
index 9ce1350..a12903d 100644
--- a/src/operator/quantization/dequantize.cc
+++ b/src/operator/quantization/dequantize.cc
@@ -23,7 +23,7 @@
  * \brief
  */
 #include "./dequantize-inl.h"
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include "./mkldnn/mkldnn_dequantize-inl.h"
 #endif
 
@@ -37,7 +37,7 @@ bool DequantizeStorageType(const nnvm::NodeAttrs& attrs,
                            std::vector<int> *in_attrs,
                            std::vector<int> *out_attrs) {
   *dispatch_mode = DispatchMode::kFCompute;
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   if (dev_mask == mshadow::cpu::kDevMask) {
     *dispatch_mode = DispatchMode::kFComputeEx;
   }
@@ -53,7 +53,7 @@ static OpStatePtr CreateDequantizeState(const nnvm::NodeAttrs &attrs, Context ct
   if (ctx.dev_type == kGPU) {
     state = OpStatePtr::Create<DequantizeOperator<gpu>>(attrs);
   } else {
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
     state = OpStatePtr::Create<SgMKLDNNDequantizeOperator>(attrs);
 #else
     state = OpStatePtr::Create<DequantizeOperator<cpu>>(attrs);
@@ -93,7 +93,7 @@ by keep zero centered for the quantized value:
 // will be reverted after the improvement of CachedOP is done.
 .set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes)
 .set_attr<FCreateOpState>("FCreateOpState", CreateDequantizeState)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FStatefulComputeEx>("FStatefulComputeEx<cpu>", SgMKLDNNDequantizeForward)
 #endif
diff --git a/src/operator/quantization/mkldnn/mkldnn_dequantize-inl.h b/src/operator/quantization/mkldnn/mkldnn_dequantize-inl.h
index 7ad7aeb..08e9ce5 100644
--- a/src/operator/quantization/mkldnn/mkldnn_dequantize-inl.h
+++ b/src/operator/quantization/mkldnn/mkldnn_dequantize-inl.h
@@ -25,7 +25,7 @@
 
 #ifndef MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_DEQUANTIZE_INL_H_
 #define MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_DEQUANTIZE_INL_H_
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include <algorithm>
 #include <string>
 #include <vector>
@@ -120,5 +120,5 @@ static void SgMKLDNNDequantizeForward(const OpStatePtr &state_ptr, const OpConte
 }  // namespace op
 }  // namespace mxnet
 
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 #endif  // MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_DEQUANTIZE_INL_H_
diff --git a/src/operator/quantization/mkldnn/mkldnn_quantize-inl.h b/src/operator/quantization/mkldnn/mkldnn_quantize-inl.h
index 07e2820..3bae329 100644
--- a/src/operator/quantization/mkldnn/mkldnn_quantize-inl.h
+++ b/src/operator/quantization/mkldnn/mkldnn_quantize-inl.h
@@ -25,7 +25,7 @@
 
 #ifndef MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_QUANTIZE_INL_H_
 #define MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_QUANTIZE_INL_H_
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include <string>
 #include <algorithm>
 #include <vector>
@@ -105,5 +105,5 @@ static void MKLDNNQuantizeCompute(const nnvm::NodeAttrs& attrs, const OpContext
 }  // namespace op
 }  // namespace mxnet
 
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 #endif  // MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_QUANTIZE_INL_H_
diff --git a/src/operator/quantization/mkldnn/mkldnn_quantize_v2-inl.h b/src/operator/quantization/mkldnn/mkldnn_quantize_v2-inl.h
index 6e10efa..b738f29 100644
--- a/src/operator/quantization/mkldnn/mkldnn_quantize_v2-inl.h
+++ b/src/operator/quantization/mkldnn/mkldnn_quantize_v2-inl.h
@@ -24,7 +24,7 @@
 
 #ifndef MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_QUANTIZE_V2_INL_H_
 #define MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_QUANTIZE_V2_INL_H_
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include <algorithm>
 #include <string>
 #include <vector>
@@ -167,5 +167,5 @@ static void SgMKLDNNQuantizeForward(const OpStatePtr &state_ptr, const OpContext
 }  // namespace op
 }  // namespace mxnet
 
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 #endif  // MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_QUANTIZE_V2_INL_H_
diff --git a/src/operator/quantization/mkldnn/mkldnn_quantized_act.cc b/src/operator/quantization/mkldnn/mkldnn_quantized_act.cc
index f7520d5..f4e824c 100644
--- a/src/operator/quantization/mkldnn/mkldnn_quantized_act.cc
+++ b/src/operator/quantization/mkldnn/mkldnn_quantized_act.cc
@@ -22,7 +22,7 @@
  * \brief MKLDNN(Quantized) Activation operator based on subgraph
  * /author Zhiyuan Huang
 */
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 
 #include "../../nn/mkldnn/mkldnn_ops-inl.h"
 #include "../quantization_utils.h"
@@ -52,4 +52,4 @@ NNVM_REGISTER_OP(_contrib_quantized_act)
 }  // namespace op
 }  // namespace mxnet
 
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
diff --git a/src/operator/quantization/mkldnn/mkldnn_quantized_batch_norm.cc b/src/operator/quantization/mkldnn/mkldnn_quantized_batch_norm.cc
index 4723ea4..6a1c6b7 100644
--- a/src/operator/quantization/mkldnn/mkldnn_quantized_batch_norm.cc
+++ b/src/operator/quantization/mkldnn/mkldnn_quantized_batch_norm.cc
@@ -23,7 +23,7 @@
  * \author Yixin Bao
 */
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include "../../nn/mkldnn/mkldnn_batch_norm-inl.h"
 #include "../quantization_utils.h"
 
@@ -147,4 +147,4 @@ NNVM_REGISTER_OP(_contrib_quantized_batch_norm)
 }  // namespace op
 }  // namespace mxnet
 
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
diff --git a/src/operator/quantization/mkldnn/mkldnn_quantized_concat.cc b/src/operator/quantization/mkldnn/mkldnn_quantized_concat.cc
index 619e8bf..5f68147 100644
--- a/src/operator/quantization/mkldnn/mkldnn_quantized_concat.cc
+++ b/src/operator/quantization/mkldnn/mkldnn_quantized_concat.cc
@@ -23,7 +23,7 @@
  * \brief
  */
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include "../../nn/mkldnn/mkldnn_concat-inl.h"
 #include "../quantization_utils.h"
 
@@ -127,4 +127,4 @@ NNVM_REGISTER_OP(_contrib_quantized_concat)
 }  // namespace op
 }  // namespace mxnet
 
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
diff --git a/src/operator/quantization/mkldnn/mkldnn_quantized_conv.cc b/src/operator/quantization/mkldnn/mkldnn_quantized_conv.cc
index 6ac2250..e1d6a80 100644
--- a/src/operator/quantization/mkldnn/mkldnn_quantized_conv.cc
+++ b/src/operator/quantization/mkldnn/mkldnn_quantized_conv.cc
@@ -23,7 +23,7 @@
  * \author Wenting Jiang, Xinyu Chen
 */
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include "../../nn/mkldnn/mkldnn_base-inl.h"
 #include "../../nn/mkldnn/mkldnn_convolution-inl.h"
 #include "../../nn/convolution-inl.h"
@@ -91,4 +91,4 @@ NNVM_REGISTER_OP(_contrib_quantized_conv)
 }  // namespace op
 }  // namespace mxnet
 
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
diff --git a/src/operator/quantization/mkldnn/mkldnn_quantized_elemwise_add.cc b/src/operator/quantization/mkldnn/mkldnn_quantized_elemwise_add.cc
index 917dffa..e66c682 100644
--- a/src/operator/quantization/mkldnn/mkldnn_quantized_elemwise_add.cc
+++ b/src/operator/quantization/mkldnn/mkldnn_quantized_elemwise_add.cc
@@ -23,7 +23,7 @@
  * \brief
  */
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include "../quantized_elemwise_add-inl.h"
 #include "../../nn/mkldnn/mkldnn_ops-inl.h"
 #include "../../nn/mkldnn/mkldnn_base-inl.h"
@@ -255,4 +255,4 @@ NNVM_REGISTER_OP(_contrib_quantized_elemwise_add)
 }  // namespace op
 }  // namespace mxnet
 
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
diff --git a/src/operator/quantization/mkldnn/mkldnn_quantized_flatten.cc b/src/operator/quantization/mkldnn/mkldnn_quantized_flatten.cc
index 11a960e..233ef3c 100644
--- a/src/operator/quantization/mkldnn/mkldnn_quantized_flatten.cc
+++ b/src/operator/quantization/mkldnn/mkldnn_quantized_flatten.cc
@@ -23,7 +23,7 @@
  * \brief
  */
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include "../../nn/mkldnn/mkldnn_ops-inl.h"
 #include "../quantization_utils.h"
 
@@ -58,4 +58,4 @@ NNVM_REGISTER_OP(_contrib_quantized_flatten)
 }  // namespace op
 }  // namespace mxnet
 
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
diff --git a/src/operator/quantization/mkldnn/mkldnn_quantized_fully_connected.cc b/src/operator/quantization/mkldnn/mkldnn_quantized_fully_connected.cc
index 3e21564..cea2c51 100644
--- a/src/operator/quantization/mkldnn/mkldnn_quantized_fully_connected.cc
+++ b/src/operator/quantization/mkldnn/mkldnn_quantized_fully_connected.cc
@@ -24,7 +24,7 @@
  * \author Ciyong Chen
  */
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include "../../nn/mkldnn/mkldnn_fully_connected-inl.h"
 #include "../quantization_utils.h"
 
@@ -130,4 +130,4 @@ void MKLDNNQuantizedFullyConnectedForward(const nnvm::NodeAttrs &attrs,
 }  // namespace op
 }  // namespace mxnet
 
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
diff --git a/src/operator/quantization/mkldnn/mkldnn_quantized_ops-inl.h b/src/operator/quantization/mkldnn/mkldnn_quantized_ops-inl.h
index 88d77c8..136aebe 100644
--- a/src/operator/quantization/mkldnn/mkldnn_quantized_ops-inl.h
+++ b/src/operator/quantization/mkldnn/mkldnn_quantized_ops-inl.h
@@ -27,7 +27,7 @@
 #ifndef MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_QUANTIZED_OPS_INL_H_
 #define MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_QUANTIZED_OPS_INL_H_
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 
 #include <mxnet/ndarray.h>
 #include <vector>
@@ -44,5 +44,5 @@ void MKLDNNQuantizedFullyConnectedForward(const nnvm::NodeAttrs &attrs,
 }  // namespace op
 }  // namespace mxnet
 
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 #endif  // MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_QUANTIZED_OPS_INL_H_
diff --git a/src/operator/quantization/mkldnn/mkldnn_quantized_pooling.cc b/src/operator/quantization/mkldnn/mkldnn_quantized_pooling.cc
index 740c5f9..bd3ffcd 100644
--- a/src/operator/quantization/mkldnn/mkldnn_quantized_pooling.cc
+++ b/src/operator/quantization/mkldnn/mkldnn_quantized_pooling.cc
@@ -23,7 +23,7 @@
  * \author Tao Lv, Xinyu Chen
 */
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 
 #include "../../nn/mkldnn/mkldnn_pooling-inl.h"
 
@@ -50,4 +50,4 @@ NNVM_REGISTER_OP(_contrib_quantized_pooling)
 }  // namespace op
 }  // namespace mxnet
 
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
diff --git a/src/operator/quantization/mkldnn/mkldnn_requantize-inl.h b/src/operator/quantization/mkldnn/mkldnn_requantize-inl.h
index a80b855..ef56616 100644
--- a/src/operator/quantization/mkldnn/mkldnn_requantize-inl.h
+++ b/src/operator/quantization/mkldnn/mkldnn_requantize-inl.h
@@ -24,7 +24,7 @@
 
 #ifndef MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_REQUANTIZE_INL_H_
 #define MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_REQUANTIZE_INL_H_
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include <string>
 #include <algorithm>
 #include <vector>
@@ -147,5 +147,5 @@ static void MKLDNNRequantizeForward(const nnvm::NodeAttrs& attrs,
 }  // namespace op
 }  // namespace mxnet
 
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 #endif  // MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_REQUANTIZE_INL_H_
diff --git a/src/operator/quantization/quantize.cc b/src/operator/quantization/quantize.cc
index a778328..c271bba 100644
--- a/src/operator/quantization/quantize.cc
+++ b/src/operator/quantization/quantize.cc
@@ -23,7 +23,7 @@
  * \brief
  */
 #include "./quantize-inl.h"
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include "./mkldnn/mkldnn_quantize-inl.h"
 #endif
 
@@ -37,7 +37,7 @@ bool QuantizeStorageType(const nnvm::NodeAttrs& attrs,
                          std::vector<int> *in_attrs,
                          std::vector<int> *out_attrs) {
   *dispatch_mode = DispatchMode::kFCompute;
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   if (dev_mask == mshadow::cpu::kDevMask) {
     *dispatch_mode = DispatchMode::kFComputeEx;
   }
@@ -86,7 +86,7 @@ where
 // TODO(Xinyu): a temp solution to enable GluonCV INT8 flow,
 // will be reverted after the improvement of CachedOP is done.
 .set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FComputeEx>("FComputeEx<cpu>", MKLDNNQuantizeCompute)
 #endif
diff --git a/src/operator/quantization/quantize_v2.cc b/src/operator/quantization/quantize_v2.cc
index d8b3559..dde1ed9 100644
--- a/src/operator/quantization/quantize_v2.cc
+++ b/src/operator/quantization/quantize_v2.cc
@@ -24,7 +24,7 @@
  */
 
 #include "./quantize_v2-inl.h"
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include "./mkldnn/mkldnn_quantize_v2-inl.h"
 #endif
 
@@ -36,7 +36,7 @@ static bool QuantizeV2StorageType(const nnvm::NodeAttrs& attrs, const int dev_ma
                                   DispatchMode* dispatch_mode, std::vector<int>* in_attrs,
                                   std::vector<int>* out_attrs) {
   *dispatch_mode = DispatchMode::kFCompute;
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   if (dev_mask == mshadow::cpu::kDevMask) {
     *dispatch_mode = DispatchMode::kFComputeEx;
   }
@@ -54,7 +54,7 @@ static OpStatePtr CreateQuantizeV2State(const nnvm::NodeAttrs& attrs, Context ct
   if (ctx.dev_type == kGPU) {
     state = OpStatePtr::Create<QuantizeV2Operator<gpu>>(attrs);
   } else {
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
     state = OpStatePtr::Create<SgMKLDNNQuantizeOperator>(attrs);
 #else
     state = OpStatePtr::Create<QuantizeV2Operator<cpu>>(attrs);
@@ -104,7 +104,7 @@ If min_calib_range isn't presented, the output type will be int8.
 // will be reverted after the improvement of CachedOP is done.
 .set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes)
 .set_attr<FCreateOpState>("FCreateOpState", CreateQuantizeV2State)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FStatefulComputeEx>("FStatefulComputeEx<cpu>", SgMKLDNNQuantizeForward)
 #endif
diff --git a/src/operator/quantization/quantized_activation.cc b/src/operator/quantization/quantized_activation.cc
index b4ef03b..e2c302b 100644
--- a/src/operator/quantization/quantized_activation.cc
+++ b/src/operator/quantization/quantized_activation.cc
@@ -68,7 +68,7 @@ inline static bool QuantizedActivationStorageType(const nnvm::NodeAttrs &attrs,
   CHECK_EQ(in_attrs->size(), 3);
 
   *dispatch_mode = DispatchMode::kFCompute;
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   const ActivationParam &param = nnvm::get<ActivationParam>(attrs.parsed);
   if (dev_mask == mshadow::cpu::kDevMask && param.act_type == activation::kReLU) {
     *dispatch_mode = DispatchMode::kFComputeEx;
diff --git a/src/operator/quantization/quantized_batch_norm.cc b/src/operator/quantization/quantized_batch_norm.cc
index f197ebd..0be997c 100644
--- a/src/operator/quantization/quantized_batch_norm.cc
+++ b/src/operator/quantization/quantized_batch_norm.cc
@@ -25,7 +25,7 @@
 */
 #include <mxnet/op_attr_types.h>
 #include "../nn/batch_norm-inl.h"
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include "../nn/mkldnn/mkldnn_batch_norm-inl.h"
 #endif
 
@@ -67,7 +67,7 @@ bool QuantizedBatchNormType(const nnvm::NodeAttrs& attrs, std::vector<int>* in_t
   CHECK_EQ(in_type->size(), 7U);
   CHECK_EQ(out_type->size(), 3U);
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   CHECK(in_type->at(0) == mshadow::kInt8 || in_type->at(0) == mshadow::kUint8)
       << "QuantizedBatchNorm with MKLDNN backend only supports int8/uint8 input, while "
       << in_type->at(0) << " is given.";
diff --git a/src/operator/quantization/quantized_conv.cc b/src/operator/quantization/quantized_conv.cc
index 1d380e4..4d7178b 100644
--- a/src/operator/quantization/quantized_conv.cc
+++ b/src/operator/quantization/quantized_conv.cc
@@ -24,7 +24,7 @@
  * \author Ziheng Jiang, Jun Wu
 */
 #include "../nn/convolution-inl.h"
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include "../nn/mkldnn/mkldnn_ops-inl.h"
 #endif
 
@@ -40,7 +40,7 @@ bool QuantizedConvShape(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(in_shape->size(), param.no_bias? 6U : 9U);
   CHECK_EQ(out_shape->size(), 3U);
   if (param.layout.has_value()) {
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
     CHECK(param.layout.value() == mshadow::kNCHW || param.layout.value() == mshadow::kNCDHW)
           << "mkldnn quantized_conv now supports NCHW or NCDHW for now";
 #else
@@ -53,7 +53,7 @@ bool QuantizedConvShape(const nnvm::NodeAttrs& attrs,
   const int kernel_ndims = param.kernel.ndim();
   if (data_ndims == 0U) return false;
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   CHECK(kernel_ndims == 2U || kernel_ndims == 3U)
         << "mkldnn quantized_conv only supports 2d or 3d kernel for now";
   CHECK(data_ndims == 4U || data_ndims == 5U)
@@ -94,7 +94,7 @@ if (data_ndims == 4) {
     SHAPE_ASSIGN_CHECK(*out_shape, 0, oshape);
     SHAPE_ASSIGN_CHECK(*out_shape, 1, mxnet::TShape(1, 1));
     SHAPE_ASSIGN_CHECK(*out_shape, 2, mxnet::TShape(1, 1));
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   } else {
     // conv 3d
     mxnet::TShape wshape(data_ndims, 0);
@@ -140,7 +140,7 @@ bool QuantizedConvType(const nnvm::NodeAttrs& attrs,
   const ConvolutionParam& param = nnvm::get<ConvolutionParam>(attrs.parsed);
   CHECK_EQ(in_type->size(), param.no_bias? 6U : 9U);
   CHECK_EQ(out_type->size(), 3U);
-#ifndef MXNET_USE_MKLDNN
+#ifndef MXNET_USE_ONEDNN
   TYPE_ASSIGN_CHECK(*in_type, 0, mshadow::kInt8);
 #endif
   TYPE_ASSIGN_CHECK(*in_type, 1, mshadow::kInt8);
@@ -165,7 +165,7 @@ bool QuantizedConvStorageType(const nnvm::NodeAttrs& attrs,
                               std::vector<int> *in_attrs,
                               std::vector<int> *out_attrs) {
   *dispatch_mode = DispatchMode::kFCompute;
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   if (dev_mask == mshadow::cpu::kDevMask) {
     *dispatch_mode = DispatchMode::kFComputeEx;
   }
diff --git a/src/operator/quantization/quantized_fully_connected.cc b/src/operator/quantization/quantized_fully_connected.cc
index e8caf79..0158ebd 100644
--- a/src/operator/quantization/quantized_fully_connected.cc
+++ b/src/operator/quantization/quantized_fully_connected.cc
@@ -26,7 +26,7 @@
 #include <vector>
 #include "quantization_utils.h"
 #include "../nn/fully_connected-inl.h"
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include "../nn/mkldnn/mkldnn_fully_connected-inl.h"
 #include "mkldnn/mkldnn_quantized_ops-inl.h"
 #endif
@@ -94,7 +94,7 @@ bool QuantizedFullyConnectedType(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(in_type->size(), num_inputs * 3);
   CHECK_EQ(out_type->size(), 3U);
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   CHECK(in_type->at(0) == mshadow::kInt8 || in_type->at(0) == mshadow::kUint8)
       << "QuantizedFullyConnected only supports int8/uint8 input, while "
       << in_type->at(0) << " is given.";
@@ -124,7 +124,7 @@ bool QuantizedFullyConnectedStorageType(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(in_attrs->size(), num_inputs * 3);
   CHECK_EQ(out_attrs->size(), 3U);
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   return MKLDNNStorageType(attrs, dev_mask, true,
                            dispatch_mode, in_attrs, out_attrs);
 #else
@@ -292,7 +292,7 @@ void QuantizedFullyConnectedForwardCPU(const nnvm::NodeAttrs& attrs,
 #endif
 }
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 void QuantizedFullyConnectedForwardExCPU(const nnvm::NodeAttrs &attrs,
                                          const OpContext &ctx,
                                          const std::vector<NDArray> &in_data,
@@ -341,7 +341,7 @@ and max thresholds representing the threholds for quantizing the float32 output
 .set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes)
 .set_attr<FNeedRequantize>("FNeedRequantize", [](const NodeAttrs& attrs) { return true; })
 .set_attr<FCompute>("FCompute<cpu>", QuantizedFullyConnectedForwardCPU)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FComputeEx>("FComputeEx<cpu>", QuantizedFullyConnectedForwardExCPU)
 #endif
diff --git a/src/operator/quantization/quantized_pooling.cc b/src/operator/quantization/quantized_pooling.cc
index c35c7a4..f23ecb3 100644
--- a/src/operator/quantization/quantized_pooling.cc
+++ b/src/operator/quantization/quantized_pooling.cc
@@ -23,7 +23,7 @@
 */
 #include <mxnet/op_attr_types.h>
 #include "../nn/pooling-inl.h"
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include "../nn/mkldnn/mkldnn_pooling-inl.h"
 #endif
 
@@ -42,7 +42,7 @@ bool QuantizedPoolingShape(const nnvm::NodeAttrs& attrs,
   const int kernel_ndims = param.kernel.ndim();
   const int layout = param.GetLayout(data_ndims);
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   CHECK(data_ndims == 4U || data_ndims == 5U)
         << "MKL-DNN QuantizedPoolingOp only supports 4D/5D layout yet, input should be 4D in"
         << "(batch, channel, y, x) or 5D in (batch, channel, d, y, x)";
@@ -140,7 +140,7 @@ bool QuantizedPoolingType(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(in_type->size(), 3U);
   CHECK_EQ(out_type->size(), 3U);
   if (param.pool_type == pool_enum::kMaxPooling || param.pool_type == pool_enum::kAvgPooling) {
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
     TYPE_ASSIGN_CHECK(*out_type, 0, (*in_type)[0]);
 #else
     TYPE_ASSIGN_CHECK(*in_type, 0, mshadow::kInt8);
@@ -164,7 +164,7 @@ inline static bool QuantizedPoolingStorageType(const nnvm::NodeAttrs &attrs,
   CHECK_EQ(in_attrs->size(), 3);
 
   *dispatch_mode = DispatchMode::kFCompute;
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   const PoolingParam &param = nnvm::get<PoolingParam>(attrs.parsed);
   if (dev_mask == mshadow::cpu::kDevMask && SupportMKLDNNPooling(param)) {
     *dispatch_mode = DispatchMode::kFComputeEx;
diff --git a/src/operator/quantization/requantize.cc b/src/operator/quantization/requantize.cc
index 9ee299c..e4124e0 100644
--- a/src/operator/quantization/requantize.cc
+++ b/src/operator/quantization/requantize.cc
@@ -24,7 +24,7 @@
  */
 #include "./requantize-inl.h"
 #include "./quantize-inl.h"
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include "./mkldnn/mkldnn_requantize-inl.h"
 #endif
 
@@ -38,7 +38,7 @@ bool RequantizeStorageType(const nnvm::NodeAttrs& attrs,
                          std::vector<int> *in_attrs,
                          std::vector<int> *out_attrs) {
   *dispatch_mode = DispatchMode::kFCompute;
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   if (dev_mask == mshadow::cpu::kDevMask) {
     *dispatch_mode = DispatchMode::kFComputeEx;
   }
@@ -71,7 +71,7 @@ inference accuracy.
 // TODO(Xinyu): a temp solution to enable GluonCV INT8 flow,
 // will be reverted after the improvement of CachedOP is done.
 .set_attr<nnvm::FGradient>("FGradient", MakeZeroGradNodes)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FComputeEx>("FComputeEx<cpu>", MKLDNNRequantizeForward)
 #else
diff --git a/src/operator/rnn.cc b/src/operator/rnn.cc
index 7f65870..7cb9f4a 100644
--- a/src/operator/rnn.cc
+++ b/src/operator/rnn.cc
@@ -27,9 +27,9 @@
 #include <iterator>
 
 #include "./rnn-inl.h"
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include "./nn/mkldnn/mkldnn_rnn-inl.h"
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 
 namespace mxnet {
 namespace op {
@@ -184,14 +184,14 @@ static std::vector<ResourceRequest> RNNResourceEx(const NodeAttrs& attrs, const
 #endif
   } else {
     request.emplace_back(ResourceRequest::kRandom);
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
     request.emplace_back(ResourceRequest::kTempSpace);
 #endif
   }
   return request;
 }
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 inline static bool RNNStorageType(const nnvm::NodeAttrs& attrs,
                                   const int dev_mask,
                                   DispatchMode* dispatch_mode,
@@ -199,11 +199,11 @@ inline static bool RNNStorageType(const nnvm::NodeAttrs& attrs,
                                   std::vector<int> *out_attrs) {
   const RNNParam& param = nnvm::get<RNNParam>(attrs.parsed);
   const bool support_mkldnn_rnn =
-      !param.use_sequence_length && dmlc::GetEnv("MXNET_USE_MKLDNN_RNN", 1);
+      !param.use_sequence_length && dmlc::GetEnv("MXNET_USE_ONEDNN_RNN", 1);
   return MKLDNNStorageType(attrs, dev_mask, support_mkldnn_rnn,
                            dispatch_mode, in_attrs, out_attrs);
 }
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 
 struct RNNGrad {
   const char *op_name;
@@ -245,14 +245,14 @@ static OpStatePtr CreateRNNState(const nnvm::NodeAttrs &attrs,
     itype = in_types[seq_len_input_idx];
   }
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   if (ctx.dev_type == kCPU && SupportMKLDNNRnn(param, in_types[rnn_enum::kData])) {
     const mxnet::TShape& data_shape = in_shapes[rnn_enum::kData];
     state = OpStatePtr::Create<MKLDNNRnnOp>(param, data_shape[0],
         data_shape[1], data_shape[2]);
     return state;
   }
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 
   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
     MSHADOW_TYPE_SWITCH(itype, IType, {
@@ -266,7 +266,7 @@ static OpStatePtr CreateRNNState(const nnvm::NodeAttrs &attrs,
   return state;
 }
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 static void RNNStatefulComputeExCPU(const OpStatePtr& state_ptr,
                                     const OpContext& ctx,
                                     const std::vector<NDArray>& inputs,
@@ -292,7 +292,7 @@ static void RNNStatefulGradComputeExCPU(const OpStatePtr& state_ptr,
     FallBackCompute(RNNStatefulGradCompute<cpu>, state_ptr, ctx, inputs, req, outputs);
   }
 }
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 
 NNVM_REGISTER_OP(RNN)
 .add_alias("_npx_rnn")
@@ -402,7 +402,7 @@ The definition of GRU here is slightly different from paper but compatible with
 .set_attr<nnvm::FInferType>("FInferType", RNNType)
 .set_attr<FCreateOpState>("FCreateOpState", CreateRNNState)
 .set_attr<FStatefulCompute>("FStatefulCompute<cpu>", RNNStatefulCompute<cpu>)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<FInferStorageType>("FInferStorageType", RNNStorageType)
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FStatefulComputeEx>("FStatefulComputeEx<cpu>", RNNStatefulComputeExCPU)
@@ -443,7 +443,7 @@ NNVM_REGISTER_OP(_backward_RNN)
 .set_attr<bool>("TIsLayerOpBackward", true)
 .set_attr<nnvm::TIsBackward>("TIsBackward", true)
 .set_attr<FStatefulCompute>("FStatefulCompute<cpu>", RNNStatefulGradCompute<cpu>)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<FInferStorageType>("FInferStorageType", RNNStorageType)
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FStatefulComputeEx>("FStatefulComputeEx<cpu>", RNNStatefulGradComputeExCPU)
diff --git a/src/operator/subgraph/mkldnn/mkldnn_bn_relu_property.h b/src/operator/subgraph/mkldnn/mkldnn_bn_relu_property.h
index 84eb18b..c19d282 100644
--- a/src/operator/subgraph/mkldnn/mkldnn_bn_relu_property.h
+++ b/src/operator/subgraph/mkldnn/mkldnn_bn_relu_property.h
@@ -19,7 +19,7 @@
 
 #ifndef MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_BN_RELU_PROPERTY_H_
 #define MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_BN_RELU_PROPERTY_H_
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 
 #include <string>
 #include <vector>
@@ -138,5 +138,5 @@ class SgMKLDNNBNReLUProperty : public SubgraphProperty {
 }  // namespace op
 }  // namespace mxnet
 
-#endif  // if MXNET_USE_MKLDNN == 1
+#endif  // if MXNET_USE_ONEDNN == 1
 #endif  // MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_BN_RELU_PROPERTY_H_
diff --git a/src/operator/subgraph/mkldnn/mkldnn_common.h b/src/operator/subgraph/mkldnn/mkldnn_common.h
index 2d1d66f..c06f3f9 100644
--- a/src/operator/subgraph/mkldnn/mkldnn_common.h
+++ b/src/operator/subgraph/mkldnn/mkldnn_common.h
@@ -26,7 +26,7 @@
 
 #ifndef MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_COMMON_H_
 #define MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_COMMON_H_
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include <vector>
 
 namespace mxnet {
@@ -134,5 +134,5 @@ static void ConvertWeightBias2MKLDNN(NDArray *weight, NDArray *bias, bool has_bi
 }  // namespace op
 }  // namespace mxnet
 
-#endif  // if MXNET_USE_MKLDNN == 1
+#endif  // if MXNET_USE_ONEDNN == 1
 #endif  // MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_COMMON_H_
diff --git a/src/operator/subgraph/mkldnn/mkldnn_conv-inl.h b/src/operator/subgraph/mkldnn/mkldnn_conv-inl.h
index 509d250..7428f34 100644
--- a/src/operator/subgraph/mkldnn/mkldnn_conv-inl.h
+++ b/src/operator/subgraph/mkldnn/mkldnn_conv-inl.h
@@ -19,7 +19,7 @@
 
 #ifndef MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_CONV_INL_H_
 #define MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_CONV_INL_H_
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 
 #include <string>
 #include <utility>
@@ -61,5 +61,5 @@ enum MKLDNNConvOpOutputs { kOut, kMin, kMax };
 }  // namespace op
 }  // namespace mxnet
 
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 #endif  // MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_CONV_INL_H_
diff --git a/src/operator/subgraph/mkldnn/mkldnn_conv.cc b/src/operator/subgraph/mkldnn/mkldnn_conv.cc
index 4f703c3..1c9bb0c 100644
--- a/src/operator/subgraph/mkldnn/mkldnn_conv.cc
+++ b/src/operator/subgraph/mkldnn/mkldnn_conv.cc
@@ -17,7 +17,7 @@
 * under the License.
 */
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 
 #include <utility>
 #include <vector>
@@ -734,4 +734,4 @@ NNVM_REGISTER_OP(_sg_mkldnn_conv)
 }  // namespace op
 }  // namespace mxnet
 
-#endif  // if MXNET_USE_MKLDNN == 1
+#endif  // if MXNET_USE_ONEDNN == 1
diff --git a/src/operator/subgraph/mkldnn/mkldnn_conv_property.h b/src/operator/subgraph/mkldnn/mkldnn_conv_property.h
index 6eaa930..28ee14f 100644
--- a/src/operator/subgraph/mkldnn/mkldnn_conv_property.h
+++ b/src/operator/subgraph/mkldnn/mkldnn_conv_property.h
@@ -19,7 +19,7 @@
 
 #ifndef MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_CONV_PROPERTY_H_
 #define MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_CONV_PROPERTY_H_
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 
 #include <string>
 #include <vector>
@@ -301,5 +301,5 @@ class SgMKLDNNConvProperty : public SubgraphProperty {
 }  // namespace op
 }  // namespace mxnet
 
-#endif  // if MXNET_USE_MKLDNN == 1
+#endif  // if MXNET_USE_ONEDNN == 1
 #endif  // MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_CONV_PROPERTY_H_
diff --git a/src/operator/subgraph/mkldnn/mkldnn_elemwisemul_post_quantize_property.h b/src/operator/subgraph/mkldnn/mkldnn_elemwisemul_post_quantize_property.h
index 21b29a6..4fc2cff 100644
--- a/src/operator/subgraph/mkldnn/mkldnn_elemwisemul_post_quantize_property.h
+++ b/src/operator/subgraph/mkldnn/mkldnn_elemwisemul_post_quantize_property.h
@@ -26,7 +26,7 @@
 
 #ifndef MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_ELEMWISEMUL_POST_QUANTIZE_PROPERTY_H_
 #define MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_ELEMWISEMUL_POST_QUANTIZE_PROPERTY_H_
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 
 #include <string>
 #include <vector>
@@ -218,5 +218,5 @@ class ElemwiseMulPostQuantizeProperty : public SubgraphProperty {
 }  // namespace op
 }  // namespace mxnet
 
-#endif  // if MXNET_USE_MKLDNN == 1
+#endif  // if MXNET_USE_ONEDNN == 1
 #endif  // MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_ELEMWISEMUL_POST_QUANTIZE_PROPERTY_H_
diff --git a/src/operator/subgraph/mkldnn/mkldnn_fc-inl.h b/src/operator/subgraph/mkldnn/mkldnn_fc-inl.h
index 9a09d91..14177c4 100644
--- a/src/operator/subgraph/mkldnn/mkldnn_fc-inl.h
+++ b/src/operator/subgraph/mkldnn/mkldnn_fc-inl.h
@@ -19,7 +19,7 @@
 
 #ifndef MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_FC_INL_H_
 #define MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_FC_INL_H_
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 
 #include <string>
 #include <utility>
@@ -79,5 +79,5 @@ static inline bool IsOutputUint8(const MKLDNNFCFullParam& full_param) {
 }  // namespace op
 }  // namespace mxnet
 
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 #endif  // MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_FC_INL_H_
diff --git a/src/operator/subgraph/mkldnn/mkldnn_fc.cc b/src/operator/subgraph/mkldnn/mkldnn_fc.cc
index e2b1807..0d81d26 100644
--- a/src/operator/subgraph/mkldnn/mkldnn_fc.cc
+++ b/src/operator/subgraph/mkldnn/mkldnn_fc.cc
@@ -24,7 +24,7 @@
  * \author Ciyong Chen
 */
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 
 #include <utility>
 #include <vector>
@@ -670,4 +670,4 @@ NNVM_REGISTER_OP(_sg_mkldnn_fully_connected)
 }  // namespace op
 }  // namespace mxnet
 
-#endif  // if MXNET_USE_MKLDNN == 1
+#endif  // if MXNET_USE_ONEDNN == 1
diff --git a/src/operator/subgraph/mkldnn/mkldnn_fc_post_quantize_property.h b/src/operator/subgraph/mkldnn/mkldnn_fc_post_quantize_property.h
index aaa613c..e0328bf 100644
--- a/src/operator/subgraph/mkldnn/mkldnn_fc_post_quantize_property.h
+++ b/src/operator/subgraph/mkldnn/mkldnn_fc_post_quantize_property.h
@@ -26,7 +26,7 @@
 
 #ifndef MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_FC_POST_QUANTIZE_PROPERTY_H_
 #define MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_FC_POST_QUANTIZE_PROPERTY_H_
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 
 #include <string>
 #include <vector>
@@ -218,5 +218,5 @@ class SgMKLDNNFCPostQuantizeProperty : public SubgraphProperty {
 }  // namespace op
 }  // namespace mxnet
 
-#endif  // if MXNET_USE_MKLDNN == 1
+#endif  // if MXNET_USE_ONEDNN == 1
 #endif  // MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_FC_POST_QUANTIZE_PROPERTY_H_
diff --git a/src/operator/subgraph/mkldnn/mkldnn_fc_property.h b/src/operator/subgraph/mkldnn/mkldnn_fc_property.h
index aecb3a7..5c455c5 100644
--- a/src/operator/subgraph/mkldnn/mkldnn_fc_property.h
+++ b/src/operator/subgraph/mkldnn/mkldnn_fc_property.h
@@ -26,7 +26,7 @@
 
 #ifndef MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_FC_PROPERTY_H_
 #define MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_FC_PROPERTY_H_
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 
 #include <string>
 #include <vector>
@@ -223,5 +223,5 @@ class SgMKLDNNFCProperty : public SubgraphProperty {
 }  // namespace op
 }  // namespace mxnet
 
-#endif  // if MXNET_USE_MKLDNN == 1
+#endif  // if MXNET_USE_ONEDNN == 1
 #endif  // MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_FC_PROPERTY_H_
diff --git a/src/operator/subgraph/mkldnn/mkldnn_post_quantize_align_scale_property.h b/src/operator/subgraph/mkldnn/mkldnn_post_quantize_align_scale_property.h
index 424d651..db2a6c0 100644
--- a/src/operator/subgraph/mkldnn/mkldnn_post_quantize_align_scale_property.h
+++ b/src/operator/subgraph/mkldnn/mkldnn_post_quantize_align_scale_property.h
@@ -19,7 +19,7 @@
 
 #ifndef MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_POST_QUANTIZE_ALIGN_SCALE_PROPERTY_H_
 #define MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_POST_QUANTIZE_ALIGN_SCALE_PROPERTY_H_
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 
 #include <string>
 #include <vector>
@@ -167,5 +167,5 @@ class SgMKLDNNPostQuantizeAlignScaleProperty : public SubgraphProperty {
 }  // namespace op
 }  // namespace mxnet
 
-#endif  // if MXNET_USE_MKLDNN == 1
+#endif  // if MXNET_USE_ONEDNN == 1
 #endif  // MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_POST_QUANTIZE_ALIGN_SCALE_PROPERTY_H_
diff --git a/src/operator/subgraph/mkldnn/mkldnn_post_quantize_property.h b/src/operator/subgraph/mkldnn/mkldnn_post_quantize_property.h
index e2bb7ca..837ed6e 100644
--- a/src/operator/subgraph/mkldnn/mkldnn_post_quantize_property.h
+++ b/src/operator/subgraph/mkldnn/mkldnn_post_quantize_property.h
@@ -18,7 +18,7 @@
  */
 #ifndef MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_POST_QUANTIZE_PROPERTY_H_
 #define MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_POST_QUANTIZE_PROPERTY_H_
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 
 #include <set>
 #include <string>
@@ -179,5 +179,5 @@ class SgMKLDNNPostQuantizeProperty : public SubgraphProperty {
 }  // namespace op
 }  // namespace mxnet
 
-#endif  // if MXNET_USE_MKLDNN == 1
+#endif  // if MXNET_USE_ONEDNN == 1
 #endif  // MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_POST_QUANTIZE_PROPERTY_H_
diff --git a/src/operator/subgraph/mkldnn/mkldnn_subgraph_base-inl.h b/src/operator/subgraph/mkldnn/mkldnn_subgraph_base-inl.h
index 6436852..910fed6 100644
--- a/src/operator/subgraph/mkldnn/mkldnn_subgraph_base-inl.h
+++ b/src/operator/subgraph/mkldnn/mkldnn_subgraph_base-inl.h
@@ -18,7 +18,7 @@
  */
 #ifndef MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_SUBGRAPH_BASE_INL_H_
 #define MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_SUBGRAPH_BASE_INL_H_
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 
 #include "../subgraph_property.h"
 
@@ -40,5 +40,5 @@ static inline bool SupportMKLDNNAttr(const std::shared_ptr<NodeAttr>& node_attr)
 }  // namespace op
 }  // namespace mxnet
 
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 #endif  // MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_SUBGRAPH_BASE_INL_H_
diff --git a/src/operator/subgraph/mkldnn/mkldnn_subgraph_property.cc b/src/operator/subgraph/mkldnn/mkldnn_subgraph_property.cc
index 07f06cd..5a6223f 100644
--- a/src/operator/subgraph/mkldnn/mkldnn_subgraph_property.cc
+++ b/src/operator/subgraph/mkldnn/mkldnn_subgraph_property.cc
@@ -17,7 +17,7 @@
  * under the License.
  */
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 
 #include "mkldnn_conv_property.h"
 #include "mkldnn_fc_property.h"
@@ -54,4 +54,4 @@ MXNET_REGISTER_SUBGRAPH_PROPERTY(MKLDNN_QUANTIZE, SgMKLDNNPostQuantizeAlignScale
 
 }  // namespace op
 }  // namespace mxnet
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
diff --git a/src/operator/subgraph/partitioner/custom_subgraph_property.h b/src/operator/subgraph/partitioner/custom_subgraph_property.h
index cd41035..1ade2ed 100644
--- a/src/operator/subgraph/partitioner/custom_subgraph_property.h
+++ b/src/operator/subgraph/partitioner/custom_subgraph_property.h
@@ -212,7 +212,7 @@ class  CustomSubgraphProperty: public SubgraphProperty {
         arg_names.push_back(in_arg_names[i].c_str());
         const NDArray &in_arg = *(in_args_ptr[i]);
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
         // reorder data if in MKLDNN format
         if (in_arg.IsMKLDNNData()) {
           in_arg.Reorder2DefaultAsync();
@@ -246,7 +246,7 @@ class  CustomSubgraphProperty: public SubgraphProperty {
         aux_names.push_back(in_aux_names[i].c_str());
         const auto &in_aux = *(in_aux_ptr[i]);
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
         // reorder data if in MKLDNN format
         if (in_aux.IsMKLDNNData()) {
           in_aux.Reorder2DefaultAsync();
diff --git a/src/operator/tensor/amp_cast.cc b/src/operator/tensor/amp_cast.cc
index 088cb9a..fa0fb3b 100644
--- a/src/operator/tensor/amp_cast.cc
+++ b/src/operator/tensor/amp_cast.cc
@@ -30,7 +30,7 @@ namespace op {
 DMLC_REGISTER_PARAMETER(AMPCastParam);
 DMLC_REGISTER_PARAMETER(AMPMultiCastParam);
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 static void AMPCastExCPU(const nnvm::NodeAttrs& attrs,
                     const OpContext& ctx,
                     const std::vector<NDArray>& inputs,
@@ -116,7 +116,7 @@ inline static bool AMPMultiCastStorageType(const nnvm::NodeAttrs& attrs, const i
   return MKLDNNStorageType(attrs, dev_mask, true, dispatch_mode, in_attrs, out_attrs);
 }
 
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 
 NNVM_REGISTER_OP(amp_cast)
 .add_alias("_npi_amp_cast")
@@ -136,7 +136,7 @@ It casts only between low precision float/FP32 and does not do anything for othe
     return std::vector<bool>{true};
   })
 .set_attr<FCompute>("FCompute<cpu>", AMPCastCompute<cpu>)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FInferStorageType>("FInferStorageType", AMPCastStorageType)
 .set_attr<FComputeEx>("FComputeEx<cpu>", AMPCastExCPU)
@@ -155,7 +155,7 @@ NNVM_REGISTER_OP(_backward_amp_cast)
   [](const NodeAttrs& attrs){
     return std::vector<bool>{true};
   })
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FInferStorageType>("FInferStorageType", AMPCastStorageType)
 .set_attr<FComputeEx>("FComputeEx<cpu>", AMPCastExCPU)
@@ -208,7 +208,7 @@ It casts only between low precision float/FP32 and does not do anything for othe
     return std::vector<bool>(num_args, true);
   })
 .set_attr<FCompute>("FCompute<cpu>", AMPMultiCastCompute<cpu>)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FInferStorageType>("FInferStorageType", AMPMultiCastStorageType)
 .set_attr<FComputeEx>("FComputeEx<cpu>", AMPMultiCastExCPU)
@@ -252,7 +252,7 @@ NNVM_REGISTER_OP(_backward_amp_multicast)
     int num_args = dmlc::get<AMPMultiCastParam>(attrs.parsed).num_outputs;
     return std::vector<bool>(num_args, true);
   })
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FInferStorageType>("FInferStorageType", AMPMultiCastStorageType)
 .set_attr<FComputeEx>("FComputeEx<cpu>", AMPMultiCastExCPU)
diff --git a/src/operator/tensor/cast_storage-inl.h b/src/operator/tensor/cast_storage-inl.h
index e8fec30..aa2eb0d 100644
--- a/src/operator/tensor/cast_storage-inl.h
+++ b/src/operator/tensor/cast_storage-inl.h
@@ -34,7 +34,7 @@
 #ifdef __CUDACC__
 #include "./cast_storage-inl.cuh"
 #endif  // __CUDACC__
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include "../nn/mkldnn/mkldnn_base-inl.h"
 #endif
 
@@ -397,7 +397,7 @@ void CastStorageComputeImpl(const OpContext& ctx,
   } else if (src_stype == kRowSparseStorage && dst_stype == kRowSparseStorage) {
     NDArray ret = output;
     CastStorageRspRspImpl<xpu>(ctx, input, &ret);
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   } else if (src_stype == kDefaultStorage && dst_stype == kDefaultStorage) {
     CHECK_EQ(output.ctx().dev_type, input.ctx().dev_type);
     // If one of them uses the MKLDNN layout.
@@ -449,7 +449,7 @@ inline bool CastStorageInferStorageType(const nnvm::NodeAttrs& attrs,
   if (!dispatched && in_stype == kDefaultStorage && param_stype == kDefaultStorage) {
     // dns -> dns
     DispatchMode mode = DispatchMode::kFCompute;
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
     // If we use MKLDNN and the arrays are in CPU memory, the array may store
     // MKLDNN layout, we should convert its layout explicitly.
     if (dev_mask == kCPU)
diff --git a/src/operator/tensor/elemwise_binary_op_basic.cc b/src/operator/tensor/elemwise_binary_op_basic.cc
index 4690816..1620579 100644
--- a/src/operator/tensor/elemwise_binary_op_basic.cc
+++ b/src/operator/tensor/elemwise_binary_op_basic.cc
@@ -43,7 +43,7 @@ static void ElemwiseAddEx(const nnvm::NodeAttrs& attrs,
                           const std::vector<NDArray>& outputs) {
   CHECK_EQ(inputs.size(), 2U);
   CHECK_EQ(outputs.size(), 1U);
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   if (SupportMKLDNNSum(inputs[0]) && SupportMKLDNNSum(inputs[1])) {
     MKLDNNRun(MKLDNNSumForward, attrs, ctx, inputs, req, outputs);
     return;
@@ -67,7 +67,7 @@ static inline bool ElemwiseAddStorageType(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(out_attrs->size(), 1);
   bool ret = ElemwiseBinaryOp::PreferDenseStorageType<true, true, true>(
                attrs, dev_mask, dispatch_mode, in_attrs, out_attrs);
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   if (dev_mask == mshadow::cpu::kDevMask && !MKLDNNEnvSet()) {
     *dispatch_mode = DispatchMode::kFComputeFallback;
   } else if (dev_mask == mshadow::cpu::kDevMask
@@ -82,7 +82,7 @@ static inline bool ElemwiseAddStorageType(const nnvm::NodeAttrs& attrs,
 MXNET_OPERATOR_REGISTER_BINARY(elemwise_add)
 .set_attr<FInferStorageType>("FInferStorageType", ElemwiseAddStorageType)
 .set_attr<FCompute>("FCompute<cpu>", ElemwiseBinaryOp::Compute<cpu, op::mshadow_op::plus>)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 #endif
 .set_attr<FComputeEx>("FComputeEx<cpu>", ElemwiseAddEx)
@@ -121,7 +121,7 @@ static void _backward_ElemwiseAddEx(const nnvm::NodeAttrs& attrs,
                                     const std::vector<NDArray>& outputs) {
   CHECK_EQ(inputs.size(), 1U);
   CHECK_EQ(outputs.size(), 2U);
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   if (inputs[0].IsMKLDNNData()) {
     MKLDNNRun(MKLDNNCopy, attrs, ctx, inputs[0], req[0], outputs[0]);
     MKLDNNRun(MKLDNNCopy, attrs, ctx, inputs[0], req[1], outputs[1]);
@@ -146,7 +146,7 @@ static inline bool ElemwiseAddBackwardStorageType(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(out_attrs->size(), 2);
   bool ret = ElemwiseStorageType<1, 2, true, true, true>(attrs, dev_mask, dispatch_mode,
                                                          in_attrs, out_attrs);
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   if (dev_mask == mshadow::cpu::kDevMask && !MKLDNNEnvSet()) {
     *dispatch_mode = DispatchMode::kFComputeFallback;
   } else if (dev_mask == mshadow::cpu::kDevMask) {
@@ -165,7 +165,7 @@ NNVM_REGISTER_OP(_backward_add)
                                   return std::vector<std::pair<int, int> >{{0, 0},
                                                                            {0, 1}};
                                 })
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
   return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
 })
diff --git a/src/operator/tensor/elemwise_sum.cc b/src/operator/tensor/elemwise_sum.cc
index c513e65..2e779b5 100644
--- a/src/operator/tensor/elemwise_sum.cc
+++ b/src/operator/tensor/elemwise_sum.cc
@@ -83,7 +83,7 @@ bool ElementWiseSumForwardInferStorageType(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(out_attrs->size(), 1U);
   bool ret = ElemwiseStorageAttr<false, true, false>(attrs, dev_mask, dispatch_mode,
                                                      in_attrs, out_attrs);
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   // We should always use FComputeEx.
   if (dev_mask == mshadow::cpu::kDevMask
       && common::ContainsOnlyStorage(*in_attrs, kDefaultStorage)
@@ -94,7 +94,7 @@ bool ElementWiseSumForwardInferStorageType(const nnvm::NodeAttrs& attrs,
   return ret;
 }
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 static inline bool IsMKLDNNData(const std::vector<NDArray> &arrs) {
   for (auto &arr : arrs) {
     if (!arr.IsMKLDNNData())
@@ -113,7 +113,7 @@ void ElementWiseSumComputeExCPU(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(outputs.size(), 1U);
   CHECK_EQ(req.size(), 1U);
   if (req[0] == kNullOp) return;
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   if (IsMKLDNNData(inputs)) {
     MKLDNNRun(MKLDNNSumForward, attrs, ctx, inputs, req, outputs);
   } else if (common::ContainsOnlyStorage(inputs, kDefaultStorage)) {
@@ -180,7 +180,7 @@ The storage type of ``add_n`` output depends on storage types of inputs
     return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
   })
 .set_attr<THasDeterministicOutput>("THasDeterministicOutput", true)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 #endif
 .set_attr<mxnet::FInferShape>("FInferShape", ElementWiseSumShape)
diff --git a/src/operator/tensor/elemwise_unary_op.h b/src/operator/tensor/elemwise_unary_op.h
index f5650d2..4d34c51 100644
--- a/src/operator/tensor/elemwise_unary_op.h
+++ b/src/operator/tensor/elemwise_unary_op.h
@@ -409,7 +409,7 @@ class UnaryOp : public OpBase {
       case kWriteInplace:
 // cannot check if ptrs are the same for MKLDNN because we may have
 // created copies of input when reordering. WriteInPlace will still write to original array
-#if MXNET_USE_MKLDNN == 0
+#if MXNET_USE_ONEDNN == 0
         CHECK_EQ(inputs[0].dptr_, outputs[0].dptr_);
 #endif
         break;
diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc
index de80443..5107de8 100644
--- a/src/operator/tensor/elemwise_unary_op_basic.cc
+++ b/src/operator/tensor/elemwise_unary_op_basic.cc
@@ -204,7 +204,7 @@ static void CopyEx(const nnvm::NodeAttrs& attrs,
                    const std::vector<NDArray>& outputs) {
   CHECK_EQ(inputs.size(), 1U);
   CHECK_EQ(outputs.size(), 1U);
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   const auto in_stype = inputs[0].storage_type();
   const auto out_stype = outputs[0].storage_type();
   if (inputs[0].IsMKLDNNData()) {
@@ -215,7 +215,7 @@ static void CopyEx(const nnvm::NodeAttrs& attrs,
       FallBackCompute(UnaryOp::IdentityCompute<cpu>, attrs, ctx, inputs, req, outputs);
     return;
   }
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
   UnaryOp::IdentityComputeEx<cpu>(attrs, ctx, inputs, req, outputs);
 }
 
@@ -228,7 +228,7 @@ static inline bool CopyStorageType(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(out_attrs->size(), 1);
   bool ret = ElemwiseStorageType<1, 1, false, true, true>(attrs, dev_mask, dispatch_mode,
                                                           in_attrs, out_attrs);
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   // We have to make sure all inputs are default layouts. Otherwise, we might
   // want to fallback.
   if (dev_mask == mshadow::cpu::kDevMask
@@ -236,7 +236,7 @@ static inline bool CopyStorageType(const nnvm::NodeAttrs& attrs,
       && out_attrs->at(0) == kDefaultStorage) {
     *dispatch_mode = DispatchMode::kFComputeEx;
   }
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
   return ret;
 }
 
@@ -246,12 +246,12 @@ MXNET_OPERATOR_REGISTER_UNARY(_copy)
 .set_attr<FInferStorageType>("FInferStorageType", CopyStorageType)
 .set_attr<FCompute>("FCompute<cpu>", UnaryOp::IdentityCompute<cpu>)
 .set_attr<FComputeEx>("FComputeEx<cpu>", CopyEx)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
   return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
 })
 .set_attr<bool>("TIsMKLDNN", true)
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 .set_attr<nnvm::FInplaceIdentity>("FInplaceIdentity",
   [](const NodeAttrs& attrs){
     return std::vector<bool>{true};
@@ -269,11 +269,11 @@ NNVM_REGISTER_OP(_backward_copy)
 .set_attr<FInferStorageType>("FInferStorageType", CopyStorageType)
 .set_attr<FCompute>("FCompute<cpu>", UnaryOp::IdentityCompute<cpu>)
 .set_attr<FComputeEx>("FComputeEx<cpu>", CopyEx)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& n) {
   return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
-})  // MXNET_USE_MKLDNN == 1
+})  // MXNET_USE_ONEDNN == 1
 #endif
 .set_attr<nnvm::FInplaceIdentity>("FInplaceIdentity",
   [](const NodeAttrs& attrs){
diff --git a/src/operator/tensor/matrix_op-inl.h b/src/operator/tensor/matrix_op-inl.h
index 7bc623b..49a3ed2 100644
--- a/src/operator/tensor/matrix_op-inl.h
+++ b/src/operator/tensor/matrix_op-inl.h
@@ -704,7 +704,7 @@ inline bool SliceForwardInferStorageType(const nnvm::NodeAttrs& attrs,
   }
 
   if (in_stype == kDefaultStorage) {
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
     if (dev_mask == Context::kCPU && MKLDNNEnvSet()
         && SupportMKLDNNSlice(param)) {
       dispatched = storage_type_assign(&out_stype, kDefaultStorage,
diff --git a/src/operator/tensor/matrix_op.cc b/src/operator/tensor/matrix_op.cc
index a7cc36a..0b5846b 100644
--- a/src/operator/tensor/matrix_op.cc
+++ b/src/operator/tensor/matrix_op.cc
@@ -25,7 +25,7 @@
 // this will be invoked by gcc and compile CPU version
 #include "./matrix_op-inl.h"
 #include "./elemwise_unary_op.h"
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 #include "../nn/mkldnn/mkldnn_ops-inl.h"
 #include "../nn/mkldnn/mkldnn_base-inl.h"
 #include "../nn/mkldnn/mkldnn_slice-inl.h"
@@ -107,7 +107,7 @@ DMLC_REGISTER_PARAMETER(SqueezeParam);
 DMLC_REGISTER_PARAMETER(DepthToSpaceParam);
 DMLC_REGISTER_PARAMETER(SplitParam);
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 static void ReshapeComputeExCPU(const nnvm::NodeAttrs& attrs,
                                 const OpContext& ctx,
                                 const std::vector<NDArray>& inputs,
@@ -191,7 +191,7 @@ If the argument `reverse` is set to 1, then the special values are inferred from
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_backward_reshape"})
 .set_attr<FCompute>("FCompute<cpu>", UnaryOp::IdentityCompute<cpu>)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FComputeEx>("FComputeEx<cpu>", ReshapeComputeExCPU)
 .set_attr<FInferStorageType>("FInferStorageType", ReshapeStorageType)
@@ -211,7 +211,7 @@ If the argument `reverse` is set to 1, then the special values are inferred from
 .add_argument("data", "NDArray-or-Symbol", "Input data to reshape.")
 .add_arguments(ReshapeParam::__FIELDS__());
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 static void FlattenEx(const nnvm::NodeAttrs& attrs,
                       const OpContext& ctx,
                       const std::vector<NDArray>& inputs,
@@ -266,7 +266,7 @@ Example::
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_backward_copy" })
 .set_attr<FCompute>("FCompute<cpu>", UnaryOp::IdentityCompute<cpu>)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FComputeEx>("FComputeEx<cpu>", FlattenEx)
 .set_attr<FInferStorageType>("FInferStorageType", FlattenStorageType)
@@ -285,7 +285,7 @@ Example::
 .set_attr<THasDeterministicOutput>("THasDeterministicOutput", true)
 .add_argument("data", "NDArray-or-Symbol", "Input array.");
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 static void TransposeComputeExCPU(const nnvm::NodeAttrs& attrs,
                                   const OpContext& ctx,
                                   const std::vector<NDArray>& inputs,
@@ -368,7 +368,7 @@ Examples::
   [](const NodeAttrs& n) {
     return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
 })
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FComputeEx>("FComputeEx<cpu>", TransposeComputeExCPU)
 .set_attr<FInferStorageType>("FInferStorageType", TransposeStorageType)
@@ -377,7 +377,7 @@ Examples::
 .add_arguments(TransposeParam::__FIELDS__());
 
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 static void ExpandDimEx(const nnvm::NodeAttrs& attrs,
                         const OpContext& ctx,
                         const std::vector<NDArray>& inputs,
@@ -425,7 +425,7 @@ will return a new array with shape ``(2,1,3,4)``.
   })
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_backward_reshape"})
 .set_attr<FCompute>("FCompute<cpu>", UnaryOp::IdentityCompute<cpu>)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 .set_attr<FComputeEx>("FComputeEx<cpu>", ExpandDimEx)
 .set_attr<FInferStorageType>("FInferStorageType", ExpandDimStorageType)
@@ -447,7 +447,7 @@ void SliceExCPU(const nnvm::NodeAttrs& attrs,
   auto in_stype = inputs[0].storage_type();
   if (in_stype == kCSRStorage) {
     SliceCsrImpl<cpu>(param, ctx, inputs[0], req[0], outputs[0]);
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   } else if (in_stype == kDefaultStorage) {
     if (SupportMKLDNN(inputs[0])) {
       MKLDNNRun(MKLDNNSlice, attrs, ctx, inputs[0], req[0], outputs[0]);
@@ -518,7 +518,7 @@ Example::
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_backward_slice"})
 .set_attr<FCompute>("FCompute<cpu>", SliceOpForward<cpu>)
 .set_attr<FComputeEx>("FComputeEx<cpu>", SliceExCPU)
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 .set_attr<bool>("TIsMKLDNN", true)
 #endif
 .add_argument("data", "NDArray-or-Symbol", "Source input")
diff --git a/src/serialization/cnpy.cc b/src/serialization/cnpy.cc
index f9532f8..fc895d1 100644
--- a/src/serialization/cnpy.cc
+++ b/src/serialization/cnpy.cc
@@ -247,7 +247,7 @@ void save_array(const std::string& fname, const NDArray& array_) {
   } else {
     array = array_;
     array.WaitToRead();
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
     if (array.IsMKLDNNData()) {
       array = array.Reorder2Default();
     }
@@ -446,7 +446,7 @@ void save_array(mz_zip_archive* archive, const std::string& array_name, const ND
   } else {
     array = array_;
     array.WaitToRead();
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
     if (array.IsMKLDNNData()) {
       array = array.Reorder2Default();
     }
diff --git a/src/storage/cpu_device_storage.h b/src/storage/cpu_device_storage.h
index 1987fa1..07c1fca 100644
--- a/src/storage/cpu_device_storage.h
+++ b/src/storage/cpu_device_storage.h
@@ -50,7 +50,7 @@ class CPUDeviceStorage {
   /*!
    * \brief Alignment of allocation.
    */
-#if MXNET_USE_MKLDNN == 1 || MXNET_USE_INTGEMM == 1
+#if MXNET_USE_ONEDNN == 1 || MXNET_USE_INTGEMM == 1
   // MKLDNN requires special alignment. 64 is used by the MKLDNN library in
   // memory allocation.
   static constexpr size_t alignment_ = kMKLDNNAlign;
diff --git a/src/storage/storage_manager_helpers.h b/src/storage/storage_manager_helpers.h
index dd5ff16..2732b68 100644
--- a/src/storage/storage_manager_helpers.h
+++ b/src/storage/storage_manager_helpers.h
@@ -120,7 +120,7 @@ class ContextHelperCPU : public ContextHelper {
   }
 
  private:
-#if MXNET_USE_MKLDNN == 1 || MXNET_USE_INTGEMM == 1
+#if MXNET_USE_ONEDNN == 1 || MXNET_USE_INTGEMM == 1
   // MKLDNN requires special alignment. 64 is used by the MKLDNN library in
   // memory allocation.
   static constexpr size_t alignment_ = kMKLDNNAlign;
diff --git a/tests/README.md b/tests/README.md
index 98b1cd5..7c4dc91 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -27,7 +27,7 @@
     ```
 1. Generate your Makefile and build along with the tests with cmake (specify appropraite flags)
     ```
-    cmake -DUSE_CUDNN=ON -DUSE_CUDA=ON -DUSE_MKLDNN=ON -DUSE_BLAS=Open -DCMAKE_BUILD_TYPE=Debug .. && make
+    cmake -DUSE_CUDNN=ON -DUSE_CUDA=ON -DUSE_ONEDNN=ON -DUSE_BLAS=Open -DCMAKE_BUILD_TYPE=Debug .. && make
     ```
 1.  Run tests
     ```
@@ -53,7 +53,7 @@ Ninja is a build tool (like make) that prioritizes building speed. If you will b
     ```
 1. When running cmake, add the `-GNinja` flag to specify cmake to generate a Ninja build file
     ```
-    cmake -DUSE_CUDNN=ON -DUSE_CUDA=ON -DUSE_MKLDNN=ON -DUSE_BLAS=Open -GNinja -DCMAKE_BUILD_TYPE=Debug ..
+    cmake -DUSE_CUDNN=ON -DUSE_CUDA=ON -DUSE_ONEDNN=ON -DUSE_BLAS=Open -GNinja -DCMAKE_BUILD_TYPE=Debug ..
     ```
 1. Run the ninja build file with
     ```
diff --git a/tests/cpp/include/test_mkldnn.h b/tests/cpp/include/test_mkldnn.h
index b727cd8..a7ab641 100644
--- a/tests/cpp/include/test_mkldnn.h
+++ b/tests/cpp/include/test_mkldnn.h
@@ -26,12 +26,12 @@
 #ifndef TEST_MKLDNN_H_
 #define TEST_MKLDNN_H_
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 
 #include <set>
 #include <string>
 #include <vector>
-#include "../../../3rdparty/mkldnn/include/mkldnn_types.h"
+#include "../../../3rdparty/onednn/include/mkldnn_types.h"
 #include "../../../3rdparty/googletest/googletest/include/gtest/gtest.h"
 #include "../../../src/operator/nn/mkldnn/mkldnn_base-inl.h"
 
@@ -621,5 +621,5 @@ inline void VerifySumResult(const std::vector<NDArray *> &in_arrs,
     ASSERT_EQ(d1[i] + d2[i], o[i]);
 }
 
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
 #endif  // TEST_MKLDNN_H_
diff --git a/tests/cpp/include/test_util.h b/tests/cpp/include/test_util.h
index f138219..c41d57c 100644
--- a/tests/cpp/include/test_util.h
+++ b/tests/cpp/include/test_util.h
@@ -807,7 +807,7 @@ static void AssertEqual(const std::vector<NDArray *> &in_arrs,
       tmp1.WaitToRead();
       tmp2.WaitToRead();
     }
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
     tmp1 = tmp1.Reorder2Default();
     tmp2 = tmp2.Reorder2Default();
 #endif
diff --git a/tests/cpp/operator/batchnorm_test.cc b/tests/cpp/operator/batchnorm_test.cc
index dab8c98..e66b0b7 100644
--- a/tests/cpp/operator/batchnorm_test.cc
+++ b/tests/cpp/operator/batchnorm_test.cc
@@ -1012,14 +1012,14 @@ TEST(BATCH_NORM, TestTiming_2D) {
   }
 MSHADOW_REAL_TYPE_SWITCH_EX(
   mshadow::kFloat32, DType, AccReal, {
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
   // MKL
   timingTest<BatchNormCoreOpProp, BNOperatorExecutor<DType, AccReal>>(
     "MKL BatchNormProp<cpu> 2D",
     false, false,
     blank_kwargs_nocudnn,
     2, THISCOUNT);
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
   // CPU
   test::ScopeSet<volatile bool> disableMKL(&mxnet::op::batchnorm::disable_mkl, true);
   timingTest<BatchNormCoreOpProp, BNOperatorExecutor<DType, AccReal>>(
diff --git a/tests/cpp/operator/mkldnn_operator_test.cc b/tests/cpp/operator/mkldnn_operator_test.cc
index 8e86100..a88174d 100644
--- a/tests/cpp/operator/mkldnn_operator_test.cc
+++ b/tests/cpp/operator/mkldnn_operator_test.cc
@@ -23,7 +23,7 @@
  *  \author Alex Zai
  */
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 
 #include <mkldnn_types.h>
 #include <cmath>
@@ -1349,4 +1349,4 @@ TEST(IMPERATIVE, BNOp) {
   TestOpExBN(forward_attrs, backwards_attrs);
 }
 
-#endif  // MXNET_USE_MKLDNN == 1
+#endif  // MXNET_USE_ONEDNN == 1
diff --git a/tests/cpp/operator/mkldnn_test.cc b/tests/cpp/operator/mkldnn_test.cc
index 816979b..566835f 100644
--- a/tests/cpp/operator/mkldnn_test.cc
+++ b/tests/cpp/operator/mkldnn_test.cc
@@ -23,7 +23,7 @@
  *  \author Da Zheng
  */
 
-#if MXNET_USE_MKLDNN == 1
+#if MXNET_USE_ONEDNN == 1
 
 #include <mkldnn_types.h>
 #include <cmath>
@@ -417,4 +417,4 @@ TEST(MKLDNN_NDArray, CopyFrom) {
   }
 }
 
-#endif  // MXNET_USE_MKLDNN  == 1
+#endif  // MXNET_USE_ONEDNN  == 1
diff --git a/tests/cpp/storage/storage_test.cc b/tests/cpp/storage/storage_test.cc
index 497af35..3d957cd 100644
--- a/tests/cpp/storage/storage_test.cc
+++ b/tests/cpp/storage/storage_test.cc
@@ -48,7 +48,7 @@ TEST(Storage, Basic_CPU) {
 }
 
 TEST(Storage, CPU_MemAlign) {
-  #if MXNET_USE_MKLDNN == 1
+  #if MXNET_USE_ONEDNN == 1
   // MKLDNN requires special alignment. 64 is used by the MKLDNN library in
   // memory allocation.
     static constexpr size_t alignment_ = mxnet::kMKLDNNAlign;
diff --git a/tools/license_header.py b/tools/license_header.py
index 5c7abc9..44e7eac 100755
--- a/tools/license_header.py
+++ b/tools/license_header.py
@@ -70,7 +70,7 @@ _WHITE_LIST = [
                '3rdparty/dlpack',
                '3rdparty/dmlc-core',
                '3rdparty/googletest',
-               '3rdparty/mkldnn',
+               '3rdparty/onednn',
                '3rdparty/nvidia_cub',
                '3rdparty/onnx-tensorrt',
                '3rdparty/openmp',
@@ -78,7 +78,7 @@ _WHITE_LIST = [
                '3rdparty/tvm',
 
                # 3rdparty headerfiles under different licenses
-               'include/mkldnn',
+               'include/onednn',
 
                # Docs Sphinx themes under different licenses
                'docs/python_docs/themes',
diff --git a/tools/pip/doc/CPU_ADDITIONAL.md b/tools/pip/doc/CPU_ADDITIONAL.md
index d5b2f10..0f330b3 100644
--- a/tools/pip/doc/CPU_ADDITIONAL.md
+++ b/tools/pip/doc/CPU_ADDITIONAL.md
@@ -23,7 +23,7 @@ This package supports Linux, Mac OSX, and Windows platforms. You may also want t
 - [mxnet-cu102](https://pypi.python.org/pypi/mxnet-cu102/) with CUDA-10.2 support.
 - [mxnet-cu101](https://pypi.python.org/pypi/mxnet-cu101/) with CUDA-10.1 support.
 - [mxnet](https://pypi.python.org/pypi/mxnet/).
-- [mxnet-native](https://pypi.python.org/pypi/mxnet-native/) CPU variant without MKLDNN.
+- [mxnet-native](https://pypi.python.org/pypi/mxnet-native/) CPU variant without ONEDNN.
 
 To use this package on Linux you need the `libquadmath.so.0` shared library. On
 Debian based systems, including Ubuntu, run `sudo apt install libquadmath0` to
diff --git a/tools/pip/doc/CU101_ADDITIONAL.md b/tools/pip/doc/CU101_ADDITIONAL.md
index 3d2c535..ef7f7d1 100644
--- a/tools/pip/doc/CU101_ADDITIONAL.md
+++ b/tools/pip/doc/CU101_ADDITIONAL.md
@@ -22,7 +22,7 @@ This package supports Linux and Windows platforms. You may also want to check:
 - [mxnet-cu110](https://pypi.python.org/pypi/mxnet-cu110/) with CUDA-11.0 support.
 - [mxnet-cu102](https://pypi.python.org/pypi/mxnet-cu102/) with CUDA-10.2 support.
 - [mxnet](https://pypi.python.org/pypi/mxnet/).
-- [mxnet-native](https://pypi.python.org/pypi/mxnet-native/) CPU variant without MKLDNN.
+- [mxnet-native](https://pypi.python.org/pypi/mxnet-native/) CPU variant without ONEDNN.
 
 To download CUDA, check [CUDA download](https://developer.nvidia.com/cuda-downloads). For more instructions, check [CUDA Toolkit online documentation](http://docs.nvidia.com/cuda/index.html).
 
diff --git a/tools/pip/doc/CU102_ADDITIONAL.md b/tools/pip/doc/CU102_ADDITIONAL.md
index 6f62d56..c10ed71 100644
--- a/tools/pip/doc/CU102_ADDITIONAL.md
+++ b/tools/pip/doc/CU102_ADDITIONAL.md
@@ -22,7 +22,7 @@ This package supports Linux and Windows platforms. You may also want to check:
 - [mxnet-cu110](https://pypi.python.org/pypi/mxnet-cu110/) with CUDA-11.0 support.
 - [mxnet-cu101](https://pypi.python.org/pypi/mxnet-cu101/) with CUDA-10.1 support.
 - [mxnet](https://pypi.python.org/pypi/mxnet/).
-- [mxnet-native](https://pypi.python.org/pypi/mxnet-native/) CPU variant without MKLDNN.
+- [mxnet-native](https://pypi.python.org/pypi/mxnet-native/) CPU variant without ONEDNN.
 
 To download CUDA, check [CUDA download](https://developer.nvidia.com/cuda-downloads). For more instructions, check [CUDA Toolkit online documentation](http://docs.nvidia.com/cuda/index.html).
 
diff --git a/tools/pip/doc/CU110_ADDITIONAL.md b/tools/pip/doc/CU110_ADDITIONAL.md
index d442a8e..ce40ba9 100644
--- a/tools/pip/doc/CU110_ADDITIONAL.md
+++ b/tools/pip/doc/CU110_ADDITIONAL.md
@@ -22,7 +22,7 @@ This package supports Linux and Windows platforms. You may also want to check:
 - [mxnet-cu102](https://pypi.python.org/pypi/mxnet-cu102/) with CUDA-10.2 support.
 - [mxnet-cu101](https://pypi.python.org/pypi/mxnet-cu101/) with CUDA-10.1 support.
 - [mxnet](https://pypi.python.org/pypi/mxnet/).
-- [mxnet-native](https://pypi.python.org/pypi/mxnet-native/) CPU variant without MKLDNN.
+- [mxnet-native](https://pypi.python.org/pypi/mxnet-native/) CPU variant without ONEDNN.
 
 To download CUDA, check [CUDA download](https://developer.nvidia.com/cuda-downloads). For more instructions, check [CUDA Toolkit online documentation](http://docs.nvidia.com/cuda/index.html).
 
diff --git a/tools/pip/doc/CU112_ADDITIONAL.md b/tools/pip/doc/CU112_ADDITIONAL.md
index 206c96a..ee0fe49 100644
--- a/tools/pip/doc/CU112_ADDITIONAL.md
+++ b/tools/pip/doc/CU112_ADDITIONAL.md
@@ -22,7 +22,7 @@ This package supports Linux and Windows platforms. You may also want to check:
 - [mxnet-cu102](https://pypi.python.org/pypi/mxnet-cu102/) with CUDA-10.2 support.
 - [mxnet-cu101](https://pypi.python.org/pypi/mxnet-cu101/) with CUDA-10.1 support.
 - [mxnet](https://pypi.python.org/pypi/mxnet/).
-- [mxnet-native](https://pypi.python.org/pypi/mxnet-native/) CPU variant without MKLDNN.
+- [mxnet-native](https://pypi.python.org/pypi/mxnet-native/) CPU variant without ONEDNN.
 
 To download CUDA, check [CUDA download](https://developer.nvidia.com/cuda-downloads). For more instructions, check [CUDA Toolkit online documentation](http://docs.nvidia.com/cuda/index.html).
 
diff --git a/tools/pip/doc/NATIVE_ADDITIONAL.md b/tools/pip/doc/NATIVE_ADDITIONAL.md
index ef26962..012fb39 100644
--- a/tools/pip/doc/NATIVE_ADDITIONAL.md
+++ b/tools/pip/doc/NATIVE_ADDITIONAL.md
@@ -23,7 +23,7 @@ This package supports Linux and Windows platforms. You may also want to check:
 - [mxnet-cu102](https://pypi.python.org/pypi/mxnet-cu102/) with CUDA-10.2 support.
 - [mxnet-cu101](https://pypi.python.org/pypi/mxnet-cu101/) with CUDA-10.1 support.
 - [mxnet](https://pypi.python.org/pypi/mxnet/).
-- [mxnet-native](https://pypi.python.org/pypi/mxnet-native/) CPU variant without MKLDNN.
+- [mxnet-native](https://pypi.python.org/pypi/mxnet-native/) CPU variant without ONEDNN.
 
 To download CUDA, check [CUDA download](https://developer.nvidia.com/cuda-downloads). For more instructions, check [CUDA Toolkit online documentation](http://docs.nvidia.com/cuda/index.html).
 
diff --git a/tools/pip/setup.py b/tools/pip/setup.py
index 896e137..f0c5844 100644
--- a/tools/pip/setup.py
+++ b/tools/pip/setup.py
@@ -149,8 +149,8 @@ short_description += ' This version uses {0}.'.format(' and '.join(libraries))
 package_data = {'mxnet': [os.path.join('mxnet', os.path.basename(LIB_PATH[0]))],
                 'dmlc_tracker': []}
 if Features().is_enabled("MKLDNN"):
-    shutil.copytree(os.path.join(CURRENT_DIR, 'mxnet-build/3rdparty/mkldnn/include'),
-                    os.path.join(CURRENT_DIR, 'mxnet/include/mkldnn'))
+    shutil.copytree(os.path.join(CURRENT_DIR, 'mxnet-build/3rdparty/onednn/include'),
+                    os.path.join(CURRENT_DIR, 'mxnet/include/onednn'))
 if platform.system() == 'Linux':
     libdir, mxdir = os.path.dirname(LIB_PATH[0]), os.path.join(CURRENT_DIR, 'mxnet')
     if os.path.exists(os.path.join(libdir, 'libgfortran.so.3')):
diff --git a/tools/source-exclude-artifacts.txt b/tools/source-exclude-artifacts.txt
index 1394ba2..59c3671 100644
--- a/tools/source-exclude-artifacts.txt
+++ b/tools/source-exclude-artifacts.txt
@@ -20,4 +20,4 @@
 #  not be included in source release archives due to licensing
 #  restrictions.
 
-3rdparty/mkldnn/doc
+3rdparty/onednn/doc
diff --git a/tools/staticbuild/README.md b/tools/staticbuild/README.md
index 684ca98..ad5767e 100644
--- a/tools/staticbuild/README.md
+++ b/tools/staticbuild/README.md
@@ -30,13 +30,13 @@ Ubuntu systems.
 ```
 tools/staticbuild/build.sh cu112
 ```
-This would build the mxnet package based on CUDA 11.2. Currently, we support variants cpu, native, cu101, cu102, cu110, and cu112. All of these variants expect native have MKL-DNN backend enabled. 
+This would build the mxnet package based on CUDA 11.2. Currently, we support variants cpu, native, cu101, cu102, cu110, and cu112. All of these variants expect native have ONEDNN backend enabled. 
 
 ```
 tools/staticbuild/build.sh cpu
 ```
 
-This would build the mxnet package based on MKL-DNN.
+This would build the mxnet package based on ONEDNN.
 
 As the result, users would have a complete static dependencies in `/staticdeps` in the root folder as well as a static-linked `libmxnet.so` file lives in `lib`. You can build your language binding by using the `libmxnet.so`.