You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by zh...@apache.org on 2021/12/21 00:14:58 UTC

[incubator-mxnet] branch v2.0.0.beta0 updated (9eaf198 -> f0ef9d8)

This is an automated email from the ASF dual-hosted git repository.

zhenghuijin pushed a change to branch v2.0.0.beta0
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git.


 discard 9eaf198  [v2.0.0.beta0] Port #20628 (#20646)
    omit 4b017f2  [v2.0.0.beta0] License Fix (#20627)
     add 23af413  Fast cuDNN BatchNorm NHWC kernels support (#20615)
     add 2d090a2  [v2.0.0.beta0] License Update: **/*.md **/*.ipynb (#20628)
     add c0bdcee  [CI] Freeze array-api-test (#20631)
     add a77dcb0  Attempt to fix website build pipeline (#20634)
     add 4d48b06  [Master] Ignoring mass reformatting commits with git blame (#20578)
     add 36cb619  change nd -> np in imagenet_gen_qsym_onedenn.py (#20399)
     add 7be60ca  [API Standardization] Add Linalg kernels: (diagonal, outer, tensordot, cross, trace, matrix_transpose) (#20638)
     add 4b73646  [Feature][Master] Clang-format tool to perform additional formatting and semantic checking of code.  (#20433)
     add 0d09770  [Master][CI][Bugfix] Clang-format-13 file needs to have right license header and install clang-format package.  (#20658)
     add abd293f  [FEATURE] Enable dynamic linking with MKL and compiler based OpenMP (#20474)
     add 5e04608  [submodule] Remove soon to be obsolete dnnl nomenclature from mxnet (#20606)
     add b94fda7  [Master] Port #20627 (#20645)
     add ab6a3f9  [API Standardization]Standardize MXNet NumPy Statistical & Linalg Functions (#20592)
     add f1f9669  Fix os_x_mklbuild.yml (#20668)
     add fda48e0  Added ::GCD and ::LCM: <numeric> [c++17] contains gcd and lcm implementation (#20583)
     add f46b25b  [Master] Clang-format description on a wiki (#20612)
     add 6bbe886  Fix Windows-GPU build for monolithic arch dll (#20466)
     add cd76b63  Disable debug log to avoid duplications (#20665)
     add ca87539   Permlink changes (#20674)
     add a6a7ab4  [CI] UPgrade windows CI (#20676)
     add 9e97732  A clang-format file can be removed from .gitignore (#20664)
     add caa2308  [2.0] Update Sparse Feature Related Error Message (#20402)
     add fb90650  [Performance] Add oneDNN support for temperature parameter in Softmax (#20567)
     add 6dff660  [2.0] Bump Python to >= 3.8 (#20593)
     add 481eba7  [API] Add positive (#20667)
     add 5f0efbb  [v2.0] RNN: use rnn_params (#20384)
     add 5d247f1  [master][bugfix] Remove exit 0 to avoid blocking in CI pipeline  (#20683)
     add 61a456b  [master][tests] __init__' file to avoid undefined variables (#20701)
     add 64999b4  [API] Add logaddexp (#20673)
     add 026dbf8  [BUGFIX] Fix #20293 (#20462)
     add 4f289f8  An option to clorize output during build (#20681)
     add d2107af  Add: break line entry before tenary (#20705)
     add 3256754  [CI] Add timeout and retry to linkcheck (#20708)
     add 197fbba  [API] Add linalg.svdvals (#20696)
     add fcbab28  [API] Add floor_divide (#20620)
     add 1584250  [API STD][SEARCH FUNC] Add keepdims=False to argmax/argmin (#20692)
     add 1a1464f  [API NEW][METHOD] Add mT, permute_dims (#20688)
     add 94fc557  [FEATURE] Add oneDNN support for numpy concatenate operator (#20652)
     add 54ca02a  [master] Make warning message when oneDNN is turned off less confusing (#20700)
     add 3b48957  Fix csr param description (#20698)
     add 79e1753  Prospector checker initial commit (#20684)
     add fb1d395  Add quantized batch_dot (#20680)
     add 0eeaa49  [master] Bring dnnl_readme.md on master up-to-date (#20670)
     add 922d9f5  Add async GPU dependency Engine (#20331)
     add 3dffdc1  [master] Add aliases for subgraph operators to be compatible with old models (#20679)
     add 943ab64  [API] Add bitwise_left/right_shift (#20587)
     add 75e4d1d  [API NEW][ARRAY METHOD] Add __Index__() and __array_namespace__() (#20689)
     add 9e6dd92  [API STD][LINALG] Standardize sort & linalg operators (#20694)
     add 630a144  [API NEW][SET FUNC] Add set functions (#20693)
     add 30734fb  Remove extra spaces between 'if' (#20721)
     add 9266a91  Optimize preparation of selfattn operators (#20682)
     add 683c974  [API] Standardize MXNet NumPy creation functions (#20572)
     add af1622e  [DOC] Fix migration guide document (#20716)
     add 16fed6e  [FEATURE] add oneDNN support for numpy transpose (#20419)
     add 36ed5e0  Port convolutions to cuDNN v8 API (#20635)
     add 52bc1bf  Fix scale bug in quantized batch_dot (#20735)
     add bfa71cf  [master][bugfix] Zero initialization to avoid error message on a Centos (#20582)
     add ebf3054  [LICENSE] Port #20709 (#20736)
     add e3c4da9  [NumPy] Wrap unravel_index backend implementation instead of fallback (#20730)
     add a2ad4db  [API NEW][LINALG] Add vector_norm, matrix_norm (#20703)
     add 1add250  [master][clang-format] Re-format cc. .h. .cu files; cond.  (#20704)
     add 930e140  Reintroduce next_impl in onednn deconvolution (#20663)
     add 71007d8  [API TESTS] Standardization and add more array api tests (#20725)
     add 4a19f7f  [master][ci][feature] Static code checker for CMake files (#20706)
     add 024d01e  Unify all names used to refer to oneDNN library in logs and docs to oneDNN (#20719)
     add 1a8f6e6  Improve stack operator performance by oneDNN (#20621)
     add 2e8e0ae  [master] Merge DNNL adaptive pooling with standard pooling (#20741)
     add 07e21fe  [master][style-fix] Clang-format comment style fix (#20744)
     add 9be61e1  [submodule] Upgrade oneDNN to v2.3.3 (#20752)
     add 26f9fa6  Unifying oneDNN post-quantization properties (#20724)
     add f67b222  Add oneDNN support for reduce operators (#20669)
     add ac66740  [2.0] Fix devices issues (#20732)
     add 45c7999  [API] Add new dlpack API (#20546)
     add ebc88e7  Fix sanity CI (#20763)
     add 5cbcbce  Remove identity operators from oneDNN optimized graph (#20712)
     add f60c1d2  Fix test_numpy_op tests & lacking asserts (#20756)
     add 40359ce  Automatic Layout Management (#20718)
     add a676da5  [CI] Workaround MKL CI timeout issue (#20777)
     add cea7ab1  Fix link check (#20773)
     add b555b54  [master] CI/CD updates to be more stable (#20740)
     add f0ef9d8  Use cuDNN for conv bias and bias grad (#20771)

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (9eaf198)
            \
             N -- N -- N   refs/heads/v2.0.0.beta0 (f0ef9d8)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

No new revisions were added by this update.

Summary of changes:
 .../numpy_extension/__init__.py => .clang-format   |   42 +-
 .../test_quantization_gpu.py => .cmakelintrc       |   17 +-
 .../requirements => .git-blame-ignore-revs         |   20 +-
 .github/workflows/greetings.yml                    |    4 +
 .github/workflows/link_check.yml                   |   23 +-
 .github/workflows/os_x_mklbuild.yml                |   50 +
 .github/workflows/os_x_staticbuild.yml             |   34 +-
 .gitignore                                         |    1 -
 .licenserc.yaml                                    |   28 +-
 3rdparty/mshadow/mshadow/base.h                    |   60 +
 3rdparty/mshadow/mshadow/tensor.h                  |   91 ++
 3rdparty/onednn                                    |    2 +-
 CMakeLists.txt                                     |   51 +-
 CONTRIBUTORS.md                                    |    2 +-
 MKLDNN_README.md => DNNL_README.md                 |    2 +-
 LICENSE                                            |   89 +-
 README.md                                          |    2 +-
 benchmark/opperf/README.md                         |    2 +-
 benchmark/python/sparse/cast_storage.py            |    2 +-
 benchmark/python/sparse/dot.py                     |    4 +-
 benchmark/python/sparse/sparse_op.py               |    4 +-
 cd/README.md                                       |    8 +-
 cd/python/docker/Dockerfile                        |    4 +-
 cd/python/docker/test_python_image.sh              |    2 +-
 cd/python/pypi/pypi_package.sh                     |    3 +-
 cd/utils/artifact_repository.md                    |    4 +-
 cd/utils/artifact_repository.py                    |    2 +-
 cd/utils/test_artifact_repository.py               |    6 +-
 ci/build_windows.py                                |   29 +-
 ci/dev_menu.py                                     |    4 +-
 ci/docker/Dockerfile.build.centos7                 |    8 +-
 ci/docker/Dockerfile.build.ubuntu                  |   17 +-
 ci/docker/docker-compose.yml                       |    2 +-
 ci/docker/install/requirements                     |   17 +-
 ci/docker/runtime_functions.sh                     |  130 +-
 ci/jenkins/Jenkins_steps.groovy                    |   58 +-
 ci/jenkins/Jenkinsfile_unix_cpu                    |    3 +-
 ci/jenkins/Jenkinsfile_unix_gpu                    |    4 +-
 ci/other/pylintrc                                  |  460 ------
 ci/windows/test_py3_cpu.ps1                        |   14 +-
 ci/windows/test_py3_gpu.ps1                        |   20 +-
 cmake/ChooseBlas.cmake                             |   33 +-
 cmake/Utils.cmake                                  |    2 +-
 cmake/upstream/FindBLAS.cmake                      |    9 +-
 config/darwin.cmake                                |    2 +-
 config/distribution/darwin_cpu.cmake               |    2 +-
 .../{darwin_native.cmake => darwin_cpu_mkl.cmake}  |    5 +-
 config/distribution/darwin_native.cmake            |    2 +-
 config/distribution/linux_cpu.cmake                |    2 +-
 .../{linux_cpu.cmake => linux_cpu_mkl.cmake}       |    6 +-
 config/distribution/linux_cu100.cmake              |    2 +-
 config/distribution/linux_cu101.cmake              |    2 +-
 config/distribution/linux_cu102.cmake              |    2 +-
 config/distribution/linux_cu110.cmake              |    2 +-
 config/distribution/linux_cu112.cmake              |    2 +-
 config/distribution/linux_cu92.cmake               |    2 +-
 config/distribution/linux_native.cmake             |    2 +-
 config/linux.cmake                                 |    2 +-
 config/linux_gpu.cmake                             |    2 +-
 cpp-package/example/charRNN.cpp                    |    3 +-
 cpp-package/example/inference/README.md            |    2 +-
 .../multi_threaded_inference.cc                    |  152 +-
 cpp-package/example/utils.h                        |   56 +-
 cpp-package/include/mxnet-cpp/base.h               |   16 +-
 cpp-package/include/mxnet-cpp/contrib.h            |  135 +-
 cpp-package/include/mxnet-cpp/executor.h           |  143 +-
 cpp-package/include/mxnet-cpp/initializer.h        |   83 +-
 cpp-package/include/mxnet-cpp/io.h                 |   69 +-
 cpp-package/include/mxnet-cpp/kvstore.h            |   20 +-
 cpp-package/include/mxnet-cpp/lr_scheduler.h       |   45 +-
 cpp-package/include/mxnet-cpp/metric.h             |   41 +-
 cpp-package/include/mxnet-cpp/model.h              |   19 +-
 cpp-package/include/mxnet-cpp/ndarray.h            |  683 +++++----
 cpp-package/include/mxnet-cpp/op_map.h             |   76 +-
 cpp-package/include/mxnet-cpp/op_suppl.h           |  122 +-
 cpp-package/include/mxnet-cpp/op_util.h            |   14 +-
 cpp-package/include/mxnet-cpp/operator.h           |  186 +--
 cpp-package/include/mxnet-cpp/optimizer.h          |   80 +-
 cpp-package/include/mxnet-cpp/shape.h              |  351 +++--
 cpp-package/include/mxnet-cpp/symbol.h             |  362 ++---
 .../python/api/{context => device}/index.rst       |    4 +-
 docs/python_docs/python/api/index.rst              |   12 +-
 docs/python_docs/python/api/np/arrays.ndarray.rst  |    3 +-
 docs/python_docs/python/api/np/routines.math.rst   |    3 +
 docs/python_docs/python/api/npx/index.rst          |    2 +-
 .../inference/image_classification_jetson.md       |   10 +-
 .../python/tutorials/extend/customop.md            |    8 +-
 .../getting-started/crash-course/1-nparray.md      |    2 +-
 .../getting-started/crash-course/2-create-nn.md    |    2 +-
 .../getting-started/crash-course/4-components.md   |    6 +-
 .../getting-started/crash-course/5-datasets.md     |    2 +-
 .../getting-started/crash-course/6-train-nn.md     |   12 +-
 .../getting-started/crash-course/7-use-gpus.md     |   14 +-
 .../gluon_from_experiment_to_deployment.md         |   22 +-
 .../getting-started/gluon_migration_guide.md       |  228 ++-
 .../logistic_regression_explained.md               |   10 +-
 docs/python_docs/python/tutorials/index.rst        |    6 +-
 .../packages/gluon/blocks/custom-layer.md          |    2 +-
 .../python/tutorials/packages/gluon/blocks/nn.md   |    2 +-
 .../packages/gluon/blocks/save_load_params.md      |   14 +-
 .../tutorials/packages/gluon/data/datasets.md      |   16 +-
 .../tutorials/packages/gluon/image/info_gan.md     |   38 +-
 .../python/tutorials/packages/gluon/image/mnist.md |   22 +-
 .../python/tutorials/packages/gluon/text/gnmt.rst  |   24 +-
 .../packages/gluon/training/fit_api_tutorial.md    |   18 +-
 .../learning_rates/learning_rate_finder.md         |   28 +-
 .../learning_rates/learning_rate_schedules.md      |   14 +-
 .../python/tutorials/packages/kvstore/kvstore.md   |    8 +-
 .../python/tutorials/packages/np/cheat-sheet.md    |    8 +-
 .../python/tutorials/packages/np/np-vs-numpy.md    |   12 +-
 .../tutorials/packages/onnx/fine_tuning_gluon.md   |   22 +-
 .../packages/onnx/inference_on_onnx_model.md       |   10 +-
 .../python/tutorials/performance/backend/amp.md    |   26 +-
 .../mkldnn_readme.md => dnnl/dnnl_readme.md}       |  191 ++-
 .../performance/backend/{mkldnn => dnnl}/index.rst |   14 +-
 .../python/tutorials/performance/backend/index.rst |    8 +-
 .../tutorials/performance/backend/profiler.md      |   34 +-
 .../python/tutorials/performance/index.rst         |    6 +-
 docs/python_docs/requirements                      |    1 +
 .../src/_includes/get_started/cloud/cpu.md         |    2 +-
 .../src/_includes/get_started/cloud/gpu.md         |    2 +-
 .../cpp/docs/tutorials/multi_threaded_inference.md |    2 +-
 .../pages/api/cpp/docs/tutorials/subgraphAPI.md    |    8 +-
 .../src/pages/api/developer_guide/profiling.md     |    4 +-
 docs/static_site/src/pages/api/faq/cloud.md        |    4 +-
 docs/static_site/src/pages/api/faq/env_var.md      |   61 +-
 .../src/pages/api/faq/large_tensor_support.md      |    4 +-
 docs/static_site/src/pages/api/faq/perf.md         |    1 -
 .../src/pages/api/faq/tensor_inspector_tutorial.md |    2 +-
 .../src/pages/community/clang_format_guide.md      |   58 +
 docs/static_site/src/pages/community/code_guide.md |    2 +-
 .../src/pages/community/pull_request.md            |    2 +-
 example/README.md                                  |    2 +-
 example/adversary/adversary_generation.ipynb       |    2 +-
 example/bi-lstm-sort/bi-lstm-sort.ipynb            |    4 +-
 .../distributed_training-horovod/gluon_mnist.py    |    2 +-
 .../distributed_training/cifar10_kvstore_hvd.py    |    2 +-
 example/extensions/lib_api/init_lib.cc             |    1 -
 example/extensions/lib_api/libtest.cc              |    9 +-
 example/extensions/lib_custom_op/gemm_lib.cc       |   69 +-
 example/extensions/lib_custom_op/relu_lib.cc       |   68 +-
 example/extensions/lib_custom_op/relu_lib.cu       |   60 +-
 example/extensions/lib_custom_op/relu_lib.h        |   45 +-
 .../extensions/lib_custom_op/transposecsr_lib.cc   |  102 +-
 .../extensions/lib_custom_op/transposerowsp_lib.cc |   99 +-
 example/extensions/lib_external_ops/init_lib.cc    |    1 -
 example/extensions/lib_external_ops/min_ex-inl.h   |   20 +-
 example/extensions/lib_external_ops/min_ex.cc      |   17 +-
 example/extensions/lib_external_ops/min_ex.cu      |    4 +-
 example/extensions/lib_pass/pass_lib.cc            |    6 +-
 example/extensions/lib_subgraph/subgraph_lib.cc    |  182 +--
 example/gluon/image_classification.py              |   32 +-
 example/gluon/mnist/mnist.py                       |    8 +-
 example/gluon/super_resolution/super_resolution.py |   36 +-
 example/multi-task/multi-task-learning.ipynb       |   16 +-
 example/quantization/README.md                     |   10 +-
 example/quantization/imagenet_gen_qsym_onednn.py   |   12 +-
 example/quantization/imagenet_inference.py         |   10 +-
 example/recommenders/demo1-MF.ipynb                |   10 +-
 example/recommenders/demo2-dssm.ipynb              |    2 +-
 include/mxnet/base.h                               |  106 +-
 include/mxnet/c_api.h                              | 1278 ++++++++-------
 include/mxnet/c_api_error.h                        |   42 +-
 include/mxnet/c_api_test.h                         |   21 +-
 include/mxnet/engine.h                             |  186 ++-
 include/mxnet/executor.h                           |   82 +-
 include/mxnet/expr_operator.h                      |   11 +-
 include/mxnet/imperative.h                         |  112 +-
 include/mxnet/io.h                                 |   46 +-
 include/mxnet/ir/expr.h                            |    2 +-
 include/mxnet/kvstore.h                            |   42 +-
 include/mxnet/lib_api.h                            | 1286 ++++++++++------
 include/mxnet/libinfo.h                            |    9 +-
 include/mxnet/ndarray.h                            |   84 +-
 include/mxnet/node/container.h                     |   66 +-
 include/mxnet/node/node.h                          |   10 +-
 include/mxnet/op_attr_types.h                      |  112 +-
 include/mxnet/operator.h                           |  121 +-
 include/mxnet/operator_util.h                      |  105 +-
 include/mxnet/random_generator.h                   |   77 +-
 include/mxnet/resource.h                           |   74 +-
 include/mxnet/rtc.h                                |   18 +-
 include/mxnet/runtime/c_runtime_api.h              |   28 +-
 include/mxnet/runtime/container.h                  |   43 +-
 include/mxnet/runtime/container_ext.h              |  289 +++-
 include/mxnet/runtime/data_type.h                  |   22 +-
 include/mxnet/runtime/ffi_helper.h                 |   40 +-
 include/mxnet/runtime/memory.h                     |   52 +-
 include/mxnet/runtime/ndarray.h                    |    2 +-
 include/mxnet/runtime/ndarray_handle.h             |    4 +-
 include/mxnet/runtime/object.h                     |  193 +--
 include/mxnet/runtime/packed_func.h                |  349 +++--
 include/mxnet/runtime/py_arg.h                     |    3 +-
 include/mxnet/runtime/registry.h                   |   47 +-
 include/mxnet/storage.h                            |   46 +-
 include/mxnet/tensor_blob.h                        |  239 +--
 include/mxnet/tuple.h                              |  241 +--
 include/onednn/mkldnn.h                            |    1 -
 include/onednn/mkldnn.hpp                          |    1 -
 include/onednn/mkldnn_config.h                     |    1 -
 include/onednn/mkldnn_debug.h                      |    1 -
 include/onednn/mkldnn_dnnl_mangling.h              |    1 -
 include/onednn/mkldnn_types.h                      |    1 -
 include/onednn/mkldnn_version.h                    |    1 -
 licenses/LICENSE.bfloat16.txt                      |    9 +
 licenses/LICENSE.blockingconcurrentqueue.txt       |   26 +
 LICENSE => licenses/LICENSE.builtin_fp16.txt       |  323 ++--
 licenses/LICENSE.clang.txt                         |   63 +
 licenses/LICENSE.cma.txt                           |   22 +
 licenses/LICENSE.cmakeincludes.txt                 |   30 +
 licenses/LICENSE.concurrentqueue.txt               |   22 +
 .../LICENSE => licenses/LICENSE.ctc_include.txt    |    0
 licenses/LICENSE.deformable_im2col.txt             |   52 +
 LICENSE => licenses/LICENSE.dlpack.txt             |  177 +--
 licenses/LICENSE.erfinv.txt                        |   31 +
 licenses/LICENSE.findeigen3.txt                    |   22 +
 licenses/LICENSE.findjemalloc.txt                  |   31 +
 licenses/LICENSE.findpythonlibsnew.txt             |   33 +
 LICENSE => licenses/LICENSE.gmock_gen.txt          |  175 +--
 licenses/LICENSE.googlemock.txt                    |   28 +
 licenses/LICENSE.googletest.txt                    |   28 +
 licenses/LICENSE.im2col.txt                        |   49 +
 licenses/LICENSE.intgemm.txt                       |   70 +
 licenses/LICENSE.layer_norm_cpu.txt                |   27 +
 licenses/LICENSE.mersenne.txt                      |   30 +
 licenses/LICENSE.moderngpu.txt                     |   23 +
 .../LICENSE.modulated_deformable_convolution.txt   |   21 +
 licenses/LICENSE.modulated_deformable_im2col.txt   |   52 +
 .../LICENSE => licenses/LICENSE.mshadow.txt        |    0
 licenses/LICENSE.mx2onnx.txt                       |   26 +
 licenses/LICENSE.np_einsum.txt                     |   32 +
 licenses/LICENSE.nvidia_cub.txt                    |   24 +
 licenses/LICENSE.onednn.txt                        |  742 +++++++++
 licenses/LICENSE.onnx-tensorrt.txt                 |   22 +
 licenses/LICENSE.onnx.txt                          |   22 +
 LICENSE => licenses/LICENSE.openmp.txt             |  373 +++--
 licenses/LICENSE.picojson.txt                      |   25 +
 licenses/LICENSE.pool.txt                          |   49 +
 LICENSE => licenses/LICENSE.ps-lite.txt            |  179 +--
 licenses/LICENSE.rang.txt                          |   24 +
 LICENSE => licenses/LICENSE.tvm.txt                |  214 +--
 plugin/opencv/cv_api.cc                            |  151 +-
 plugin/opencv/cv_api.h                             |   38 +-
 plugin/sframe/iter_sframe.cc                       |   99 +-
 plugin/torch/torch_base.cc                         |    9 +-
 plugin/torch/torch_base.h                          |   54 +-
 plugin/torch/torch_criterion-inl.h                 |   86 +-
 plugin/torch/torch_criterion.cc                    |   13 +-
 plugin/torch/torch_criterion.cu                    |    7 +-
 plugin/torch/torch_function.cc                     |   56 +-
 plugin/torch/torch_function.h                      |  137 +-
 plugin/torch/torch_module-inl.h                    |  137 +-
 plugin/torch/torch_module.cc                       |   13 +-
 plugin/torch/torch_module.cu                       |    7 +-
 plugin/warpctc/warpctc-inl.h                       |  152 +-
 plugin/warpctc/warpctc.cc                          |   17 +-
 plugin/warpctc/warpctc.cu                          |    7 +-
 prospector.yaml                                    |  279 ++++
 python/mxnet/__init__.py                           |    3 +-
 python/mxnet/_ctypes/cached_op.py                  |   16 +-
 python/mxnet/_ffi/_ctypes/function.py              |   12 +-
 python/mxnet/_ffi/_ctypes/types.py                 |    4 +-
 python/mxnet/_ffi/_cython/base.pxi                 |    3 +-
 python/mxnet/_ffi/_cython/function.pxi             |   10 +-
 python/mxnet/amp/amp.py                            |   18 +-
 python/mxnet/amp/lists/symbol_bf16.py              |    5 +-
 python/mxnet/amp/lists/symbol_fp16.py              |   27 +-
 python/mxnet/amp/loss_scaler.py                    |    6 +-
 python/mxnet/base.py                               |    7 +
 python/mxnet/context.py                            |  286 +---
 python/mxnet/contrib/quantization.py               |   83 +-
 python/mxnet/{context.py => device.py}             |  130 +-
 python/mxnet/dlpack.py                             |   32 +-
 python/mxnet/executor.py                           |   29 +-
 python/mxnet/gluon/block.py                        |  167 +-
 .../contrib/data/vision/transforms/bbox/bbox.py    |    2 +-
 .../gluon/contrib/estimator/batch_processor.py     |    4 +-
 python/mxnet/gluon/contrib/estimator/estimator.py  |   66 +-
 .../mxnet/gluon/contrib/estimator/event_handler.py |    2 +-
 python/mxnet/gluon/data/batchify.py                |   14 +-
 python/mxnet/gluon/loss.py                         |    3 +-
 python/mxnet/gluon/metric.py                       |   42 +-
 python/mxnet/gluon/model_zoo/vision/alexnet.py     |   13 +-
 python/mxnet/gluon/model_zoo/vision/densenet.py    |   29 +-
 python/mxnet/gluon/model_zoo/vision/inception.py   |   13 +-
 python/mxnet/gluon/model_zoo/vision/mobilenet.py   |   62 +-
 python/mxnet/gluon/model_zoo/vision/resnet.py      |   63 +-
 python/mxnet/gluon/model_zoo/vision/squeezenet.py  |   23 +-
 python/mxnet/gluon/model_zoo/vision/vgg.py         |   53 +-
 python/mxnet/gluon/nn/activations.py               |    4 +-
 python/mxnet/gluon/nn/basic_layers.py              |   56 +-
 python/mxnet/gluon/nn/conv_layers.py               |   32 +-
 python/mxnet/gluon/parameter.py                    |  238 +--
 .../gluon/probability/distributions/categorical.py |    2 +-
 .../gluon/probability/distributions/cauchy.py      |    2 +-
 .../gluon/probability/distributions/constraint.py  |    2 +-
 .../gluon/probability/distributions/divergence.py  |    2 +-
 .../gluon/probability/distributions/geometric.py   |    2 +-
 .../distributions/multivariate_normal.py           |   15 +-
 .../distributions/transformed_distribution.py      |    4 +-
 .../probability/transformation/transformation.py   |    4 +-
 python/mxnet/gluon/rnn/conv_rnn_cell.py            |   16 +-
 python/mxnet/gluon/rnn/rnn_cell.py                 |   72 +-
 python/mxnet/gluon/rnn/rnn_layer.py                |  117 +-
 python/mxnet/gluon/trainer.py                      |   39 +-
 python/mxnet/gluon/utils.py                        |   84 +-
 python/mxnet/initializer.py                        |  119 ++
 python/mxnet/model.py                              |    2 +-
 python/mxnet/ndarray/contrib.py                    |    4 +-
 python/mxnet/ndarray/ndarray.py                    |   37 +-
 python/mxnet/ndarray/numpy/_op.py                  |  548 +++++--
 python/mxnet/ndarray/numpy/linalg.py               |   14 +-
 python/mxnet/ndarray/numpy/random.py               |  239 +--
 python/mxnet/ndarray/numpy_extension/_op.py        |    1 +
 python/mxnet/ndarray/numpy_extension/random.py     |   60 +-
 python/mxnet/ndarray/sparse.py                     |   18 +-
 python/mxnet/numpy/__init__.py                     |    2 +
 python/mxnet/numpy/fallback.py                     |    1 -
 python/mxnet/numpy/io.py                           |   12 +-
 python/mxnet/numpy/linalg.py                       |  559 ++++++-
 python/mxnet/numpy/multiarray.py                   | 1621 +++++++++++++++++---
 python/mxnet/numpy/random.py                       |  143 +-
 python/mxnet/numpy/set_functions.py                |  113 ++
 python/mxnet/numpy/type_functions.py               |  163 ++
 python/mxnet/numpy/utils.py                        |  154 +-
 python/mxnet/numpy_dispatch_protocol.py            |    2 +
 python/mxnet/numpy_extension/__init__.py           |    2 +-
 python/mxnet/numpy_extension/random.py             |   45 +-
 python/mxnet/numpy_op_fallback.py                  |    8 +-
 python/mxnet/onnx/mx2onnx/LICENSE                  |   44 -
 python/mxnet/onnx/mx2onnx/_export_onnx.py          |   22 +-
 .../_op_translations/_op_translations_opset12.py   |   25 +-
 .../_op_translations/_op_translations_opset13.py   |   25 +-
 python/mxnet/optimizer/optimizer.py                |    6 +
 python/mxnet/optimizer/updater.py                  |    2 +-
 python/mxnet/random.py                             |   30 +-
 python/mxnet/symbol/symbol.py                      |   15 +-
 python/mxnet/test_utils.py                         |   98 +-
 python/mxnet/util.py                               |  215 ++-
 rat-excludes                                       |   12 +
 src/api/_api_internal/_api_internal.cc             |    4 +-
 src/api/operator/numpy/linalg/np_eig.cc            |    2 +-
 src/api/operator/numpy/linalg/np_eigvals.cc        |    2 +-
 src/api/operator/numpy/linalg/np_lstsq.cc          |    2 +-
 src/api/operator/numpy/linalg/np_matrix_rank.cc    |    4 +-
 src/api/operator/numpy/linalg/np_norm.cc           |    2 +-
 src/api/operator/numpy/linalg/np_pinv.cc           |    4 +-
 src/api/operator/numpy/linalg/np_potrf.cc          |    2 +-
 src/api/operator/numpy/linalg/np_tensorinv.cc      |    2 +-
 src/api/operator/numpy/linalg/np_tensorsolve.cc    |    2 +-
 src/api/operator/numpy/np_bincount_op.cc           |    2 +-
 .../numpy/np_broadcast_reduce_op_boolean.cc        |    2 +-
 .../operator/numpy/np_broadcast_reduce_op_index.cc |    2 +-
 .../operator/numpy/np_broadcast_reduce_op_value.cc |    2 +-
 src/api/operator/numpy/np_cross.cc                 |    2 +-
 src/api/operator/numpy/np_cumsum.cc                |    2 +-
 src/api/operator/numpy/np_delete_op.cc             |    2 +-
 src/api/operator/numpy/np_diff_op.cc               |    2 +-
 src/api/operator/numpy/np_ediff1d_op.cc            |    2 +-
 src/api/operator/numpy/np_einsum_op.cc             |    2 +-
 src/api/operator/numpy/np_elemwise_broadcast_op.cc |   35 +
 .../operator/numpy/np_elemwise_unary_op_basic.cc   |    2 +-
 src/api/operator/numpy/np_fill_diagonal_op.cc      |    2 +-
 src/api/operator/numpy/np_histogram_op.cc          |    2 +-
 src/api/operator/numpy/np_init_op.cc               |   64 +-
 src/api/operator/numpy/np_insert_op.cc             |    6 +-
 src/api/operator/numpy/np_interp_op.cc             |    2 +-
 src/api/operator/numpy/np_matrix_op.cc             |   54 +-
 src/api/operator/numpy/np_moments_op.cc            |    6 +-
 src/api/operator/numpy/np_nan_to_num_op.cc         |    2 +-
 src/api/operator/numpy/np_ordering_op.cc           |    8 +-
 src/api/operator/numpy/np_pad_op.cc                |    2 +-
 src/api/operator/numpy/np_percentile_op.cc         |    2 +-
 src/api/operator/numpy/np_repeat_op.cc             |    2 +-
 src/api/operator/numpy/np_tensordot_op.cc          |    4 +-
 src/api/operator/numpy/np_trace_op.cc              |    2 +-
 src/api/operator/numpy/np_tri_op.cc                |    8 +-
 src/api/operator/numpy/np_tril_op.cc               |    2 +-
 src/api/operator/numpy/np_triu_op.cc               |    2 +-
 src/api/operator/numpy/np_unique_op.cc             |    2 +-
 src/api/operator/numpy/np_where_op.cc              |    4 +-
 src/api/operator/numpy/np_window_op.cc             |    2 +-
 src/api/operator/numpy/random/np_choice_op.cc      |    2 +-
 src/api/operator/numpy/random/np_exponential_op.cc |    2 +-
 src/api/operator/numpy/random/np_laplace_op.cc     |    2 +-
 .../operator/numpy/random/np_location_scale_op.cc  |    4 +-
 src/api/operator/numpy/random/np_multinomial_op.cc |    2 +-
 src/api/operator/numpy/random/np_pareto_op.cc      |    2 +-
 src/api/operator/numpy/random/np_power_op.cc       |    2 +-
 src/api/operator/numpy/random/np_rayleigh_op.cc    |    2 +-
 src/api/operator/numpy/random/np_weibull_op.cc     |    2 +-
 .../operator/numpy_extension/npx_activation_op.cc  |    2 +-
 .../operator/numpy_extension/npx_arange_like_op.cc |    2 +-
 .../operator/numpy_extension/npx_batch_dot_op.cc   |    2 +-
 .../operator/numpy_extension/npx_batch_norm_op.cc  |    2 +-
 .../numpy_extension/npx_broadcast_like_op.cc       |    2 +-
 .../numpy_extension/npx_control_flow_op.cc         |    6 +-
 .../operator/numpy_extension/npx_convolution_op.cc |    4 +-
 .../numpy_extension/npx_deconvolution_op.cc        |    4 +-
 src/api/operator/numpy_extension/npx_dropout_op.cc |    2 +-
 .../operator/numpy_extension/npx_embedding_op.cc   |    2 +-
 .../numpy_extension/npx_fully_connected_op.cc      |    2 +-
 .../operator/numpy_extension/npx_group_norm_op.cc  |    2 +-
 .../operator/numpy_extension/npx_layer_norm_op.cc  |    2 +-
 .../operator/numpy_extension/npx_leaky_relu_op.cc  |    2 +-
 src/api/operator/numpy_extension/npx_one_hot_op.cc |    2 +-
 src/api/operator/numpy_extension/npx_pick_op.cc    |    2 +-
 src/api/operator/numpy_extension/npx_pooling_op.cc |    2 +-
 src/api/operator/numpy_extension/npx_rnn_op.cc     |    2 +-
 src/api/operator/numpy_extension/npx_softmax_op.cc |   14 +-
 src/api/operator/numpy_extension/npx_topk_op.cc    |    2 +-
 src/api/operator/random/np_gamma_op.cc             |    2 +-
 src/api/operator/random/np_normal_op.cc            |    2 +-
 src/api/operator/random/np_randint_op.cc           |    2 +-
 src/api/operator/random/np_uniform_op.cc           |    2 +-
 src/api/operator/tensor/indexing_op.cc             |    2 +-
 src/api/operator/tensor/matrix_op.cc               |    2 +-
 .../npx_group_norm_op.cc => tensor/unravel.cc}     |   43 +-
 src/api/operator/ufunc_helper.cc                   |    8 +-
 src/c_api/c_api.cc                                 |   70 +-
 src/c_api/c_api_ndarray.cc                         |    3 +-
 src/c_api/c_api_profile.cc                         |    2 +-
 src/c_api/c_api_symbolic.cc                        |    6 +-
 src/common/alm.cc                                  |  209 +++
 src/common/alm.h                                   |  100 ++
 src/common/cuda/cudnn_cxx.cc                       |  324 ++++
 src/common/cuda/cudnn_cxx.h                        |  326 ++++
 src/common/cuda/nvtx.h                             |   19 +-
 src/common/cuda/rtc/backward_functions-inl.h       |   58 +
 src/common/cuda/rtc/forward_functions-inl.h        |   62 +
 src/common/cuda/rtc/util-inl.h                     |  186 ++-
 src/common/cuda/utils.h                            |   16 +-
 src/common/exec_utils.h                            |    4 +-
 src/common/object_pool.h                           |    4 +-
 src/common/utils.cc                                |    8 +
 src/common/utils.h                                 |  165 +-
 src/engine/engine.cc                               |   28 +-
 src/engine/naive_engine.cc                         |   19 +-
 src/engine/stream_manager.h                        |   24 +-
 src/engine/threaded_engine.cc                      |  222 ++-
 src/engine/threaded_engine.h                       |   40 +-
 src/engine/threaded_engine_perdevice.cc            |   68 +-
 src/engine/threaded_engine_pooled.cc               |   35 +-
 src/imperative/attach_op_execs_pass.cc             |    8 +-
 src/imperative/attach_op_resource_pass.cc          |    5 +-
 src/imperative/cached_op.cc                        |   21 +-
 src/imperative/cached_op.h                         |    3 +
 src/imperative/exec_pass.h                         |    2 +-
 src/imperative/imperative.cc                       |   19 +-
 src/imperative/imperative_utils.h                  |  120 +-
 src/imperative/inplace_addto_detect_pass.cc        |    7 +-
 src/initialize.cc                                  |   28 +-
 src/initialize.h                                   |    8 +-
 src/io/batchify.cc                                 |    2 +-
 src/io/dataset.cc                                  |    4 +-
 src/io/iter_prefetcher.h                           |    6 +-
 src/kvstore/comm.h                                 |   49 +-
 src/kvstore/gpu_topology.h                         |    4 +-
 src/kvstore/gradient_compression.cc                |    4 -
 src/kvstore/kvstore_dist.h                         |   37 +-
 src/kvstore/kvstore_dist_server.h                  |   10 +-
 src/kvstore/kvstore_local.h                        |    7 +-
 src/kvstore/p3store_dist.h                         |    9 +-
 src/ndarray/ndarray.cc                             |  579 ++++---
 src/ndarray/ndarray_function-inl.h                 |    2 +-
 src/ndarray/ndarray_function.cu                    |    4 +-
 src/nnvm/gradient.cc                               |   10 +-
 src/nnvm/plan_memory.cc                            |    6 +-
 src/operator/contrib/adamw.cu                      |    2 +
 src/operator/contrib/adaptive_avg_pooling-inl.h    |   35 +-
 src/operator/contrib/adaptive_avg_pooling.cc       |   61 +-
 src/operator/contrib/batch_norm_relu.cc            |   32 +-
 src/operator/contrib/bilinear_resize-inl.h         |   16 +-
 src/operator/contrib/boolean_mask.cc               |    2 +-
 src/operator/contrib/bounding_box-inl.h            |   30 +-
 src/operator/contrib/bounding_box.cu               |    6 +-
 src/operator/contrib/deformable_psroi_pooling.cc   |   40 +-
 src/operator/contrib/deformable_psroi_pooling.cu   |   40 +-
 src/operator/contrib/intgemm/prepare_weight_op.cc  |    6 +-
 src/operator/contrib/multi_lamb.cc                 |   12 +-
 src/operator/contrib/multi_lamb.cu                 |   12 +-
 src/operator/contrib/multi_lans.cc                 |    8 +-
 src/operator/contrib/multi_lans.cu                 |   12 +-
 src/operator/contrib/multi_lars-inl.h              |    8 +-
 src/operator/control_flow.cc                       |    6 +-
 src/operator/correlation.cc                        |   12 +-
 src/operator/cudnn_ops.cc                          |  855 +++++++++++
 src/operator/cudnn_ops.h                           |  281 ++++
 src/operator/custom/ndarray_op.cc                  |   10 +-
 src/operator/elemwise_op_common.h                  |   14 +-
 src/operator/leaky_relu.cc                         |   57 +-
 src/operator/mshadow_op.h                          |  194 ++-
 src/operator/mxnet_op.h                            |  164 +-
 src/operator/nn/activation.cc                      |   32 +-
 src/operator/nn/batch_norm-inl.h                   |   10 +-
 src/operator/nn/batch_norm.cc                      |   51 +-
 src/operator/nn/batch_norm.cu                      |   68 +-
 src/operator/nn/concat-inl.h                       |   34 +-
 src/operator/nn/concat.cc                          |   75 +-
 src/operator/nn/concat.cu                          |    5 +-
 src/operator/nn/convolution-inl.h                  |    1 +
 src/operator/nn/convolution.cc                     |   91 +-
 src/operator/nn/convolution.cu                     |  183 ++-
 src/operator/nn/cudnn/cudnn_batch_norm-inl.h       |  307 ----
 src/operator/nn/cudnn/cudnn_batch_norm.cc          |  125 --
 src/operator/nn/cudnn/cudnn_batch_norm.cu          |  263 ++++
 src/operator/nn/cudnn/cudnn_batch_norm.h           |   59 +
 src/operator/nn/cudnn/cudnn_convolution-inl.h      |   24 +-
 src/operator/nn/cudnn/cudnn_deconvolution-inl.h    |   18 +-
 src/operator/nn/cudnn/cudnn_pooling-inl.h          |   48 +-
 src/operator/nn/deconvolution.cc                   |   53 +-
 src/operator/nn/deconvolution.cu                   |  172 +--
 src/operator/nn/dnnl/dnnl_act-inl.h                |  114 ++
 src/operator/nn/dnnl/dnnl_act.cc                   |  321 ++++
 .../mkldnn_base-inl.h => dnnl/dnnl_base-inl.h}     |  429 +++---
 .../{mkldnn/mkldnn_base.cc => dnnl/dnnl_base.cc}   |  409 +++--
 src/operator/nn/dnnl/dnnl_batch_dot-inl.h          |  133 ++
 src/operator/nn/dnnl/dnnl_batch_dot.cc             |  216 +++
 .../dnnl_batch_norm-inl.h}                         |  256 ++--
 .../mkldnn_concat-inl.h => dnnl/dnnl_concat-inl.h} |   38 +-
 src/operator/nn/dnnl/dnnl_concat.cc                |  130 ++
 src/operator/nn/dnnl/dnnl_convolution-inl.h        |  171 +++
 .../dnnl_convolution.cc}                           |  457 +++---
 .../{mkldnn/mkldnn_copy.cc => dnnl/dnnl_copy.cc}   |   28 +-
 .../dnnl_deconvolution-inl.h}                      |  234 ++-
 .../dnnl_deconvolution.cc}                         |  216 +--
 src/operator/nn/dnnl/dnnl_fully_connected-inl.h    |  142 ++
 src/operator/nn/dnnl/dnnl_fully_connected.cc       |  328 ++++
 .../dnnl_layer_norm-inl.h}                         |   57 +-
 .../dnnl_layer_norm.cc}                            |  176 ++-
 src/operator/nn/dnnl/dnnl_log_softmax.cc           |  210 +++
 src/operator/nn/dnnl/dnnl_lrn-inl.h                |  262 ++++
 src/operator/nn/dnnl/dnnl_ops-inl.h                |  205 +++
 src/operator/nn/dnnl/dnnl_pooling-inl.h            |  199 +++
 src/operator/nn/dnnl/dnnl_pooling.cc               |  418 +++++
 src/operator/nn/dnnl/dnnl_reduce-inl.h             |  108 ++
 src/operator/nn/dnnl/dnnl_reduce.cc                |  236 +++
 .../dnnl_reshape-inl.h}                            |   31 +-
 src/operator/nn/dnnl/dnnl_reshape.cc               |  145 ++
 .../mkldnn_rnn-inl.h => dnnl/dnnl_rnn-inl.h}       |  297 ++--
 .../nn/{mkldnn/mkldnn_rnn.cc => dnnl/dnnl_rnn.cc}  |  484 +++---
 .../mkldnn_slice-inl.h => dnnl/dnnl_slice-inl.h}   |   43 +-
 .../{mkldnn/mkldnn_slice.cc => dnnl/dnnl_slice.cc} |   68 +-
 src/operator/nn/dnnl/dnnl_softmax-inl.h            |  154 ++
 src/operator/nn/dnnl/dnnl_softmax.cc               |  231 +++
 src/operator/nn/dnnl/dnnl_softmax_output.cc        |  124 ++
 src/operator/nn/dnnl/dnnl_stack.cc                 |  123 ++
 src/operator/nn/dnnl/dnnl_sum.cc                   |  135 ++
 src/operator/nn/dnnl/dnnl_transpose-inl.h          |   73 +
 src/operator/nn/dnnl/dnnl_transpose.cc             |  146 ++
 src/operator/nn/fully_connected.cc                 |   32 +-
 src/operator/nn/layer_norm.cc                      |  131 +-
 src/operator/nn/layer_norm_cpu.h                   |  108 ++
 src/operator/nn/log_softmax.cc                     |   28 +-
 src/operator/nn/lrn.cc                             |   34 +-
 src/operator/nn/mkldnn/mkldnn_act-inl.h            |  114 --
 src/operator/nn/mkldnn/mkldnn_act.cc               |  325 ----
 src/operator/nn/mkldnn/mkldnn_batch_dot-inl.h      |   69 -
 src/operator/nn/mkldnn/mkldnn_batch_dot.cc         |  127 --
 src/operator/nn/mkldnn/mkldnn_concat.cc            |  129 --
 src/operator/nn/mkldnn/mkldnn_convolution-inl.h    |  172 ---
 .../nn/mkldnn/mkldnn_fully_connected-inl.h         |  143 --
 src/operator/nn/mkldnn/mkldnn_fully_connected.cc   |  328 ----
 src/operator/nn/mkldnn/mkldnn_log_softmax.cc       |  214 ---
 src/operator/nn/mkldnn/mkldnn_lrn-inl.h            |  267 ----
 src/operator/nn/mkldnn/mkldnn_ops-inl.h            |  197 ---
 src/operator/nn/mkldnn/mkldnn_pooling-inl.h        |  178 ---
 src/operator/nn/mkldnn/mkldnn_pooling.cc           |  405 -----
 src/operator/nn/mkldnn/mkldnn_reshape.cc           |  148 --
 src/operator/nn/mkldnn/mkldnn_softmax.cc           |  214 ---
 src/operator/nn/mkldnn/mkldnn_softmax_output.cc    |  126 --
 src/operator/nn/mkldnn/mkldnn_sum.cc               |  137 --
 src/operator/nn/mkldnn/mkldnn_transpose.cc         |  151 --
 src/operator/nn/pool.cuh                           |   19 -
 src/operator/nn/pooling-inl.h                      |   25 +-
 src/operator/nn/pooling.cc                         |  106 +-
 src/operator/nn/pooling.cu                         |    2 +-
 src/operator/nn/softmax-inl.h                      |   32 +-
 src/operator/nn/softmax.cc                         |   42 +-
 src/operator/npx_control_flow.cc                   |    6 +-
 src/operator/numpy/linalg/np_lstsq.cc              |    6 +-
 src/operator/numpy/linalg/np_norm.cc               |    4 +-
 src/operator/numpy/np_bincount_op.cc               |    6 +-
 src/operator/numpy/np_boolean_mask_assign.cc       |    6 +-
 src/operator/numpy/np_broadcast_reduce_op.cc       |    6 +-
 src/operator/numpy/np_broadcast_reduce_op.h        |   25 +-
 src/operator/numpy/np_broadcast_reduce_op_value.h  |   59 +-
 .../numpy/np_broadcast_reduce_op_value_mean.cc     |    5 +
 .../numpy/np_broadcast_reduce_op_value_sum.cc      |    5 +
 src/operator/numpy/np_delete_op-inl.h              |    8 +-
 src/operator/numpy/np_delete_op.cc                 |    6 +-
 src/operator/numpy/np_einsum_op-inl.h              |    4 +-
 .../numpy/np_elemwise_broadcast_logic_op.h         |   24 +-
 src/operator/numpy/np_elemwise_broadcast_op.h      |  430 +++++-
 src/operator/numpy/np_elemwise_broadcast_op_add.cc |   37 +-
 src/operator/numpy/np_elemwise_broadcast_op_add.cu |    5 +-
 .../numpy/np_elemwise_broadcast_op_extended.cc     |   88 +-
 .../numpy/np_elemwise_broadcast_op_extended_thi.cc |  184 +++
 .../numpy/np_elemwise_broadcast_op_extended_thi.cu |   72 +
 src/operator/numpy/np_elemwise_broadcast_op_lae.cc |   79 +
 ...t_op_add.cu => np_elemwise_broadcast_op_lae.cu} |   19 +-
 src/operator/numpy/np_elemwise_broadcast_op_mod.cc |   37 +-
 src/operator/numpy/np_elemwise_broadcast_op_mod.cu |    5 +-
 src/operator/numpy/np_elemwise_broadcast_op_mul.cc |   37 +-
 src/operator/numpy/np_elemwise_broadcast_op_mul.cu |    4 +-
 src/operator/numpy/np_elemwise_broadcast_op_pow.cc |   38 +-
 src/operator/numpy/np_elemwise_broadcast_op_pow.cu |    5 +-
 .../numpy/np_elemwise_broadcast_op_scalar.cc       |   41 +-
 .../numpy/np_elemwise_broadcast_op_scalar.cu       |   22 +-
 src/operator/numpy/np_elemwise_broadcast_op_sub.cc |   37 +-
 src/operator/numpy/np_elemwise_broadcast_op_sub.cu |    4 +-
 ...wise_broadcast_op_mod.cu => np_floor_divide.cc} |   16 +-
 .../operator/numpy/np_floor_divide.cu              |   11 +-
 src/operator/numpy/np_init_op.cc                   |   22 +-
 src/operator/numpy/np_init_op.cu                   |    5 +-
 src/operator/numpy/np_init_op.h                    |  208 +++
 src/operator/numpy/np_insert_op_scalar-inl.h       |    6 +-
 src/operator/numpy/np_insert_op_slice-inl.h        |    6 +-
 src/operator/numpy/np_insert_op_tensor-inl.h       |    6 +-
 src/operator/numpy/np_interp_op.cc                 |    6 +-
 src/operator/numpy/np_matrix_op-inl.h              |   16 +
 src/operator/numpy/np_matrix_op.cc                 |  220 +--
 src/operator/numpy/np_matrix_op.cu                 |    8 -
 src/operator/numpy/np_moments_op.cc                |    6 +-
 src/operator/numpy/np_percentile_op.cc             |    6 +-
 src/operator/numpy/np_true_divide-inl.h            |   70 +-
 src/operator/numpy/np_true_divide.cc               |   12 +-
 src/operator/numpy/np_unique_op.cc                 |    7 +-
 src/operator/numpy/random/np_bernoulli_op.cc       |    6 +-
 src/operator/numpy/random/np_exponential_op.cc     |    6 +-
 src/operator/numpy/random/np_pareto_op.cc          |    6 +-
 src/operator/numpy/random/np_power_op.cc           |    6 +-
 src/operator/numpy/random/np_rayleigh_op.cc        |    6 +-
 src/operator/numpy/random/np_weibull_op.cc         |    6 +-
 src/operator/operator_common.h                     |   13 +-
 src/operator/operator_tune.cc                      |   17 +
 src/operator/operator_util.cc                      |   15 -
 src/operator/optimizer_op-inl.h                    |    6 +-
 src/operator/optimizer_op.cc                       |    4 +-
 src/operator/optimizer_op.cu                       |    4 +-
 src/operator/quantization/dequantize.cc            |    8 +-
 .../dnnl_dequantize-inl.h}                         |   79 +-
 .../dnnl_quantize-inl.h}                           |   66 +-
 .../dnnl_quantize_v2-inl.h}                        |   80 +-
 .../dnnl_quantized_act.cc}                         |   22 +-
 .../dnnl_quantized_batch_norm.cc}                  |   70 +-
 .../dnnl_quantized_concat.cc}                      |   62 +-
 .../dnnl_quantized_conv.cc}                        |   49 +-
 .../dnnl_quantized_elemwise_add.cc}                |   98 +-
 .../dnnl_quantized_flatten.cc}                     |   22 +-
 .../dnnl_quantized_fully_connected.cc}             |   50 +-
 .../dnnl_quantized_ops-inl.h}                      |   20 +-
 .../dnnl_quantized_pooling.cc}                     |   23 +-
 .../dnnl_requantize-inl.h}                         |   58 +-
 src/operator/quantization/quantize.cc              |    6 +-
 src/operator/quantization/quantize_graph_pass.cc   |    2 +-
 src/operator/quantization/quantize_v2.cc           |    8 +-
 src/operator/quantization/quantized_batch_norm.cc  |    4 +-
 src/operator/quantization/quantized_concat.cc      |   43 +-
 src/operator/quantization/quantized_conv.cc        |    8 +-
 .../quantization/quantized_elemwise_add.cc         |    4 +-
 .../quantization/quantized_fully_connected.cc      |   10 +-
 src/operator/quantization/quantized_pooling.cc     |   10 +-
 src/operator/quantization/requantize.cc            |    6 +-
 src/operator/random/sampler.h                      |    8 +-
 src/operator/random/shuffle_op.cu                  |    4 +-
 src/operator/rnn.cc                                |   22 +-
 src/operator/sequence_last-inl.h                   |   12 +-
 src/operator/softmax_output.cc                     |   16 +-
 src/operator/subgraph/build_subgraph.cc            |    6 +-
 src/operator/subgraph/dnnl/dnnl_batch_dot.cc       |  176 +++
 .../subgraph/dnnl/dnnl_batch_dot_property.h        |   99 ++
 .../dnnl_bn_relu_property.h}                       |   29 +-
 .../{mkldnn/mkldnn_common.h => dnnl/dnnl_common.h} |   60 +-
 .../mkldnn_conv-inl.h => dnnl/dnnl_conv-inl.h}     |   36 +-
 .../{mkldnn/mkldnn_conv.cc => dnnl/dnnl_conv.cc}   |  429 +++---
 .../dnnl_conv_property.h}                          |   41 +-
 .../{mkldnn/mkldnn_fc-inl.h => dnnl/dnnl_fc-inl.h} |   38 +-
 .../{mkldnn/mkldnn_fc.cc => dnnl/dnnl_fc.cc}       |  319 ++--
 .../dnnl_fc_property.h}                            |   41 +-
 .../subgraph/dnnl/dnnl_identity_property.h         |  168 ++
 .../dnnl_post_quantize_align_scale_property.h}     |   23 +-
 .../dnnl_post_quantize_property.h}                 |  154 +-
 .../dnnl_subgraph_base-inl.h}                      |   10 +-
 .../subgraph/dnnl/dnnl_subgraph_property.cc        |   61 +
 .../dnnl_transformer-inl.h}                        |   10 +-
 .../dnnl_transformer.cc}                           |  359 ++---
 .../dnnl_transformer_qk_property.h}                |   35 +-
 .../dnnl_transformer_valatt_property.h}            |   33 +-
 .../mkldnn/mkldnn_fc_post_quantize_property.h      |  231 ---
 .../mkldnn/mkldnn_post_quantize_property.h         |  182 ---
 .../subgraph/mkldnn/mkldnn_subgraph_property.cc    |   63 -
 .../mkldnn_transformer_post_quantize_property.h    |  201 ---
 .../partitioner/custom_subgraph_property.h         |    8 +-
 src/operator/subgraph/tensorrt/nnvm_to_onnx.cc     |    5 +-
 src/operator/subgraph/tensorrt/onnx_to_tensorrt.h  |   12 +-
 src/operator/subgraph/tensorrt/tensorrt-inl.h      |    5 +-
 src/operator/tensor/amp_cast.cc                    |   85 +-
 src/operator/tensor/broadcast_reduce-inl.h         |    6 +-
 src/operator/tensor/broadcast_reduce_op.h          |   25 +-
 src/operator/tensor/broadcast_reduce_sum_value.cc  |    7 +
 src/operator/tensor/cast_storage-inl.h             |   18 +-
 src/operator/tensor/dot-inl.h                      |   31 +-
 src/operator/tensor/dot.cc                         |   19 +-
 src/operator/tensor/elemwise_binary_broadcast_op.h |   43 +-
 src/operator/tensor/elemwise_binary_op-inl.h       |   16 +-
 src/operator/tensor/elemwise_binary_op.h           |   32 +-
 src/operator/tensor/elemwise_binary_op_basic.cc    |   26 +-
 src/operator/tensor/elemwise_binary_scalar_op.h    |    8 +-
 src/operator/tensor/elemwise_sum.cc                |   17 +-
 src/operator/tensor/elemwise_unary_op.h            |   16 +-
 src/operator/tensor/elemwise_unary_op_basic.cc     |   13 +-
 src/operator/tensor/elemwise_unary_op_logexp.cc    |    7 +-
 src/operator/tensor/elemwise_unary_op_pow.cc       |    7 +-
 src/operator/tensor/histogram.cc                   |    6 +-
 src/operator/tensor/indexing_op.cu                 |   11 +-
 src/operator/tensor/indexing_op.h                  |   12 +-
 src/operator/tensor/la_op-inl.h                    |   20 +-
 src/operator/tensor/la_op.h                        |   12 +-
 src/operator/tensor/matrix_op-inl.h                |    6 +-
 src/operator/tensor/matrix_op.cc                   |  119 +-
 src/operator/tensor/matrix_op.cu                   |   10 +-
 src/operator/tensor/ravel.cc                       |    1 +
 src/operator/tensor/ravel.h                        |    8 +-
 src/operator/tensor/reduce_rtc.cc                  |   12 +-
 src/operator/tensor/sort_op.h                      |   10 +-
 src/operator/tensor/square_sum.cc                  |    2 +
 src/operator/tensor/square_sum.cu                  |    2 +
 src/profiler/aggregate_stats.cc                    |   12 +-
 src/resource.cc                                    |   15 +-
 src/runtime/container.cc                           |    4 +-
 src/serialization/cnpy.cc                          |   12 +-
 src/storage/cpu_device_storage.h                   |    9 +-
 src/storage/cpu_shared_storage_manager.h           |    4 +-
 src/storage/gpu_device_storage.h                   |   24 +-
 src/storage/naive_storage_manager.h                |    6 +-
 src/storage/pinned_memory_storage.h                |    4 +-
 src/storage/pooled_storage_manager.h               |   68 +-
 src/storage/storage.cc                             |   11 +-
 src/storage/storage_manager.h                      |    3 +-
 src/storage/storage_manager_helpers.h              |    4 +-
 tests/CMakeLists.txt                               |    2 +-
 tests/cpp/engine/engine_shutdown_test.cc           |   18 +-
 tests/cpp/engine/omp_test.cc                       |   35 +-
 tests/cpp/engine/thread_local_test.cc              |   65 +-
 tests/cpp/engine/threaded_engine_test.cc           |  332 ++--
 tests/cpp/include/test_core_op.h                   |  192 +--
 tests/cpp/include/{test_mkldnn.h => test_dnnl.h}   |  290 ++--
 tests/cpp/include/test_legacy_op.h                 |  245 +--
 tests/cpp/include/test_ndarray_utils.h             |  115 +-
 tests/cpp/include/test_op.h                        |   90 +-
 tests/cpp/include/test_op_runner.h                 |  143 +-
 tests/cpp/include/test_perf.h                      |  119 +-
 tests/cpp/include/test_tune.h                      |  122 +-
 tests/cpp/include/test_util.h                      |  249 ++-
 tests/cpp/kvstore/gpu_topology_test.cc             |  279 ++--
 tests/cpp/misc/base.cc                             |   30 +-
 tests/cpp/operator/activation_perf.cc              |   69 +-
 tests/cpp/operator/batchnorm_test.cc               |  873 +++++------
 tests/cpp/operator/coreop_perf.cc                  |   61 +-
 ...ldnn_operator_test.cc => dnnl_operator_test.cc} |  109 +-
 .../cpp/operator/{mkldnn_test.cc => dnnl_test.cc}  |  201 ++-
 tests/cpp/operator/dropout_perf.cc                 |   58 +-
 tests/cpp/operator/fully_conn_perf.cc              |   62 +-
 tests/cpp/operator/krprod_test.cc                  |  116 +-
 tests/cpp/operator/runner/core_op_runner_test.cc   |  196 ++-
 tests/cpp/operator/slice_channel_perf.cc           |   52 +-
 tests/cpp/operator/tune/operator_tune_test.cc      |   66 +-
 tests/cpp/storage/storage_test.cc                  |   37 +-
 tests/cpp/test_main.cc                             |   23 +-
 tests/nightly/dist_device_sync_kvstore_byteps.py   |   16 +-
 tests/nightly/dist_device_sync_kvstore_horovod.py  |    4 +-
 tests/nightly/estimator/test_estimator_cnn.py      |    2 +-
 tests/nightly/estimator/test_sentiment_rnn.py      |    2 +-
 .../__init__.py                                    |   18 +-
 .../model_backwards_compat_inference.py            |    2 +-
 .../model_backwards_compat_train.py                |    2 +-
 tests/nightly/test_large_array.py                  |    6 +-
 tests/nightly/test_np_large_array.py               |    4 +-
 tests/nightly/test_np_random.py                    |   18 +-
 tests/python/array-api/test_data_interchange.py    |   65 +
 .../data/test_dnnl_test_dnnl_model_model1.json}    |    0
 .../{mkl => dnnl}/subgraphs/subgraph_common.py     |   24 +-
 .../{mkl => dnnl}/subgraphs/test_conv_subgraph.py  |   46 +-
 .../{mkl => dnnl}/subgraphs/test_fc_subgraph.py    |   35 +-
 .../subgraphs/test_matmul_subgraph.py}             |   51 +-
 tests/python/{mkl => dnnl}/test_amp.py             |    2 +-
 tests/python/{mkl => dnnl}/test_bf16_operator.py   |    2 +-
 .../{mkl/test_mkldnn.py => dnnl/test_dnnl.py}      |   45 +-
 .../test_quantization_dnnl.py}                     |    2 -
 tests/python/gpu/test_amp.py                       |    6 +-
 tests/python/gpu/test_amp_init.py                  |   96 +-
 tests/python/gpu/test_deferred_compute_gpu.py      |    2 +-
 tests/python/gpu/test_device.py                    |    4 +-
 tests/python/gpu/test_extensions_gpu.py            |    2 +-
 tests/python/gpu/test_fusion.py                    |    2 +-
 tests/python/gpu/test_gluon_gpu.py                 |  277 ++--
 tests/python/gpu/test_gluon_model_zoo_gpu.py       |    7 +-
 tests/python/gpu/test_gluon_transforms.py          |    4 +-
 tests/python/gpu/test_kvstore_gpu.py               |    8 +-
 tests/python/gpu/test_nccl.py                      |    2 +-
 .../gpu/{test_numpy_op.py => test_numpy_einsum.py} |    4 +-
 tests/python/gpu/test_numpy_fallback.py            |   14 +-
 tests/python/gpu/test_operator_gpu.py              |   92 +-
 tests/python/gpu/test_profiler_gpu.py              |    6 +-
 tests/python/gpu/test_tvm_op_gpu.py                |    4 +-
 tests/python/profiling/test_nvtx.py                |    2 +-
 tests/python/quantization/test_quantization.py     |  100 +-
 tests/python/test_quantization_gpu.py              |    4 +-
 tests/python/unittest/common.py                    |    2 +-
 tests/python/unittest/test_contrib_control_flow.py |   12 +-
 tests/python/unittest/test_contrib_operator.py     |   12 +-
 tests/python/unittest/test_contrib_stes_op.py      |   18 +-
 tests/python/unittest/test_deferred_compute.py     |   42 +-
 tests/python/unittest/test_exc_handling.py         |   24 +-
 tests/python/unittest/test_executor.py             |   20 +-
 tests/python/unittest/test_extensions.py           |    2 +-
 tests/python/unittest/test_gluon.py                |  143 +-
 .../python/unittest/test_gluon_batch_processor.py  |   12 +-
 tests/python/unittest/test_gluon_control_flow.py   |   52 +-
 tests/python/unittest/test_gluon_data.py           |    2 +-
 tests/python/unittest/test_gluon_estimator.py      |  100 +-
 tests/python/unittest/test_gluon_probability_v2.py |   14 +-
 tests/python/unittest/test_gluon_rnn.py            |  153 +-
 tests/python/unittest/test_loss.py                 |    6 +-
 tests/python/unittest/test_ndarray.py              |    8 +-
 tests/python/unittest/test_numpy_default_dtype.py  |    4 +-
 tests/python/unittest/test_numpy_gluon.py          |   16 +-
 .../python/unittest/test_numpy_interoperability.py |  226 +--
 tests/python/unittest/test_numpy_loss.py           |    6 +-
 tests/python/unittest/test_numpy_ndarray.py        |  124 +-
 tests/python/unittest/test_numpy_op.py             | 1468 ++++++++++++++----
 tests/python/unittest/test_operator.py             |  283 ++--
 tests/python/unittest/test_optimizer.py            |    2 +-
 tests/python/unittest/test_random.py               |    4 +-
 tests/python/unittest/test_sparse_ndarray.py       |    8 +-
 tests/python/unittest/test_sparse_operator.py      |   30 +-
 tests/python/unittest/test_subgraph.py             |   14 +-
 tests/python/unittest/test_subgraph_op.py          |   22 +-
 tests/python/unittest/test_thread_local.py         |   30 +-
 tests/tutorials/test_sanity_tutorials.py           |    6 +-
 tests/tutorials/test_tutorials.py                  |    4 +-
 tools/dependencies/README.md                       |    6 +-
 tools/dependencies/make_shared_dependencies.sh     |   11 +-
 tools/dependencies/mkl.sh                          |   49 +
 tools/dependencies/numpy_mkl.sh                    |   45 +
 tools/dependencies/opencv.sh                       |    2 +-
 .../requirements => tools/git-pre-commit           |   20 +-
 tools/im2rec.cc                                    |  275 ++--
 tools/license_header.py                            |   12 +-
 .../requirements => tools/lint/clang_format_ci.sh  |   18 +-
 tools/lint/git-clang-format-13                     |  586 +++++++
 tools/pip/doc/CPU_ADDITIONAL.md                    |    2 +-
 tools/pip/doc/CU101_ADDITIONAL.md                  |    2 +-
 tools/pip/doc/CU102_ADDITIONAL.md                  |    2 +-
 tools/pip/doc/CU110_ADDITIONAL.md                  |    2 +-
 tools/pip/doc/CU112_ADDITIONAL.md                  |    2 +-
 tools/pip/doc/NATIVE_ADDITIONAL.md                 |    2 +-
 tools/staticbuild/README.md                        |    4 +-
 tools/staticbuild/build.sh                         |    7 +-
 tools/staticbuild/build_lib.sh                     |   10 +-
 861 files changed, 33765 insertions(+), 22382 deletions(-)
 copy python/mxnet/numpy_extension/__init__.py => .clang-format (52%)
 copy tests/python/test_quantization_gpu.py => .cmakelintrc (63%)
 copy docs/python_docs/requirements => .git-blame-ignore-revs (77%)
 create mode 100644 .github/workflows/os_x_mklbuild.yml
 rename MKLDNN_README.md => DNNL_README.md (83%)
 delete mode 100644 ci/other/pylintrc
 copy config/distribution/{darwin_native.cmake => darwin_cpu_mkl.cmake} (90%)
 copy config/distribution/{linux_cpu.cmake => linux_cpu_mkl.cmake} (90%)
 rename docs/python_docs/python/api/{context => device}/index.rst (95%)
 rename docs/python_docs/python/tutorials/performance/backend/{mkldnn/mkldnn_readme.md => dnnl/dnnl_readme.md} (57%)
 rename docs/python_docs/python/tutorials/performance/backend/{mkldnn => dnnl}/index.rst (78%)
 create mode 100644 docs/static_site/src/pages/community/clang_format_guide.md
 mode change 100755 => 100644 include/mxnet/tensor_blob.h
 delete mode 120000 include/onednn/mkldnn.h
 delete mode 120000 include/onednn/mkldnn.hpp
 delete mode 120000 include/onednn/mkldnn_config.h
 delete mode 120000 include/onednn/mkldnn_debug.h
 delete mode 120000 include/onednn/mkldnn_dnnl_mangling.h
 delete mode 120000 include/onednn/mkldnn_types.h
 delete mode 120000 include/onednn/mkldnn_version.h
 create mode 100644 licenses/LICENSE.bfloat16.txt
 create mode 100644 licenses/LICENSE.blockingconcurrentqueue.txt
 copy LICENSE => licenses/LICENSE.builtin_fp16.txt (50%)
 create mode 100644 licenses/LICENSE.clang.txt
 create mode 100644 licenses/LICENSE.cma.txt
 create mode 100644 licenses/LICENSE.cmakeincludes.txt
 create mode 100644 licenses/LICENSE.concurrentqueue.txt
 copy 3rdparty/ctc_include/LICENSE => licenses/LICENSE.ctc_include.txt (100%)
 create mode 100644 licenses/LICENSE.deformable_im2col.txt
 copy LICENSE => licenses/LICENSE.dlpack.txt (56%)
 create mode 100644 licenses/LICENSE.erfinv.txt
 create mode 100644 licenses/LICENSE.findeigen3.txt
 create mode 100644 licenses/LICENSE.findjemalloc.txt
 create mode 100644 licenses/LICENSE.findpythonlibsnew.txt
 copy LICENSE => licenses/LICENSE.gmock_gen.txt (56%)
 create mode 100644 licenses/LICENSE.googlemock.txt
 create mode 100644 licenses/LICENSE.googletest.txt
 create mode 100644 licenses/LICENSE.im2col.txt
 create mode 100644 licenses/LICENSE.intgemm.txt
 create mode 100644 licenses/LICENSE.layer_norm_cpu.txt
 create mode 100644 licenses/LICENSE.mersenne.txt
 create mode 100644 licenses/LICENSE.moderngpu.txt
 create mode 100644 licenses/LICENSE.modulated_deformable_convolution.txt
 create mode 100644 licenses/LICENSE.modulated_deformable_im2col.txt
 copy 3rdparty/mshadow/LICENSE => licenses/LICENSE.mshadow.txt (100%)
 create mode 100644 licenses/LICENSE.mx2onnx.txt
 create mode 100644 licenses/LICENSE.np_einsum.txt
 create mode 100644 licenses/LICENSE.nvidia_cub.txt
 create mode 100644 licenses/LICENSE.onednn.txt
 create mode 100644 licenses/LICENSE.onnx-tensorrt.txt
 create mode 100644 licenses/LICENSE.onnx.txt
 copy LICENSE => licenses/LICENSE.openmp.txt (50%)
 create mode 100644 licenses/LICENSE.picojson.txt
 create mode 100644 licenses/LICENSE.pool.txt
 copy LICENSE => licenses/LICENSE.ps-lite.txt (56%)
 create mode 100644 licenses/LICENSE.rang.txt
 copy LICENSE => licenses/LICENSE.tvm.txt (56%)
 create mode 100644 prospector.yaml
 copy python/mxnet/{context.py => device.py} (63%)
 create mode 100644 python/mxnet/numpy/set_functions.py
 create mode 100644 python/mxnet/numpy/type_functions.py
 delete mode 100644 python/mxnet/onnx/mx2onnx/LICENSE
 copy src/api/operator/{numpy_extension/npx_group_norm_op.cc => tensor/unravel.cc} (60%)
 create mode 100644 src/common/alm.cc
 create mode 100644 src/common/alm.h
 create mode 100644 src/common/cuda/cudnn_cxx.cc
 create mode 100644 src/common/cuda/cudnn_cxx.h
 create mode 100644 src/operator/cudnn_ops.cc
 create mode 100644 src/operator/cudnn_ops.h
 delete mode 100644 src/operator/nn/cudnn/cudnn_batch_norm-inl.h
 delete mode 100644 src/operator/nn/cudnn/cudnn_batch_norm.cc
 create mode 100644 src/operator/nn/cudnn/cudnn_batch_norm.cu
 create mode 100644 src/operator/nn/cudnn/cudnn_batch_norm.h
 create mode 100644 src/operator/nn/dnnl/dnnl_act-inl.h
 create mode 100644 src/operator/nn/dnnl/dnnl_act.cc
 rename src/operator/nn/{mkldnn/mkldnn_base-inl.h => dnnl/dnnl_base-inl.h} (52%)
 rename src/operator/nn/{mkldnn/mkldnn_base.cc => dnnl/dnnl_base.cc} (56%)
 create mode 100644 src/operator/nn/dnnl/dnnl_batch_dot-inl.h
 create mode 100644 src/operator/nn/dnnl/dnnl_batch_dot.cc
 rename src/operator/nn/{mkldnn/mkldnn_batch_norm-inl.h => dnnl/dnnl_batch_norm-inl.h} (59%)
 rename src/operator/nn/{mkldnn/mkldnn_concat-inl.h => dnnl/dnnl_concat-inl.h} (56%)
 create mode 100644 src/operator/nn/dnnl/dnnl_concat.cc
 create mode 100644 src/operator/nn/dnnl/dnnl_convolution-inl.h
 rename src/operator/nn/{mkldnn/mkldnn_convolution.cc => dnnl/dnnl_convolution.cc} (54%)
 rename src/operator/nn/{mkldnn/mkldnn_copy.cc => dnnl/dnnl_copy.cc} (69%)
 rename src/operator/nn/{mkldnn/mkldnn_deconvolution-inl.h => dnnl/dnnl_deconvolution-inl.h} (56%)
 rename src/operator/nn/{mkldnn/mkldnn_deconvolution.cc => dnnl/dnnl_deconvolution.cc} (57%)
 create mode 100644 src/operator/nn/dnnl/dnnl_fully_connected-inl.h
 create mode 100644 src/operator/nn/dnnl/dnnl_fully_connected.cc
 rename src/operator/nn/{mkldnn/mkldnn_layer_norm-inl.h => dnnl/dnnl_layer_norm-inl.h} (57%)
 rename src/operator/nn/{mkldnn/mkldnn_layer_norm.cc => dnnl/dnnl_layer_norm.cc} (51%)
 create mode 100644 src/operator/nn/dnnl/dnnl_log_softmax.cc
 create mode 100644 src/operator/nn/dnnl/dnnl_lrn-inl.h
 create mode 100644 src/operator/nn/dnnl/dnnl_ops-inl.h
 create mode 100644 src/operator/nn/dnnl/dnnl_pooling-inl.h
 create mode 100644 src/operator/nn/dnnl/dnnl_pooling.cc
 create mode 100644 src/operator/nn/dnnl/dnnl_reduce-inl.h
 create mode 100644 src/operator/nn/dnnl/dnnl_reduce.cc
 rename src/operator/nn/{mkldnn/mkldnn_reshape-inl.h => dnnl/dnnl_reshape-inl.h} (62%)
 create mode 100644 src/operator/nn/dnnl/dnnl_reshape.cc
 rename src/operator/nn/{mkldnn/mkldnn_rnn-inl.h => dnnl/dnnl_rnn-inl.h} (61%)
 rename src/operator/nn/{mkldnn/mkldnn_rnn.cc => dnnl/dnnl_rnn.cc} (76%)
 rename src/operator/nn/{mkldnn/mkldnn_slice-inl.h => dnnl/dnnl_slice-inl.h} (54%)
 rename src/operator/nn/{mkldnn/mkldnn_slice.cc => dnnl/dnnl_slice.cc} (52%)
 create mode 100644 src/operator/nn/dnnl/dnnl_softmax-inl.h
 create mode 100644 src/operator/nn/dnnl/dnnl_softmax.cc
 create mode 100644 src/operator/nn/dnnl/dnnl_softmax_output.cc
 create mode 100644 src/operator/nn/dnnl/dnnl_stack.cc
 create mode 100644 src/operator/nn/dnnl/dnnl_sum.cc
 create mode 100644 src/operator/nn/dnnl/dnnl_transpose-inl.h
 create mode 100644 src/operator/nn/dnnl/dnnl_transpose.cc
 create mode 100644 src/operator/nn/layer_norm_cpu.h
 delete mode 100644 src/operator/nn/mkldnn/mkldnn_act-inl.h
 delete mode 100644 src/operator/nn/mkldnn/mkldnn_act.cc
 delete mode 100644 src/operator/nn/mkldnn/mkldnn_batch_dot-inl.h
 delete mode 100644 src/operator/nn/mkldnn/mkldnn_batch_dot.cc
 delete mode 100644 src/operator/nn/mkldnn/mkldnn_concat.cc
 delete mode 100644 src/operator/nn/mkldnn/mkldnn_convolution-inl.h
 delete mode 100644 src/operator/nn/mkldnn/mkldnn_fully_connected-inl.h
 delete mode 100644 src/operator/nn/mkldnn/mkldnn_fully_connected.cc
 delete mode 100644 src/operator/nn/mkldnn/mkldnn_log_softmax.cc
 delete mode 100644 src/operator/nn/mkldnn/mkldnn_lrn-inl.h
 delete mode 100644 src/operator/nn/mkldnn/mkldnn_ops-inl.h
 delete mode 100644 src/operator/nn/mkldnn/mkldnn_pooling-inl.h
 delete mode 100644 src/operator/nn/mkldnn/mkldnn_pooling.cc
 delete mode 100644 src/operator/nn/mkldnn/mkldnn_reshape.cc
 delete mode 100644 src/operator/nn/mkldnn/mkldnn_softmax.cc
 delete mode 100644 src/operator/nn/mkldnn/mkldnn_softmax_output.cc
 delete mode 100644 src/operator/nn/mkldnn/mkldnn_sum.cc
 delete mode 100644 src/operator/nn/mkldnn/mkldnn_transpose.cc
 create mode 100644 src/operator/numpy/np_elemwise_broadcast_op_extended_thi.cc
 create mode 100644 src/operator/numpy/np_elemwise_broadcast_op_extended_thi.cu
 create mode 100644 src/operator/numpy/np_elemwise_broadcast_op_lae.cc
 copy src/operator/numpy/{np_elemwise_broadcast_op_add.cu => np_elemwise_broadcast_op_lae.cu} (61%)
 copy src/operator/numpy/{np_elemwise_broadcast_op_mod.cu => np_floor_divide.cc} (60%)
 copy example/extensions/lib_external_ops/min_ex.cu => src/operator/numpy/np_floor_divide.cu (78%)
 rename src/operator/quantization/{mkldnn/mkldnn_dequantize-inl.h => dnnl/dnnl_dequantize-inl.h} (52%)
 rename src/operator/quantization/{mkldnn/mkldnn_quantize-inl.h => dnnl/dnnl_quantize-inl.h} (55%)
 rename src/operator/quantization/{mkldnn/mkldnn_quantize_v2-inl.h => dnnl/dnnl_quantize_v2-inl.h} (66%)
 rename src/operator/quantization/{mkldnn/mkldnn_quantized_act.cc => dnnl/dnnl_quantized_act.cc} (66%)
 rename src/operator/quantization/{mkldnn/mkldnn_quantized_batch_norm.cc => dnnl/dnnl_quantized_batch_norm.cc} (71%)
 rename src/operator/quantization/{mkldnn/mkldnn_quantized_concat.cc => dnnl/dnnl_quantized_concat.cc} (68%)
 rename src/operator/quantization/{mkldnn/mkldnn_quantized_conv.cc => dnnl/dnnl_quantized_conv.cc} (64%)
 rename src/operator/quantization/{mkldnn/mkldnn_quantized_elemwise_add.cc => dnnl/dnnl_quantized_elemwise_add.cc} (73%)
 rename src/operator/quantization/{mkldnn/mkldnn_quantized_flatten.cc => dnnl/dnnl_quantized_flatten.cc} (73%)
 rename src/operator/quantization/{mkldnn/mkldnn_quantized_fully_connected.cc => dnnl/dnnl_quantized_fully_connected.cc} (73%)
 rename src/operator/quantization/{mkldnn/mkldnn_quantized_ops-inl.h => dnnl/dnnl_quantized_ops-inl.h} (59%)
 rename src/operator/quantization/{mkldnn/mkldnn_quantized_pooling.cc => dnnl/dnnl_quantized_pooling.cc} (64%)
 rename src/operator/quantization/{mkldnn/mkldnn_requantize-inl.h => dnnl/dnnl_requantize-inl.h} (70%)
 create mode 100644 src/operator/subgraph/dnnl/dnnl_batch_dot.cc
 create mode 100644 src/operator/subgraph/dnnl/dnnl_batch_dot_property.h
 rename src/operator/subgraph/{mkldnn/mkldnn_bn_relu_property.h => dnnl/dnnl_bn_relu_property.h} (82%)
 rename src/operator/subgraph/{mkldnn/mkldnn_common.h => dnnl/dnnl_common.h} (72%)
 rename src/operator/subgraph/{mkldnn/mkldnn_conv-inl.h => dnnl/dnnl_conv-inl.h} (55%)
 rename src/operator/subgraph/{mkldnn/mkldnn_conv.cc => dnnl/dnnl_conv.cc} (60%)
 rename src/operator/subgraph/{mkldnn/mkldnn_conv_property.h => dnnl/dnnl_conv_property.h} (90%)
 rename src/operator/subgraph/{mkldnn/mkldnn_fc-inl.h => dnnl/dnnl_fc-inl.h} (61%)
 rename src/operator/subgraph/{mkldnn/mkldnn_fc.cc => dnnl/dnnl_fc.cc} (67%)
 rename src/operator/subgraph/{mkldnn/mkldnn_fc_property.h => dnnl/dnnl_fc_property.h} (85%)
 create mode 100644 src/operator/subgraph/dnnl/dnnl_identity_property.h
 rename src/operator/subgraph/{mkldnn/mkldnn_post_quantize_align_scale_property.h => dnnl/dnnl_post_quantize_align_scale_property.h} (87%)
 rename src/operator/subgraph/{mkldnn/mkldnn_elemwisemul_post_quantize_property.h => dnnl/dnnl_post_quantize_property.h} (54%)
 rename src/operator/subgraph/{mkldnn/mkldnn_subgraph_base-inl.h => dnnl/dnnl_subgraph_base-inl.h} (78%)
 create mode 100644 src/operator/subgraph/dnnl/dnnl_subgraph_property.cc
 rename src/operator/subgraph/{mkldnn/mkldnn_transformer-inl.h => dnnl/dnnl_transformer-inl.h} (87%)
 rename src/operator/subgraph/{mkldnn/mkldnn_transformer.cc => dnnl/dnnl_transformer.cc} (71%)
 rename src/operator/subgraph/{mkldnn/mkldnn_transformer_qk_property.h => dnnl/dnnl_transformer_qk_property.h} (87%)
 rename src/operator/subgraph/{mkldnn/mkldnn_transformer_valatt_property.h => dnnl/dnnl_transformer_valatt_property.h} (89%)
 delete mode 100644 src/operator/subgraph/mkldnn/mkldnn_fc_post_quantize_property.h
 delete mode 100644 src/operator/subgraph/mkldnn/mkldnn_post_quantize_property.h
 delete mode 100644 src/operator/subgraph/mkldnn/mkldnn_subgraph_property.cc
 delete mode 100644 src/operator/subgraph/mkldnn/mkldnn_transformer_post_quantize_property.h
 rename tests/cpp/include/{test_mkldnn.h => test_dnnl.h} (66%)
 rename tests/cpp/operator/{mkldnn_operator_test.cc => dnnl_operator_test.cc} (93%)
 rename tests/cpp/operator/{mkldnn_test.cc => dnnl_test.cc} (64%)
 copy docs/python_docs/requirements => tests/nightly/model_backwards_compatibility_check/__init__.py (77%)
 create mode 100644 tests/python/array-api/test_data_interchange.py
 rename tests/python/{mkl/data/test_mkldnn_test_mkldnn_model_model1.json => dnnl/data/test_dnnl_test_dnnl_model_model1.json} (100%)
 rename tests/python/{mkl => dnnl}/subgraphs/subgraph_common.py (93%)
 rename tests/python/{mkl => dnnl}/subgraphs/test_conv_subgraph.py (96%)
 rename tests/python/{mkl => dnnl}/subgraphs/test_fc_subgraph.py (87%)
 rename tests/python/{mkl/subgraphs/test_transformer_subgraph.py => dnnl/subgraphs/test_matmul_subgraph.py} (67%)
 rename tests/python/{mkl => dnnl}/test_amp.py (98%)
 rename tests/python/{mkl => dnnl}/test_bf16_operator.py (99%)
 rename tests/python/{mkl/test_mkldnn.py => dnnl/test_dnnl.py} (96%)
 rename tests/python/{mkl/test_quantization_mkldnn.py => dnnl/test_quantization_dnnl.py} (92%)
 rename tests/python/gpu/{test_numpy_op.py => test_numpy_einsum.py} (99%)
 create mode 100755 tools/dependencies/mkl.sh
 create mode 100755 tools/dependencies/numpy_mkl.sh
 copy docs/python_docs/requirements => tools/git-pre-commit (77%)
 copy docs/python_docs/requirements => tools/lint/clang_format_ci.sh (77%)
 mode change 100644 => 100755
 create mode 100755 tools/lint/git-clang-format-13