You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by la...@apache.org on 2021/10/13 15:28:22 UTC

[incubator-mxnet] branch master updated: [FEATURE] Enable dynamic linking with MKL and compiler based OpenMP (#20474)

This is an automated email from the ASF dual-hosted git repository.

lausen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new abd293f  [FEATURE] Enable dynamic linking with MKL and compiler based OpenMP (#20474)
abd293f is described below

commit abd293fca19cb92edd2e66288b5217ac8c0ace4c
Author: Anna Karbownik <an...@intel.com>
AuthorDate: Wed Oct 13 17:25:36 2021 +0200

    [FEATURE] Enable dynamic linking with MKL and compiler based OpenMP (#20474)
    
    OneMKL 2021.3 fixed linking OpenMP while using SDL and MKL_THREADING_LAYER set to GNU.
    
    * Disabling SDL with MKL threading on Windows
    
    * Use multi-threading 'on' as the default option
    
    * Sets the interface layer for Intel oneAPI MKL at runtime
    
    * Clean up the apt cache
    
    * Moving mkl runtime initialization to the function
    
    * Cleaning MKL find_path cmake directories
    
    * [WIP] Adding github runner for MAC OS to check MKL specific changes
    
    This is a temporary change to check if adding MKL runtime support
    won't crash MacOS.
    
    * clang format + mkl workflow rename
    
    * Fixing some formatting + installing patchelf
    
    * setting up Mac OS rpath for MKL libraries
    
    * Run only mkl tests
    
    * Fix for finding MKL libraries on MacOs by FindBLAS.cmake
    Turn off SDL for MKL on MacOS as it need fixes.
    
    * Enable linking MxNET with MKL static libraries on MacOS
    
    Add proper mkl_threading flags for Mac Os.
    Enable all tests that are for MacOS + MKL tests.
    Rebuild numpy with MKL BLAS (instead of OpenBLAS).
    
    * Excluding MKL bf16 tests as CI MacOs machines seems not to have avx512
    support.
---
 .github/workflows/os_x_mklbuild.yml            | 50 ++++++++++++++++++++++++++
 ci/docker/Dockerfile.build.ubuntu              | 12 +++----
 cmake/ChooseBlas.cmake                         | 33 ++++++++++++-----
 cmake/upstream/FindBLAS.cmake                  |  9 +++--
 config/distribution/darwin_cpu_mkl.cmake       | 34 ++++++++++++++++++
 config/distribution/linux_cpu_mkl.cmake        | 34 ++++++++++++++++++
 src/initialize.cc                              | 28 ++++++++++++++-
 src/initialize.h                               |  8 ++++-
 tools/dependencies/make_shared_dependencies.sh | 11 +++++-
 tools/dependencies/mkl.sh                      | 49 +++++++++++++++++++++++++
 tools/dependencies/numpy_mkl.sh                | 45 +++++++++++++++++++++++
 tools/dependencies/opencv.sh                   |  2 +-
 tools/staticbuild/build.sh                     |  7 +++-
 tools/staticbuild/build_lib.sh                 | 10 ++++--
 14 files changed, 308 insertions(+), 24 deletions(-)

diff --git a/.github/workflows/os_x_mklbuild.yml b/.github/workflows/os_x_mklbuild.yml
new file mode 100644
index 0000000..9f6bee4
--- /dev/null
+++ b/.github/workflows/os_x_mklbuild.yml
@@ -0,0 +1,50 @@
+name: mkl continuous build
+
+on: [push, pull_request]
+
+jobs:
+  macosx-x86_64:
+    runs-on: macos-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v2
+
+      - name: Compilation cache
+        uses: actions/cache@v2
+        with:
+          path: ~/.ccache
+          # We include the commit sha in the cache key, as new cache entries are
+          # only created if there is no existing entry for the key yet.
+          key: ${{ runner.os }}-ccache-${{ github.sha }}
+          # Restore any ccache cache entry, if none for
+          # ${{ runner.os }}-ccache-${{ github.sha }} exists
+          restore-keys: |
+            ${{ runner.os }}-ccache
+
+      - name: Setup python
+        uses: actions/setup-python@v2
+        with:
+          python-version: '3.6'
+          architecture: x64
+
+      - name: Install Dependencies
+        run: |
+          brew install nasm automake ninja libtool cmake pkgconfig protobuf hdf5 zlib ccache
+          ccache -M 500M  # Limit the ccache size; Github's overall cache limit is 5GB
+          python -m pip install -r ci/docker/install/requirements
+        shell: bash
+
+      - name: Build project
+        run: |
+          ./tools/staticbuild/build.sh cpu mkl
+
+      - name: Setup Python
+        run: |
+          python -m pip install --user -e python
+
+      - name: Test project
+        run: |
+          python -m pytest -n 4 --durations=50 --verbose tests/python/unittest/ -k 'not test_operator and not (test_subgraph or test_custom_op or test_external_op or test_recordimage_dataset_with_data_loader_multiworker or test_multi_worker or test_multi_worker_shape or test_multi_worker_forked_data_loader or test_multi_worker_dataloader_release_pool)' -m 'not serial'
+          MXNET_ENGINE_TYPE=NaiveEngine python -m pytest -n 4 --durations=50 --verbose tests/python/unittest/ -k 'test_operator and not (test_subgraph or test_custom_op or test_external_op or test_recordimage_dataset_with_data_loader_multiworker or test_multi_worker or test_multi_worker_shape or test_multi_worker_forked_data_loader or test_multi_worker_dataloader_release_pool)' -m 'not serial'
+          python -m pytest --durations=50 --verbose tests/python/unittest/ -k 'not (test_subgraph or test_custom_op or test_external_op or test_recordimage_dataset_with_data_loader_multiworker or test_multi_worker or test_multi_worker_shape or test_multi_worker_forked_data_loader or test_multi_worker_dataloader_release_pool)' -m 'serial'
+          python -m pytest -n 4 --durations=50 --verbose tests/python/mkl -k 'not test_bf16_operator'
diff --git a/ci/docker/Dockerfile.build.ubuntu b/ci/docker/Dockerfile.build.ubuntu
index 843d0f9..f8963d3 100644
--- a/ci/docker/Dockerfile.build.ubuntu
+++ b/ci/docker/Dockerfile.build.ubuntu
@@ -36,13 +36,12 @@ WORKDIR /work/deps
 SHELL ["/bin/bash", "-c"]
 RUN export DEBIAN_FRONTEND=noninteractive && \
     export OS_RELEASE="$(cat /etc/os-release)" && \
+    apt-get clean && \
     apt-get update && \
     apt-get install -y wget software-properties-common && \
-    if [[ ${OS_RELEASE} == *"Bionic"* ]]; then \
-        wget -qO - wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB | apt-key add -; \
-        apt-add-repository "deb https://apt.repos.intel.com/mkl all main"; \
-        INTEL_MKL="-2020.0-088"; \
-    fi && \
+    wget -qO - wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB -O - | apt-key add -; \
+    add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"; \
+    INTEL_MKL="-2021.3.0"; \
     apt-get update && \
     apt-get install -y \
         ## Utilities
@@ -65,7 +64,8 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
         g++ \
         g++-7 \
         g++-8 \
-        intel-mkl${INTEL_MKL} \
+        intel-oneapi-mkl${INTEL_MKL} \
+        intel-oneapi-mkl-devel${INTEL_MKL} \
         libomp-dev \
         ## Dependencies
         libgomp1 \
diff --git a/cmake/ChooseBlas.cmake b/cmake/ChooseBlas.cmake
index b4d025e..ed6c733 100644
--- a/cmake/ChooseBlas.cmake
+++ b/cmake/ChooseBlas.cmake
@@ -23,10 +23,12 @@ set(INTEL_OPT_ROOT "/opt/intel" CACHE PATH "Folder contains root-installed intel
 
 if(DEFINED USE_BLAS)
   set(BLAS "${USE_BLAS}")
-else()
-  # Setting up BLAS_mkl_MKLROOT for non-Ubuntu 20.04 OSes
-  find_path(BLAS_mkl_MKLROOT mkl PATHS $ENV{MKLROOT} ${INTEL_HOME_ROOT} ${INTEL_OPT_ROOT})
-  if(NOT BLAS_mkl_MKLROOT STREQUAL "BLAS_mkl_MKLROOT-NOTFOUND")
+endif()
+if(USE_BLAS MATCHES "MKL" OR USE_BLAS MATCHES "mkl" OR NOT DEFINED USE_BLAS)
+  find_path(MKL_INCLUDE_DIR mkl_version.h
+    PATHS $ENV{MKLROOT} ${INTEL_HOME_ROOT}/mkl ${INTEL_OPT_ROOT}/mkl ${INTEL_OPT_ROOT}/oneapi/mkl/latest
+    PATH_SUFFIXES mkl latest include)
+  if(NOT MKL_INCLUDE_DIR STREQUAL "MKL_INCLUDE_DIR-NOTFOUND")
     set(BLAS "MKL")
   endif()
 endif()
@@ -122,15 +124,20 @@ set(FORTRAN_DIR \\\"\$\{CMAKE_Fortran_IMPLICIT_LINK_DIRECTORIES\}\\\")
   endif()
 elseif(BLAS STREQUAL "MKL" OR BLAS STREQUAL "mkl")
   # ---[ MKL Options
+  file(STRINGS ${MKL_INCLUDE_DIR}/mkl_version.h MKL_VERSION_DEF REGEX "INTEL_MKL_VERSION")
+  string(REGEX MATCH "([0-9]+)" MKL_VERSION ${MKL_VERSION_DEF})
   if(UNIX)
     # Single dynamic library interface leads to conflicts between intel omp and llvm omp
     # https://github.com/apache/incubator-mxnet/issues/17641
-    option(MKL_USE_SINGLE_DYNAMIC_LIBRARY "Use single dynamic library interface" OFF)
+    # Fixed in oneMKL 2021.3: [MKLD-11109] MKL is opening libgomp.so instead of
+    # libgomp.so.1 while SDL=1 & MKL_THREADING_LAYER=GNU
+    cmake_dependent_option(MKL_USE_SINGLE_DYNAMIC_LIBRARY "Use single dynamic library interface" ON
+      "NOT BLA_STATIC;MKL_VERSION GREATER_EQUAL 20210003" OFF)
   else()
     option(MKL_USE_SINGLE_DYNAMIC_LIBRARY "Use single dynamic library interface" ON)
   endif()
   cmake_dependent_option(BLA_STATIC "Use static libraries" ON "NOT MKL_USE_SINGLE_DYNAMIC_LIBRARY" OFF)
-  cmake_dependent_option(MKL_MULTI_THREADED  "Use multi-threading"  ON "NOT MKL_USE_SINGLE_DYNAMIC_LIBRARY" OFF)
+  option(MKL_MULTI_THREADED  "Use multi-threading" ON)
 
   if(BLA_VENDOR)
       message(FATAL_ERROR "Do not set BLA_VENDOR manually. MKL version (BLA_VENDOR) is selected based on MKL_USE_SINGLE_DYNAMIC_LIBRARY, "
@@ -160,15 +167,23 @@ elseif(BLAS STREQUAL "MKL" OR BLAS STREQUAL "mkl")
       endif()
     endif()
   endif()
-  # Setting up BLAS_mkl_MKLROOT for non-Ubuntu 20.04 OSes
-  find_path(BLAS_mkl_MKLROOT mkl PATHS $ENV{MKLROOT} ${INTEL_HOME_ROOT} ${INTEL_OPT_ROOT})
+  # In case of oneAPI 2021.3 if MKL_INCLUDE_DIR points to the subdirectory 'include',
+  # use the parent directory 'latest' instead
+  file(TO_CMAKE_PATH "${MKL_INCLUDE_DIR}" BLAS_mkl_MKLROOT)
+  get_filename_component(BLAS_mkl_MKLROOT_LAST_DIR "${BLAS_mkl_MKLROOT}" NAME)
+  if(BLAS_mkl_MKLROOT_LAST_DIR STREQUAL "include")
+      get_filename_component(BLAS_mkl_MKLROOT "${BLAS_mkl_MKLROOT}" DIRECTORY)
+  endif()
   find_package(BLAS)
-  find_path(MKL_INCLUDE_DIR mkl.h HINTS ${INTEL_HOME_ROOT}/mkl ${INTEL_OPT_ROOT}/mkl PATHS ENV MKLROOT PATH_SUFFIXES include mkl REQUIRED)
   include_directories(SYSTEM ${MKL_INCLUDE_DIR})
   list(APPEND mshadow_LINKER_LIBS ${BLAS_LIBRARIES})
+  if(USE_INT64_TENSOR_SIZE)
+    add_definitions(-DUSE_INT64_TENSOR_SIZE=1)
+  endif()
   add_definitions(-DMSHADOW_USE_CBLAS=0)
   add_definitions(-DMSHADOW_USE_MKL=1)
   add_definitions(-DMXNET_USE_BLAS_MKL=1)
+  message("-- Found MKL (version: ${MKL_VERSION})")
 elseif(BLAS STREQUAL "apple")
   find_package(Accelerate REQUIRED)
   include_directories(SYSTEM ${Accelerate_INCLUDE_DIR})
diff --git a/cmake/upstream/FindBLAS.cmake b/cmake/upstream/FindBLAS.cmake
index 934dd44..b8fbca3 100644
--- a/cmake/upstream/FindBLAS.cmake
+++ b/cmake/upstream/FindBLAS.cmake
@@ -320,7 +320,10 @@ if(BLA_VENDOR MATCHES "Intel" OR BLA_VENDOR STREQUAL "All")
           set(BLAS_mkl_DLL_SUFFIX "_dll")
         endif()
       else()
-        if(BLA_STATIC)
+        # MXNET NOTE: The second 2 lines differs from CMake source by ${CMAKE_CURRENT_LIST_DIR}
+        # replaced with ${CMAKE_ROOT}/Modules
+        # https://gitlab.kitware.com/cmake/cmake/-/issues/20548
+        if(BLA_STATIC AND NOT APPLE)
           set(BLAS_mkl_START_GROUP "-Wl,--start-group")
           set(BLAS_mkl_END_GROUP "-Wl,--end-group")
         else()
@@ -527,7 +530,9 @@ if(BLA_VENDOR MATCHES "Intel" OR BLA_VENDOR STREQUAL "All")
           "compiler/lib/${BLAS_mkl_ARCH_NAME}"
           "mkl/lib" "mkl/lib/${BLAS_mkl_ARCH_NAME}_${BLAS_mkl_OS_NAME}"
           "mkl/lib/${BLAS_mkl_ARCH_NAME}"
-          "lib/${BLAS_mkl_ARCH_NAME}_${BLAS_mkl_OS_NAME}")
+          "lib" "lib/${BLAS_mkl_ARCH_NAME}_${BLAS_mkl_OS_NAME}"
+          "lib/${BLAS_mkl_ARCH_NAME}"
+          )
 
       foreach(IT ${BLAS_SEARCH_LIBS})
         string(REPLACE " " ";" SEARCH_LIBS ${IT})
diff --git a/config/distribution/darwin_cpu_mkl.cmake b/config/distribution/darwin_cpu_mkl.cmake
new file mode 100644
index 0000000..f4e54a8
--- /dev/null
+++ b/config/distribution/darwin_cpu_mkl.cmake
@@ -0,0 +1,34 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(CMAKE_BUILD_TYPE "Distribution" CACHE STRING "Build type")
+set(CFLAGS "-mno-avx" CACHE STRING "CFLAGS")
+set(CXXFLAGS "-mno-avx" CACHE STRING "CXXFLAGS")
+
+set(USE_BLAS "mkl" CACHE STRING "BLAS Vendor")
+set(BLA_STATIC ON CACHE BOOL "Use static libraries")
+
+set(USE_CUDA OFF CACHE BOOL "Build with CUDA support")
+set(USE_OPENCV ON CACHE BOOL "Build with OpenCV support")
+set(USE_OPENMP OFF CACHE BOOL "Build with Openmp support")
+set(USE_ONEDNN ON CACHE BOOL "Build with ONEDNN support")
+set(USE_LAPACK ON CACHE BOOL "Build with lapack support")
+set(USE_TVM_OP OFF CACHE BOOL "Enable use of TVM operator build system.")
+set(USE_SSE ON CACHE BOOL "Build with x86 SSE instruction support")
+set(USE_F16C OFF CACHE BOOL "Build with x86 F16C instruction support")
+set(USE_LIBJPEG_TURBO ON CACHE BOOL "Build with libjpeg-turbo")
+set(USE_DIST_KVSTORE ON CACHE BOOL "Build with DIST_KVSTORE support")
diff --git a/config/distribution/linux_cpu_mkl.cmake b/config/distribution/linux_cpu_mkl.cmake
new file mode 100644
index 0000000..3f8dcfc
--- /dev/null
+++ b/config/distribution/linux_cpu_mkl.cmake
@@ -0,0 +1,34 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set(CMAKE_BUILD_TYPE "Distribution" CACHE STRING "Build type")
+set(CFLAGS "-mno-avx" CACHE STRING "CFLAGS")
+set(CXXFLAGS "-mno-avx" CACHE STRING "CXXFLAGS")
+
+set(USE_BLAS "mkl" CACHE STRING "BLAS Vendor")
+set(BLA_STATIC ON CACHE BOOL "Use static libraries")
+
+set(USE_CUDA OFF CACHE BOOL "Build with CUDA support")
+set(USE_OPENCV ON CACHE BOOL "Build with OpenCV support")
+set(USE_OPENMP ON CACHE BOOL "Build with Openmp support")
+set(USE_ONEDNN ON CACHE BOOL "Build with ONEDNN support")
+set(USE_LAPACK ON CACHE BOOL "Build with lapack support")
+set(USE_TVM_OP OFF CACHE BOOL "Enable use of TVM operator build system.")
+set(USE_SSE ON CACHE BOOL "Build with x86 SSE instruction support")
+set(USE_F16C OFF CACHE BOOL "Build with x86 F16C instruction support")
+set(USE_LIBJPEG_TURBO ON CACHE BOOL "Build with libjpeg-turbo")
+set(USE_DIST_KVSTORE ON CACHE BOOL "Build with DIST_KVSTORE support")
diff --git a/src/initialize.cc b/src/initialize.cc
index 225eb2d..5cb5b63 100644
--- a/src/initialize.cc
+++ b/src/initialize.cc
@@ -22,6 +22,7 @@
  * \brief initialize mxnet library
  */
 #include "initialize.h"
+
 #include <algorithm>
 #include <csignal>
 
@@ -52,8 +53,9 @@ void win_err(char** err) {
 #endif
 
 #include <dmlc/logging.h>
-#include <mxnet/engine.h>
 #include <mxnet/c_api.h>
+#include <mxnet/engine.h>
+
 #include "./engine/openmp.h"
 #include "./operator/custom/custom-inl.h"
 #if MXNET_USE_OPENCV
@@ -62,6 +64,10 @@ void win_err(char** err) {
 #include "common/utils.h"
 #include "engine/openmp.h"
 
+#if defined(MKL_USE_SINGLE_DYNAMIC_LIBRARY)
+#include <mkl.h>
+#endif
+
 namespace mxnet {
 
 // pthread_atfork handlers, delegated to LibraryInitializer members.
@@ -89,6 +95,7 @@ LibraryInitializer::LibraryInitializer()
       cpu_worker_nthreads_(dmlc::GetEnv("MXNET_CPU_WORKER_NTHREADS", 1)),
       mp_cv_num_threads_(dmlc::GetEnv("MXNET_MP_OPENCV_NUM_THREADS", 0)) {
   dmlc::InitLogging("mxnet");
+  init_mkl_dynamic_library();
   engine::OpenMP::Get();  // force OpenMP initialization
   install_pthread_atfork_handlers();
 }
@@ -223,6 +230,25 @@ void LibraryInitializer::install_pthread_atfork_handlers() {
 #endif
 }
 
+void LibraryInitializer::init_mkl_dynamic_library() {
+#if !(defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__))
+#if MKL_USE_SINGLE_DYNAMIC_LIBRARY
+#if USE_INT64_TENSOR_SIZE
+  int interface = MKL_INTERFACE_ILP64;
+#else
+  int interface = MKL_INTERFACE_LP64;
+#endif
+#if defined(__INTEL_LLVM_COMPILER) || defined(__APPLE__)
+  mkl_set_threading_layer(MKL_THREADING_INTEL);
+#else
+  mkl_set_threading_layer(MKL_THREADING_GNU);
+  interface += MKL_INTERFACE_GNU;
+#endif
+  mkl_set_interface_layer(interface);
+#endif
+#endif
+}
+
 #if MXNET_USE_SIGNAL_HANDLER && DMLC_LOG_STACK_TRACE
 
 static inline void printStackTrace(FILE* out = stderr, const unsigned int max_frames = 63) {
diff --git a/src/initialize.h b/src/initialize.h
index 490a3ee..5a1062a 100644
--- a/src/initialize.h
+++ b/src/initialize.h
@@ -23,8 +23,9 @@
  */
 
 #include <cstdlib>
-#include <string>
 #include <map>
+#include <string>
+
 #include "dmlc/io.h"
 
 #ifndef MXNET_INITIALIZE_H_
@@ -92,6 +93,11 @@ class LibraryInitializer {
   void install_pthread_atfork_handlers();
 
   /**
+   * Sets the interface and threading layer for IntelĀ® oneAPI MKL at run time.
+   * Use with the Single Dynamic Library.
+   */
+  void init_mkl_dynamic_library();
+  /**
    * Install signal handlers (UNIX). Has no effect on Windows.
    */
   void install_signal_handlers();
diff --git a/tools/dependencies/make_shared_dependencies.sh b/tools/dependencies/make_shared_dependencies.sh
index 96d3561..ac05aa9 100755
--- a/tools/dependencies/make_shared_dependencies.sh
+++ b/tools/dependencies/make_shared_dependencies.sh
@@ -49,7 +49,7 @@ download () {
     fi
 }
 
-if [[ ! $PLATFORM == 'darwin' ]]; then
+if [[ ! $PLATFORM == 'darwin' ]] && [[ ! $BLAS == 'mkl' ]]; then
     source ${DIR}/openblas.sh
 fi
 source $DIR/libz.sh
@@ -64,6 +64,15 @@ source $DIR/protobuf.sh
 source $DIR/cityhash.sh
 source $DIR/zmq.sh
 source $DIR/lz4.sh
+if [[ $BLAS == 'mkl' ]]; then
+    source ${DIR}/mkl.sh
+    source ${DIR}/numpy_mkl.sh
+    if [[ $PLATFORM == 'darwin' ]]; then
+        # export this path to find iomp5 needed by MKL according to Intel Link Line Advisor
+        export DYLD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/opt/intel/oneapi/compiler/${INTEL_MKL}/mac/compiler
+    fi
+fi
+
 
 export LIBRARY_PATH=${LIBRARY_PATH}:$(dirname $(find $DEPS_PATH -type f -name 'libprotoc*' | grep protobuf | head -n 1)):$DEPS_PATH/lib:$DEPS_PATH/lib64
 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$(dirname $(find $DEPS_PATH -type f -name 'libprotoc*' | grep protobuf | head -n 1)):$DEPS_PATH/lib:$DEPS_PATH/lib64
diff --git a/tools/dependencies/mkl.sh b/tools/dependencies/mkl.sh
new file mode 100755
index 0000000..863ffd2
--- /dev/null
+++ b/tools/dependencies/mkl.sh
@@ -0,0 +1,49 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This script downloads OneMKL
+set -ex
+export INTEL_MKL="2021.3.0"
+if [[  (! -e /opt/intel/oneapi/mkl/) ]]; then
+    >&2 echo "Downloading mkl..."
+
+    if [[ $PLATFORM == 'darwin' ]]; then
+        download \
+            https://registrationcenter-download.intel.com/akdlm/irc_nas/17960/m_onemkl_p_${INTEL_MKL}.517_offline.dmg \
+            ${DEPS_PATH}/m_onemkl_p_${INTEL_MKL}.517_offline.dmg
+        hdiutil attach ${DEPS_PATH}/m_onemkl_p_${INTEL_MKL}.517_offline.dmg
+        pushd /Volumes/m_onemkl_p_${INTEL_MKL}.517_offline/bootstrapper.app/Contents/MacOS/
+        sudo ./install.sh --silent --eula accept
+        popd
+    elif [[ $PLATFORM == 'linux' ]]; then
+        # use wget to fetch the Intel repository public key
+        download \
+            https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
+            ${DEPS_PATH}/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
+        # add to your apt sources keyring so that archives signed with this key will be trusted.
+        apt-key add ${DEPS_PATH}/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
+        # remove the public key
+        rm ${DEPS_PATH}/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
+        add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
+        apt-get update && \
+        apt install -y intel-oneapi-mkl-${INTEL_MKL} intel-oneapi-mkl-common-${INTEL_MKL} intel-oneapi-mkl-devel-${INTEL_MKL}
+    else
+        >&2 echo "Not available"
+    fi
+fi
diff --git a/tools/dependencies/numpy_mkl.sh b/tools/dependencies/numpy_mkl.sh
new file mode 100755
index 0000000..ba00390
--- /dev/null
+++ b/tools/dependencies/numpy_mkl.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This script rebuilds numpy so that it will use MKL BLAS instead of OpenBLAS.
+set -x
+
+# check if numpy uses openblas
+set +e
+python3 -c "from numpy import show_config; show_config()" | grep 'openblas_info'
+if [[ $? -eq 0 ]] && [[ -e /opt/intel/oneapi/mkl/ ]] && [[ ! -e ~/.numpy-site.cfg ]]; then
+  # create file and add to it MKL configuration
+  if [[ $PLATFORM == 'darwin' ]]; then
+    echo "[mkl]
+  library_dirs = /opt/intel/oneapi/compiler/$INTEL_MKL/mac/compiler/lib:/opt/intel/oneapi/mkl/$INTEL_MKL/lib
+  include_dirs = /opt/intel/oneapi/mkl/$INTEL_MKL/include
+  libraries = mkl_rt,iomp5
+  extra_link_args = -Wl,-rpath,/opt/intel/oneapi/mkl/$INTEL_MKL/lib,-rpath,/opt/intel/oneapi/compiler/$INTEL_MKL/mac/compiler/lib" >> ~/.numpy-site.cfg
+  else
+    echo "[mkl]
+  library_dirs = /opt/intel/oneapi/compiler/$INTEL_MKL/linux/compiler/lib/intel64_lin:/opt/intel/oneapi/mkl/$INTEL_MKL/lib/intel64
+  include_dirs = /opt/intel/oneapi/mkl/$INTEL_MKL/include
+  libraries = mkl_rt,iomp5
+  extra_link_args = -Wl,-rpath,/opt/intel/oneapi/mkl/$INTEL_MKL/lib/intel64,-rpath,/opt/intel/oneapi/compiler/$INTEL_MKL/linux/compiler/lib/intel64_lin" >> ~/.numpy-site.cfg
+  fi
+
+  # reinstall numpy to use MKL BLAS
+  pip3 install numpy==1.19.5 --no-binary numpy --force-reinstall --no-cache-dir
+fi
+set -e
diff --git a/tools/dependencies/opencv.sh b/tools/dependencies/opencv.sh
index fce8c15..17d594b 100755
--- a/tools/dependencies/opencv.sh
+++ b/tools/dependencies/opencv.sh
@@ -22,7 +22,7 @@
 set -ex
 OPENCV_VERSION=3.4.2
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
-if [[ $PLATFORM == 'linux' ]]; then
+if [[ $PLATFORM == 'linux' ]] && [[ $BLAS == 'open' ]]; then
     OPENCV_LAPACK_OPTIONS=" \
           -D OpenBLAS_HOME=$DEPS_PATH \
           -D OpenBLAS_INCLUDE_DIR=$DEPS_PATH/include \
diff --git a/tools/staticbuild/build.sh b/tools/staticbuild/build.sh
index 5ed4ac6..b610844 100755
--- a/tools/staticbuild/build.sh
+++ b/tools/staticbuild/build.sh
@@ -18,18 +18,23 @@
 # under the License.
 
 if [ $# -lt 1 ]; then
-    >&2 echo "Usage: build.sh <VARIANT>"
+    >&2 echo "Usage: build.sh <VARIANT> <BLAS>"
 fi
 
 export CURDIR=$PWD
 export DEPS_PATH=$PWD/staticdeps
 export VARIANT=$(echo $1 | tr '[:upper:]' '[:lower:]')
 export PLATFORM=$(uname | tr '[:upper:]' '[:lower:]')
+export BLAS=$(echo $2 | tr '[:upper:]' '[:lower:]')
 
 if [[ $VARIANT == darwin* ]]; then
     export VARIANT="darwin"
 fi
 
+if [[ ! $BLAS ]]; then
+    export BLAS="open"
+fi
+
 NUM_PROC=1
 if [[ ! -z $(command -v nproc) ]]; then
     NUM_PROC=$(nproc)
diff --git a/tools/staticbuild/build_lib.sh b/tools/staticbuild/build_lib.sh
index 4293870..6fbc255 100755
--- a/tools/staticbuild/build_lib.sh
+++ b/tools/staticbuild/build_lib.sh
@@ -20,7 +20,11 @@
 set -eo pipefail
 
 # This script builds the libraries of mxnet.
-cmake_config=${CURDIR}/config/distribution/${PLATFORM}_${VARIANT}.cmake
+if [[ ! $BLAS ]] || [[ $BLAS == 'open' ]]; then
+    cmake_config=${CURDIR}/config/distribution/${PLATFORM}_${VARIANT}.cmake
+else
+    cmake_config=${CURDIR}/config/distribution/${PLATFORM}_${VARIANT}_${BLAS}.cmake
+fi
 if [[ ! -f $cmake_config ]]; then
     >&2 echo "Couldn't find cmake config $make_config for the current settings."
     exit 1
@@ -42,7 +46,9 @@ cd -
 rm -rf lib; mkdir lib;
 if [[ $PLATFORM == 'linux' ]]; then
     cp -L build/libmxnet.so lib/libmxnet.so
-    cp -L $(ldd lib/libmxnet.so | grep libgfortran |  awk '{print $3}') lib/
+    if [[ $BLAS == 'open' ]]; then
+        cp -L $(ldd lib/libmxnet.so | grep libgfortran |  awk '{print $3}') lib/
+    fi
 elif [[ $PLATFORM == 'darwin' ]]; then
     cp -L build/libmxnet.dylib lib/libmxnet.dylib
 fi